from collections import Counter, defaultdict from pprint import pprint from readrides import read_rides, row_to_dataclass def count_routes(data): return len({row.route for row in data}) def rider_count(data, date=None, route=None): if date and not isinstance(date, tuple): date = (date,) if route and isinstance(route, tuple): route = (route,) def _filterfunc(row): if (date and row.date not in date): return False if (route and row.route not in route): return False return True return sum(row.rides for row in data if _filterfunc(row)) def rides_per_route(data): ride_counts = Counter() for row in data: ride_counts[row.route] += row.rides return dict(ride_counts) def ten_year_increase(data): ridership = defaultdict(Counter) routes = defaultdict(set) for row in data: if '/2001' in row.date: year = 2001 elif '/2011' in row.date: year = 2011 else: continue ridership[year][row.route] += row.rides routes[year].add(row.route) increases = Counter() for route in (routes[2001] & routes[2011]): difference = ridership[2011][route] - ridership[2001][route] if difference >= 0: increases[route] = difference return increases if __name__ == '__main__': filename = 'Data/ctabus.csv' data = read_rides(filename, row_to_dataclass) print("Number of bus routes: ", count_routes(data)) print("Ridership count, 22 bus on 2/2/2011:", rider_count(data, date='02/02/2011', route='22')) print("Total Ridership Per Route:") print(" Route | Ridership") print(" -------+-----------") total_ridership = rides_per_route(data) for route in sorted(total_ridership.keys()): rides = total_ridership[route] print(f" {route:>5} | {rides}") print("Route ridership increases, 2001 - 2011") pprint(ten_year_increase(data).most_common(5))