diff --git a/reader.py b/reader.py new file mode 100644 index 0000000..8929bc9 --- /dev/null +++ b/reader.py @@ -0,0 +1,54 @@ +import collections.abc +import csv + + +class DataCollection(collections.abc.Sequence): + def __init__(self, headers): + self.headers = headers + self.data = dict() + for name in headers: + self.data[name] = [] + + def __len__(self): + return len(self.data[self.headers[0]]) + + def __getitem__(self, index): + if isinstance(index, slice): + value = DataCollection(self.headers) + else: + value = {} + + for name in self.headers: + if isinstance(index, slice): + value.data[name] = self.data[name][index] + else: + value[name] = self.data[name][index] + return value + + def append(self, d): + for name in self.headers: + self.data[name].append(d[name]) + + +def read_csv_as_dicts(filename, conversions): + value = [] + with open(filename) as f: + rows = csv.reader(f) + headers = next(rows) + for row in rows: + value.append( + {name: func(val) + for name, func, val in zip(headers, conversions, row)}) + return value + + +def read_csv_as_columns(filename, conversions): + with open(filename) as f: + rows = csv.reader(f) + headers = next(rows) + value = DataCollection(headers) + for row in rows: + value.append( + {name: func(val) + for name, func, val in zip(headers, conversions, row)}) + return value