88 lines
2.2 KiB
Python
88 lines
2.2 KiB
Python
import collections.abc
|
|
import csv
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
class DataCollection(collections.abc.Sequence):
|
|
def __init__(self, headers):
|
|
self.headers = headers
|
|
self.data = dict()
|
|
for name in headers:
|
|
self.data[name] = []
|
|
|
|
def __len__(self):
|
|
return len(self.data[self.headers[0]])
|
|
|
|
def __getitem__(self, index):
|
|
if isinstance(index, slice):
|
|
value = DataCollection(self.headers)
|
|
else:
|
|
value = {}
|
|
|
|
for name in self.headers:
|
|
if isinstance(index, slice):
|
|
value.data[name] = self.data[name][index]
|
|
else:
|
|
value[name] = self.data[name][index]
|
|
return value
|
|
|
|
def append(self, d):
|
|
for name in self.headers:
|
|
self.data[name].append(d[name])
|
|
|
|
|
|
class CSVParser(ABC):
|
|
def parse(self, filename):
|
|
records = []
|
|
with open(filename) as f:
|
|
rows = csv.reader(f)
|
|
headers = next(rows)
|
|
for row in rows:
|
|
records.append(self.make_record(headers, row))
|
|
return records
|
|
|
|
@abstractmethod
|
|
def make_record(self, headers, row):
|
|
pass
|
|
|
|
|
|
class DictCSVParser(CSVParser):
|
|
def __init__(self, types):
|
|
self.types = types
|
|
|
|
def make_record(self, headers, row):
|
|
return {name: func(val) for name, func, val in zip(headers, self.types, row)}
|
|
|
|
|
|
class InstanceCSVParser(CSVParser):
|
|
def __init__(self, cl):
|
|
self.cls = cl
|
|
|
|
def make_record(self, headers, row):
|
|
return self.cls.from_row(row)
|
|
|
|
|
|
def read_csv_as_dicts(filename, conversions):
|
|
parser = DictCSVParser(conversions)
|
|
records = parser.parse(filename)
|
|
return records
|
|
|
|
|
|
def read_csv_as_instances(filename, cls):
|
|
"""Read a CSV file into a list of instances"""
|
|
parser = InstanceCSVParser(cls)
|
|
records = parser.parse(filename)
|
|
return records
|
|
|
|
|
|
def read_csv_as_columns(filename, conversions):
|
|
with open(filename) as f:
|
|
rows = csv.reader(f)
|
|
headers = next(rows)
|
|
value = DataCollection(headers)
|
|
for row in rows:
|
|
value.append(
|
|
{name: func(val) for name, func, val in zip(headers, conversions, row)}
|
|
)
|
|
return value
|