This commit is contained in:
Mike Bloy 2023-11-24 11:57:24 -06:00
parent 530cba6953
commit 2e2512ab52
2 changed files with 127 additions and 83 deletions

123
reader.py
View File

@ -1,87 +1,44 @@
import collections.abc
import csv
from abc import ABC, abstractmethod
from typing import Iterable, Optional, Sequence
class DataCollection(collections.abc.Sequence):
def __init__(self, headers):
self.headers = headers
self.data = dict()
for name in headers:
self.data[name] = []
def __len__(self):
return len(self.data[self.headers[0]])
def __getitem__(self, index):
if isinstance(index, slice):
value = DataCollection(self.headers)
else:
value = {}
for name in self.headers:
if isinstance(index, slice):
value.data[name] = self.data[name][index]
else:
value[name] = self.data[name][index]
return value
def append(self, d):
for name in self.headers:
self.data[name].append(d[name])
class CSVParser(ABC):
def parse(self, filename):
records = []
with open(filename) as f:
rows = csv.reader(f)
headers = next(rows)
for row in rows:
records.append(self.make_record(headers, row))
return records
@abstractmethod
def make_record(self, headers, row):
pass
class DictCSVParser(CSVParser):
def __init__(self, types):
self.types = types
def make_record(self, headers, row):
return {name: func(val) for name, func, val in zip(headers, self.types, row)}
class InstanceCSVParser(CSVParser):
def __init__(self, cl):
self.cls = cl
def make_record(self, headers, row):
return self.cls.from_row(row)
def read_csv_as_dicts(filename, conversions):
parser = DictCSVParser(conversions)
records = parser.parse(filename)
return records
def read_csv_as_instances(filename, cls):
"""Read a CSV file into a list of instances"""
parser = InstanceCSVParser(cls)
records = parser.parse(filename)
return records
def read_csv_as_columns(filename, conversions):
with open(filename) as f:
rows = csv.reader(f)
def csv_as_dicts(
lines: Iterable[str], types: Sequence[type], headers: Optional[Sequence[str]] = None
):
records = []
rows = csv.reader(lines)
if headers is None:
headers = next(rows)
value = DataCollection(headers)
for row in rows:
value.append(
{name: func(val) for name, func, val in zip(headers, conversions, row)}
)
return value
for row in rows:
record = {name: func(val) for name, func, val in zip(headers, types, row)}
records.append(record)
return records
def read_csv_as_dicts(
filename: str, types: Sequence[type], headers: Optional[Sequence[str]] = None
):
"""
Read CSV data into list of dictionaries with optional type conversion.
"""
with open(filename) as file:
return csv_as_dicts(file, types, headers)
def csv_as_instances(lines: Iterable[str], cls: type, has_headers: bool = True):
records = []
rows = csv.reader(lines)
if has_headers:
next(rows)
for row in rows:
record = cls.from_row(row)
records.append(record)
return records
def read_csv_as_instances(filename: str, cls: type, has_headers: bool = True):
"""
Read CSV data into list of instances.
"""
with open(filename) as file:
return csv_as_instances(file, cls, has_headers)

87
reader_classes.py Normal file
View File

@ -0,0 +1,87 @@
import collections.abc
import csv
from abc import ABC, abstractmethod
class DataCollection(collections.abc.Sequence):
def __init__(self, headers):
self.headers = headers
self.data = dict()
for name in headers:
self.data[name] = []
def __len__(self):
return len(self.data[self.headers[0]])
def __getitem__(self, index):
if isinstance(index, slice):
value = DataCollection(self.headers)
else:
value = {}
for name in self.headers:
if isinstance(index, slice):
value.data[name] = self.data[name][index]
else:
value[name] = self.data[name][index]
return value
def append(self, d):
for name in self.headers:
self.data[name].append(d[name])
class CSVParser(ABC):
def parse(self, filename):
records = []
with open(filename) as f:
rows = csv.reader(f)
headers = next(rows)
for row in rows:
records.append(self.make_record(headers, row))
return records
@abstractmethod
def make_record(self, headers, row):
pass
class DictCSVParser(CSVParser):
def __init__(self, types):
self.types = types
def make_record(self, headers, row):
return {name: func(val) for name, func, val in zip(headers, self.types, row)}
class InstanceCSVParser(CSVParser):
def __init__(self, cl):
self.cls = cl
def make_record(self, headers, row):
return self.cls.from_row(row)
def read_csv_as_dicts(filename, conversions):
parser = DictCSVParser(conversions)
records = parser.parse(filename)
return records
def read_csv_as_instances(filename, cls):
"""Read a CSV file into a list of instances"""
parser = InstanceCSVParser(cls)
records = parser.parse(filename)
return records
def read_csv_as_columns(filename, conversions):
with open(filename) as f:
rows = csv.reader(f)
headers = next(rows)
value = DataCollection(headers)
for row in rows:
value.append(
{name: func(val) for name, func, val in zip(headers, conversions, row)}
)
return value