diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 89ff4e6..cbe097b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -23,8 +23,14 @@ jobs:
         pip install poetry
         poetry install
 
+    - name: Lint check
+      run: poetry run ruff check 
+
     - name: Check formatting
       run: poetry run black --check .
 
-    - name: Test with pytest
+    - name: Check types
+      run: poetry run mypy .
+
+    - name: Run tests
       run: poetry run pytest -v
diff --git a/marctable/__init__.py b/marctable/__init__.py
index 94144da..483a929 100644
--- a/marctable/__init__.py
+++ b/marctable/__init__.py
@@ -1,4 +1,6 @@
 from collections.abc import Callable
+from io import IOBase
+from typing import BinaryIO, TextIO
 
 import click
 
@@ -38,7 +40,7 @@ def rule_params(f: Callable) -> Callable:
 @cli.command()
 @io_params
 @rule_params
-def csv(infile: click.File, outfile: click.File, rules: list, batch: int) -> None:
+def csv(infile: BinaryIO, outfile: TextIO, rules: list, batch: int) -> None:
     """
     Convert MARC to CSV.
     """
@@ -48,7 +50,7 @@ def csv(infile: click.File, outfile: click.File, rules: list, batch: int) -> Non
 @cli.command()
 @io_params
 @rule_params
-def parquet(infile: click.File, outfile: click.File, rules: list, batch: int) -> None:
+def parquet(infile: BinaryIO, outfile: IOBase, rules: list, batch: int) -> None:
     """
     Convert MARC to Parquet.
     """
@@ -58,7 +60,7 @@ def parquet(infile: click.File, outfile: click.File, rules: list, batch: int) ->
 @cli.command()
 @io_params
 @rule_params
-def jsonl(infile: click.File, outfile: click.File, rules: list, batch: int) -> None:
+def jsonl(infile: BinaryIO, outfile: BinaryIO, rules: list, batch: int) -> None:
     """
     Convert MARC to JSON Lines (JSONL)
     """
@@ -67,9 +69,10 @@ def jsonl(infile: click.File, outfile: click.File, rules: list, batch: int) -> N
 
 @cli.command()
 @click.argument("outfile", type=click.File("w"), default="-")
-def avram(outfile: click.File) -> None:
+def avram(outfile: TextIO) -> None:
     """
-    Generate Avram (YAML) from scraping the Library of Congress MARC bibliographic website.
+    Generate Avram (YAML) from scraping the Library of Congress MARC
+    bibliographic web.
     """
     marctable.marc.crawl(outfile=outfile)
 
diff --git a/marctable/marc.py b/marctable/marc.py
index 42fd48b..e5c9c00 100644
--- a/marctable/marc.py
+++ b/marctable/marc.py
@@ -14,11 +14,11 @@
 import re
 import sys
 from functools import cache
-from typing import IO, Generator
+from typing import IO, Generator, List, Optional, Type
 from urllib.parse import urljoin
 
 import requests
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 
 
 class Subfield:
@@ -28,8 +28,8 @@ def __init__(self, code: str, label: str, repeatable: bool = False) -> None:
         self.repeatable = repeatable
 
     @classmethod
-    def from_dict(_, d: dict):
-        return Subfield(d.get("code"), d.get("label"), d.get("repeatable"))
+    def from_dict(cls: Type["Subfield"], d: dict) -> "Subfield":
+        return Subfield(d["code"], d["label"], d["repeatable"])
 
     def to_dict(self) -> dict:
         return {"code": self.code, "label": self.label, "repeatable": self.repeatable}
@@ -37,7 +37,12 @@ def to_dict(self) -> dict:
 
 class Field:
     def __init__(
-        self, tag: str, label: str, subfields: dict, repeatable: False, url: str = None
+        self,
+        tag: str,
+        label: str,
+        subfields: list[Subfield],
+        repeatable: bool = False,
+        url: Optional[str] = None,
     ) -> None:
         self.tag = tag
         self.label = label
@@ -47,7 +52,7 @@ def __init__(
 
     def __str__(self) -> str:
         if len(self.subfields) > 0:
-            subfields = ": " + (",".join(self.subfields.keys()))
+            subfields = ": " + (",".join([sf.code for sf in self.subfields]))
         else:
             subfields = ""
         return (
@@ -55,63 +60,58 @@ def __str__(self) -> str:
         )
 
     @classmethod
-    def from_dict(klass, d: dict):
+    def from_dict(cls: Type["Field"], d: dict) -> "Field":
         return Field(
-            tag=d.get("tag"),
-            label=d.get("label"),
-            repeatable=d.get("repeatable"),
+            tag=d["tag"],
+            label=d["label"],
+            repeatable=d["repeatable"],
             url=d.get("url"),
             subfields=[Subfield.from_dict(d) for d in d.get("subfields", {}).values()],
         )
 
     def to_dict(self) -> dict:
-        return {
+        d = {
             "tag": self.tag,
             "label": self.label,
             "repeatable": self.repeatable,
             "url": self.url,
-            "subfields": {sf.code: sf.to_dict() for sf in self.subfields.values()},
         }
 
-    def to_avram(self) -> dict:
-        d = self.to_dict()
-        if len(d["subfields"]) == 0:
-            del d["subfields"]
+        if self.subfields is not None:
+            d["subfields"] = {sf.code: sf.to_dict() for sf in self.subfields}
+
         return d
 
     def get_subfield(self, code: str) -> Subfield:
         for sf in self.subfields:
             if sf.code == code:
                 return sf
-        return None
+        raise SchemaSubfieldError(f"{code} is not a valid subfield in field {self.tag}")
 
 
 class MARC:
     def __init__(self) -> None:
-        self.fields = []
+        self.fields: List[Field] = []
 
     @cache
     def get_field(self, tag: str) -> Field:
         for field in self.fields:
             if field.tag == tag:
                 return field
-        return None
+        raise SchemaFieldError(f"{tag} is not a defined field tag in Avram schema")
 
     @cache
     def get_subfield(self, tag: str, code: str) -> Subfield:
         field = self.get_field(tag)
-        if field:
-            return field.get_subfield(code)
-        else:
-            return None
+        return field.get_subfield(code)
 
     @property
-    def avram_file(self):
+    def avram_file(self) -> pathlib.Path:
         return pathlib.Path(__file__).parent / "marc.json"
 
     @classmethod
     @cache
-    def from_avram(cls, avram_file: IO = None) -> dict:
+    def from_avram(cls: Type["MARC"], avram_file: Optional[IO] = None) -> "MARC":
         marc = MARC()
 
         if avram_file is None:
@@ -122,7 +122,7 @@ def from_avram(cls, avram_file: IO = None) -> dict:
 
         return marc
 
-    def write_avram(self, avram_file: IO = None) -> None:
+    def to_avram(self, avram_file: Optional[IO] = None) -> None:
         if avram_file is None:
             avram_file = self.avram_file.open("w")
 
@@ -131,11 +131,19 @@ def write_avram(self, avram_file: IO = None) -> None:
             "url": "https://www.loc.gov/marc/bibliographic/",
             "family": "marc",
             "language": "en",
-            "fields": {f.tag: f.to_avram() for f in self.fields},
+            "fields": {f.tag: f.to_dict() for f in self.fields},
         }
         json.dump(d, avram_file, indent=2)
 
 
+class SchemaFieldError(Exception):
+    pass
+
+
+class SchemaSubfieldError(Exception):
+    pass
+
+
 def fields() -> Generator[Field, None, None]:
     toc_url = "https://www.loc.gov/marc/bibliographic/"
     toc_doc = _soup(toc_url)
@@ -150,19 +158,21 @@ def fields() -> Generator[Field, None, None]:
                         yield field
 
 
-def make_field(url: str) -> Field:
+def make_field(url: str) -> Optional[Field]:
     soup = _soup(url)
-    h1 = soup.select_one("h1", first=True).text.strip()
-    if m1 := re.match(r"^(\d+) - (.+) \((.+)\)$", h1):
+    h1: Optional[Tag] = soup.select_one("h1")
+    if h1 is None:
+        raise Exception("Expecting h1 element in {url}")
+
+    h1_text: str = h1.text.strip()
+    if m1 := re.match(r"^(\d+) - (.+) \((.+)\)$", h1_text):
         tag, label, repeatable = m1.groups()
 
         # most pages put the subfield info in a list
-        subfields = {}
+        subfields = []
         for el in soup.select("table.subfields li"):
             if m2 := re.match(r"^\$(.) - (.+) \((.+)\)$", el.text):
-                subfields[m2.group(1)] = Subfield(
-                    m2.group(1), m2.group(2), m2.group(3) == "R"
-                )
+                subfields.append(Subfield(m2.group(1), m2.group(2), m2.group(3) == "R"))
 
         # some pages use a different layout, of course
         if len(subfields) == 0:
@@ -170,10 +180,12 @@ def make_field(url: str) -> Field:
                 for text in el.text.split("$"):
                     text = text.strip()
                     if m2 := re.match(r"^(.) - (.+) \((.+)\)$", text):
-                        subfields[m2.group(1)] = Subfield(
-                            code=m2.group(1),
-                            label=m2.group(2),
-                            repeatable=m2.group(3) == "R",
+                        subfields.append(
+                            Subfield(
+                                code=m2.group(1),
+                                label=m2.group(2),
+                                repeatable=m2.group(3) == "R",
+                            )
                         )
 
         return Field(
@@ -184,6 +196,8 @@ def make_field(url: str) -> Field:
             subfields=subfields,
         )
 
+    return None
+
 
 # scrape the loc website for the marc fields
 def crawl(n: int = 0, quiet: bool = False, outfile: IO = sys.stdout) -> None:
@@ -194,7 +208,7 @@ def crawl(n: int = 0, quiet: bool = False, outfile: IO = sys.stdout) -> None:
             print(f)
         if n != 0 and len(marc.fields) >= n:
             break
-    marc.write_avram(outfile)
+    marc.to_avram(outfile)
 
 
 def _soup(url: str) -> BeautifulSoup:
diff --git a/marctable/utils.py b/marctable/utils.py
index 5991fbc..42a15c9 100644
--- a/marctable/utils.py
+++ b/marctable/utils.py
@@ -1,6 +1,6 @@
 import json
-import typing
-from typing import Generator
+from io import IOBase
+from typing import BinaryIO, Dict, Generator, List, TextIO, Tuple, Union
 
 import pyarrow
 import pymarc
@@ -9,8 +9,10 @@
 
 from .marc import MARC
 
+ListOrString = Union[str, List[str]]
 
-def to_dataframe(marc_input: typing.BinaryIO, rules: list = []) -> DataFrame:
+
+def to_dataframe(marc_input: BinaryIO, rules: list = []) -> DataFrame:
     """
     Return a single DataFrame for the entire dataset.
     """
@@ -18,8 +20,8 @@ def to_dataframe(marc_input: typing.BinaryIO, rules: list = []) -> DataFrame:
 
 
 def to_csv(
-    marc_input: typing.BinaryIO,
-    csv_output: typing.TextIO,
+    marc_input: BinaryIO,
+    csv_output: TextIO,
     rules: list = [],
     batch: int = 1000,
 ) -> None:
@@ -32,8 +34,8 @@ def to_csv(
 
 
 def to_jsonl(
-    marc_input: typing.BinaryIO,
-    jsonl_output: typing.BinaryIO,
+    marc_input: BinaryIO,
+    jsonl_output: BinaryIO,
     rules: list = [],
     batch: int = 1000,
 ) -> None:
@@ -46,8 +48,8 @@ def to_jsonl(
 
 
 def to_parquet(
-    marc_input: typing.BinaryIO,
-    parquet_output: typing.BinaryIO,
+    marc_input: BinaryIO,
+    parquet_output: IOBase,
     rules: list = [],
     batch: int = 1000,
 ) -> None:
@@ -55,7 +57,7 @@ def to_parquet(
     Convert MARC to Parquet.
     """
     schema = _make_parquet_schema(rules)
-    writer = ParquetWriter(parquet_output, schema, compression="gzip")
+    writer = ParquetWriter(parquet_output, schema, compression="SNAPPY")
     for records_batch in records_iter(marc_input, rules=rules, batch=batch):
         table = pyarrow.Table.from_pylist(records_batch, schema)
         writer.write_table(table)
@@ -64,7 +66,7 @@ def to_parquet(
 
 
 def dataframe_iter(
-    marc_input: typing.BinaryIO, rules: list = [], batch: int = 1000
+    marc_input: BinaryIO, rules: list = [], batch: int = 1000
 ) -> Generator[DataFrame, None, None]:
     columns = _columns(_mapping(rules))
     for records_batch in records_iter(marc_input, rules, batch):
@@ -72,8 +74,8 @@ def dataframe_iter(
 
 
 def records_iter(
-    marc_input: typing.BinaryIO, rules: list = [], batch: int = 1000
-) -> Generator[DataFrame, None, None]:
+    marc_input: BinaryIO, rules: list = [], batch: int = 1000
+) -> Generator[List[Dict], None, None]:
     """
     Read MARC input and generate a list of dictionaries, where each list element
     represents a MARC record.
@@ -85,9 +87,10 @@ def records_iter(
     for record in pymarc.MARCReader(marc_input):
         # if pymarc can't make sense of a record it returns None
         if record is None:
+            # TODO: log this?
             continue
 
-        r = {}
+        r: Dict[str, ListOrString] = {}
         for field in record.fields:
             if field.tag not in mapping:
                 continue
@@ -98,12 +101,15 @@ def records_iter(
             if subfields is None:
                 key = f"F{field.tag}"
                 if marc.get_field(field.tag).repeatable:
-                    value = r.get(key, [])
-                    value.append(_stringify_field(field))
+                    lst = r.get(key, [])
+                    assert isinstance(
+                        lst, list
+                    ), "Repeatable field contains a string instead of a list"
+                    lst.append(_stringify_field(field))
+                    r[key] = lst
                 else:
-                    value = _stringify_field(field)
-
-                r[key] = value
+                    s = _stringify_field(field)
+                    r[key] = s
 
             # otherwise only add the subfields that were requested in the mapping
             else:
@@ -113,7 +119,10 @@ def records_iter(
 
                     key = f"F{field.tag}{sf.code}"
                     if marc.get_subfield(field.tag, sf.code).repeatable:
-                        value = r.get(key, [])
+                        value: ListOrString = r.get(key, [])
+                        assert isinstance(
+                            value, list
+                        ), "Repeatable field contains a string instead of list"
                         value.append(sf.value)
                     else:
                         value = sf.value
@@ -180,7 +189,7 @@ def _columns(mapping: dict) -> list:
     return cols
 
 
-def _make_pandas_schema(rules: list) -> pyarrow.Schema:
+def _make_pandas_schema(rules: list) -> Dict[str, str]:
     marc = MARC.from_avram()
     mapping = _mapping(rules)
     schema = {}
@@ -200,19 +209,22 @@ def _make_pandas_schema(rules: list) -> pyarrow.Schema:
 def _make_parquet_schema(rules: list) -> pyarrow.Schema:
     marc = MARC.from_avram()
     mapping = _mapping(rules)
-    cols = []
+
+    pyarrow_str = pyarrow.string()
+    pyarrow_list_of_str = pyarrow.list_(pyarrow.string())
+
+    cols: List[Tuple[str, pyarrow.DataType]] = []
     for field_tag, subfields in mapping.items():
         if subfields is None:
             if marc.get_field(field_tag).repeatable:
-                typ = pyarrow.list_(pyarrow.string())
+                cols.append((f"F{field_tag}", pyarrow_list_of_str))
             else:
-                typ = pyarrow.string()
-            cols.append((f"F{field_tag}", typ))
+                cols.append((f"F{field_tag}", pyarrow_str))
         else:
-            for sf in subfields:
-                if marc.get_subfield(field_tag, sf).repeatable:
-                    typ = pyarrow.list_(pyarrow.string())
+            for sf_code in subfields:
+                sf = marc.get_subfield(field_tag, sf_code)
+                if sf is not None and sf.repeatable:
+                    cols.append((f"F{field_tag}{sf}", pyarrow_list_of_str))
                 else:
-                    typ = pyarrow.string()
-                cols.append((f"F{field_tag}{sf}", typ))
-    return pyarrow.schema(cols)
+                    cols.append((f"F{field_tag}{sf}", pyarrow_str))
+    return pyarrow.schema(cols)  # type: ignore[arg-type]
diff --git a/pyproject.toml b/pyproject.toml
index eb7e41b..e9c0b2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,11 @@ click = "^8.1.7"
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"
 black = "^23.12.0"
+types-requests = "^2.31.0.10"
+types-beautifulsoup4 = "^4.12.0.7"
+mypy = "^1.8.0"
+pandas-stubs = "^2.1.4.231227"
+pyarrow-stubs = "^10.0.1.7"
 
 [tool.poetry.scripts]
 marctable = "marctable:main"
@@ -34,3 +39,6 @@ build-backend = "poetry.core.masonry.api"
 
 [project.urls]
 Homepage = "https://github.com/edsu/marctable"
+
+[tool.mypy]
+warn_no_return = false
diff --git a/test_marctable.py b/test_marctable.py
index 3a1e1f6..016222a 100644
--- a/test_marctable.py
+++ b/test_marctable.py
@@ -1,10 +1,11 @@
+import json
 import pathlib
 from io import StringIO
 
 import pandas
-import json
-from marctable.marc import MARC, crawl
+from marctable.marc import MARC, SchemaFieldError, SchemaSubfieldError, crawl
 from marctable.utils import _mapping, dataframe_iter, to_csv, to_dataframe, to_parquet
+from pytest import raises
 
 marc = MARC.from_avram()
 
@@ -41,12 +42,14 @@ def test_marc() -> None:
 
 def test_get_field() -> None:
     assert marc.get_field("245")
-    assert not marc.get_field("abc")
+    with raises(SchemaFieldError, match="abc is not a defined field tag in Avram"):
+        marc.get_field("abc")
 
 
 def test_get_subfield() -> None:
     assert marc.get_subfield("245", "a").label == "Title"
-    assert marc.get_subfield("245", "-") is None
+    with raises(SchemaSubfieldError, match="- is not a valid subfield in field 245"):
+        marc.get_subfield("245", "-") is None
 
 
 def test_non_repeatable_field() -> None:
@@ -71,7 +74,8 @@ def test_df() -> None:
     # 245 is not repeatable
     assert (
         df.iloc[0]["F245"]
-        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
+        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. "
+        "Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
     )
     # 650 is repeatable
     assert df.iloc[0]["F650"] == ["Leak detectors.", "Gas leakage."]
@@ -86,7 +90,8 @@ def test_custom_fields_df() -> None:
     assert df.columns[1] == "F650"
     assert (
         df.iloc[0]["F245"]
-        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
+        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. "
+        "Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
     )
     assert df.iloc[0]["F650"] == ["Leak detectors.", "Gas leakage."]
 
@@ -132,7 +137,8 @@ def test_to_csv() -> None:
     assert len(df.columns) == 215
     assert (
         df.iloc[0]["F245"]
-        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
+        == "Leak testing CD-ROM [computer file] / technical editors, Charles N. "
+        "Jackson, Jr., Charles N. Sherlock ; editor, Patrick O. Moore."
     )