diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 98fb135..5295f41 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,7 +22,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup pandoc uses: r-lib/actions/setup-pandoc@v2 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/docs.yaml similarity index 57% rename from .github/workflows/pkgdown.yaml rename to .github/workflows/docs.yaml index 5912d61..d1b8bfd 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/docs.yaml @@ -5,14 +5,18 @@ on: push: branches: [main, master] -name: pkgdown +name: docs + +env: + PYTHONUNBUFFERED: "1" + UV_SYSTEM_PYTHON: 1 jobs: - build-pkgdown-site: + build: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup pandoc uses: r-lib/actions/setup-pandoc@v2 @@ -28,21 +32,41 @@ jobs: extra-packages: any::pkgdown, local::. needs: website - - name: Build pkgdown site + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: python/pyproject.toml + cache-suffix: docs + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version-file: python/pyproject.toml + + - name: Install Python dependencies + working-directory: python + shell: bash + run: uv pip install .[docs] + + - name: Build R docs run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} + - name: Build Python docs + run: sphinx-build python/docs/source docs/python + - name: Configure pages - uses: actions/configure-pages@v3 + uses: actions/configure-pages@v5 - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v3 with: path: 'docs' deploy: if: contains(fromJSON('["main", "master"]'), github.ref_name) && github.event_name != 'pull_request' - needs: build-pkgdown-site + needs: build runs-on: ubuntu-latest environment: name: github-pages @@ -53,4 +77,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v2 + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 507fedb..2c8b81e 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup R uses: r-lib/actions/setup-r@v2 diff --git a/.github/workflows/pytest-coverage.yaml b/.github/workflows/pytest-coverage.yaml new file mode 100644 index 0000000..e758284 --- /dev/null +++ b/.github/workflows/pytest-coverage.yaml @@ -0,0 +1,52 @@ +on: + pull_request: + push: + branches: [main, master] + +name: pytest-coverage + +env: + PYTHONUNBUFFERED: "1" + UV_SYSTEM_PYTHON: 1 + +jobs: + pytest-coverage: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13"] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: python/pyproject.toml + cache-suffix: test + + - name: Install Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + working-directory: python + shell: bash + run: uv pip install .[dev,docs] + + - name: Run pytest + working-directory: python + shell: bash + run: | + pytest -v --doctest-modules \ + --junitxml=junit/test-results-${{ matrix.python-version }}.xmlpytest + + - name: Upload artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: python/pytest-results-${{ matrix.python-version }} + path: python/junit/test-results-${{ matrix.python-version }}.xml diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2d30441..9f6749f 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup R uses: r-lib/actions/setup-r@v2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11a5038..f4e0385 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,7 @@ repos: - id: use-tidy-description - id: lintr - id: readme-rmd-rendered + exclude: ^python/ - id: parsable-R - id: no-browser-statement - id: no-debug-statement @@ -33,3 +34,15 @@ repos: entry: Cannot commit .Rhistory, .RData, .Rds or .rds. language: fail files: '\.(Rhistory|RData|Rds|rds)$' +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.7.4 + hooks: + # Python linter. Ruff recommends running this before the formatter to + # avoid conflicts when using the --fix flag + - id: ruff + args: + - --fix + files: ^python/ + # Formatter + - id: ruff-format + files: ^python/ diff --git a/data-raw/chars_sample.R b/data-raw/chars_sample.R index fa09860..52907c6 100644 --- a/data-raw/chars_sample.R +++ b/data-raw/chars_sample.R @@ -54,6 +54,9 @@ chars_sample_athena <- dbGetQuery( " ) usethis::use_data(chars_sample_athena, overwrite = TRUE) +# Write the data to CSV so that the Python version of the package can consume +# it as well +readr::write_csv(chars_sample_athena, "data-raw/chars_sample_athena.csv") # Get sample ADDCHARS data chars_sample_hie <- dbGetQuery( @@ -69,3 +72,4 @@ chars_sample_hie <- dbGetQuery( " ) usethis::use_data(chars_sample_hie, overwrite = TRUE) +readr::write_csv(chars_sample_hie, "data-raw/chars_sample_hie.csv") diff --git a/data-raw/chars_sample_athena.csv b/data-raw/chars_sample_athena.csv new file mode 100644 index 0000000..12447a8 --- /dev/null +++ b/data-raw/chars_sample_athena.csv @@ -0,0 +1,71 @@ +pin,year,class,char_yrblt,char_bldg_sf,char_land_sf,char_beds,char_rooms,char_fbath,char_hbath,char_frpl,char_type_resd,char_cnst_qlty,char_apts,char_tp_dsgn,char_attic_fnsh,char_gar1_att,char_gar1_area,char_gar1_size,char_gar1_cnst,char_attic_type,char_bsmt,char_ext_wall,char_heat,char_repair_cnd,char_bsmt_fin,char_roof_cnst,char_use,char_age,char_site,char_ncu,char_renovation,char_porch,char_air,char_tp_plan +10254170360000,2015,205,1948,1775,4340,4,7,2,1,2,2,2,6,2,3,2,2,7,0,3,1,2,1,2,1,1,1,67,2,0,0,0,1,2 +09363230550000,2019,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,95,2,0,0,1,2,2 +09363230550000,2016,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,92,2,0,0,1,2,2 +14321260280000,2018,211,1878,2850,3125,4,9,3,0,0,3,2,2,2,3,2,2,7,0,3,3,2,1,2,1,2,2,140,2,0,0,0,2,2 +10253190450000,2018,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,67,2,0,0,0,1,2 +14321260280000,2016,211,1878,2850,3125,4,9,3,0,0,3,2,2,2,3,2,2,7,0,3,3,2,1,2,1,2,2,137,2,0,0,0,2,2 +09254040180000,2020,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,63,2,0,0,0,1,2 +13362270230000,2015,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,125,2,0,0,1,2,2 +13253160160000,2020,202,1904,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,117,2,0,0,0,2,2 +10254170360000,2018,205,1948,1829,4340,3,7,2,1,2,2,2,6,2,3,2,2,3,1,3,1,2,1,2,1,1,1,70,2,0,0,0,1,2 +13253160160000,2017,202,1901,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,114,2,0,0,0,2,2 +09363230550000,2017,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,92,2,0,0,1,2,2 +10253190450000,2017,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,64,2,0,0,0,1,2 +13253230040000,2019,203,1969,1040,3150,2,5,1,0,0,1,2,6,2,3,2,2,7,0,3,1,2,1,2,3,1,1,49,2,0,0,0,2,2 +17032010190000,2016,211,1887,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,128,2,0,0,0,1,2 +09361030150000,2016,203,1928,1200,5280,2,5,1,0,0,1,2,6,0,3,1,1,1,1,1,1,3,2,2,3,1,1,87,3,0,0,0,2,0 +09361030150000,2015,203,1928,1200,5280,2,5,1,0,0,1,2,6,0,3,1,1,1,1,1,1,3,2,2,3,1,1,87,3,0,0,0,2,0 +17032010190000,2015,211,1887,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,128,2,0,0,0,1,2 +10253190450000,2021,209,1954,6027,11160,5,12,4,1,1,2,2,6,2,3,1,2,3,2,3,3,4,1,2,3,1,1,70,2,0,0,0,1,2 +09254040180000,2018,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,63,2,0,0,0,1,2 +09254040180000,2017,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,60,2,0,0,0,1,2 +13362270230000,2016,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,125,2,0,0,1,2,2 +13253230040000,2021,278,1972,2210,3150,4,9,3,0,0,2,2,6,2,3,2,2,7,0,3,1,2,1,2,1,1,1,52,2,0,0,0,1,2 +09254040180000,2015,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,60,2,0,0,0,1,2 +13362270230000,2017,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,125,2,0,0,1,2,2 +13253230040000,2015,203,1969,1040,3150,2,5,1,0,0,1,2,6,0,3,2,2,3,1,3,2,2,1,2,3,1,1,46,2,0,0,0,2,2 +13253160160000,2019,202,1901,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,117,2,0,0,0,2,2 +10254170360000,2017,205,1948,1829,4340,3,7,2,1,2,2,2,6,2,3,2,2,3,1,3,1,2,1,2,1,1,1,67,2,0,0,0,1,2 +10253190450000,2019,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,67,2,0,0,0,1,2 +14321260280000,2021,206,1880,3660,3125,4,11,5,0,0,3,2,6,2,3,2,2,4,2,3,3,2,1,2,1,2,1,143,2,0,1,0,2,2 +09361030150000,2018,203,1928,1357,5280,2,6,1,1,0,1,2,6,2,3,2,2,1,1,3,1,2,2,2,3,1,1,90,3,0,0,0,2,2 +09361030150000,2021,206,1931,2772,5280,5,9,3,1,0,2,2,6,2,3,2,2,1,1,3,1,3,2,2,3,1,1,93,3,0,0,0,1,2 +17032010190000,2021,210,1887,3299,1204,8,11,3,1,2,3,2,6,2,3,2,2,7,0,3,1,2,2,2,1,6,1,134,2,0,1,0,1,2 +13253160160000,2021,205,1904,1790,3150,3,7,3,0,0,2,2,6,2,3,2,2,3,1,1,1,2,2,2,1,1,1,120,2,0,1,0,1,2 +10254170360000,2021,206,1959,2312,4340,4,8,3,1,2,2,2,6,2,3,2,2,3,1,3,1,2,1,2,1,1,1,73,2,0,0,0,1,2 +13362270230000,2020,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,128,2,0,0,1,2,2 +09361030150000,2017,203,1928,1357,5280,2,6,1,1,0,1,2,6,2,3,2,2,1,1,3,1,2,2,2,3,1,1,87,3,0,0,0,2,2 +17032010190000,2017,211,1887,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,128,2,0,0,0,1,2 +13253230040000,2017,203,1969,1040,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,3,1,2,1,2,3,1,1,46,2,0,0,0,2,2 +17032010190000,2019,211,1888,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,131,2,0,0,0,1,2 +09361030150000,2019,203,1928,1357,5280,2,6,1,1,0,1,2,6,2,3,2,2,1,1,3,1,2,2,2,3,1,1,90,3,0,0,0,2,2 +09363230550000,2015,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,92,2,0,0,1,2,2 +09254040180000,2016,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,60,2,0,0,0,1,2 +13362270230000,2019,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,128,2,0,0,1,2,2 +13253160160000,2018,202,1901,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,117,2,0,0,0,2,2 +09363230550000,2021,206,1926,2376,4375,4,7,3,1,0,2,2,6,2,3,2,2,3,1,3,1,3,2,2,3,1,1,98,2,0,0,1,1,2 +13253230040000,2016,203,1969,1040,3150,2,5,1,0,0,1,2,6,0,3,2,2,3,1,3,2,2,1,2,3,1,1,46,2,0,0,0,2,2 +10254170360000,2016,205,1948,1775,4340,4,7,2,1,2,2,2,6,2,3,2,2,7,0,3,1,2,1,2,1,1,1,67,2,0,0,0,1,2 +13362270230000,2021,205,1893,1476,2750,4,6,3,1,0,2,2,6,2,3,2,2,3,1,1,1,3,2,3,1,1,1,131,2,0,1,1,1,2 +17032010190000,2018,211,1887,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,131,2,0,0,0,1,2 +14321260280000,2015,211,1878,2850,3125,6,15,3,0,0,3,2,2,0,3,2,2,7,0,3,1,2,2,2,3,2,2,137,2,0,0,1,2,2 +17032010190000,2020,211,1888,3299,1204,8,11,3,0,2,3,2,2,2,3,2,2,7,0,3,1,2,2,2,1,6,2,131,2,0,0,0,1,2 +13253160160000,2015,202,1901,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,114,2,0,0,1,2,2 +09363230550000,2018,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,95,2,0,0,1,2,2 +09254040180000,2019,203,1955,1571,3750,3,6,1,0,0,5,2,6,2,1,2,2,7,0,1,1,2,1,2,3,1,1,63,2,0,0,0,1,2 +10253190450000,2020,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,67,2,0,0,0,1,2 +09361030150000,2020,203,1928,1357,5280,2,6,1,1,0,1,2,6,2,3,2,2,1,1,3,1,2,2,2,3,1,1,90,3,0,0,0,2,2 +13362270230000,2018,202,1890,756,2750,2,4,1,1,0,1,2,6,0,3,2,2,3,1,1,1,3,2,3,1,1,1,128,2,0,0,1,2,2 +10253190450000,2015,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,64,2,0,0,0,1,2 +13253230040000,2018,203,1969,1040,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,3,1,2,1,2,3,1,1,49,2,0,0,0,2,2 +10253190450000,2016,204,1951,2469,11160,3,8,2,1,1,1,2,6,0,3,1,2,3,2,3,3,2,1,2,3,1,1,64,2,0,0,0,1,2 +14321260280000,2019,211,1878,2850,3125,4,9,3,0,0,3,2,2,2,3,2,2,7,0,3,3,2,1,2,1,2,2,140,2,0,0,0,2,2 +13253160160000,2016,202,1901,706,3150,2,5,1,0,0,1,2,6,2,3,2,2,3,1,1,1,2,2,2,3,1,1,114,2,0,0,0,2,2 +09363230550000,2020,203,1923,1200,4375,3,5,1,1,0,1,2,6,2,3,2,2,7,0,3,1,3,2,2,3,1,1,95,2,0,0,1,2,2 +10254170360000,2019,205,1948,1829,4340,3,7,2,1,2,2,2,6,2,3,2,2,3,1,3,1,2,1,2,1,1,1,70,2,0,0,0,1,2 +13253230040000,2020,203,1969,1040,3150,2,5,1,0,0,1,2,6,2,3,2,2,7,0,3,1,2,1,2,3,1,1,49,2,0,0,0,2,2 +10254170360000,2020,205,1948,1829,4340,3,7,2,1,2,2,2,6,2,3,2,2,3,1,3,1,2,1,2,1,1,1,70,2,0,0,0,1,2 +09254040180000,2021,203,1958,1568,3750,3,8,3,0,0,6,2,6,2,1,2,2,3,1,1,1,2,1,2,1,1,1,66,2,0,0,0,1,2 +14321260280000,2020,211,1878,2850,3125,4,9,3,0,0,3,2,2,2,3,2,2,7,0,3,3,2,1,2,1,2,2,140,2,0,0,0,2,2 +14321260280000,2017,211,1878,2850,3125,4,9,3,0,0,3,2,2,2,3,2,2,7,0,3,3,2,1,2,1,2,2,137,2,0,0,0,2,2 diff --git a/data-raw/chars_sample_hie.csv b/data-raw/chars_sample_hie.csv new file mode 100644 index 0000000..f8ed4cc --- /dev/null +++ b/data-raw/chars_sample_hie.csv @@ -0,0 +1,12 @@ +pin,qu_town,qu_mlt_cd,qu_home_improvement,qu_use,qu_exterior_wall,qu_roof,qu_basement_type,qu_basement_finish,qu_heat,qu_air,qu_attic_type,qu_attic_finish,qu_type_plan,qu_type_design,qu_construct_quality,qu_porch,qu_garage_size,qu_garage_const,qu_garage_attached,qu_garage_area,qu_num_apts,qu_sqft_bld,qu_lnd_sqft,qu_class,qu_rooms,qu_beds,qu_full_bath,qu_half_bath,qu_fire_place,qu_no_com_unit,qu_type_of_res,qu_upload_date,hie_last_year_active,year +13253230040000,77,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,2,2,0,0,0,203,0,0,0,0,0,0,0,2019-06-19,2023,2019 +10254170360000,75,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,483,0,206,1,1,1,0,0,0,0,2017-01-23,2020,2017 +13253230040000,77,3,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1170,0,278,4,2,2,0,0,0,2,2017-04-20,2020,2017 +09361030150000,71,3,1,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1415,0,206,3,3,2,0,0,0,2,2017-06-09,2020,2017 +13362270230000,77,3,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,720,0,205,2,2,2,0,0,0,2,2015-07-01,2020,2015 +09254040180000,71,3,1,0,0,0,0,0,0,1,0,0,0,0,0,0,3,1,2,2,0,845,0,207,2,2,2,0,0,0,2,2015-08-01,2020,2015 +09363230550000,71,3,1,0,0,0,0,0,0,1,0,0,0,0,0,0,3,1,2,2,0,1176,0,206,2,1,2,0,0,0,2,2015-08-01,2020,2015 +17032010190000,74,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,210,0,0,0,1,0,0,0,2015-09-02,2020,2015 +10253190450000,75,3,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3558,0,209,4,2,2,0,0,0,2,2016-02-03,2020,2016 +13253160160000,77,3,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1084,0,205,2,1,2,0,0,0,2,2016-08-01,2020,2016 +14321260280000,74,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,4,2,2,2,6,810,0,206,2,0,2,0,0,0,0,2016-09-13,2020,2016 diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..c7bd47a --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,40 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +uv.lock + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Sphinx documentation +_build/ diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..550d607 --- /dev/null +++ b/python/README.md @@ -0,0 +1,51 @@ +# CCAO Python package + +This is a Python version of the [`ccao` R +package](https://ccao-data.github.io/ccao/), providing utilities for +managing, distributing, and version controlling *CCAO-specific* functions +used throughout CCAO applications, models, and diagnostics. For generalized +versions of assessment-related functions, see +[assesspy](https://github.com/ccao-data/assesspy). + +## Installation + +Install the latest release of `ccao` from PyPI: + +```bash +pip install ccao +``` + +You can also install the most recent code directly from GitHub: + +```bash +pip install "git+https://github.com/ccao-data/ccao.git#egg=ccao&subdirectory=python" +``` + +## Development + +Create a development environment using [`uv`](https://docs.astral.sh/uv/): + +``` +uv venv +source .venv/bin/activate +uv python install +uv pip install .[dev,docs] +``` + +### Running tests + +Run tests with pytest: + +``` +pytest +``` + +### Building docs + +Build and serve the docs locally with sphinx: + +``` +sphinx-autobuild docs/source _build/html +``` + +Navigate to http://localhost:8000 to view the docs. diff --git a/python/ccao/__init__.py b/python/ccao/__init__.py new file mode 100644 index 0000000..f203ad4 --- /dev/null +++ b/python/ccao/__init__.py @@ -0,0 +1 @@ +from ccao.vars_funs import vars_dict, vars_rename diff --git a/python/ccao/data/__init__.py b/python/ccao/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/ccao/data/vars_dict.csv b/python/ccao/data/vars_dict.csv new file mode 120000 index 0000000..f61f4c0 --- /dev/null +++ b/python/ccao/data/vars_dict.csv @@ -0,0 +1 @@ +../../../data-raw/vars_dict.csv \ No newline at end of file diff --git a/python/ccao/vars_funs.py b/python/ccao/vars_funs.py new file mode 100644 index 0000000..2a4ddec --- /dev/null +++ b/python/ccao/vars_funs.py @@ -0,0 +1,128 @@ +# Functions for translating variables between different data sources +import importlib.resources + +import pandas as pd + +import ccao.data + +# Load the default variable dictionary +_data_path = importlib.resources.files(ccao.data) +vars_dict = pd.read_csv(str(_data_path / "vars_dict.csv")) + +# Prefix we use to identify variable name columns in the variable dictionary +VAR_NAME_PREFIX = "var_name" + + +def vars_rename( + data: list[str] | pd.DataFrame, + names_from: str, + names_to: str, + output_type: str = "inplace", + dictionary: pd.DataFrame | None = None, +) -> list[str] | pd.DataFrame: + """ + Rename variables from one naming convention to another. + + This function renames columns in a dataset based on a dictionary that maps + names from one convention to another. It can rename columns in-place or return + a character vector of renamed columns, behavior that is configurable using + the `output_type` argument. + + :param data: + DataFrame or list of column names to rename. + If a DataFrame, renames columns directly. + :type data: pandas.DataFrame or list[str] + + :param names_from: + The source naming convention to rename from. + Must match a key in the dictionary. + :type names_from: str + + :param names_to: + The target naming convention to rename to. + Must match a key in the dictionary. + :type names_to: str + + :param output_type: + Output type. Either ``"inplace"``, which mutates the input data frame, + or ``"vector"``, which returns a list of strings with the construction + new_col_name = old_col_name. + :type output_type: str + + :param dictionary: + The dictionary for mapping column names. + Must contain keys like ``var_name_`` and ``var_name_``. + :type dictionary: pandas.DataFrame + + :raises ValueError: If required arguments are invalid or the dictionary does not meet format requirements. + :raises TypeError: If ``data`` is neither a DataFrame nor a list of column names. + + :return: + Either the input data with renamed columns if ``output_type`` is + ``"inplace"`` and the input data is a DataFrame, otherwise a list of + renamed columns. + :rtype: pandas.DataFrame or list[str] + + :example: + + .. code-block:: python + + import ccao + + ccao.vars_rename( + data=["char_yrblt"], + names_from="athena", + names_to="pretty", + output_type="vector" + ) + """ + # Validate the dictionary schema + dictionary = dictionary if dictionary is not None else vars_dict + if dictionary.empty: + raise ValueError("dictionary must be a non-empty pandas DataFrame") + + # Make sure the dictionary contains variable columns + dictionary_var_columns = [ + col + for col in list(dictionary.columns.values) + if col.startswith(VAR_NAME_PREFIX) + ] + if not len(dictionary_var_columns) >= 2: + raise ValueError( + f"dictionary must contain at least two columns starting with " + f"{VAR_NAME_PREFIX}" + ) + + # Get a list of possible names_from and names_to from dictionary + possible_names_args = [ + col.replace(f"{VAR_NAME_PREFIX}_", "") + for col in dictionary_var_columns + ] + + # If names arguments aren't possible, throw error and list possible names + for label, var in [("names_from", names_from), ("names_to", names_to)]: + if var not in possible_names_args: + raise ValueError( + f"{label} must be one of {possible_names_args} (got '{var}')" + ) + + # Validate output type + if output_type not in ["inplace", "vector"]: + raise ValueError("output_type must be one of 'inplace' or 'vector'") + + # Get a mapping from names_from to names_to + from_ = f"{VAR_NAME_PREFIX}_{names_from}" + to = f"{VAR_NAME_PREFIX}_{names_to}" + mapping = dict(zip(dictionary[from_], dictionary[to])) + + # Handle output differently depending on the input and output type args + if isinstance(data, pd.DataFrame): + if output_type == "inplace": + data.rename(columns=mapping, inplace=True) + return data + else: + return [mapping.get(col, col) for col in list(data.columns.values)] + else: + # If the input data is a list, it's not possible to update it inplace, + # so ignore that argument + return [mapping.get(col, col) for col in data] diff --git a/python/docs/images/logo.png b/python/docs/images/logo.png new file mode 120000 index 0000000..1b1dd05 --- /dev/null +++ b/python/docs/images/logo.png @@ -0,0 +1 @@ +../../../man/figures/logo.png \ No newline at end of file diff --git a/python/docs/source/authors.rst b/python/docs/source/authors.rst new file mode 100644 index 0000000..c0c6217 --- /dev/null +++ b/python/docs/source/authors.rst @@ -0,0 +1,7 @@ +======= +Authors +======= + +**Jean Cochrane**. Author, maintainer. + +**Dan Snow**. Author, maintainer. diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py new file mode 100644 index 0000000..ceabeaf --- /dev/null +++ b/python/docs/source/conf.py @@ -0,0 +1,14 @@ +import pathlib +import sys + +from sphinx_pyproject import SphinxConfig + +# Add source path to sys path so that autodoc can load functions +rootdir = pathlib.Path(__file__).resolve().parent.parent.parent +sys.path.append(str(rootdir.resolve())) + +# Loads config from pyproject.toml +config = SphinxConfig(rootdir / "pyproject.toml", globalns=globals()) + +# Options that can't be parsed by sphinx-pyproject +html_sidebars = {"**": []} diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst new file mode 100644 index 0000000..1df6361 --- /dev/null +++ b/python/docs/source/index.rst @@ -0,0 +1,42 @@ +CCAO Python package +=================== + +.. toctree:: + :hidden: + :caption: Contents: + + reference + +.. toctree:: + :hidden: + :caption: Appendix: + + authors + license + Source Code + +This is a Python version of the ``ccao`` `R +package `_, providing utilities for +managing, distributing, and version controlling CCAO-specific functions +used throughout CCAO applications, models, and diagnostics. For generalized +versions of assessment-related functions, see +`assesspy `_. + +For detailed documentation on included functions and data, `visit the full reference +list `_. + + +Installation +------------ + +Install the latest release of ``ccao`` from PyPI: + +.. code-block:: python + + pip install ccao + +You can also install the most recent code directly from GitHub: + +.. code-block:: python + + pip install "git+https://github.com/ccao-data/ccao.git#egg=ccao&subdirectory=python" diff --git a/python/docs/source/license.rst b/python/docs/source/license.rst new file mode 100644 index 0000000..546489a --- /dev/null +++ b/python/docs/source/license.rst @@ -0,0 +1,5 @@ +======= +License +======= + +.. literalinclude:: ../../../LICENSE diff --git a/python/docs/source/reference.rst b/python/docs/source/reference.rst new file mode 100644 index 0000000..3dc531e --- /dev/null +++ b/python/docs/source/reference.rst @@ -0,0 +1,14 @@ +========= +Reference +========= + +Functions +--------- + +Manage characteristics +^^^^^^^^^^^^^^^^^^^^^^ + +Recode/rename characteristic columns, merge HIE data, and fix characteristic +errors. + +:doc:`vars_rename() ` diff --git a/python/docs/source/vars_rename.rst b/python/docs/source/vars_rename.rst new file mode 100644 index 0000000..db93a1d --- /dev/null +++ b/python/docs/source/vars_rename.rst @@ -0,0 +1,5 @@ +================================================================================== +Bulk rename variables from CCAO SQL to standardized or pretty names and visa versa +================================================================================== + +.. autofunction:: ccao.vars_rename diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..733619b --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,57 @@ +[project] +name = "ccao" +version = "1.3.0" +description = "Convenience Functions and Datasets for the Cook County Assessor's Office" +readme = "README.md" +requires-python = ">=3.10" +authors = [ + {name = "Jean Cochrane", email="jean.cochrane@cookcountyil.gov"}, + {name = "Dan Snow", email="daniel.snow@cookcountyil.gov"}, +] +dependencies = [ + "pandas>=2.2.3", +] + +[project.optional-dependencies] +dev = [ + "mypy>=1.13.0", + "pytest>=8.3.3", + "ruff>=0.7.4", +] +docs = [ + "Sphinx>=8.1.3", + "myst-parser>=4.0.0", + "pydata-sphinx-theme>=0.16.0", + "sphinx-pyproject>=0.3.0" +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.package-data] +"*" = ["*.csv"] + +[tool.ruff] +line-length = 79 + +[tool.ruff.lint] +extend-select = ["I"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] + +[tool.sphinx-pyproject] +github_username = "ccao-data" +github_repository = "ccao" +project = "ccao" +copyright = "2024, Cook County Assessor's Office" +language = "en" +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "myst_parser" +] +highlight_language = "none" +html_theme = "pydata_sphinx_theme" +html_logo = "../images/logo.png" +html_show_copyright = false diff --git a/python/tests/__init__.py b/python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000..2d59d8b --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,18 @@ +import pathlib + +import pandas as pd +import pytest + +fixture_dir = pathlib.Path(__file__).parent / "fixtures" + + +@pytest.fixture(scope="module") +def chars_sample_athena() -> pd.DataFrame: + """Sample chars with Athena variable names""" + return pd.read_csv(fixture_dir / "chars_sample_athena.csv") + + +@pytest.fixture(scope="module") +def chars_sample_hie() -> pd.DataFrame: + """Sample chars with HIE variable names""" + return pd.read_csv(fixture_dir / "chars_sample_hie.csv") diff --git a/python/tests/fixtures/chars_sample_athena.csv b/python/tests/fixtures/chars_sample_athena.csv new file mode 120000 index 0000000..6ab79aa --- /dev/null +++ b/python/tests/fixtures/chars_sample_athena.csv @@ -0,0 +1 @@ +../../../data-raw/chars_sample_athena.csv \ No newline at end of file diff --git a/python/tests/fixtures/chars_sample_hie.csv b/python/tests/fixtures/chars_sample_hie.csv new file mode 120000 index 0000000..1340c3f --- /dev/null +++ b/python/tests/fixtures/chars_sample_hie.csv @@ -0,0 +1 @@ +../../../data-raw/chars_sample_hie.csv \ No newline at end of file diff --git a/python/tests/test_vars_funs.py b/python/tests/test_vars_funs.py new file mode 100644 index 0000000..63c7c93 --- /dev/null +++ b/python/tests/test_vars_funs.py @@ -0,0 +1,124 @@ +import pandas as pd +import pytest + +import ccao +import ccao.vars_funs + + +class TestVarsRename: + @pytest.mark.parametrize("output_type", ["inplace", "vector"]) + def test_vars_rename_input_data_is_dataframe( + self, output_type, chars_sample_athena + ): + data = chars_sample_athena.iloc[:, 13:19].copy() + result = ccao.vars_rename( + data=data, + names_from="athena", + names_to="pretty", + output_type=output_type, + ) + expected = [ + "Apartments", + "Cathedral Ceiling", + "Attic Finish", + "Garage 1 Attached", + "Garage 1 Area Included", + "Garage 1 Size", + ] + if output_type == "inplace": + assert list(result.columns) == expected + else: + assert result == expected + + @pytest.mark.parametrize("output_type", ["inplace", "vector"]) + def test_vars_rename_input_data_is_list(self, output_type): + result = ccao.vars_rename( + data=["Apartments", "Cathedral Ceiling"], + names_from="pretty", + names_to="model", + output_type=output_type, + ) + expected = ["char_apts", "char_tp_dsgn"] + # Output should be the same regardless of the value of `output_type` + assert result == expected + + def test_vars_rename_hie_to_athena(self, chars_sample_hie): + data = chars_sample_hie.iloc[:, 1:3].copy() + result = ccao.vars_rename( + data=data, + names_from="hie", + names_to="athena", + output_type="vector", + ) + expected = ["township_code", "card"] + assert result == expected + + def test_vars_rename_unmatched_cols_unchanged(self): + # If columns are not present in the dictionary, leave them as-is + unmatched_colnames = ["foo", "bar", "baz"] + result = ccao.vars_rename( + data=unmatched_colnames, names_from="hie", names_to="athena" + ) + assert result == unmatched_colnames + + def test_vars_rename_custom_dictionary(self): + result = ccao.vars_rename( + data=["1", "2", "3"], + names_from="foo", + names_to="bar", + dictionary=pd.DataFrame( + { + "var_name_foo": ["1", "2", "3"], + "var_name_bar": ["char_1", "char_2", "char_3"], + } + ), + ) + expected = ["char_1", "char_2", "char_3"] + assert result == expected + + def test_vars_rename_invalid_dictionary_empty(self): + with pytest.raises(ValueError) as exc: + ccao.vars_rename( + data=["1", "2", "3"], + names_from="sql", + names_to="char", + dictionary=pd.DataFrame(), + ) + assert "non-empty" in str(exc.value) + + def test_vars_rename_invalid_dictionary_missing_variable_columns(self): + with pytest.raises(ValueError) as exc: + ccao.vars_rename( + data=["1", "2", "3"], + names_from="foo", + names_to="bar", + dictionary=pd.DataFrame( + { + "foo": ["1", "2", "3"], + "bar": ["char_1", "char_2", "char_3"], + } + ), + ) + assert f"starting with {ccao.vars_funs.VAR_NAME_PREFIX}" in str( + exc.value + ) + + @pytest.mark.parametrize( + "names_from,names_to", [("1", "pretty"), ("pretty", "1")] + ) + def test_vars_rename_invalid_names(self, names_from, names_to): + with pytest.raises(ValueError) as exc: + ccao.vars_rename( + data=["1", "2", "3"], names_from=names_from, names_to=names_to + ) + assert "must be one of" in str(exc.value) + + def test_vars_rename_invalid_output_type(self): + with pytest.raises(ValueError) as exc: + ccao.vars_rename( + data=["Apartments", "Cathedral Ceiling"], + names_from="pretty", + names_to="model", + output_type="foo", + ) + assert "output_type must be one of" in str(exc.value)