AdminAnalytics/tests/test_ipeds_download.py
2026-03-30 07:15:14 -04:00

65 lines
1.9 KiB
Python

import io
import zipfile
import httpx
import respx
import admin_analytics.config as config
from admin_analytics.config import ipeds_filename
from admin_analytics.ipeds.download import download_component
def test_ipeds_filename_patterns():
assert ipeds_filename("hd", 2023) == "HD2023"
assert ipeds_filename("finance", 2023) == "F2223_F1A"
assert ipeds_filename("finance", 2005) == "F0405_F1A"
assert ipeds_filename("enrollment", 2023) == "EF2023A"
assert ipeds_filename("staff", 2023) == "S2023_OC"
assert ipeds_filename("salaries", 2023) == "SAL2023_IS"
def _make_zip_bytes(filename: str, content: str) -> bytes:
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr(filename, content)
return buf.getvalue()
@respx.mock
def test_download_component(tmp_path):
zip_bytes = _make_zip_bytes("HD2023.csv", "UNITID,INSTNM\n130943,University of Delaware\n")
respx.get("https://nces.ed.gov/ipeds/datacenter/data/HD2023.zip").mock(
return_value=httpx.Response(200, content=zip_bytes)
)
original = config.IPEDS_DATA_DIR
config.IPEDS_DATA_DIR = tmp_path / "ipeds"
try:
dest = download_component("hd", 2023)
finally:
config.IPEDS_DATA_DIR = original
assert dest.exists()
csvs = list(dest.glob("*.csv"))
assert len(csvs) == 1
assert "University of Delaware" in csvs[0].read_text()
@respx.mock
def test_download_component_skips_if_exists(tmp_path):
original = config.IPEDS_DATA_DIR
config.IPEDS_DATA_DIR = tmp_path / "ipeds"
dest = tmp_path / "ipeds" / "hd" / "2023"
dest.mkdir(parents=True)
(dest / "HD2023.csv").write_text("UNITID\n130943\n")
route = respx.get("https://nces.ed.gov/ipeds/datacenter/data/HD2023.zip")
try:
result = download_component("hd", 2023, force=False)
finally:
config.IPEDS_DATA_DIR = original
assert result == dest
assert not route.called