Initial build out

This commit is contained in:
emfurst 2026-03-30 07:15:14 -04:00
commit 29215e2bd2
40 changed files with 2622 additions and 0 deletions

126
src/admin_analytics/cli.py Normal file
View file

@ -0,0 +1,126 @@
from typing import Annotated
import typer
from admin_analytics.config import DEFAULT_YEAR_RANGE
from admin_analytics.db.connection import get_connection
from admin_analytics.db.schema import ensure_schema
app = typer.Typer(help="University of Delaware administrative analytics")
ingest_app = typer.Typer(help="Ingest data from external sources")
app.add_typer(ingest_app, name="ingest")
def _parse_year_range(year_range: str) -> range:
"""Parse '2005-2024' into a range object."""
parts = year_range.split("-")
if len(parts) != 2:
raise typer.BadParameter("Year range must be in format YYYY-YYYY")
start, end = int(parts[0]), int(parts[1])
return range(start, end + 1)
@ingest_app.command()
def ipeds(
year_range: Annotated[
str, typer.Option(help="Year range, e.g. 2005-2024")
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
component: Annotated[
str, typer.Option(help="Component: all, hd, finance, staff, enrollment")
] = "all",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest IPEDS data for the University of Delaware."""
years = _parse_year_range(year_range)
conn = get_connection()
ensure_schema(conn)
from admin_analytics.ipeds.download import download_all
from admin_analytics.ipeds.institution import load_institutions
from admin_analytics.ipeds.finance import load_finance
from admin_analytics.ipeds.staff import load_staff
from admin_analytics.ipeds.enrollment import load_enrollment
components = (
["hd", "finance", "staff", "enrollment"]
if component == "all"
else [component]
)
# Finance needs both F1A and F2 downloads (UD reports under FASB/F2)
download_components = list(components)
if "finance" in download_components:
download_components.append("finance_f2")
typer.echo(f"Downloading IPEDS data for {years.start}-{years.stop - 1}...")
download_all(years, download_components, force=force)
if "hd" in components:
typer.echo("Loading institutional directory (HD)...")
load_institutions(conn, years)
if "finance" in components:
typer.echo("Loading finance data (F1A)...")
load_finance(conn, years)
if "staff" in components:
typer.echo("Loading staff data (S)...")
load_staff(conn, years)
if "enrollment" in components:
typer.echo("Loading enrollment data (EF)...")
load_enrollment(conn, years)
typer.echo("IPEDS ingestion complete.")
conn.close()
@ingest_app.command()
def irs990(
year_range: Annotated[
str, typer.Option(help="Year range for index files, e.g. 2019-2024")
] = "2019-2024",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest IRS 990 data for UD and UD Research Foundation."""
years = _parse_year_range(year_range)
conn = get_connection()
ensure_schema(conn)
from admin_analytics.irs990.download import download_all_filings
from admin_analytics.irs990.loader import load_all
typer.echo(f"Downloading 990 filings for {years.start}-{years.stop - 1}...")
download_all_filings(years, force=force)
typer.echo("Loading 990 data into database...")
totals = load_all(conn, years)
typer.echo(
f"IRS 990 ingestion complete: {totals['filings']} filings, "
f"{totals['part_vii']} Part VII rows, {totals['schedule_j']} Schedule J rows."
)
conn.close()
@ingest_app.command()
def cpi() -> None:
"""Ingest BLS CPI-U data. (Not yet implemented.)"""
typer.echo("CPI-U ingestion is not yet implemented (Sprint 3).")
raise typer.Exit(1)
@ingest_app.command(name="all")
def ingest_all(
year_range: Annotated[
str, typer.Option(help="Year range, e.g. 2005-2024")
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest all data sources."""
ipeds(year_range=year_range, component="all", force=force)