Initial build out
This commit is contained in:
parent
f037c50736
commit
29215e2bd2
40 changed files with 2622 additions and 0 deletions
126
src/admin_analytics/cli.py
Normal file
126
src/admin_analytics/cli.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
from typing import Annotated
|
||||
|
||||
import typer
|
||||
|
||||
from admin_analytics.config import DEFAULT_YEAR_RANGE
|
||||
from admin_analytics.db.connection import get_connection
|
||||
from admin_analytics.db.schema import ensure_schema
|
||||
|
||||
app = typer.Typer(help="University of Delaware administrative analytics")
|
||||
ingest_app = typer.Typer(help="Ingest data from external sources")
|
||||
app.add_typer(ingest_app, name="ingest")
|
||||
|
||||
|
||||
def _parse_year_range(year_range: str) -> range:
|
||||
"""Parse '2005-2024' into a range object."""
|
||||
parts = year_range.split("-")
|
||||
if len(parts) != 2:
|
||||
raise typer.BadParameter("Year range must be in format YYYY-YYYY")
|
||||
start, end = int(parts[0]), int(parts[1])
|
||||
return range(start, end + 1)
|
||||
|
||||
|
||||
@ingest_app.command()
|
||||
def ipeds(
|
||||
year_range: Annotated[
|
||||
str, typer.Option(help="Year range, e.g. 2005-2024")
|
||||
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
|
||||
component: Annotated[
|
||||
str, typer.Option(help="Component: all, hd, finance, staff, enrollment")
|
||||
] = "all",
|
||||
force: Annotated[
|
||||
bool, typer.Option("--force", help="Re-download even if files exist")
|
||||
] = False,
|
||||
) -> None:
|
||||
"""Ingest IPEDS data for the University of Delaware."""
|
||||
years = _parse_year_range(year_range)
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
from admin_analytics.ipeds.download import download_all
|
||||
from admin_analytics.ipeds.institution import load_institutions
|
||||
from admin_analytics.ipeds.finance import load_finance
|
||||
from admin_analytics.ipeds.staff import load_staff
|
||||
from admin_analytics.ipeds.enrollment import load_enrollment
|
||||
|
||||
components = (
|
||||
["hd", "finance", "staff", "enrollment"]
|
||||
if component == "all"
|
||||
else [component]
|
||||
)
|
||||
|
||||
# Finance needs both F1A and F2 downloads (UD reports under FASB/F2)
|
||||
download_components = list(components)
|
||||
if "finance" in download_components:
|
||||
download_components.append("finance_f2")
|
||||
|
||||
typer.echo(f"Downloading IPEDS data for {years.start}-{years.stop - 1}...")
|
||||
download_all(years, download_components, force=force)
|
||||
|
||||
if "hd" in components:
|
||||
typer.echo("Loading institutional directory (HD)...")
|
||||
load_institutions(conn, years)
|
||||
|
||||
if "finance" in components:
|
||||
typer.echo("Loading finance data (F1A)...")
|
||||
load_finance(conn, years)
|
||||
|
||||
if "staff" in components:
|
||||
typer.echo("Loading staff data (S)...")
|
||||
load_staff(conn, years)
|
||||
|
||||
if "enrollment" in components:
|
||||
typer.echo("Loading enrollment data (EF)...")
|
||||
load_enrollment(conn, years)
|
||||
|
||||
typer.echo("IPEDS ingestion complete.")
|
||||
conn.close()
|
||||
|
||||
|
||||
@ingest_app.command()
|
||||
def irs990(
|
||||
year_range: Annotated[
|
||||
str, typer.Option(help="Year range for index files, e.g. 2019-2024")
|
||||
] = "2019-2024",
|
||||
force: Annotated[
|
||||
bool, typer.Option("--force", help="Re-download even if files exist")
|
||||
] = False,
|
||||
) -> None:
|
||||
"""Ingest IRS 990 data for UD and UD Research Foundation."""
|
||||
years = _parse_year_range(year_range)
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
from admin_analytics.irs990.download import download_all_filings
|
||||
from admin_analytics.irs990.loader import load_all
|
||||
|
||||
typer.echo(f"Downloading 990 filings for {years.start}-{years.stop - 1}...")
|
||||
download_all_filings(years, force=force)
|
||||
|
||||
typer.echo("Loading 990 data into database...")
|
||||
totals = load_all(conn, years)
|
||||
typer.echo(
|
||||
f"IRS 990 ingestion complete: {totals['filings']} filings, "
|
||||
f"{totals['part_vii']} Part VII rows, {totals['schedule_j']} Schedule J rows."
|
||||
)
|
||||
conn.close()
|
||||
|
||||
|
||||
@ingest_app.command()
|
||||
def cpi() -> None:
|
||||
"""Ingest BLS CPI-U data. (Not yet implemented.)"""
|
||||
typer.echo("CPI-U ingestion is not yet implemented (Sprint 3).")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
@ingest_app.command(name="all")
|
||||
def ingest_all(
|
||||
year_range: Annotated[
|
||||
str, typer.Option(help="Year range, e.g. 2005-2024")
|
||||
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
|
||||
force: Annotated[
|
||||
bool, typer.Option("--force", help="Re-download even if files exist")
|
||||
] = False,
|
||||
) -> None:
|
||||
"""Ingest all data sources."""
|
||||
ipeds(year_range=year_range, component="all", force=force)
|
||||
Loading…
Add table
Add a link
Reference in a new issue