AdminAnalytics/src/admin_analytics/cli.py
2026-03-30 20:42:08 -04:00

203 lines
6.6 KiB
Python

from typing import Annotated
import typer
from admin_analytics.config import DEFAULT_YEAR_RANGE
from admin_analytics.db.connection import get_connection
from admin_analytics.db.schema import ensure_schema
app = typer.Typer(help="University of Delaware administrative analytics")
ingest_app = typer.Typer(help="Ingest data from external sources")
app.add_typer(ingest_app, name="ingest")
def _parse_year_range(year_range: str) -> range:
"""Parse '2005-2024' into a range object."""
parts = year_range.split("-")
if len(parts) != 2:
raise typer.BadParameter("Year range must be in format YYYY-YYYY")
start, end = int(parts[0]), int(parts[1])
return range(start, end + 1)
@ingest_app.command()
def ipeds(
year_range: Annotated[
str, typer.Option(help="Year range, e.g. 2005-2024")
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
component: Annotated[
str, typer.Option(help="Component: all, hd, finance, staff, enrollment")
] = "all",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest IPEDS data for the University of Delaware."""
years = _parse_year_range(year_range)
conn = get_connection()
ensure_schema(conn)
from admin_analytics.ipeds.download import download_all
from admin_analytics.ipeds.institution import load_institutions
from admin_analytics.ipeds.finance import load_finance
from admin_analytics.ipeds.staff import load_staff
from admin_analytics.ipeds.enrollment import load_enrollment
components = (
["hd", "finance", "staff", "enrollment"]
if component == "all"
else [component]
)
# Finance needs both F1A and F2 downloads (UD reports under FASB/F2)
download_components = list(components)
if "finance" in download_components:
download_components.append("finance_f2")
typer.echo(f"Downloading IPEDS data for {years.start}-{years.stop - 1}...")
download_all(years, download_components, force=force)
if "hd" in components:
typer.echo("Loading institutional directory (HD)...")
load_institutions(conn, years)
if "finance" in components:
typer.echo("Loading finance data (F1A)...")
load_finance(conn, years)
if "staff" in components:
typer.echo("Loading staff data (S)...")
load_staff(conn, years)
if "enrollment" in components:
typer.echo("Loading enrollment data (EF)...")
load_enrollment(conn, years)
typer.echo("IPEDS ingestion complete.")
conn.close()
@ingest_app.command()
def irs990(
year_range: Annotated[
str, typer.Option(help="Year range for index files, e.g. 2019-2025")
] = "2019-2025",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest IRS 990 data for UD and UD Research Foundation."""
years = _parse_year_range(year_range)
conn = get_connection()
ensure_schema(conn)
from admin_analytics.irs990.download import download_all_filings
from admin_analytics.irs990.loader import load_all
typer.echo(f"Downloading 990 filings for {years.start}-{years.stop - 1}...")
download_all_filings(years, force=force)
typer.echo("Loading 990 data into database...")
totals = load_all(conn, years)
typer.echo(
f"IRS 990 ingestion complete: {totals['filings']} filings, "
f"{totals['part_vii']} Part VII rows, {totals['schedule_j']} Schedule J rows."
)
conn.close()
@ingest_app.command()
def cpi(
force: Annotated[
bool, typer.Option("--force", help="Re-download even if file exists")
] = False,
) -> None:
"""Ingest BLS CPI-U data."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.bls.download import download_cpi_file
from admin_analytics.bls.loader import load_cpi
typer.echo("Downloading BLS CPI-U data...")
file_path = download_cpi_file(force=force)
typer.echo("Loading CPI-U data into database...")
count = load_cpi(conn, file_path)
typer.echo(f"CPI-U ingestion complete: {count} monthly observations loaded.")
conn.close()
@ingest_app.command()
def scrape() -> None:
"""Scrape UD staff directory pages for admin headcounts."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.scraper.directory import scrape_all
from admin_analytics.scraper.loader import load_scrape
from admin_analytics.scraper.classify import OVERHEAD_CATEGORIES, NON_OVERHEAD_CATEGORIES
typer.echo("Scraping UD staff directory pages...")
entries = scrape_all()
typer.echo("Loading scraped data into database...")
count = load_scrape(conn, entries)
# Summary by unit and category
typer.echo(f"\nLoaded {count} staff entries.\n")
unit_counts: dict[str, dict[str, int]] = {}
for e in entries:
unit_counts.setdefault(e.unit, {})
unit_counts[e.unit][e.category] = unit_counts[e.unit].get(e.category, 0) + 1
for unit, cats in sorted(unit_counts.items()):
total = sum(cats.values())
overhead = sum(v for k, v in cats.items() if k in OVERHEAD_CATEGORIES)
typer.echo(f" {unit}: {total} staff ({overhead} overhead)")
for cat, n in sorted(cats.items(), key=lambda x: -x[1]):
typer.echo(f" {cat}: {n}")
conn.close()
@app.command()
def dashboard(
host: Annotated[str, typer.Option(help="Host to bind to (0.0.0.0 for network access)")] = "127.0.0.1",
port: Annotated[int, typer.Option(help="Port to serve on")] = 8050,
debug: Annotated[bool, typer.Option(help="Enable Dash debug mode")] = True,
) -> None:
"""Launch the analytics dashboard."""
from admin_analytics.dashboard.app import create_app
dash_app = create_app()
typer.echo(f"Starting dashboard at http://{host}:{port}/")
dash_app.run(host=host, debug=debug, port=port)
@app.command()
def validate() -> None:
"""Run data validation checks and print a report."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.validation import format_report
typer.echo(format_report(conn))
conn.close()
@ingest_app.command(name="all")
def ingest_all(
year_range: Annotated[
str, typer.Option(help="Year range, e.g. 2005-2024")
] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}",
force: Annotated[
bool, typer.Option("--force", help="Re-download even if files exist")
] = False,
) -> None:
"""Ingest all data sources."""
ipeds(year_range=year_range, component="all", force=force)
irs990(year_range=year_range, force=force)
cpi(force=force)
scrape()