from typing import Annotated import typer from admin_analytics.config import DEFAULT_YEAR_RANGE from admin_analytics.db.connection import get_connection from admin_analytics.db.schema import ensure_schema app = typer.Typer(help="University of Delaware administrative analytics") ingest_app = typer.Typer(help="Ingest data from external sources") app.add_typer(ingest_app, name="ingest") def _parse_year_range(year_range: str) -> range: """Parse '2005-2024' into a range object.""" parts = year_range.split("-") if len(parts) != 2: raise typer.BadParameter("Year range must be in format YYYY-YYYY") start, end = int(parts[0]), int(parts[1]) return range(start, end + 1) @ingest_app.command() def ipeds( year_range: Annotated[ str, typer.Option(help="Year range, e.g. 2005-2024") ] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}", component: Annotated[ str, typer.Option(help="Component: all, hd, finance, staff, enrollment") ] = "all", force: Annotated[ bool, typer.Option("--force", help="Re-download even if files exist") ] = False, ) -> None: """Ingest IPEDS data for the University of Delaware.""" years = _parse_year_range(year_range) conn = get_connection() ensure_schema(conn) from admin_analytics.ipeds.download import download_all from admin_analytics.ipeds.institution import load_institutions from admin_analytics.ipeds.finance import load_finance from admin_analytics.ipeds.staff import load_staff from admin_analytics.ipeds.enrollment import load_enrollment components = ( ["hd", "finance", "staff", "enrollment"] if component == "all" else [component] ) # Finance needs both F1A and F2 downloads (UD reports under FASB/F2) download_components = list(components) if "finance" in download_components: download_components.append("finance_f2") typer.echo(f"Downloading IPEDS data for {years.start}-{years.stop - 1}...") download_all(years, download_components, force=force) if "hd" in components: typer.echo("Loading institutional directory (HD)...") load_institutions(conn, years) if "finance" in components: typer.echo("Loading finance data (F1A)...") load_finance(conn, years) if "staff" in components: typer.echo("Loading staff data (S)...") load_staff(conn, years) if "enrollment" in components: typer.echo("Loading enrollment data (EF)...") load_enrollment(conn, years) typer.echo("IPEDS ingestion complete.") conn.close() @ingest_app.command() def irs990( year_range: Annotated[ str, typer.Option(help="Year range for index files, e.g. 2019-2025") ] = "2019-2025", force: Annotated[ bool, typer.Option("--force", help="Re-download even if files exist") ] = False, ) -> None: """Ingest IRS 990 data for UD and UD Research Foundation.""" years = _parse_year_range(year_range) conn = get_connection() ensure_schema(conn) from admin_analytics.irs990.download import download_all_filings from admin_analytics.irs990.loader import load_all typer.echo(f"Downloading 990 filings for {years.start}-{years.stop - 1}...") download_all_filings(years, force=force) typer.echo("Loading 990 data into database...") totals = load_all(conn, years) typer.echo( f"IRS 990 ingestion complete: {totals['filings']} filings, " f"{totals['part_vii']} Part VII rows, {totals['schedule_j']} Schedule J rows." ) conn.close() @ingest_app.command() def cpi( force: Annotated[ bool, typer.Option("--force", help="Re-download even if file exists") ] = False, ) -> None: """Ingest BLS CPI-U data.""" conn = get_connection() ensure_schema(conn) from admin_analytics.bls.download import download_cpi_file from admin_analytics.bls.loader import load_cpi typer.echo("Downloading BLS CPI-U data...") file_path = download_cpi_file(force=force) typer.echo("Loading CPI-U data into database...") count = load_cpi(conn, file_path) typer.echo(f"CPI-U ingestion complete: {count} monthly observations loaded.") conn.close() @ingest_app.command() def scrape() -> None: """Scrape UD staff directory pages for admin headcounts.""" conn = get_connection() ensure_schema(conn) from admin_analytics.scraper.directory import scrape_all from admin_analytics.scraper.loader import load_scrape from admin_analytics.scraper.classify import OVERHEAD_CATEGORIES, NON_OVERHEAD_CATEGORIES typer.echo("Scraping UD staff directory pages...") entries = scrape_all() typer.echo("Loading scraped data into database...") count = load_scrape(conn, entries) # Summary by unit and category typer.echo(f"\nLoaded {count} staff entries.\n") unit_counts: dict[str, dict[str, int]] = {} for e in entries: unit_counts.setdefault(e.unit, {}) unit_counts[e.unit][e.category] = unit_counts[e.unit].get(e.category, 0) + 1 for unit, cats in sorted(unit_counts.items()): total = sum(cats.values()) overhead = sum(v for k, v in cats.items() if k in OVERHEAD_CATEGORIES) typer.echo(f" {unit}: {total} staff ({overhead} overhead)") for cat, n in sorted(cats.items(), key=lambda x: -x[1]): typer.echo(f" {cat}: {n}") conn.close() @app.command() def dashboard( host: Annotated[str, typer.Option(help="Host to bind to (0.0.0.0 for network access)")] = "127.0.0.1", port: Annotated[int, typer.Option(help="Port to serve on")] = 8050, debug: Annotated[bool, typer.Option(help="Enable Dash debug mode")] = True, ) -> None: """Launch the analytics dashboard.""" from admin_analytics.dashboard.app import create_app dash_app = create_app() typer.echo(f"Starting dashboard at http://{host}:{port}/") dash_app.run(host=host, debug=debug, port=port) @app.command() def validate() -> None: """Run data validation checks and print a report.""" conn = get_connection() ensure_schema(conn) from admin_analytics.validation import format_report typer.echo(format_report(conn)) conn.close() @ingest_app.command(name="all") def ingest_all( year_range: Annotated[ str, typer.Option(help="Year range, e.g. 2005-2024") ] = f"{DEFAULT_YEAR_RANGE.start}-{DEFAULT_YEAR_RANGE.stop - 1}", force: Annotated[ bool, typer.Option("--force", help="Re-download even if files exist") ] = False, ) -> None: """Ingest all data sources.""" ipeds(year_range=year_range, component="all", force=force) irs990(year_range=year_range, force=force) cpi(force=force) scrape()