Phase 1 project prototype

This commit is contained in:
emfurst 2026-03-30 19:29:33 -04:00
commit 2c9ae1c312
29 changed files with 2967 additions and 22 deletions

View file

@ -107,10 +107,83 @@ def irs990(
@ingest_app.command()
def cpi() -> None:
"""Ingest BLS CPI-U data. (Not yet implemented.)"""
typer.echo("CPI-U ingestion is not yet implemented (Sprint 3).")
raise typer.Exit(1)
def cpi(
force: Annotated[
bool, typer.Option("--force", help="Re-download even if file exists")
] = False,
) -> None:
"""Ingest BLS CPI-U data."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.bls.download import download_cpi_file
from admin_analytics.bls.loader import load_cpi
typer.echo("Downloading BLS CPI-U data...")
file_path = download_cpi_file(force=force)
typer.echo("Loading CPI-U data into database...")
count = load_cpi(conn, file_path)
typer.echo(f"CPI-U ingestion complete: {count} monthly observations loaded.")
conn.close()
@ingest_app.command()
def scrape() -> None:
"""Scrape UD staff directory pages for admin headcounts."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.scraper.directory import scrape_all
from admin_analytics.scraper.loader import load_scrape
from admin_analytics.scraper.classify import OVERHEAD_CATEGORIES, NON_OVERHEAD_CATEGORIES
typer.echo("Scraping UD staff directory pages...")
entries = scrape_all()
typer.echo("Loading scraped data into database...")
count = load_scrape(conn, entries)
# Summary by unit and category
typer.echo(f"\nLoaded {count} staff entries.\n")
unit_counts: dict[str, dict[str, int]] = {}
for e in entries:
unit_counts.setdefault(e.unit, {})
unit_counts[e.unit][e.category] = unit_counts[e.unit].get(e.category, 0) + 1
for unit, cats in sorted(unit_counts.items()):
total = sum(cats.values())
overhead = sum(v for k, v in cats.items() if k in OVERHEAD_CATEGORIES)
typer.echo(f" {unit}: {total} staff ({overhead} overhead)")
for cat, n in sorted(cats.items(), key=lambda x: -x[1]):
typer.echo(f" {cat}: {n}")
conn.close()
@app.command()
def dashboard(
port: Annotated[int, typer.Option(help="Port to serve on")] = 8050,
debug: Annotated[bool, typer.Option(help="Enable Dash debug mode")] = True,
) -> None:
"""Launch the analytics dashboard."""
from admin_analytics.dashboard.app import create_app
dash_app = create_app()
typer.echo(f"Starting dashboard at http://localhost:{port}/")
dash_app.run(debug=debug, port=port)
@app.command()
def validate() -> None:
"""Run data validation checks and print a report."""
conn = get_connection()
ensure_schema(conn)
from admin_analytics.validation import format_report
typer.echo(format_report(conn))
conn.close()
@ingest_app.command(name="all")
@ -124,3 +197,6 @@ def ingest_all(
) -> None:
"""Ingest all data sources."""
ipeds(year_range=year_range, component="all", force=force)
irs990(year_range=year_range, force=force)
cpi(force=force)
scrape()