Phase 1 project prototype
This commit is contained in:
parent
29215e2bd2
commit
2c9ae1c312
29 changed files with 2967 additions and 22 deletions
|
|
@ -107,10 +107,83 @@ def irs990(
|
|||
|
||||
|
||||
@ingest_app.command()
|
||||
def cpi() -> None:
|
||||
"""Ingest BLS CPI-U data. (Not yet implemented.)"""
|
||||
typer.echo("CPI-U ingestion is not yet implemented (Sprint 3).")
|
||||
raise typer.Exit(1)
|
||||
def cpi(
|
||||
force: Annotated[
|
||||
bool, typer.Option("--force", help="Re-download even if file exists")
|
||||
] = False,
|
||||
) -> None:
|
||||
"""Ingest BLS CPI-U data."""
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
from admin_analytics.bls.download import download_cpi_file
|
||||
from admin_analytics.bls.loader import load_cpi
|
||||
|
||||
typer.echo("Downloading BLS CPI-U data...")
|
||||
file_path = download_cpi_file(force=force)
|
||||
|
||||
typer.echo("Loading CPI-U data into database...")
|
||||
count = load_cpi(conn, file_path)
|
||||
typer.echo(f"CPI-U ingestion complete: {count} monthly observations loaded.")
|
||||
conn.close()
|
||||
|
||||
|
||||
@ingest_app.command()
|
||||
def scrape() -> None:
|
||||
"""Scrape UD staff directory pages for admin headcounts."""
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
from admin_analytics.scraper.directory import scrape_all
|
||||
from admin_analytics.scraper.loader import load_scrape
|
||||
from admin_analytics.scraper.classify import OVERHEAD_CATEGORIES, NON_OVERHEAD_CATEGORIES
|
||||
|
||||
typer.echo("Scraping UD staff directory pages...")
|
||||
entries = scrape_all()
|
||||
|
||||
typer.echo("Loading scraped data into database...")
|
||||
count = load_scrape(conn, entries)
|
||||
|
||||
# Summary by unit and category
|
||||
typer.echo(f"\nLoaded {count} staff entries.\n")
|
||||
unit_counts: dict[str, dict[str, int]] = {}
|
||||
for e in entries:
|
||||
unit_counts.setdefault(e.unit, {})
|
||||
unit_counts[e.unit][e.category] = unit_counts[e.unit].get(e.category, 0) + 1
|
||||
|
||||
for unit, cats in sorted(unit_counts.items()):
|
||||
total = sum(cats.values())
|
||||
overhead = sum(v for k, v in cats.items() if k in OVERHEAD_CATEGORIES)
|
||||
typer.echo(f" {unit}: {total} staff ({overhead} overhead)")
|
||||
for cat, n in sorted(cats.items(), key=lambda x: -x[1]):
|
||||
typer.echo(f" {cat}: {n}")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
@app.command()
|
||||
def dashboard(
|
||||
port: Annotated[int, typer.Option(help="Port to serve on")] = 8050,
|
||||
debug: Annotated[bool, typer.Option(help="Enable Dash debug mode")] = True,
|
||||
) -> None:
|
||||
"""Launch the analytics dashboard."""
|
||||
from admin_analytics.dashboard.app import create_app
|
||||
|
||||
dash_app = create_app()
|
||||
typer.echo(f"Starting dashboard at http://localhost:{port}/")
|
||||
dash_app.run(debug=debug, port=port)
|
||||
|
||||
|
||||
@app.command()
|
||||
def validate() -> None:
|
||||
"""Run data validation checks and print a report."""
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
from admin_analytics.validation import format_report
|
||||
|
||||
typer.echo(format_report(conn))
|
||||
conn.close()
|
||||
|
||||
|
||||
@ingest_app.command(name="all")
|
||||
|
|
@ -124,3 +197,6 @@ def ingest_all(
|
|||
) -> None:
|
||||
"""Ingest all data sources."""
|
||||
ipeds(year_range=year_range, component="all", force=force)
|
||||
irs990(year_range=year_range, force=force)
|
||||
cpi(force=force)
|
||||
scrape()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue