Phase 1 project prototype

2026-03-30 19:29:33 -04:00 · 2026-03-30 19:29:33 -04:00 · 2c9ae1c312
commit 2c9ae1c312
parent 29215e2bd2
29 changed files with 2967 additions and 22 deletions
--- a/src/admin_analytics/dashboard/init.py
+++ b/src/admin_analytics/dashboard/init.py
--- a/src/admin_analytics/dashboard/app.py
+++ b/src/admin_analytics/dashboard/app.py
@ -0,0 +1,53 @@
+"""Dash application factory."""
+
+import dash
+from dash import dcc, html, Input, Output
+
+from admin_analytics.db.connection import get_connection
+from admin_analytics.db.schema import ensure_schema
+from admin_analytics.dashboard.pages import overview, compensation, staffing, headcount
+
+
+def create_app() -> dash.Dash:
+    """Create and configure the Dash application."""
+    app = dash.Dash(__name__, suppress_callback_exceptions=True)
+    conn = get_connection()
+    ensure_schema(conn)
+
+    app.layout = html.Div(
+        [
+            html.H1(
+                "University of Delaware — Administrative Analytics",
+                style={"textAlign": "center", "padding": "20px", "color": "#00539F"},
+            ),
+            dcc.Tabs(
+                id="tabs",
+                value="overview",
+                children=[
+                    dcc.Tab(label="Admin Cost Overview", value="overview"),
+                    dcc.Tab(label="Executive Compensation", value="compensation"),
+                    dcc.Tab(label="Staffing & Enrollment", value="staffing"),
+                    dcc.Tab(label="Current Headcount", value="headcount"),
+                ],
+                style={"marginBottom": "20px"},
+            ),
+            html.Div(id="tab-content", style={"padding": "0 20px 20px 20px"}),
+        ],
+        style={"fontFamily": "system-ui, -apple-system, sans-serif", "maxWidth": "1400px", "margin": "0 auto"},
+    )
+
+    @app.callback(Output("tab-content", "children"), Input("tabs", "value"))
+    def render_tab(tab: str):
+        if tab == "overview":
+            return overview.layout(conn)
+        elif tab == "compensation":
+            return compensation.layout(conn)
+        elif tab == "staffing":
+            return staffing.layout(conn)
+        elif tab == "headcount":
+            return headcount.layout(conn)
+        return html.Div("Unknown tab")
+
+    compensation.register_callbacks(app, conn)
+
+    return app
--- a/src/admin_analytics/dashboard/pages/init.py
+++ b/src/admin_analytics/dashboard/pages/init.py
--- a/src/admin_analytics/dashboard/pages/compensation.py
+++ b/src/admin_analytics/dashboard/pages/compensation.py
@ -0,0 +1,162 @@
+"""Page 2: Executive Compensation."""
+
+import dash
+import duckdb
+from dash import html, dcc, Input, Output, dash_table
+import plotly.express as px
+import plotly.graph_objects as go
+
+from admin_analytics.dashboard.queries import (
+    query_top_earners,
+    query_comp_by_role,
+    query_comp_vs_cpi,
+)
+
+_NO_DATA = html.Div(
+    "No IRS 990 data loaded. Run: admin-analytics ingest irs990",
+    style={"textAlign": "center", "padding": "40px", "color": "#888"},
+)
+
+# Roles to highlight in trend chart
+_KEY_ROLES = ["PRESIDENT", "PROVOST", "VP_FINANCE", "VP_RESEARCH", "VP_ADVANCEMENT", "CFO"]
+
+
+def layout(conn: duckdb.DuckDBPyConnection):
+    all_earners = query_top_earners(conn)
+    if all_earners.height == 0:
+        return _NO_DATA
+
+    years = sorted(all_earners["tax_year"].unique().to_list())
+    year_options = [{"label": "All Years", "value": "all"}] + [
+        {"label": str(y), "value": y} for y in years
+    ]
+
+    # Compensation by role trend
+    role_df = query_comp_by_role(conn)
+    role_fig = go.Figure()
+    if role_df.height > 0:
+        role_pd = role_df.to_pandas()
+        for role in _KEY_ROLES:
+            subset = role_pd[role_pd["canonical_role"] == role]
+            if len(subset) > 0:
+                role_fig.add_trace(go.Scatter(
+                    x=subset["tax_year"],
+                    y=subset["total_compensation"],
+                    mode="lines+markers",
+                    name=role.replace("_", " ").title(),
+                ))
+    role_fig.update_layout(
+        title="Compensation Trends by Role",
+        xaxis_title="Tax Year", yaxis_title="Total Compensation ($)",
+        template="plotly_white", height=420,
+    )
+
+    # Comp vs CPI indexed
+    cpi_df = query_comp_vs_cpi(conn)
+    cpi_fig = go.Figure()
+    if cpi_df.height > 0:
+        cpi_pd = cpi_df.to_pandas()
+        cpi_fig.add_trace(go.Scatter(
+            x=cpi_pd["year"], y=cpi_pd["comp_index"],
+            mode="lines+markers", name="Top Compensation",
+            line={"color": "#00539F"},
+        ))
+        cpi_fig.add_trace(go.Scatter(
+            x=cpi_pd["year"], y=cpi_pd["cpi_index"],
+            mode="lines+markers", name="CPI-U",
+            line={"color": "#FFD200", "dash": "dash"},
+        ))
+    cpi_fig.update_layout(
+        title="Top Compensation vs CPI-U (Indexed, Base Year = 100)",
+        xaxis_title="Year", yaxis_title="Index",
+        template="plotly_white", height=380,
+    )
+
+    return html.Div([
+        html.Div(
+            [
+                html.Label("Filter by Tax Year: ", style={"fontWeight": "bold"}),
+                dcc.Dropdown(
+                    id="comp-year-dropdown",
+                    options=year_options,
+                    value="all",
+                    style={"width": "200px", "display": "inline-block"},
+                ),
+            ],
+            style={"marginBottom": "16px"},
+        ),
+        dash_table.DataTable(
+            id="comp-table",
+            columns=[
+                {"name": "Year", "id": "tax_year"},
+                {"name": "Name", "id": "person_name"},
+                {"name": "Title", "id": "title"},
+                {"name": "Role", "id": "canonical_role"},
+                {"name": "Base", "id": "base_compensation", "type": "numeric",
+                 "format": dash_table.Format.Format().group(True)},
+                {"name": "Bonus", "id": "bonus_compensation", "type": "numeric",
+                 "format": dash_table.Format.Format().group(True)},
+                {"name": "Total", "id": "total_compensation", "type": "numeric",
+                 "format": dash_table.Format.Format().group(True)},
+            ],
+            data=all_earners.to_pandas().to_dict("records"),
+            page_size=15,
+            sort_action="native",
+            filter_action="native",
+            style_table={"overflowX": "auto"},
+            style_cell={"textAlign": "left", "padding": "8px", "fontSize": "13px"},
+            style_header={"fontWeight": "bold", "backgroundColor": "#f0f0f0"},
+        ),
+        html.Div(
+            [
+                html.Div(dcc.Graph(id="comp-breakdown-chart"), style={"flex": "1"}),
+                html.Div(dcc.Graph(figure=cpi_fig), style={"flex": "1"}),
+            ],
+            style={"display": "flex", "gap": "16px", "marginTop": "16px"},
+        ),
+        dcc.Graph(figure=role_fig),
+    ])
+
+
+def register_callbacks(app: dash.Dash, conn: duckdb.DuckDBPyConnection) -> None:
+    """Register interactive callbacks for the compensation page."""
+
+    @app.callback(
+        [Output("comp-table", "data"), Output("comp-breakdown-chart", "figure")],
+        Input("comp-year-dropdown", "value"),
+    )
+    def update_compensation(year_value):
+        year = None if year_value == "all" else int(year_value)
+        earners = query_top_earners(conn, year=year)
+
+        # Table data
+        table_data = earners.to_pandas().to_dict("records") if earners.height > 0 else []
+
+        # Breakdown chart — stacked bar of comp components
+        breakdown_fig = go.Figure()
+        if earners.height > 0:
+            ep = earners.to_pandas().head(10)  # top 10 by total comp
+            short_names = [n.split(",")[0][:20] if "," in n else n.split()[-1][:20]
+                           for n in ep["person_name"]]
+            for comp_type, label, color in [
+                ("base_compensation", "Base", "#00539F"),
+                ("bonus_compensation", "Bonus", "#FFD200"),
+                ("deferred_compensation", "Deferred", "#7FB069"),
+                ("nontaxable_benefits", "Benefits", "#E07A5F"),
+                ("other_compensation", "Other", "#999"),
+            ]:
+                if comp_type in ep.columns:
+                    breakdown_fig.add_trace(go.Bar(
+                        x=short_names, y=ep[comp_type],
+                        name=label, marker_color=color,
+                    ))
+            breakdown_fig.update_layout(barmode="stack")
+
+        title_suffix = f" ({year})" if year else " (All Years)"
+        breakdown_fig.update_layout(
+            title=f"Compensation Breakdown — Top 10{title_suffix}",
+            xaxis_title="", yaxis_title="$",
+            template="plotly_white", height=380,
+        )
+
+        return table_data, breakdown_fig
--- a/src/admin_analytics/dashboard/pages/headcount.py
+++ b/src/admin_analytics/dashboard/pages/headcount.py
@ -0,0 +1,118 @@
+"""Page 4: Current Admin Headcount (from scraper)."""
+
+import duckdb
+from dash import html, dcc, dash_table
+import plotly.express as px
+import plotly.graph_objects as go
+
+from admin_analytics.dashboard.queries import (
+    query_admin_headcount,
+    query_headcount_summary,
+)
+
+_NO_DATA = html.Div(
+    "No headcount data loaded. Run: admin-analytics ingest scrape",
+    style={"textAlign": "center", "padding": "40px", "color": "#888"},
+)
+
+
+def _kpi_card(title: str, value: str) -> html.Div:
+    return html.Div(
+        [
+            html.H4(title, style={"margin": "0", "color": "#666", "fontSize": "14px"}),
+            html.H2(value, style={"margin": "5px 0", "color": "#00539F"}),
+        ],
+        style={
+            "flex": "1",
+            "padding": "20px",
+            "backgroundColor": "#f8f9fa",
+            "borderRadius": "8px",
+            "textAlign": "center",
+            "margin": "0 8px",
+        },
+    )
+
+
+def layout(conn: duckdb.DuckDBPyConnection):
+    detail_df = query_admin_headcount(conn)
+    if detail_df.height == 0:
+        return _NO_DATA
+
+    summary_df = query_headcount_summary(conn)
+    detail_pd = detail_df.to_pandas()
+    summary_pd = summary_df.to_pandas()
+
+    total = len(detail_pd)
+    overhead_count = int(detail_pd["is_overhead"].sum()) if "is_overhead" in detail_pd.columns else 0
+    overhead_pct = round(overhead_count * 100 / total, 1) if total > 0 else 0
+
+    # KPI cards
+    kpi_row = html.Div(
+        [
+            _kpi_card("Total Staff Scraped", str(total)),
+            _kpi_card("Overhead Staff", str(overhead_count)),
+            _kpi_card("Overhead %", f"{overhead_pct}%"),
+        ],
+        style={"display": "flex", "marginBottom": "24px"},
+    )
+
+    # Staff by unit bar chart
+    unit_counts = summary_pd.groupby("unit")["count"].sum().reset_index().sort_values("count")
+    unit_fig = px.bar(
+        unit_counts, x="count", y="unit", orientation="h",
+        title="Staff Count by Unit",
+        labels={"count": "Staff", "unit": ""},
+        color_discrete_sequence=["#00539F"],
+    )
+    unit_fig.update_layout(template="plotly_white", height=max(300, len(unit_counts) * 30 + 100))
+
+    # Overhead pie
+    oh_data = detail_pd["is_overhead"].value_counts()
+    oh_labels = {True: "Overhead", False: "Non-Overhead"}
+    pie_fig = px.pie(
+        names=[oh_labels.get(k, "Debatable") for k in oh_data.index],
+        values=oh_data.values,
+        title="Overhead vs Non-Overhead",
+        color_discrete_sequence=["#E07A5F", "#7FB069", "#999"],
+    )
+    pie_fig.update_layout(template="plotly_white", height=350)
+
+    # Category distribution per unit
+    cat_fig = px.bar(
+        summary_pd, x="count", y="unit", color="category", orientation="h",
+        title="Category Distribution by Unit",
+        labels={"count": "Staff", "unit": "", "category": "Category"},
+    )
+    cat_fig.update_layout(template="plotly_white", height=max(300, len(unit_counts) * 30 + 100))
+
+    # Detail table
+    table = dash_table.DataTable(
+        columns=[
+            {"name": "Unit", "id": "unit"},
+            {"name": "Name", "id": "person_name"},
+            {"name": "Title", "id": "title"},
+            {"name": "Category", "id": "category"},
+            {"name": "Overhead", "id": "is_overhead"},
+        ],
+        data=detail_pd.to_dict("records"),
+        page_size=20,
+        sort_action="native",
+        filter_action="native",
+        style_table={"overflowX": "auto"},
+        style_cell={"textAlign": "left", "padding": "8px", "fontSize": "13px"},
+        style_header={"fontWeight": "bold", "backgroundColor": "#f0f0f0"},
+    )
+
+    return html.Div([
+        kpi_row,
+        html.Div(
+            [
+                html.Div(dcc.Graph(figure=unit_fig), style={"flex": "1"}),
+                html.Div(dcc.Graph(figure=pie_fig), style={"flex": "1"}),
+            ],
+            style={"display": "flex", "gap": "16px"},
+        ),
+        dcc.Graph(figure=cat_fig),
+        html.H3("Staff Directory Detail", style={"marginTop": "24px"}),
+        table,
+    ])
--- a/src/admin_analytics/dashboard/pages/overview.py
+++ b/src/admin_analytics/dashboard/pages/overview.py
@ -0,0 +1,168 @@
+"""Page 1: Administrative Cost Overview."""
+
+import duckdb
+from dash import html, dcc
+import plotly.express as px
+import plotly.graph_objects as go
+
+from admin_analytics.dashboard.queries import (
+    query_admin_cost_ratio,
+    query_expense_breakdown,
+    query_admin_per_student,
+    query_admin_faculty_ratio,
+)
+
+_NO_DATA = html.Div(
+    "No data loaded. Run: admin-analytics ingest all",
+    style={"textAlign": "center", "padding": "40px", "color": "#888"},
+)
+
+
+def _kpi_card(title: str, value: str, subtitle: str = "") -> html.Div:
+    return html.Div(
+        [
+            html.H4(title, style={"margin": "0", "color": "#666", "fontSize": "14px"}),
+            html.H2(value, style={"margin": "5px 0", "color": "#00539F"}),
+            html.P(subtitle, style={"margin": "0", "color": "#999", "fontSize": "12px"}),
+        ],
+        style={
+            "flex": "1",
+            "padding": "20px",
+            "backgroundColor": "#f8f9fa",
+            "borderRadius": "8px",
+            "textAlign": "center",
+            "margin": "0 8px",
+        },
+    )
+
+
+def layout(conn: duckdb.DuckDBPyConnection):
+    # Admin cost ratio
+    ratio_df = query_admin_cost_ratio(conn)
+    if ratio_df.height == 0:
+        return _NO_DATA
+
+    ratio_pd = ratio_df.to_pandas()
+    latest = ratio_pd.iloc[-1]
+
+    # Admin per student
+    aps_df = query_admin_per_student(conn)
+    aps_pd = aps_df.to_pandas() if aps_df.height > 0 else None
+    latest_aps = aps_pd.iloc[-1] if aps_pd is not None else None
+
+    # Admin-to-faculty ratio
+    afr_df = query_admin_faculty_ratio(conn)
+    afr_pd = afr_df.to_pandas() if afr_df.height > 0 else None
+    latest_afr = afr_pd.iloc[-1] if afr_pd is not None else None
+
+    # KPI cards
+    kpi_row = html.Div(
+        [
+            _kpi_card(
+                "Admin Cost Ratio",
+                f"{latest['admin_cost_pct']:.1f}%",
+                f"Institutional Support / Total Expenses ({int(latest['year'])})",
+            ),
+            _kpi_card(
+                "Admin Cost per Student",
+                f"${int(latest_aps['admin_per_student']):,}" if latest_aps is not None else "N/A",
+                f"CPI-adjusted: ${int(latest_aps['admin_per_student_cpi']):,}" if latest_aps is not None else "",
+            ),
+            _kpi_card(
+                "Admin-to-Faculty Ratio",
+                f"{latest_afr['admin_faculty_ratio']:.2f}" if latest_afr is not None else "N/A",
+                f"Management / Faculty ({int(latest_afr['year'])})" if latest_afr is not None else "",
+            ),
+        ],
+        style={"display": "flex", "marginBottom": "24px"},
+    )
+
+    # Admin cost ratio trend
+    ratio_fig = go.Figure()
+    ratio_fig.add_trace(go.Scatter(
+        x=ratio_pd["year"], y=ratio_pd["admin_cost_pct"],
+        mode="lines+markers", name="Admin Cost %",
+        line={"color": "#00539F"},
+    ))
+    ratio_fig.update_layout(
+        title="Administrative Cost Ratio Over Time",
+        xaxis_title="Year", yaxis_title="Institutional Support / Total Expenses (%)",
+        template="plotly_white", height=400,
+    )
+
+    # Expense breakdown stacked area
+    breakdown_df = query_expense_breakdown(conn)
+    if breakdown_df.height > 0:
+        bk_pd = breakdown_df.to_pandas()
+        expense_cols = [c for c in bk_pd.columns if c != "year"]
+        labels = {
+            "instruction_expenses": "Instruction",
+            "research_expenses": "Research",
+            "public_service_expenses": "Public Service",
+            "academic_support_expenses": "Academic Support",
+            "student_services_expenses": "Student Services",
+            "institutional_support_expenses": "Institutional Support",
+            "auxiliary_expenses": "Auxiliary",
+            "hospital_expenses": "Hospital",
+            "other_expenses": "Other",
+        }
+        breakdown_fig = go.Figure()
+        for col in expense_cols:
+            breakdown_fig.add_trace(go.Scatter(
+                x=bk_pd["year"], y=bk_pd[col] / 1e6,
+                mode="lines", name=labels.get(col, col),
+                stackgroup="one",
+            ))
+        breakdown_fig.update_layout(
+            title="Expenses by Function (Millions $)",
+            xaxis_title="Year", yaxis_title="Millions $",
+            template="plotly_white", height=450,
+        )
+    else:
+        breakdown_fig = go.Figure()
+
+    # Admin per student trend
+    aps_fig = go.Figure()
+    if aps_pd is not None:
+        aps_fig.add_trace(go.Scatter(
+            x=aps_pd["year"], y=aps_pd["admin_per_student"],
+            mode="lines+markers", name="Nominal",
+            line={"color": "#00539F"},
+        ))
+        aps_fig.add_trace(go.Scatter(
+            x=aps_pd["year"], y=aps_pd["admin_per_student_cpi"],
+            mode="lines+markers", name="CPI-Adjusted",
+            line={"color": "#FFD200", "dash": "dash"},
+        ))
+    aps_fig.update_layout(
+        title="Admin Cost per Student",
+        xaxis_title="Year", yaxis_title="$ per Student",
+        template="plotly_white", height=380,
+    )
+
+    # Admin-to-faculty ratio trend
+    afr_fig = go.Figure()
+    if afr_pd is not None:
+        afr_fig.add_trace(go.Scatter(
+            x=afr_pd["year"], y=afr_pd["admin_faculty_ratio"],
+            mode="lines+markers", name="Ratio",
+            line={"color": "#00539F"},
+        ))
+    afr_fig.update_layout(
+        title="Admin-to-Faculty Ratio",
+        xaxis_title="Year", yaxis_title="Management / Faculty",
+        template="plotly_white", height=380,
+    )
+
+    return html.Div([
+        kpi_row,
+        dcc.Graph(figure=ratio_fig),
+        dcc.Graph(figure=breakdown_fig),
+        html.Div(
+            [
+                html.Div(dcc.Graph(figure=aps_fig), style={"flex": "1"}),
+                html.Div(dcc.Graph(figure=afr_fig), style={"flex": "1"}),
+            ],
+            style={"display": "flex", "gap": "16px"},
+        ),
+    ])
--- a/src/admin_analytics/dashboard/pages/staffing.py
+++ b/src/admin_analytics/dashboard/pages/staffing.py
@ -0,0 +1,97 @@
+"""Page 3: Staffing & Enrollment."""
+
+import duckdb
+from dash import html, dcc
+import plotly.graph_objects as go
+
+from admin_analytics.dashboard.queries import (
+    query_staff_composition,
+    query_student_staff_ratios,
+    query_growth_index,
+)
+
+_NO_DATA = html.Div(
+    "No IPEDS staff data loaded. Run: admin-analytics ingest ipeds",
+    style={"textAlign": "center", "padding": "40px", "color": "#888"},
+)
+
+
+def layout(conn: duckdb.DuckDBPyConnection):
+    staff_df = query_staff_composition(conn)
+    if staff_df.height == 0:
+        return _NO_DATA
+
+    staff_pd = staff_df.to_pandas()
+
+    # Staff composition stacked area
+    comp_fig = go.Figure()
+    for col, label, color in [
+        ("faculty_total", "Faculty", "#00539F"),
+        ("management_total", "Management", "#E07A5F"),
+        ("other_staff", "Other Staff", "#7FB069"),
+    ]:
+        comp_fig.add_trace(go.Scatter(
+            x=staff_pd["year"], y=staff_pd[col],
+            mode="lines", name=label,
+            stackgroup="one",
+            line={"color": color},
+        ))
+    comp_fig.update_layout(
+        title="Staff Composition Over Time",
+        xaxis_title="Year", yaxis_title="Headcount",
+        template="plotly_white", height=420,
+    )
+
+    # Student-to-staff ratios
+    ratio_df = query_student_staff_ratios(conn)
+    ratio_fig = go.Figure()
+    if ratio_df.height > 0:
+        ratio_pd = ratio_df.to_pandas()
+        ratio_fig.add_trace(go.Scatter(
+            x=ratio_pd["year"], y=ratio_pd["students_per_staff"],
+            mode="lines+markers", name="Students per Staff",
+            line={"color": "#00539F"},
+        ))
+        ratio_fig.add_trace(go.Scatter(
+            x=ratio_pd["year"], y=ratio_pd["students_per_faculty"],
+            mode="lines+markers", name="Students per Faculty",
+            line={"color": "#FFD200"},
+        ))
+    ratio_fig.update_layout(
+        title="Student-to-Staff Ratios",
+        xaxis_title="Year", yaxis_title="Ratio",
+        template="plotly_white", height=380,
+    )
+
+    # Growth index
+    growth_df = query_growth_index(conn)
+    growth_fig = go.Figure()
+    if growth_df.height > 0:
+        growth_pd = growth_df.to_pandas()
+        growth_fig.add_trace(go.Scatter(
+            x=growth_pd["year"], y=growth_pd["mgmt_index"],
+            mode="lines+markers", name="Management Growth",
+            line={"color": "#E07A5F"},
+        ))
+        growth_fig.add_trace(go.Scatter(
+            x=growth_pd["year"], y=growth_pd["enrollment_index"],
+            mode="lines+markers", name="Enrollment Growth",
+            line={"color": "#00539F"},
+        ))
+        growth_fig.add_hline(y=100, line_dash="dot", line_color="#ccc")
+    growth_fig.update_layout(
+        title="Management vs Enrollment Growth (Indexed, Base Year = 100)",
+        xaxis_title="Year", yaxis_title="Index",
+        template="plotly_white", height=380,
+    )
+
+    return html.Div([
+        dcc.Graph(figure=comp_fig),
+        html.Div(
+            [
+                html.Div(dcc.Graph(figure=ratio_fig), style={"flex": "1"}),
+                html.Div(dcc.Graph(figure=growth_fig), style={"flex": "1"}),
+            ],
+            style={"display": "flex", "gap": "16px"},
+        ),
+    ])
--- a/src/admin_analytics/dashboard/queries.py
+++ b/src/admin_analytics/dashboard/queries.py
@ -0,0 +1,263 @@
+"""Dashboard query layer — all DuckDB queries returning polars DataFrames."""
+
+from typing import Any
+
+import duckdb
+import polars as pl
+
+from admin_analytics.config import UD_UNITID
+from admin_analytics.irs990.titles import normalize_title
+
+# Shared CTE for CPI adjustment
+_CPI_CTE = """
+WITH annual_cpi AS (
+    SELECT year, AVG(value) AS avg_cpi
+    FROM raw_cpi_u
+    GROUP BY year
+),
+latest_cpi AS (
+    SELECT avg_cpi FROM annual_cpi
+    WHERE year = (SELECT MAX(year) FROM annual_cpi)
+)
+"""
+
+
+def query_admin_cost_ratio(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Admin cost ratio trend with CPI-adjusted values."""
+    return conn.execute(f"""
+        {_CPI_CTE}
+        SELECT
+            f.year,
+            f.institutional_support_expenses,
+            f.total_expenses,
+            ROUND(f.institutional_support_expenses * 100.0
+                  / NULLIF(f.total_expenses, 0), 2) AS admin_cost_pct,
+            ROUND(f.institutional_support_expenses
+                  * (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi, 0)
+                AS inst_support_cpi_adjusted,
+            ROUND(f.total_expenses
+                  * (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi, 0)
+                AS total_expenses_cpi_adjusted
+        FROM raw_ipeds_finance f
+        LEFT JOIN annual_cpi ac ON ac.year = f.year
+        WHERE f.unitid = ?
+        ORDER BY f.year
+    """, [UD_UNITID]).pl()
+
+
+def query_expense_breakdown(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Expense breakdown by function over time."""
+    return conn.execute("""
+        SELECT year,
+            instruction_expenses, research_expenses, public_service_expenses,
+            academic_support_expenses, student_services_expenses,
+            institutional_support_expenses, auxiliary_expenses,
+            hospital_expenses, other_expenses
+        FROM raw_ipeds_finance
+        WHERE unitid = ?
+        ORDER BY year
+    """, [UD_UNITID]).pl()
+
+
+def query_admin_per_student(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Admin cost per student (nominal and CPI-adjusted)."""
+    return conn.execute(f"""
+        {_CPI_CTE}
+        SELECT
+            f.year,
+            f.institutional_support_expenses,
+            e.total_enrollment,
+            ROUND(f.institutional_support_expenses * 1.0
+                  / NULLIF(e.total_enrollment, 0), 0) AS admin_per_student,
+            ROUND(
+                (f.institutional_support_expenses
+                 * (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi)
+                / NULLIF(e.total_enrollment, 0), 0
+            ) AS admin_per_student_cpi
+        FROM raw_ipeds_finance f
+        JOIN raw_ipeds_enrollment e ON e.unitid = f.unitid AND e.year = f.year
+        LEFT JOIN annual_cpi ac ON ac.year = f.year
+        WHERE f.unitid = ?
+        ORDER BY f.year
+    """, [UD_UNITID]).pl()
+
+
+def query_admin_faculty_ratio(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Admin-to-faculty ratio over time."""
+    return conn.execute("""
+        SELECT year,
+            management_total,
+            faculty_total,
+            ROUND(management_total * 1.0 / NULLIF(faculty_total, 0), 3)
+                AS admin_faculty_ratio
+        FROM raw_ipeds_staff
+        WHERE unitid = ?
+        ORDER BY year
+    """, [UD_UNITID]).pl()
+
+
+def query_top_earners(
+    conn: duckdb.DuckDBPyConnection, year: int | None = None
+) -> pl.DataFrame:
+    """Top earners from Schedule J, optionally filtered by year."""
+    where = "WHERE j.total_compensation > 0"
+    params: list[Any] = []
+    if year is not None:
+        where += " AND j.tax_year = ?"
+        params.append(year)
+
+    df = conn.execute(f"""
+        SELECT
+            j.tax_year,
+            j.person_name,
+            j.title,
+            j.base_compensation,
+            j.bonus_compensation,
+            j.other_compensation,
+            j.deferred_compensation,
+            j.nontaxable_benefits,
+            j.total_compensation,
+            f.organization_name
+        FROM raw_990_schedule_j j
+        JOIN raw_990_filing f ON f.object_id = j.object_id
+        {where}
+        ORDER BY j.tax_year DESC, j.total_compensation DESC
+    """, params).pl()
+
+    if df.height > 0:
+        df = df.with_columns(
+            pl.col("title").map_elements(
+                normalize_title, return_dtype=pl.Utf8
+            ).alias("canonical_role")
+        )
+    return df
+
+
+def query_comp_by_role(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Compensation trends by canonical role across years."""
+    df = conn.execute("""
+        SELECT j.tax_year, j.person_name, j.title, j.total_compensation
+        FROM raw_990_schedule_j j
+        JOIN raw_990_filing f ON f.object_id = j.object_id
+        WHERE j.total_compensation > 0
+        ORDER BY j.tax_year, j.total_compensation DESC
+    """).pl()
+
+    if df.height == 0:
+        return df
+
+    df = df.with_columns(
+        pl.col("title").map_elements(
+            normalize_title, return_dtype=pl.Utf8
+        ).alias("canonical_role")
+    )
+
+    # Keep highest-paid person per role per year
+    return (
+        df.sort("total_compensation", descending=True)
+        .group_by(["tax_year", "canonical_role"])
+        .first()
+        .sort(["tax_year", "canonical_role"])
+    )
+
+
+def query_comp_vs_cpi(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Compensation growth vs CPI growth, indexed to first available year = 100."""
+    return conn.execute("""
+        WITH yearly_max_comp AS (
+            SELECT tax_year, MAX(total_compensation) AS top_comp
+            FROM raw_990_schedule_j
+            GROUP BY tax_year
+        ),
+        annual_cpi AS (
+            SELECT year, AVG(value) AS avg_cpi
+            FROM raw_cpi_u GROUP BY year
+        ),
+        base AS (
+            SELECT c.top_comp AS base_comp, ac.avg_cpi AS base_cpi
+            FROM yearly_max_comp c
+            JOIN annual_cpi ac ON ac.year = c.tax_year
+            ORDER BY c.tax_year LIMIT 1
+        )
+        SELECT
+            c.tax_year AS year,
+            c.top_comp,
+            ac.avg_cpi,
+            ROUND(c.top_comp * 100.0 / NULLIF((SELECT base_comp FROM base), 0), 1)
+                AS comp_index,
+            ROUND(ac.avg_cpi * 100.0 / NULLIF((SELECT base_cpi FROM base), 0), 1)
+                AS cpi_index
+        FROM yearly_max_comp c
+        JOIN annual_cpi ac ON ac.year = c.tax_year
+        ORDER BY year
+    """).pl()
+
+
+def query_staff_composition(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Staff composition over time."""
+    return conn.execute("""
+        SELECT year, total_staff, faculty_total, management_total,
+               total_staff - COALESCE(faculty_total, 0) - COALESCE(management_total, 0)
+                   AS other_staff
+        FROM raw_ipeds_staff
+        WHERE unitid = ?
+        ORDER BY year
+    """, [UD_UNITID]).pl()
+
+
+def query_student_staff_ratios(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Student-to-staff and student-to-faculty ratios."""
+    return conn.execute("""
+        SELECT s.year, e.total_enrollment, s.total_staff, s.faculty_total,
+            ROUND(e.total_enrollment * 1.0 / NULLIF(s.total_staff, 0), 1)
+                AS students_per_staff,
+            ROUND(e.total_enrollment * 1.0 / NULLIF(s.faculty_total, 0), 1)
+                AS students_per_faculty
+        FROM raw_ipeds_staff s
+        JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
+        WHERE s.unitid = ?
+        ORDER BY s.year
+    """, [UD_UNITID]).pl()
+
+
+def query_growth_index(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Management vs enrollment growth, indexed to first year = 100."""
+    return conn.execute("""
+        WITH base AS (
+            SELECT s.management_total AS base_mgmt, e.total_enrollment AS base_enrl
+            FROM raw_ipeds_staff s
+            JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
+            WHERE s.unitid = ?
+            ORDER BY s.year LIMIT 1
+        )
+        SELECT s.year,
+            s.management_total,
+            e.total_enrollment,
+            ROUND(s.management_total * 100.0
+                  / NULLIF((SELECT base_mgmt FROM base), 0), 1) AS mgmt_index,
+            ROUND(e.total_enrollment * 100.0
+                  / NULLIF((SELECT base_enrl FROM base), 0), 1) AS enrollment_index
+        FROM raw_ipeds_staff s
+        JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
+        WHERE s.unitid = ?
+        ORDER BY s.year
+    """, [UD_UNITID, UD_UNITID]).pl()
+
+
+def query_admin_headcount(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """All scraped admin headcount entries."""
+    return conn.execute("""
+        SELECT unit, person_name, title, category, is_overhead, scrape_date
+        FROM raw_admin_headcount
+        ORDER BY unit, category, person_name
+    """).pl()
+
+
+def query_headcount_summary(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
+    """Headcount summary by unit and category."""
+    return conn.execute("""
+        SELECT unit, category, is_overhead, COUNT(*) AS count
+        FROM raw_admin_headcount
+        GROUP BY unit, category, is_overhead
+        ORDER BY unit, count DESC
+    """).pl()