Phase 1 project prototype
This commit is contained in:
parent
29215e2bd2
commit
2c9ae1c312
29 changed files with 2967 additions and 22 deletions
0
src/admin_analytics/dashboard/__init__.py
Normal file
0
src/admin_analytics/dashboard/__init__.py
Normal file
53
src/admin_analytics/dashboard/app.py
Normal file
53
src/admin_analytics/dashboard/app.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
"""Dash application factory."""
|
||||
|
||||
import dash
|
||||
from dash import dcc, html, Input, Output
|
||||
|
||||
from admin_analytics.db.connection import get_connection
|
||||
from admin_analytics.db.schema import ensure_schema
|
||||
from admin_analytics.dashboard.pages import overview, compensation, staffing, headcount
|
||||
|
||||
|
||||
def create_app() -> dash.Dash:
|
||||
"""Create and configure the Dash application."""
|
||||
app = dash.Dash(__name__, suppress_callback_exceptions=True)
|
||||
conn = get_connection()
|
||||
ensure_schema(conn)
|
||||
|
||||
app.layout = html.Div(
|
||||
[
|
||||
html.H1(
|
||||
"University of Delaware — Administrative Analytics",
|
||||
style={"textAlign": "center", "padding": "20px", "color": "#00539F"},
|
||||
),
|
||||
dcc.Tabs(
|
||||
id="tabs",
|
||||
value="overview",
|
||||
children=[
|
||||
dcc.Tab(label="Admin Cost Overview", value="overview"),
|
||||
dcc.Tab(label="Executive Compensation", value="compensation"),
|
||||
dcc.Tab(label="Staffing & Enrollment", value="staffing"),
|
||||
dcc.Tab(label="Current Headcount", value="headcount"),
|
||||
],
|
||||
style={"marginBottom": "20px"},
|
||||
),
|
||||
html.Div(id="tab-content", style={"padding": "0 20px 20px 20px"}),
|
||||
],
|
||||
style={"fontFamily": "system-ui, -apple-system, sans-serif", "maxWidth": "1400px", "margin": "0 auto"},
|
||||
)
|
||||
|
||||
@app.callback(Output("tab-content", "children"), Input("tabs", "value"))
|
||||
def render_tab(tab: str):
|
||||
if tab == "overview":
|
||||
return overview.layout(conn)
|
||||
elif tab == "compensation":
|
||||
return compensation.layout(conn)
|
||||
elif tab == "staffing":
|
||||
return staffing.layout(conn)
|
||||
elif tab == "headcount":
|
||||
return headcount.layout(conn)
|
||||
return html.Div("Unknown tab")
|
||||
|
||||
compensation.register_callbacks(app, conn)
|
||||
|
||||
return app
|
||||
0
src/admin_analytics/dashboard/pages/__init__.py
Normal file
0
src/admin_analytics/dashboard/pages/__init__.py
Normal file
162
src/admin_analytics/dashboard/pages/compensation.py
Normal file
162
src/admin_analytics/dashboard/pages/compensation.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
"""Page 2: Executive Compensation."""
|
||||
|
||||
import dash
|
||||
import duckdb
|
||||
from dash import html, dcc, Input, Output, dash_table
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from admin_analytics.dashboard.queries import (
|
||||
query_top_earners,
|
||||
query_comp_by_role,
|
||||
query_comp_vs_cpi,
|
||||
)
|
||||
|
||||
_NO_DATA = html.Div(
|
||||
"No IRS 990 data loaded. Run: admin-analytics ingest irs990",
|
||||
style={"textAlign": "center", "padding": "40px", "color": "#888"},
|
||||
)
|
||||
|
||||
# Roles to highlight in trend chart
|
||||
_KEY_ROLES = ["PRESIDENT", "PROVOST", "VP_FINANCE", "VP_RESEARCH", "VP_ADVANCEMENT", "CFO"]
|
||||
|
||||
|
||||
def layout(conn: duckdb.DuckDBPyConnection):
|
||||
all_earners = query_top_earners(conn)
|
||||
if all_earners.height == 0:
|
||||
return _NO_DATA
|
||||
|
||||
years = sorted(all_earners["tax_year"].unique().to_list())
|
||||
year_options = [{"label": "All Years", "value": "all"}] + [
|
||||
{"label": str(y), "value": y} for y in years
|
||||
]
|
||||
|
||||
# Compensation by role trend
|
||||
role_df = query_comp_by_role(conn)
|
||||
role_fig = go.Figure()
|
||||
if role_df.height > 0:
|
||||
role_pd = role_df.to_pandas()
|
||||
for role in _KEY_ROLES:
|
||||
subset = role_pd[role_pd["canonical_role"] == role]
|
||||
if len(subset) > 0:
|
||||
role_fig.add_trace(go.Scatter(
|
||||
x=subset["tax_year"],
|
||||
y=subset["total_compensation"],
|
||||
mode="lines+markers",
|
||||
name=role.replace("_", " ").title(),
|
||||
))
|
||||
role_fig.update_layout(
|
||||
title="Compensation Trends by Role",
|
||||
xaxis_title="Tax Year", yaxis_title="Total Compensation ($)",
|
||||
template="plotly_white", height=420,
|
||||
)
|
||||
|
||||
# Comp vs CPI indexed
|
||||
cpi_df = query_comp_vs_cpi(conn)
|
||||
cpi_fig = go.Figure()
|
||||
if cpi_df.height > 0:
|
||||
cpi_pd = cpi_df.to_pandas()
|
||||
cpi_fig.add_trace(go.Scatter(
|
||||
x=cpi_pd["year"], y=cpi_pd["comp_index"],
|
||||
mode="lines+markers", name="Top Compensation",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
cpi_fig.add_trace(go.Scatter(
|
||||
x=cpi_pd["year"], y=cpi_pd["cpi_index"],
|
||||
mode="lines+markers", name="CPI-U",
|
||||
line={"color": "#FFD200", "dash": "dash"},
|
||||
))
|
||||
cpi_fig.update_layout(
|
||||
title="Top Compensation vs CPI-U (Indexed, Base Year = 100)",
|
||||
xaxis_title="Year", yaxis_title="Index",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
return html.Div([
|
||||
html.Div(
|
||||
[
|
||||
html.Label("Filter by Tax Year: ", style={"fontWeight": "bold"}),
|
||||
dcc.Dropdown(
|
||||
id="comp-year-dropdown",
|
||||
options=year_options,
|
||||
value="all",
|
||||
style={"width": "200px", "display": "inline-block"},
|
||||
),
|
||||
],
|
||||
style={"marginBottom": "16px"},
|
||||
),
|
||||
dash_table.DataTable(
|
||||
id="comp-table",
|
||||
columns=[
|
||||
{"name": "Year", "id": "tax_year"},
|
||||
{"name": "Name", "id": "person_name"},
|
||||
{"name": "Title", "id": "title"},
|
||||
{"name": "Role", "id": "canonical_role"},
|
||||
{"name": "Base", "id": "base_compensation", "type": "numeric",
|
||||
"format": dash_table.Format.Format().group(True)},
|
||||
{"name": "Bonus", "id": "bonus_compensation", "type": "numeric",
|
||||
"format": dash_table.Format.Format().group(True)},
|
||||
{"name": "Total", "id": "total_compensation", "type": "numeric",
|
||||
"format": dash_table.Format.Format().group(True)},
|
||||
],
|
||||
data=all_earners.to_pandas().to_dict("records"),
|
||||
page_size=15,
|
||||
sort_action="native",
|
||||
filter_action="native",
|
||||
style_table={"overflowX": "auto"},
|
||||
style_cell={"textAlign": "left", "padding": "8px", "fontSize": "13px"},
|
||||
style_header={"fontWeight": "bold", "backgroundColor": "#f0f0f0"},
|
||||
),
|
||||
html.Div(
|
||||
[
|
||||
html.Div(dcc.Graph(id="comp-breakdown-chart"), style={"flex": "1"}),
|
||||
html.Div(dcc.Graph(figure=cpi_fig), style={"flex": "1"}),
|
||||
],
|
||||
style={"display": "flex", "gap": "16px", "marginTop": "16px"},
|
||||
),
|
||||
dcc.Graph(figure=role_fig),
|
||||
])
|
||||
|
||||
|
||||
def register_callbacks(app: dash.Dash, conn: duckdb.DuckDBPyConnection) -> None:
|
||||
"""Register interactive callbacks for the compensation page."""
|
||||
|
||||
@app.callback(
|
||||
[Output("comp-table", "data"), Output("comp-breakdown-chart", "figure")],
|
||||
Input("comp-year-dropdown", "value"),
|
||||
)
|
||||
def update_compensation(year_value):
|
||||
year = None if year_value == "all" else int(year_value)
|
||||
earners = query_top_earners(conn, year=year)
|
||||
|
||||
# Table data
|
||||
table_data = earners.to_pandas().to_dict("records") if earners.height > 0 else []
|
||||
|
||||
# Breakdown chart — stacked bar of comp components
|
||||
breakdown_fig = go.Figure()
|
||||
if earners.height > 0:
|
||||
ep = earners.to_pandas().head(10) # top 10 by total comp
|
||||
short_names = [n.split(",")[0][:20] if "," in n else n.split()[-1][:20]
|
||||
for n in ep["person_name"]]
|
||||
for comp_type, label, color in [
|
||||
("base_compensation", "Base", "#00539F"),
|
||||
("bonus_compensation", "Bonus", "#FFD200"),
|
||||
("deferred_compensation", "Deferred", "#7FB069"),
|
||||
("nontaxable_benefits", "Benefits", "#E07A5F"),
|
||||
("other_compensation", "Other", "#999"),
|
||||
]:
|
||||
if comp_type in ep.columns:
|
||||
breakdown_fig.add_trace(go.Bar(
|
||||
x=short_names, y=ep[comp_type],
|
||||
name=label, marker_color=color,
|
||||
))
|
||||
breakdown_fig.update_layout(barmode="stack")
|
||||
|
||||
title_suffix = f" ({year})" if year else " (All Years)"
|
||||
breakdown_fig.update_layout(
|
||||
title=f"Compensation Breakdown — Top 10{title_suffix}",
|
||||
xaxis_title="", yaxis_title="$",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
return table_data, breakdown_fig
|
||||
118
src/admin_analytics/dashboard/pages/headcount.py
Normal file
118
src/admin_analytics/dashboard/pages/headcount.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
"""Page 4: Current Admin Headcount (from scraper)."""
|
||||
|
||||
import duckdb
|
||||
from dash import html, dcc, dash_table
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from admin_analytics.dashboard.queries import (
|
||||
query_admin_headcount,
|
||||
query_headcount_summary,
|
||||
)
|
||||
|
||||
_NO_DATA = html.Div(
|
||||
"No headcount data loaded. Run: admin-analytics ingest scrape",
|
||||
style={"textAlign": "center", "padding": "40px", "color": "#888"},
|
||||
)
|
||||
|
||||
|
||||
def _kpi_card(title: str, value: str) -> html.Div:
|
||||
return html.Div(
|
||||
[
|
||||
html.H4(title, style={"margin": "0", "color": "#666", "fontSize": "14px"}),
|
||||
html.H2(value, style={"margin": "5px 0", "color": "#00539F"}),
|
||||
],
|
||||
style={
|
||||
"flex": "1",
|
||||
"padding": "20px",
|
||||
"backgroundColor": "#f8f9fa",
|
||||
"borderRadius": "8px",
|
||||
"textAlign": "center",
|
||||
"margin": "0 8px",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def layout(conn: duckdb.DuckDBPyConnection):
|
||||
detail_df = query_admin_headcount(conn)
|
||||
if detail_df.height == 0:
|
||||
return _NO_DATA
|
||||
|
||||
summary_df = query_headcount_summary(conn)
|
||||
detail_pd = detail_df.to_pandas()
|
||||
summary_pd = summary_df.to_pandas()
|
||||
|
||||
total = len(detail_pd)
|
||||
overhead_count = int(detail_pd["is_overhead"].sum()) if "is_overhead" in detail_pd.columns else 0
|
||||
overhead_pct = round(overhead_count * 100 / total, 1) if total > 0 else 0
|
||||
|
||||
# KPI cards
|
||||
kpi_row = html.Div(
|
||||
[
|
||||
_kpi_card("Total Staff Scraped", str(total)),
|
||||
_kpi_card("Overhead Staff", str(overhead_count)),
|
||||
_kpi_card("Overhead %", f"{overhead_pct}%"),
|
||||
],
|
||||
style={"display": "flex", "marginBottom": "24px"},
|
||||
)
|
||||
|
||||
# Staff by unit bar chart
|
||||
unit_counts = summary_pd.groupby("unit")["count"].sum().reset_index().sort_values("count")
|
||||
unit_fig = px.bar(
|
||||
unit_counts, x="count", y="unit", orientation="h",
|
||||
title="Staff Count by Unit",
|
||||
labels={"count": "Staff", "unit": ""},
|
||||
color_discrete_sequence=["#00539F"],
|
||||
)
|
||||
unit_fig.update_layout(template="plotly_white", height=max(300, len(unit_counts) * 30 + 100))
|
||||
|
||||
# Overhead pie
|
||||
oh_data = detail_pd["is_overhead"].value_counts()
|
||||
oh_labels = {True: "Overhead", False: "Non-Overhead"}
|
||||
pie_fig = px.pie(
|
||||
names=[oh_labels.get(k, "Debatable") for k in oh_data.index],
|
||||
values=oh_data.values,
|
||||
title="Overhead vs Non-Overhead",
|
||||
color_discrete_sequence=["#E07A5F", "#7FB069", "#999"],
|
||||
)
|
||||
pie_fig.update_layout(template="plotly_white", height=350)
|
||||
|
||||
# Category distribution per unit
|
||||
cat_fig = px.bar(
|
||||
summary_pd, x="count", y="unit", color="category", orientation="h",
|
||||
title="Category Distribution by Unit",
|
||||
labels={"count": "Staff", "unit": "", "category": "Category"},
|
||||
)
|
||||
cat_fig.update_layout(template="plotly_white", height=max(300, len(unit_counts) * 30 + 100))
|
||||
|
||||
# Detail table
|
||||
table = dash_table.DataTable(
|
||||
columns=[
|
||||
{"name": "Unit", "id": "unit"},
|
||||
{"name": "Name", "id": "person_name"},
|
||||
{"name": "Title", "id": "title"},
|
||||
{"name": "Category", "id": "category"},
|
||||
{"name": "Overhead", "id": "is_overhead"},
|
||||
],
|
||||
data=detail_pd.to_dict("records"),
|
||||
page_size=20,
|
||||
sort_action="native",
|
||||
filter_action="native",
|
||||
style_table={"overflowX": "auto"},
|
||||
style_cell={"textAlign": "left", "padding": "8px", "fontSize": "13px"},
|
||||
style_header={"fontWeight": "bold", "backgroundColor": "#f0f0f0"},
|
||||
)
|
||||
|
||||
return html.Div([
|
||||
kpi_row,
|
||||
html.Div(
|
||||
[
|
||||
html.Div(dcc.Graph(figure=unit_fig), style={"flex": "1"}),
|
||||
html.Div(dcc.Graph(figure=pie_fig), style={"flex": "1"}),
|
||||
],
|
||||
style={"display": "flex", "gap": "16px"},
|
||||
),
|
||||
dcc.Graph(figure=cat_fig),
|
||||
html.H3("Staff Directory Detail", style={"marginTop": "24px"}),
|
||||
table,
|
||||
])
|
||||
168
src/admin_analytics/dashboard/pages/overview.py
Normal file
168
src/admin_analytics/dashboard/pages/overview.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
"""Page 1: Administrative Cost Overview."""
|
||||
|
||||
import duckdb
|
||||
from dash import html, dcc
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from admin_analytics.dashboard.queries import (
|
||||
query_admin_cost_ratio,
|
||||
query_expense_breakdown,
|
||||
query_admin_per_student,
|
||||
query_admin_faculty_ratio,
|
||||
)
|
||||
|
||||
_NO_DATA = html.Div(
|
||||
"No data loaded. Run: admin-analytics ingest all",
|
||||
style={"textAlign": "center", "padding": "40px", "color": "#888"},
|
||||
)
|
||||
|
||||
|
||||
def _kpi_card(title: str, value: str, subtitle: str = "") -> html.Div:
|
||||
return html.Div(
|
||||
[
|
||||
html.H4(title, style={"margin": "0", "color": "#666", "fontSize": "14px"}),
|
||||
html.H2(value, style={"margin": "5px 0", "color": "#00539F"}),
|
||||
html.P(subtitle, style={"margin": "0", "color": "#999", "fontSize": "12px"}),
|
||||
],
|
||||
style={
|
||||
"flex": "1",
|
||||
"padding": "20px",
|
||||
"backgroundColor": "#f8f9fa",
|
||||
"borderRadius": "8px",
|
||||
"textAlign": "center",
|
||||
"margin": "0 8px",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def layout(conn: duckdb.DuckDBPyConnection):
|
||||
# Admin cost ratio
|
||||
ratio_df = query_admin_cost_ratio(conn)
|
||||
if ratio_df.height == 0:
|
||||
return _NO_DATA
|
||||
|
||||
ratio_pd = ratio_df.to_pandas()
|
||||
latest = ratio_pd.iloc[-1]
|
||||
|
||||
# Admin per student
|
||||
aps_df = query_admin_per_student(conn)
|
||||
aps_pd = aps_df.to_pandas() if aps_df.height > 0 else None
|
||||
latest_aps = aps_pd.iloc[-1] if aps_pd is not None else None
|
||||
|
||||
# Admin-to-faculty ratio
|
||||
afr_df = query_admin_faculty_ratio(conn)
|
||||
afr_pd = afr_df.to_pandas() if afr_df.height > 0 else None
|
||||
latest_afr = afr_pd.iloc[-1] if afr_pd is not None else None
|
||||
|
||||
# KPI cards
|
||||
kpi_row = html.Div(
|
||||
[
|
||||
_kpi_card(
|
||||
"Admin Cost Ratio",
|
||||
f"{latest['admin_cost_pct']:.1f}%",
|
||||
f"Institutional Support / Total Expenses ({int(latest['year'])})",
|
||||
),
|
||||
_kpi_card(
|
||||
"Admin Cost per Student",
|
||||
f"${int(latest_aps['admin_per_student']):,}" if latest_aps is not None else "N/A",
|
||||
f"CPI-adjusted: ${int(latest_aps['admin_per_student_cpi']):,}" if latest_aps is not None else "",
|
||||
),
|
||||
_kpi_card(
|
||||
"Admin-to-Faculty Ratio",
|
||||
f"{latest_afr['admin_faculty_ratio']:.2f}" if latest_afr is not None else "N/A",
|
||||
f"Management / Faculty ({int(latest_afr['year'])})" if latest_afr is not None else "",
|
||||
),
|
||||
],
|
||||
style={"display": "flex", "marginBottom": "24px"},
|
||||
)
|
||||
|
||||
# Admin cost ratio trend
|
||||
ratio_fig = go.Figure()
|
||||
ratio_fig.add_trace(go.Scatter(
|
||||
x=ratio_pd["year"], y=ratio_pd["admin_cost_pct"],
|
||||
mode="lines+markers", name="Admin Cost %",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
ratio_fig.update_layout(
|
||||
title="Administrative Cost Ratio Over Time",
|
||||
xaxis_title="Year", yaxis_title="Institutional Support / Total Expenses (%)",
|
||||
template="plotly_white", height=400,
|
||||
)
|
||||
|
||||
# Expense breakdown stacked area
|
||||
breakdown_df = query_expense_breakdown(conn)
|
||||
if breakdown_df.height > 0:
|
||||
bk_pd = breakdown_df.to_pandas()
|
||||
expense_cols = [c for c in bk_pd.columns if c != "year"]
|
||||
labels = {
|
||||
"instruction_expenses": "Instruction",
|
||||
"research_expenses": "Research",
|
||||
"public_service_expenses": "Public Service",
|
||||
"academic_support_expenses": "Academic Support",
|
||||
"student_services_expenses": "Student Services",
|
||||
"institutional_support_expenses": "Institutional Support",
|
||||
"auxiliary_expenses": "Auxiliary",
|
||||
"hospital_expenses": "Hospital",
|
||||
"other_expenses": "Other",
|
||||
}
|
||||
breakdown_fig = go.Figure()
|
||||
for col in expense_cols:
|
||||
breakdown_fig.add_trace(go.Scatter(
|
||||
x=bk_pd["year"], y=bk_pd[col] / 1e6,
|
||||
mode="lines", name=labels.get(col, col),
|
||||
stackgroup="one",
|
||||
))
|
||||
breakdown_fig.update_layout(
|
||||
title="Expenses by Function (Millions $)",
|
||||
xaxis_title="Year", yaxis_title="Millions $",
|
||||
template="plotly_white", height=450,
|
||||
)
|
||||
else:
|
||||
breakdown_fig = go.Figure()
|
||||
|
||||
# Admin per student trend
|
||||
aps_fig = go.Figure()
|
||||
if aps_pd is not None:
|
||||
aps_fig.add_trace(go.Scatter(
|
||||
x=aps_pd["year"], y=aps_pd["admin_per_student"],
|
||||
mode="lines+markers", name="Nominal",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
aps_fig.add_trace(go.Scatter(
|
||||
x=aps_pd["year"], y=aps_pd["admin_per_student_cpi"],
|
||||
mode="lines+markers", name="CPI-Adjusted",
|
||||
line={"color": "#FFD200", "dash": "dash"},
|
||||
))
|
||||
aps_fig.update_layout(
|
||||
title="Admin Cost per Student",
|
||||
xaxis_title="Year", yaxis_title="$ per Student",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
# Admin-to-faculty ratio trend
|
||||
afr_fig = go.Figure()
|
||||
if afr_pd is not None:
|
||||
afr_fig.add_trace(go.Scatter(
|
||||
x=afr_pd["year"], y=afr_pd["admin_faculty_ratio"],
|
||||
mode="lines+markers", name="Ratio",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
afr_fig.update_layout(
|
||||
title="Admin-to-Faculty Ratio",
|
||||
xaxis_title="Year", yaxis_title="Management / Faculty",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
return html.Div([
|
||||
kpi_row,
|
||||
dcc.Graph(figure=ratio_fig),
|
||||
dcc.Graph(figure=breakdown_fig),
|
||||
html.Div(
|
||||
[
|
||||
html.Div(dcc.Graph(figure=aps_fig), style={"flex": "1"}),
|
||||
html.Div(dcc.Graph(figure=afr_fig), style={"flex": "1"}),
|
||||
],
|
||||
style={"display": "flex", "gap": "16px"},
|
||||
),
|
||||
])
|
||||
97
src/admin_analytics/dashboard/pages/staffing.py
Normal file
97
src/admin_analytics/dashboard/pages/staffing.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""Page 3: Staffing & Enrollment."""
|
||||
|
||||
import duckdb
|
||||
from dash import html, dcc
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from admin_analytics.dashboard.queries import (
|
||||
query_staff_composition,
|
||||
query_student_staff_ratios,
|
||||
query_growth_index,
|
||||
)
|
||||
|
||||
_NO_DATA = html.Div(
|
||||
"No IPEDS staff data loaded. Run: admin-analytics ingest ipeds",
|
||||
style={"textAlign": "center", "padding": "40px", "color": "#888"},
|
||||
)
|
||||
|
||||
|
||||
def layout(conn: duckdb.DuckDBPyConnection):
|
||||
staff_df = query_staff_composition(conn)
|
||||
if staff_df.height == 0:
|
||||
return _NO_DATA
|
||||
|
||||
staff_pd = staff_df.to_pandas()
|
||||
|
||||
# Staff composition stacked area
|
||||
comp_fig = go.Figure()
|
||||
for col, label, color in [
|
||||
("faculty_total", "Faculty", "#00539F"),
|
||||
("management_total", "Management", "#E07A5F"),
|
||||
("other_staff", "Other Staff", "#7FB069"),
|
||||
]:
|
||||
comp_fig.add_trace(go.Scatter(
|
||||
x=staff_pd["year"], y=staff_pd[col],
|
||||
mode="lines", name=label,
|
||||
stackgroup="one",
|
||||
line={"color": color},
|
||||
))
|
||||
comp_fig.update_layout(
|
||||
title="Staff Composition Over Time",
|
||||
xaxis_title="Year", yaxis_title="Headcount",
|
||||
template="plotly_white", height=420,
|
||||
)
|
||||
|
||||
# Student-to-staff ratios
|
||||
ratio_df = query_student_staff_ratios(conn)
|
||||
ratio_fig = go.Figure()
|
||||
if ratio_df.height > 0:
|
||||
ratio_pd = ratio_df.to_pandas()
|
||||
ratio_fig.add_trace(go.Scatter(
|
||||
x=ratio_pd["year"], y=ratio_pd["students_per_staff"],
|
||||
mode="lines+markers", name="Students per Staff",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
ratio_fig.add_trace(go.Scatter(
|
||||
x=ratio_pd["year"], y=ratio_pd["students_per_faculty"],
|
||||
mode="lines+markers", name="Students per Faculty",
|
||||
line={"color": "#FFD200"},
|
||||
))
|
||||
ratio_fig.update_layout(
|
||||
title="Student-to-Staff Ratios",
|
||||
xaxis_title="Year", yaxis_title="Ratio",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
# Growth index
|
||||
growth_df = query_growth_index(conn)
|
||||
growth_fig = go.Figure()
|
||||
if growth_df.height > 0:
|
||||
growth_pd = growth_df.to_pandas()
|
||||
growth_fig.add_trace(go.Scatter(
|
||||
x=growth_pd["year"], y=growth_pd["mgmt_index"],
|
||||
mode="lines+markers", name="Management Growth",
|
||||
line={"color": "#E07A5F"},
|
||||
))
|
||||
growth_fig.add_trace(go.Scatter(
|
||||
x=growth_pd["year"], y=growth_pd["enrollment_index"],
|
||||
mode="lines+markers", name="Enrollment Growth",
|
||||
line={"color": "#00539F"},
|
||||
))
|
||||
growth_fig.add_hline(y=100, line_dash="dot", line_color="#ccc")
|
||||
growth_fig.update_layout(
|
||||
title="Management vs Enrollment Growth (Indexed, Base Year = 100)",
|
||||
xaxis_title="Year", yaxis_title="Index",
|
||||
template="plotly_white", height=380,
|
||||
)
|
||||
|
||||
return html.Div([
|
||||
dcc.Graph(figure=comp_fig),
|
||||
html.Div(
|
||||
[
|
||||
html.Div(dcc.Graph(figure=ratio_fig), style={"flex": "1"}),
|
||||
html.Div(dcc.Graph(figure=growth_fig), style={"flex": "1"}),
|
||||
],
|
||||
style={"display": "flex", "gap": "16px"},
|
||||
),
|
||||
])
|
||||
263
src/admin_analytics/dashboard/queries.py
Normal file
263
src/admin_analytics/dashboard/queries.py
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
"""Dashboard query layer — all DuckDB queries returning polars DataFrames."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import duckdb
|
||||
import polars as pl
|
||||
|
||||
from admin_analytics.config import UD_UNITID
|
||||
from admin_analytics.irs990.titles import normalize_title
|
||||
|
||||
# Shared CTE for CPI adjustment
|
||||
_CPI_CTE = """
|
||||
WITH annual_cpi AS (
|
||||
SELECT year, AVG(value) AS avg_cpi
|
||||
FROM raw_cpi_u
|
||||
GROUP BY year
|
||||
),
|
||||
latest_cpi AS (
|
||||
SELECT avg_cpi FROM annual_cpi
|
||||
WHERE year = (SELECT MAX(year) FROM annual_cpi)
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def query_admin_cost_ratio(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Admin cost ratio trend with CPI-adjusted values."""
|
||||
return conn.execute(f"""
|
||||
{_CPI_CTE}
|
||||
SELECT
|
||||
f.year,
|
||||
f.institutional_support_expenses,
|
||||
f.total_expenses,
|
||||
ROUND(f.institutional_support_expenses * 100.0
|
||||
/ NULLIF(f.total_expenses, 0), 2) AS admin_cost_pct,
|
||||
ROUND(f.institutional_support_expenses
|
||||
* (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi, 0)
|
||||
AS inst_support_cpi_adjusted,
|
||||
ROUND(f.total_expenses
|
||||
* (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi, 0)
|
||||
AS total_expenses_cpi_adjusted
|
||||
FROM raw_ipeds_finance f
|
||||
LEFT JOIN annual_cpi ac ON ac.year = f.year
|
||||
WHERE f.unitid = ?
|
||||
ORDER BY f.year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_expense_breakdown(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Expense breakdown by function over time."""
|
||||
return conn.execute("""
|
||||
SELECT year,
|
||||
instruction_expenses, research_expenses, public_service_expenses,
|
||||
academic_support_expenses, student_services_expenses,
|
||||
institutional_support_expenses, auxiliary_expenses,
|
||||
hospital_expenses, other_expenses
|
||||
FROM raw_ipeds_finance
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_admin_per_student(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Admin cost per student (nominal and CPI-adjusted)."""
|
||||
return conn.execute(f"""
|
||||
{_CPI_CTE}
|
||||
SELECT
|
||||
f.year,
|
||||
f.institutional_support_expenses,
|
||||
e.total_enrollment,
|
||||
ROUND(f.institutional_support_expenses * 1.0
|
||||
/ NULLIF(e.total_enrollment, 0), 0) AS admin_per_student,
|
||||
ROUND(
|
||||
(f.institutional_support_expenses
|
||||
* (SELECT avg_cpi FROM latest_cpi) / ac.avg_cpi)
|
||||
/ NULLIF(e.total_enrollment, 0), 0
|
||||
) AS admin_per_student_cpi
|
||||
FROM raw_ipeds_finance f
|
||||
JOIN raw_ipeds_enrollment e ON e.unitid = f.unitid AND e.year = f.year
|
||||
LEFT JOIN annual_cpi ac ON ac.year = f.year
|
||||
WHERE f.unitid = ?
|
||||
ORDER BY f.year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_admin_faculty_ratio(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Admin-to-faculty ratio over time."""
|
||||
return conn.execute("""
|
||||
SELECT year,
|
||||
management_total,
|
||||
faculty_total,
|
||||
ROUND(management_total * 1.0 / NULLIF(faculty_total, 0), 3)
|
||||
AS admin_faculty_ratio
|
||||
FROM raw_ipeds_staff
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_top_earners(
|
||||
conn: duckdb.DuckDBPyConnection, year: int | None = None
|
||||
) -> pl.DataFrame:
|
||||
"""Top earners from Schedule J, optionally filtered by year."""
|
||||
where = "WHERE j.total_compensation > 0"
|
||||
params: list[Any] = []
|
||||
if year is not None:
|
||||
where += " AND j.tax_year = ?"
|
||||
params.append(year)
|
||||
|
||||
df = conn.execute(f"""
|
||||
SELECT
|
||||
j.tax_year,
|
||||
j.person_name,
|
||||
j.title,
|
||||
j.base_compensation,
|
||||
j.bonus_compensation,
|
||||
j.other_compensation,
|
||||
j.deferred_compensation,
|
||||
j.nontaxable_benefits,
|
||||
j.total_compensation,
|
||||
f.organization_name
|
||||
FROM raw_990_schedule_j j
|
||||
JOIN raw_990_filing f ON f.object_id = j.object_id
|
||||
{where}
|
||||
ORDER BY j.tax_year DESC, j.total_compensation DESC
|
||||
""", params).pl()
|
||||
|
||||
if df.height > 0:
|
||||
df = df.with_columns(
|
||||
pl.col("title").map_elements(
|
||||
normalize_title, return_dtype=pl.Utf8
|
||||
).alias("canonical_role")
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def query_comp_by_role(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Compensation trends by canonical role across years."""
|
||||
df = conn.execute("""
|
||||
SELECT j.tax_year, j.person_name, j.title, j.total_compensation
|
||||
FROM raw_990_schedule_j j
|
||||
JOIN raw_990_filing f ON f.object_id = j.object_id
|
||||
WHERE j.total_compensation > 0
|
||||
ORDER BY j.tax_year, j.total_compensation DESC
|
||||
""").pl()
|
||||
|
||||
if df.height == 0:
|
||||
return df
|
||||
|
||||
df = df.with_columns(
|
||||
pl.col("title").map_elements(
|
||||
normalize_title, return_dtype=pl.Utf8
|
||||
).alias("canonical_role")
|
||||
)
|
||||
|
||||
# Keep highest-paid person per role per year
|
||||
return (
|
||||
df.sort("total_compensation", descending=True)
|
||||
.group_by(["tax_year", "canonical_role"])
|
||||
.first()
|
||||
.sort(["tax_year", "canonical_role"])
|
||||
)
|
||||
|
||||
|
||||
def query_comp_vs_cpi(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Compensation growth vs CPI growth, indexed to first available year = 100."""
|
||||
return conn.execute("""
|
||||
WITH yearly_max_comp AS (
|
||||
SELECT tax_year, MAX(total_compensation) AS top_comp
|
||||
FROM raw_990_schedule_j
|
||||
GROUP BY tax_year
|
||||
),
|
||||
annual_cpi AS (
|
||||
SELECT year, AVG(value) AS avg_cpi
|
||||
FROM raw_cpi_u GROUP BY year
|
||||
),
|
||||
base AS (
|
||||
SELECT c.top_comp AS base_comp, ac.avg_cpi AS base_cpi
|
||||
FROM yearly_max_comp c
|
||||
JOIN annual_cpi ac ON ac.year = c.tax_year
|
||||
ORDER BY c.tax_year LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
c.tax_year AS year,
|
||||
c.top_comp,
|
||||
ac.avg_cpi,
|
||||
ROUND(c.top_comp * 100.0 / NULLIF((SELECT base_comp FROM base), 0), 1)
|
||||
AS comp_index,
|
||||
ROUND(ac.avg_cpi * 100.0 / NULLIF((SELECT base_cpi FROM base), 0), 1)
|
||||
AS cpi_index
|
||||
FROM yearly_max_comp c
|
||||
JOIN annual_cpi ac ON ac.year = c.tax_year
|
||||
ORDER BY year
|
||||
""").pl()
|
||||
|
||||
|
||||
def query_staff_composition(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Staff composition over time."""
|
||||
return conn.execute("""
|
||||
SELECT year, total_staff, faculty_total, management_total,
|
||||
total_staff - COALESCE(faculty_total, 0) - COALESCE(management_total, 0)
|
||||
AS other_staff
|
||||
FROM raw_ipeds_staff
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_student_staff_ratios(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Student-to-staff and student-to-faculty ratios."""
|
||||
return conn.execute("""
|
||||
SELECT s.year, e.total_enrollment, s.total_staff, s.faculty_total,
|
||||
ROUND(e.total_enrollment * 1.0 / NULLIF(s.total_staff, 0), 1)
|
||||
AS students_per_staff,
|
||||
ROUND(e.total_enrollment * 1.0 / NULLIF(s.faculty_total, 0), 1)
|
||||
AS students_per_faculty
|
||||
FROM raw_ipeds_staff s
|
||||
JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
|
||||
WHERE s.unitid = ?
|
||||
ORDER BY s.year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_growth_index(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Management vs enrollment growth, indexed to first year = 100."""
|
||||
return conn.execute("""
|
||||
WITH base AS (
|
||||
SELECT s.management_total AS base_mgmt, e.total_enrollment AS base_enrl
|
||||
FROM raw_ipeds_staff s
|
||||
JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
|
||||
WHERE s.unitid = ?
|
||||
ORDER BY s.year LIMIT 1
|
||||
)
|
||||
SELECT s.year,
|
||||
s.management_total,
|
||||
e.total_enrollment,
|
||||
ROUND(s.management_total * 100.0
|
||||
/ NULLIF((SELECT base_mgmt FROM base), 0), 1) AS mgmt_index,
|
||||
ROUND(e.total_enrollment * 100.0
|
||||
/ NULLIF((SELECT base_enrl FROM base), 0), 1) AS enrollment_index
|
||||
FROM raw_ipeds_staff s
|
||||
JOIN raw_ipeds_enrollment e ON e.unitid = s.unitid AND e.year = s.year
|
||||
WHERE s.unitid = ?
|
||||
ORDER BY s.year
|
||||
""", [UD_UNITID, UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_admin_headcount(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""All scraped admin headcount entries."""
|
||||
return conn.execute("""
|
||||
SELECT unit, person_name, title, category, is_overhead, scrape_date
|
||||
FROM raw_admin_headcount
|
||||
ORDER BY unit, category, person_name
|
||||
""").pl()
|
||||
|
||||
|
||||
def query_headcount_summary(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Headcount summary by unit and category."""
|
||||
return conn.execute("""
|
||||
SELECT unit, category, is_overhead, COUNT(*) AS count
|
||||
FROM raw_admin_headcount
|
||||
GROUP BY unit, category, is_overhead
|
||||
ORDER BY unit, count DESC
|
||||
""").pl()
|
||||
Loading…
Add table
Add a link
Reference in a new issue