Compensation, endowmnet tweaks. Added About.
This commit is contained in:
parent
a41f78545b
commit
13fb4b8418
13 changed files with 914 additions and 17 deletions
|
|
@ -96,6 +96,116 @@ def query_admin_faculty_ratio(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
|||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_aggregate_comp(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Top-10 Schedule J compensation per year — total, count, and average."""
|
||||
return conn.execute("""
|
||||
WITH ranked AS (
|
||||
SELECT j.tax_year, j.total_compensation,
|
||||
j.base_compensation, j.bonus_compensation,
|
||||
j.deferred_compensation, j.nontaxable_benefits,
|
||||
j.other_compensation,
|
||||
ROW_NUMBER() OVER (PARTITION BY j.tax_year
|
||||
ORDER BY j.total_compensation DESC) AS rn
|
||||
FROM raw_990_schedule_j j
|
||||
WHERE j.total_compensation > 0
|
||||
)
|
||||
SELECT tax_year,
|
||||
COUNT(*) AS headcount,
|
||||
SUM(total_compensation) AS total_comp,
|
||||
ROUND(AVG(total_compensation), 0) AS avg_comp,
|
||||
SUM(base_compensation) AS total_base,
|
||||
SUM(bonus_compensation) AS total_bonus,
|
||||
SUM(deferred_compensation) AS total_deferred,
|
||||
SUM(nontaxable_benefits) AS total_benefits,
|
||||
SUM(other_compensation) AS total_other
|
||||
FROM ranked
|
||||
WHERE rn <= 10
|
||||
GROUP BY tax_year
|
||||
ORDER BY tax_year
|
||||
""").pl()
|
||||
|
||||
|
||||
def query_aggregate_comp_cagr(conn: duckdb.DuckDBPyConnection) -> dict | None:
|
||||
"""CAGR of aggregate Schedule J compensation over the last 5 years of data."""
|
||||
df = query_aggregate_comp(conn)
|
||||
if df.height < 2:
|
||||
return None
|
||||
|
||||
# Use last 5 years of available data
|
||||
df = df.tail(min(5, df.height))
|
||||
|
||||
start_year = df["tax_year"][0]
|
||||
end_year = df["tax_year"][-1]
|
||||
start_comp = float(df["total_comp"][0])
|
||||
end_comp = float(df["total_comp"][-1])
|
||||
n_years = end_year - start_year
|
||||
|
||||
if n_years <= 0 or start_comp <= 0:
|
||||
return None
|
||||
|
||||
cagr = ((end_comp / start_comp) ** (1.0 / n_years) - 1) * 100
|
||||
return {
|
||||
"cagr_pct": round(cagr, 1),
|
||||
"start_year": start_year,
|
||||
"end_year": end_year,
|
||||
"start_comp": int(end_comp),
|
||||
"end_comp": int(end_comp),
|
||||
}
|
||||
|
||||
|
||||
def query_comp_cagr(conn: duckdb.DuckDBPyConnection) -> dict | None:
|
||||
"""Annualized growth rate (CAGR) of President compensation.
|
||||
|
||||
Tracks the President role specifically using title normalization.
|
||||
Returns dict with cagr_pct, start_year, end_year, start_comp, end_comp,
|
||||
or None if insufficient data.
|
||||
"""
|
||||
raw = conn.execute("""
|
||||
SELECT j.tax_year, j.title, j.total_compensation
|
||||
FROM raw_990_schedule_j j
|
||||
WHERE j.total_compensation > 0
|
||||
ORDER BY j.tax_year
|
||||
""").pl()
|
||||
|
||||
if raw.height == 0:
|
||||
return None
|
||||
|
||||
raw = raw.with_columns(
|
||||
pl.col("title").map_elements(
|
||||
normalize_title, return_dtype=pl.Utf8
|
||||
).alias("role")
|
||||
)
|
||||
|
||||
df = (
|
||||
raw.filter(pl.col("role") == "PRESIDENT")
|
||||
.group_by("tax_year")
|
||||
.agg(pl.col("total_compensation").max().alias("top_comp"))
|
||||
.sort("tax_year")
|
||||
)
|
||||
|
||||
if df.height < 2:
|
||||
return None
|
||||
|
||||
start_year = df["tax_year"][0]
|
||||
end_year = df["tax_year"][-1]
|
||||
start_comp = df["top_comp"][0]
|
||||
end_comp = df["top_comp"][-1]
|
||||
n_years = end_year - start_year
|
||||
|
||||
if n_years <= 0 or start_comp <= 0:
|
||||
return None
|
||||
|
||||
cagr = ((end_comp / start_comp) ** (1.0 / n_years) - 1) * 100
|
||||
|
||||
return {
|
||||
"cagr_pct": round(cagr, 1),
|
||||
"start_year": start_year,
|
||||
"end_year": end_year,
|
||||
"start_comp": start_comp,
|
||||
"end_comp": end_comp,
|
||||
}
|
||||
|
||||
|
||||
def query_top_earners(
|
||||
conn: duckdb.DuckDBPyConnection, year: int | None = None
|
||||
) -> pl.DataFrame:
|
||||
|
|
@ -162,11 +272,23 @@ def query_comp_by_role(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
|||
|
||||
|
||||
def query_comp_vs_cpi(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Compensation growth vs CPI growth, indexed to first available year = 100."""
|
||||
"""Compensation growth vs CPI growth, indexed to first available year = 100.
|
||||
|
||||
Includes top earner, top-10 aggregate, and CPI-U.
|
||||
"""
|
||||
return conn.execute("""
|
||||
WITH yearly_max_comp AS (
|
||||
SELECT tax_year, MAX(total_compensation) AS top_comp
|
||||
WITH ranked AS (
|
||||
SELECT tax_year, total_compensation,
|
||||
ROW_NUMBER() OVER (PARTITION BY tax_year
|
||||
ORDER BY total_compensation DESC) AS rn
|
||||
FROM raw_990_schedule_j
|
||||
WHERE total_compensation > 0
|
||||
),
|
||||
yearly_comp AS (
|
||||
SELECT tax_year,
|
||||
MAX(total_compensation) AS top_comp,
|
||||
SUM(CASE WHEN rn <= 10 THEN total_compensation END) AS agg_comp
|
||||
FROM ranked
|
||||
GROUP BY tax_year
|
||||
),
|
||||
annual_cpi AS (
|
||||
|
|
@ -174,20 +296,24 @@ def query_comp_vs_cpi(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
|||
FROM raw_cpi_u GROUP BY year
|
||||
),
|
||||
base AS (
|
||||
SELECT c.top_comp AS base_comp, ac.avg_cpi AS base_cpi
|
||||
FROM yearly_max_comp c
|
||||
SELECT c.top_comp AS base_top, c.agg_comp AS base_agg,
|
||||
ac.avg_cpi AS base_cpi
|
||||
FROM yearly_comp c
|
||||
JOIN annual_cpi ac ON ac.year = c.tax_year
|
||||
ORDER BY c.tax_year LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
c.tax_year AS year,
|
||||
c.top_comp,
|
||||
c.agg_comp,
|
||||
ac.avg_cpi,
|
||||
ROUND(c.top_comp * 100.0 / NULLIF((SELECT base_comp FROM base), 0), 1)
|
||||
ROUND(c.top_comp * 100.0 / NULLIF((SELECT base_top FROM base), 0), 1)
|
||||
AS comp_index,
|
||||
ROUND(c.agg_comp * 100.0 / NULLIF((SELECT base_agg FROM base), 0), 1)
|
||||
AS agg_index,
|
||||
ROUND(ac.avg_cpi * 100.0 / NULLIF((SELECT base_cpi FROM base), 0), 1)
|
||||
AS cpi_index
|
||||
FROM yearly_max_comp c
|
||||
FROM yearly_comp c
|
||||
JOIN annual_cpi ac ON ac.year = c.tax_year
|
||||
ORDER BY year
|
||||
""").pl()
|
||||
|
|
@ -249,6 +375,166 @@ def query_growth_index(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
|||
""", [UD_UNITID, UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_endowment(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Endowment performance over time."""
|
||||
return conn.execute("""
|
||||
SELECT year, endowment_boy, endowment_eoy, new_gifts,
|
||||
net_investment_return, other_changes, long_term_investments
|
||||
FROM raw_ipeds_endowment
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_endowment_per_student(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Endowment value per student over time."""
|
||||
return conn.execute("""
|
||||
SELECT e.year, e.endowment_eoy, en.total_enrollment,
|
||||
ROUND(e.endowment_eoy * 1.0 / NULLIF(en.total_enrollment, 0), 0)
|
||||
AS endowment_per_student
|
||||
FROM raw_ipeds_endowment e
|
||||
JOIN raw_ipeds_enrollment en ON en.unitid = e.unitid AND en.year = e.year
|
||||
WHERE e.unitid = ?
|
||||
ORDER BY e.year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_cio_vs_endowment(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Chief Investment Officer compensation vs endowment growth, indexed."""
|
||||
raw = conn.execute("""
|
||||
SELECT j.tax_year, j.title, j.total_compensation
|
||||
FROM raw_990_schedule_j j
|
||||
WHERE j.total_compensation > 0
|
||||
""").pl()
|
||||
|
||||
if raw.height == 0:
|
||||
return pl.DataFrame()
|
||||
|
||||
raw = raw.with_columns(
|
||||
pl.col("title").map_elements(
|
||||
normalize_title, return_dtype=pl.Utf8
|
||||
).alias("role")
|
||||
)
|
||||
|
||||
cio = (
|
||||
raw.filter(pl.col("role") == "CHIEF_INVESTMENT_OFFICER")
|
||||
.group_by("tax_year")
|
||||
.agg(pl.col("total_compensation").max().alias("cio_comp"))
|
||||
.sort("tax_year")
|
||||
)
|
||||
|
||||
if cio.height == 0:
|
||||
return pl.DataFrame()
|
||||
|
||||
endow = conn.execute("""
|
||||
SELECT year, endowment_eoy
|
||||
FROM raw_ipeds_endowment
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
merged = (
|
||||
cio.join(endow, left_on="tax_year", right_on="year", how="inner")
|
||||
.drop_nulls(subset=["cio_comp", "endowment_eoy"])
|
||||
.sort("tax_year")
|
||||
)
|
||||
|
||||
if merged.height < 2:
|
||||
return merged
|
||||
|
||||
base_comp = float(merged["cio_comp"][0])
|
||||
base_endow = float(merged["endowment_eoy"][0])
|
||||
|
||||
merged = merged.with_columns(
|
||||
(pl.col("cio_comp").cast(pl.Float64) * 100.0 / base_comp).round(1).alias("cio_index"),
|
||||
(pl.col("endowment_eoy").cast(pl.Float64) * 100.0 / base_endow).round(1).alias("endowment_index"),
|
||||
)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def query_philanthropy(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""Philanthropic giving over time — IPEDS private gifts + 990 revenue."""
|
||||
return conn.execute(f"""
|
||||
{_CPI_CTE}
|
||||
SELECT e.year, e.total_private_gifts, e.new_gifts AS endowment_gifts,
|
||||
ROUND(e.total_private_gifts * (SELECT avg_cpi FROM latest_cpi)
|
||||
/ ac.avg_cpi, 0) AS gifts_cpi_adjusted
|
||||
FROM raw_ipeds_endowment e
|
||||
LEFT JOIN annual_cpi ac ON ac.year = e.year
|
||||
WHERE e.unitid = ?
|
||||
ORDER BY e.year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
|
||||
def query_comp_vs_philanthropy(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""VP Advancement and President comp vs philanthropic gifts, indexed."""
|
||||
raw = conn.execute("""
|
||||
SELECT j.tax_year, j.title, j.total_compensation
|
||||
FROM raw_990_schedule_j j
|
||||
WHERE j.total_compensation > 0
|
||||
""").pl()
|
||||
|
||||
if raw.height == 0:
|
||||
return pl.DataFrame()
|
||||
|
||||
raw = raw.with_columns(
|
||||
pl.col("title").map_elements(
|
||||
normalize_title, return_dtype=pl.Utf8
|
||||
).alias("role")
|
||||
)
|
||||
|
||||
# Get max comp per role per year for President and VP Advancement
|
||||
roles = raw.filter(pl.col("role").is_in(["PRESIDENT", "VP_ADVANCEMENT"]))
|
||||
if roles.height == 0:
|
||||
return pl.DataFrame()
|
||||
|
||||
pivoted = (
|
||||
roles.group_by(["tax_year", "role"])
|
||||
.agg(pl.col("total_compensation").max().alias("comp"))
|
||||
.sort("tax_year")
|
||||
)
|
||||
|
||||
pres = (
|
||||
pivoted.filter(pl.col("role") == "PRESIDENT")
|
||||
.select(pl.col("tax_year"), pl.col("comp").alias("president_comp"))
|
||||
)
|
||||
vp = (
|
||||
pivoted.filter(pl.col("role") == "VP_ADVANCEMENT")
|
||||
.select(pl.col("tax_year"), pl.col("comp").alias("vp_adv_comp"))
|
||||
)
|
||||
|
||||
gifts = conn.execute("""
|
||||
SELECT year, total_private_gifts
|
||||
FROM raw_ipeds_endowment
|
||||
WHERE unitid = ?
|
||||
ORDER BY year
|
||||
""", [UD_UNITID]).pl()
|
||||
|
||||
# Join all three on year
|
||||
merged = (
|
||||
pres.join(vp, on="tax_year", how="outer_coalesce")
|
||||
.join(gifts, left_on="tax_year", right_on="year", how="inner")
|
||||
.drop_nulls(subset=["total_private_gifts"])
|
||||
.sort("tax_year")
|
||||
)
|
||||
|
||||
if merged.height < 2:
|
||||
return merged
|
||||
|
||||
base_pres = float(merged.drop_nulls("president_comp")["president_comp"][0])
|
||||
base_vp = float(merged.drop_nulls("vp_adv_comp")["vp_adv_comp"][0])
|
||||
base_gifts = float(merged["total_private_gifts"][0])
|
||||
|
||||
merged = merged.with_columns(
|
||||
(pl.col("president_comp").cast(pl.Float64) * 100.0 / base_pres).round(1).alias("president_index"),
|
||||
(pl.col("vp_adv_comp").cast(pl.Float64) * 100.0 / base_vp).round(1).alias("vp_adv_index"),
|
||||
(pl.col("total_private_gifts").cast(pl.Float64) * 100.0 / base_gifts).round(1).alias("gifts_index"),
|
||||
)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def query_admin_headcount(conn: duckdb.DuckDBPyConnection) -> pl.DataFrame:
|
||||
"""All scraped admin headcount entries."""
|
||||
return conn.execute("""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue