Compensation, endowmnet tweaks. Added About.

2026-03-31 08:03:58 -04:00 · 2026-03-31 08:03:58 -04:00 · 13fb4b8418
commit 13fb4b8418
parent a41f78545b
13 changed files with 914 additions and 17 deletions
--- a/src/admin_analytics/ipeds/finance.py
+++ b/src/admin_analytics/ipeds/finance.py
@ -40,6 +40,25 @@ F2_COLUMN_VARIANTS = {
    "benefits": ["F2E133"],
 }

+# F2 endowment / philanthropy fields
+F2_ENDOWMENT_VARIANTS = {
+    "unitid": ["UNITID"],
+    "endowment_boy": ["F2H01"],
+    "endowment_eoy": ["F2H02"],
+    "new_gifts": ["F2H03A"],
+    "net_investment_return": ["F2H03B"],
+    "other_changes": ["F2H03D"],
+    "total_private_gifts": ["F2D08"],
+    "total_investment_return": ["F2D10"],
+    "long_term_investments": ["F2A01"],
+}
+
+ENDOWMENT_COLUMNS = [
+    "unitid", "year", "endowment_boy", "endowment_eoy", "new_gifts",
+    "net_investment_return", "other_changes", "total_private_gifts",
+    "total_investment_return", "long_term_investments",
+]
+
 CANONICAL_COLUMNS = [
    "unitid", "year", "reporting_standard", "total_expenses",
    "instruction_expenses", "research_expenses", "public_service_expenses",
@ -56,7 +75,7 @@ def _find_csv(component_dir: Path) -> Path | None:

 def _resolve_columns(df: pl.DataFrame, variants: dict) -> dict[str, str]:
    """For each canonical name, find the first matching column."""
-    upper_cols = {c.upper(): c for c in df.columns}
+    upper_cols = {c.strip().upper(): c for c in df.columns}
    resolved = {}
    for canonical, candidates in variants.items():
        for var in candidates:
@ -140,3 +159,49 @@ def load_finance(
            print(f"  No finance CSV found for {year}, skipping")

    return total
+
+
+def load_endowment(
+    conn: duckdb.DuckDBPyConnection,
+    year_range: range,
+    unitid_filter: int | None = UD_UNITID,
+) -> int:
+    """Load IPEDS F2 endowment and philanthropy data into raw_ipeds_endowment."""
+    total = 0
+    for year in year_range:
+        f2_dir = config.IPEDS_DATA_DIR / "finance_f2" / str(year)
+        csv_path = _find_csv(f2_dir)
+        if csv_path is None:
+            continue
+
+        df = pl.read_csv(csv_path, infer_schema_length=0, encoding="utf8-lossy")
+        col_map = _resolve_columns(df, F2_ENDOWMENT_VARIANTS)
+
+        if "unitid" not in col_map:
+            continue
+
+        result = pl.DataFrame({
+            canonical: df[actual] for canonical, actual in col_map.items()
+        })
+        result = result.with_columns(pl.lit(year).alias("year"))
+
+        for col in ENDOWMENT_COLUMNS:
+            if col not in result.columns:
+                result = result.with_columns(pl.lit(None).alias(col))
+            elif col not in ("year",):
+                result = result.with_columns(pl.col(col).cast(pl.Int64, strict=False))
+
+        if unitid_filter is not None:
+            result = result.filter(pl.col("unitid") == unitid_filter)
+
+        if result.height == 0:
+            continue
+
+        result = result.select(ENDOWMENT_COLUMNS)
+        conn.execute("DELETE FROM raw_ipeds_endowment WHERE year = ?", [year])
+        conn.register("_tmp_endow", result.to_arrow())
+        conn.execute("INSERT INTO raw_ipeds_endowment SELECT * FROM _tmp_endow")
+        conn.unregister("_tmp_endow")
+        total += result.height
+
+    return total