AdminAnalytics/src/admin_analytics/db/schema.py
emfurst a766f6ff0d Add endowment spending distribution, move planning docs to private
- Add IPEDS F2H03C (spending distribution for current use) to endowment schema, loader, queries, and dashboard
- Endowment tab now shows spend rate alongside investment return rate
- Move planning docs to planning/ directory (gitignored)
- Update data dictionary

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 07:27:57 -04:00

151 lines
4.9 KiB
Python

import duckdb
TABLES = {
"raw_institution": """
CREATE TABLE IF NOT EXISTS raw_institution (
unitid INTEGER NOT NULL,
year INTEGER NOT NULL,
ein VARCHAR,
institution_name VARCHAR,
city VARCHAR,
state VARCHAR,
sector INTEGER,
control INTEGER,
carnegie_class INTEGER,
enrollment_total INTEGER,
PRIMARY KEY (unitid, year)
)
""",
"raw_ipeds_finance": """
CREATE TABLE IF NOT EXISTS raw_ipeds_finance (
unitid INTEGER NOT NULL,
year INTEGER NOT NULL,
reporting_standard VARCHAR,
total_expenses BIGINT,
instruction_expenses BIGINT,
research_expenses BIGINT,
public_service_expenses BIGINT,
academic_support_expenses BIGINT,
student_services_expenses BIGINT,
institutional_support_expenses BIGINT,
auxiliary_expenses BIGINT,
hospital_expenses BIGINT,
other_expenses BIGINT,
salaries_wages BIGINT,
benefits BIGINT,
PRIMARY KEY (unitid, year)
)
""",
"raw_ipeds_staff": """
CREATE TABLE IF NOT EXISTS raw_ipeds_staff (
unitid INTEGER NOT NULL,
year INTEGER NOT NULL,
total_staff INTEGER,
faculty_total INTEGER,
management_total INTEGER,
PRIMARY KEY (unitid, year)
)
""",
"raw_ipeds_enrollment": """
CREATE TABLE IF NOT EXISTS raw_ipeds_enrollment (
unitid INTEGER NOT NULL,
year INTEGER NOT NULL,
total_enrollment INTEGER,
PRIMARY KEY (unitid, year)
)
""",
"raw_990_filing": """
CREATE TABLE IF NOT EXISTS raw_990_filing (
object_id VARCHAR PRIMARY KEY,
ein VARCHAR,
tax_year INTEGER,
organization_name VARCHAR,
return_type VARCHAR,
filing_date DATE,
total_revenue BIGINT,
total_expenses BIGINT,
total_assets BIGINT
)
""",
"raw_990_schedule_j": """
CREATE SEQUENCE IF NOT EXISTS seq_990_schedule_j START 1;
CREATE TABLE IF NOT EXISTS raw_990_schedule_j (
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_schedule_j'),
object_id VARCHAR,
ein VARCHAR,
tax_year INTEGER,
person_name VARCHAR,
title VARCHAR,
base_compensation BIGINT,
bonus_compensation BIGINT,
other_compensation BIGINT,
deferred_compensation BIGINT,
nontaxable_benefits BIGINT,
total_compensation BIGINT,
compensation_from_related BIGINT
)
""",
"raw_990_part_vii": """
CREATE SEQUENCE IF NOT EXISTS seq_990_part_vii START 1;
CREATE TABLE IF NOT EXISTS raw_990_part_vii (
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_part_vii'),
object_id VARCHAR,
ein VARCHAR,
tax_year INTEGER,
person_name VARCHAR,
title VARCHAR,
avg_hours_per_week DOUBLE,
reportable_comp_from_org BIGINT,
reportable_comp_from_related BIGINT,
other_compensation BIGINT
)
""",
"raw_ipeds_endowment": """
CREATE TABLE IF NOT EXISTS raw_ipeds_endowment (
unitid INTEGER NOT NULL,
year INTEGER NOT NULL,
endowment_boy BIGINT,
endowment_eoy BIGINT,
new_gifts BIGINT,
net_investment_return BIGINT,
spending_distribution BIGINT,
other_changes BIGINT,
total_private_gifts BIGINT,
total_investment_return BIGINT,
long_term_investments BIGINT,
PRIMARY KEY (unitid, year)
)
""",
"raw_cpi_u": """
CREATE TABLE IF NOT EXISTS raw_cpi_u (
year INTEGER NOT NULL,
month INTEGER NOT NULL,
value DOUBLE,
series_id VARCHAR,
PRIMARY KEY (year, month)
)
""",
"raw_admin_headcount": """
CREATE SEQUENCE IF NOT EXISTS seq_admin_headcount START 1;
CREATE TABLE IF NOT EXISTS raw_admin_headcount (
id INTEGER PRIMARY KEY DEFAULT nextval('seq_admin_headcount'),
scrape_date DATE NOT NULL,
unit VARCHAR NOT NULL,
person_name VARCHAR,
title VARCHAR,
email VARCHAR,
category VARCHAR,
is_overhead BOOLEAN
)
""",
}
def ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
"""Create all raw tables if they don't exist."""
for ddl in TABLES.values():
# Some DDL blocks contain multiple statements (e.g. CREATE SEQUENCE + CREATE TABLE)
for stmt in ddl.split(";"):
stmt = stmt.strip()
if stmt:
conn.execute(stmt)