- Add IPEDS F2H03C (spending distribution for current use) to endowment schema, loader, queries, and dashboard - Endowment tab now shows spend rate alongside investment return rate - Move planning docs to planning/ directory (gitignored) - Update data dictionary Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
151 lines
4.9 KiB
Python
151 lines
4.9 KiB
Python
import duckdb
|
|
|
|
TABLES = {
|
|
"raw_institution": """
|
|
CREATE TABLE IF NOT EXISTS raw_institution (
|
|
unitid INTEGER NOT NULL,
|
|
year INTEGER NOT NULL,
|
|
ein VARCHAR,
|
|
institution_name VARCHAR,
|
|
city VARCHAR,
|
|
state VARCHAR,
|
|
sector INTEGER,
|
|
control INTEGER,
|
|
carnegie_class INTEGER,
|
|
enrollment_total INTEGER,
|
|
PRIMARY KEY (unitid, year)
|
|
)
|
|
""",
|
|
"raw_ipeds_finance": """
|
|
CREATE TABLE IF NOT EXISTS raw_ipeds_finance (
|
|
unitid INTEGER NOT NULL,
|
|
year INTEGER NOT NULL,
|
|
reporting_standard VARCHAR,
|
|
total_expenses BIGINT,
|
|
instruction_expenses BIGINT,
|
|
research_expenses BIGINT,
|
|
public_service_expenses BIGINT,
|
|
academic_support_expenses BIGINT,
|
|
student_services_expenses BIGINT,
|
|
institutional_support_expenses BIGINT,
|
|
auxiliary_expenses BIGINT,
|
|
hospital_expenses BIGINT,
|
|
other_expenses BIGINT,
|
|
salaries_wages BIGINT,
|
|
benefits BIGINT,
|
|
PRIMARY KEY (unitid, year)
|
|
)
|
|
""",
|
|
"raw_ipeds_staff": """
|
|
CREATE TABLE IF NOT EXISTS raw_ipeds_staff (
|
|
unitid INTEGER NOT NULL,
|
|
year INTEGER NOT NULL,
|
|
total_staff INTEGER,
|
|
faculty_total INTEGER,
|
|
management_total INTEGER,
|
|
PRIMARY KEY (unitid, year)
|
|
)
|
|
""",
|
|
"raw_ipeds_enrollment": """
|
|
CREATE TABLE IF NOT EXISTS raw_ipeds_enrollment (
|
|
unitid INTEGER NOT NULL,
|
|
year INTEGER NOT NULL,
|
|
total_enrollment INTEGER,
|
|
PRIMARY KEY (unitid, year)
|
|
)
|
|
""",
|
|
"raw_990_filing": """
|
|
CREATE TABLE IF NOT EXISTS raw_990_filing (
|
|
object_id VARCHAR PRIMARY KEY,
|
|
ein VARCHAR,
|
|
tax_year INTEGER,
|
|
organization_name VARCHAR,
|
|
return_type VARCHAR,
|
|
filing_date DATE,
|
|
total_revenue BIGINT,
|
|
total_expenses BIGINT,
|
|
total_assets BIGINT
|
|
)
|
|
""",
|
|
"raw_990_schedule_j": """
|
|
CREATE SEQUENCE IF NOT EXISTS seq_990_schedule_j START 1;
|
|
CREATE TABLE IF NOT EXISTS raw_990_schedule_j (
|
|
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_schedule_j'),
|
|
object_id VARCHAR,
|
|
ein VARCHAR,
|
|
tax_year INTEGER,
|
|
person_name VARCHAR,
|
|
title VARCHAR,
|
|
base_compensation BIGINT,
|
|
bonus_compensation BIGINT,
|
|
other_compensation BIGINT,
|
|
deferred_compensation BIGINT,
|
|
nontaxable_benefits BIGINT,
|
|
total_compensation BIGINT,
|
|
compensation_from_related BIGINT
|
|
)
|
|
""",
|
|
"raw_990_part_vii": """
|
|
CREATE SEQUENCE IF NOT EXISTS seq_990_part_vii START 1;
|
|
CREATE TABLE IF NOT EXISTS raw_990_part_vii (
|
|
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_part_vii'),
|
|
object_id VARCHAR,
|
|
ein VARCHAR,
|
|
tax_year INTEGER,
|
|
person_name VARCHAR,
|
|
title VARCHAR,
|
|
avg_hours_per_week DOUBLE,
|
|
reportable_comp_from_org BIGINT,
|
|
reportable_comp_from_related BIGINT,
|
|
other_compensation BIGINT
|
|
)
|
|
""",
|
|
"raw_ipeds_endowment": """
|
|
CREATE TABLE IF NOT EXISTS raw_ipeds_endowment (
|
|
unitid INTEGER NOT NULL,
|
|
year INTEGER NOT NULL,
|
|
endowment_boy BIGINT,
|
|
endowment_eoy BIGINT,
|
|
new_gifts BIGINT,
|
|
net_investment_return BIGINT,
|
|
spending_distribution BIGINT,
|
|
other_changes BIGINT,
|
|
total_private_gifts BIGINT,
|
|
total_investment_return BIGINT,
|
|
long_term_investments BIGINT,
|
|
PRIMARY KEY (unitid, year)
|
|
)
|
|
""",
|
|
"raw_cpi_u": """
|
|
CREATE TABLE IF NOT EXISTS raw_cpi_u (
|
|
year INTEGER NOT NULL,
|
|
month INTEGER NOT NULL,
|
|
value DOUBLE,
|
|
series_id VARCHAR,
|
|
PRIMARY KEY (year, month)
|
|
)
|
|
""",
|
|
"raw_admin_headcount": """
|
|
CREATE SEQUENCE IF NOT EXISTS seq_admin_headcount START 1;
|
|
CREATE TABLE IF NOT EXISTS raw_admin_headcount (
|
|
id INTEGER PRIMARY KEY DEFAULT nextval('seq_admin_headcount'),
|
|
scrape_date DATE NOT NULL,
|
|
unit VARCHAR NOT NULL,
|
|
person_name VARCHAR,
|
|
title VARCHAR,
|
|
email VARCHAR,
|
|
category VARCHAR,
|
|
is_overhead BOOLEAN
|
|
)
|
|
""",
|
|
}
|
|
|
|
|
|
def ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|
"""Create all raw tables if they don't exist."""
|
|
for ddl in TABLES.values():
|
|
# Some DDL blocks contain multiple statements (e.g. CREATE SEQUENCE + CREATE TABLE)
|
|
for stmt in ddl.split(";"):
|
|
stmt = stmt.strip()
|
|
if stmt:
|
|
conn.execute(stmt)
|