Initial build out
This commit is contained in:
parent
f037c50736
commit
29215e2bd2
40 changed files with 2622 additions and 0 deletions
122
src/admin_analytics/db/schema.py
Normal file
122
src/admin_analytics/db/schema.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import duckdb
|
||||
|
||||
TABLES = {
|
||||
"raw_institution": """
|
||||
CREATE TABLE IF NOT EXISTS raw_institution (
|
||||
unitid INTEGER NOT NULL,
|
||||
year INTEGER NOT NULL,
|
||||
ein VARCHAR,
|
||||
institution_name VARCHAR,
|
||||
city VARCHAR,
|
||||
state VARCHAR,
|
||||
sector INTEGER,
|
||||
control INTEGER,
|
||||
carnegie_class INTEGER,
|
||||
enrollment_total INTEGER,
|
||||
PRIMARY KEY (unitid, year)
|
||||
)
|
||||
""",
|
||||
"raw_ipeds_finance": """
|
||||
CREATE TABLE IF NOT EXISTS raw_ipeds_finance (
|
||||
unitid INTEGER NOT NULL,
|
||||
year INTEGER NOT NULL,
|
||||
reporting_standard VARCHAR,
|
||||
total_expenses BIGINT,
|
||||
instruction_expenses BIGINT,
|
||||
research_expenses BIGINT,
|
||||
public_service_expenses BIGINT,
|
||||
academic_support_expenses BIGINT,
|
||||
student_services_expenses BIGINT,
|
||||
institutional_support_expenses BIGINT,
|
||||
auxiliary_expenses BIGINT,
|
||||
hospital_expenses BIGINT,
|
||||
other_expenses BIGINT,
|
||||
salaries_wages BIGINT,
|
||||
benefits BIGINT,
|
||||
PRIMARY KEY (unitid, year)
|
||||
)
|
||||
""",
|
||||
"raw_ipeds_staff": """
|
||||
CREATE TABLE IF NOT EXISTS raw_ipeds_staff (
|
||||
unitid INTEGER NOT NULL,
|
||||
year INTEGER NOT NULL,
|
||||
total_staff INTEGER,
|
||||
faculty_total INTEGER,
|
||||
management_total INTEGER,
|
||||
PRIMARY KEY (unitid, year)
|
||||
)
|
||||
""",
|
||||
"raw_ipeds_enrollment": """
|
||||
CREATE TABLE IF NOT EXISTS raw_ipeds_enrollment (
|
||||
unitid INTEGER NOT NULL,
|
||||
year INTEGER NOT NULL,
|
||||
total_enrollment INTEGER,
|
||||
PRIMARY KEY (unitid, year)
|
||||
)
|
||||
""",
|
||||
"raw_990_filing": """
|
||||
CREATE TABLE IF NOT EXISTS raw_990_filing (
|
||||
object_id VARCHAR PRIMARY KEY,
|
||||
ein VARCHAR,
|
||||
tax_year INTEGER,
|
||||
organization_name VARCHAR,
|
||||
return_type VARCHAR,
|
||||
filing_date DATE,
|
||||
total_revenue BIGINT,
|
||||
total_expenses BIGINT,
|
||||
total_assets BIGINT
|
||||
)
|
||||
""",
|
||||
"raw_990_schedule_j": """
|
||||
CREATE SEQUENCE IF NOT EXISTS seq_990_schedule_j START 1;
|
||||
CREATE TABLE IF NOT EXISTS raw_990_schedule_j (
|
||||
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_schedule_j'),
|
||||
object_id VARCHAR,
|
||||
ein VARCHAR,
|
||||
tax_year INTEGER,
|
||||
person_name VARCHAR,
|
||||
title VARCHAR,
|
||||
base_compensation BIGINT,
|
||||
bonus_compensation BIGINT,
|
||||
other_compensation BIGINT,
|
||||
deferred_compensation BIGINT,
|
||||
nontaxable_benefits BIGINT,
|
||||
total_compensation BIGINT,
|
||||
compensation_from_related BIGINT
|
||||
)
|
||||
""",
|
||||
"raw_990_part_vii": """
|
||||
CREATE SEQUENCE IF NOT EXISTS seq_990_part_vii START 1;
|
||||
CREATE TABLE IF NOT EXISTS raw_990_part_vii (
|
||||
id INTEGER PRIMARY KEY DEFAULT nextval('seq_990_part_vii'),
|
||||
object_id VARCHAR,
|
||||
ein VARCHAR,
|
||||
tax_year INTEGER,
|
||||
person_name VARCHAR,
|
||||
title VARCHAR,
|
||||
avg_hours_per_week DOUBLE,
|
||||
reportable_comp_from_org BIGINT,
|
||||
reportable_comp_from_related BIGINT,
|
||||
other_compensation BIGINT
|
||||
)
|
||||
""",
|
||||
"raw_cpi_u": """
|
||||
CREATE TABLE IF NOT EXISTS raw_cpi_u (
|
||||
year INTEGER NOT NULL,
|
||||
month INTEGER NOT NULL,
|
||||
value DOUBLE,
|
||||
series_id VARCHAR,
|
||||
PRIMARY KEY (year, month)
|
||||
)
|
||||
""",
|
||||
}
|
||||
|
||||
|
||||
def ensure_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
||||
"""Create all raw tables if they don't exist."""
|
||||
for ddl in TABLES.values():
|
||||
# Some DDL blocks contain multiple statements (e.g. CREATE SEQUENCE + CREATE TABLE)
|
||||
for stmt in ddl.split(";"):
|
||||
stmt = stmt.strip()
|
||||
if stmt:
|
||||
conn.execute(stmt)
|
||||
Loading…
Add table
Add a link
Reference in a new issue