AdminAnalytics/tests/test_dashboard_queries.py

"""Tests for dashboard query functions."""

from admin_analytics.config import UD_UNITID
from admin_analytics.dashboard.queries import (
    query_admin_cost_ratio,
    query_expense_breakdown,
    query_admin_per_student,
    query_admin_faculty_ratio,
    query_top_earners,
    query_comp_by_role,
    query_comp_vs_cpi,
    query_staff_composition,
    query_student_staff_ratios,
    query_growth_index,
    query_admin_headcount,
    query_headcount_summary,
)


def _seed_ipeds(conn):
    """Insert minimal IPEDS data for 2 years."""
    for year, inst_support, total in [(2020, 100_000, 1_000_000), (2021, 120_000, 1_100_000)]:
        conn.execute(
            "INSERT INTO raw_ipeds_finance (unitid, year, institutional_support_expenses, total_expenses) VALUES (?, ?, ?, ?)",
            [UD_UNITID, year, inst_support, total],
        )
    for year, enrollment in [(2020, 20000), (2021, 21000)]:
        conn.execute(
            "INSERT INTO raw_ipeds_enrollment (unitid, year, total_enrollment) VALUES (?, ?, ?)",
            [UD_UNITID, year, enrollment],
        )
    for year, total, faculty, mgmt in [(2020, 3000, 1500, 500), (2021, 3100, 1550, 520)]:
        conn.execute(
            "INSERT INTO raw_ipeds_staff (unitid, year, total_staff, faculty_total, management_total) VALUES (?, ?, ?, ?, ?)",
            [UD_UNITID, year, total, faculty, mgmt],
        )


def _seed_cpi(conn):
    """Insert CPI data for 2020-2021."""
    for year, value in [(2020, 258.8), (2021, 270.9)]:
        for month in range(1, 13):
            conn.execute(
                "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (?, ?, ?, ?)",
                [year, month, value, "CUUR0000SA0"],
            )


def _seed_990(conn):
    """Insert minimal 990 filing and Schedule J data."""
    conn.execute(
        "INSERT INTO raw_990_filing (object_id, ein, tax_year, organization_name, total_revenue, total_expenses) "
        "VALUES ('obj1', '516000297', 2021, 'UD Foundation', 50000000, 40000000)"
    )
    conn.execute(
        "INSERT INTO raw_990_schedule_j (object_id, ein, tax_year, person_name, title, "
        "base_compensation, bonus_compensation, other_compensation, deferred_compensation, "
        "nontaxable_benefits, total_compensation) "
        "VALUES ('obj1', '516000297', 2021, 'JOHN DOE', 'PRESIDENT', "
        "500000, 100000, 10000, 20000, 15000, 645000)"
    )
    conn.execute(
        "INSERT INTO raw_990_schedule_j (object_id, ein, tax_year, person_name, title, "
        "base_compensation, bonus_compensation, other_compensation, deferred_compensation, "
        "nontaxable_benefits, total_compensation) "
        "VALUES ('obj1', '516000297', 2021, 'JANE SMITH', 'PROVOST', "
        "400000, 50000, 5000, 15000, 10000, 480000)"
    )


class TestEmptyDatabase:
    def test_admin_cost_ratio_empty(self, db_conn):
        df = query_admin_cost_ratio(db_conn)
        assert df.height == 0

    def test_top_earners_empty(self, db_conn):
        df = query_top_earners(db_conn)
        assert df.height == 0

    def test_headcount_empty(self, db_conn):
        df = query_admin_headcount(db_conn)
        assert df.height == 0


class TestAdminCostRatio:
    def test_returns_correct_ratio(self, db_conn):
        _seed_ipeds(db_conn)
        _seed_cpi(db_conn)
        df = query_admin_cost_ratio(db_conn)
        assert df.height == 2
        # 2020: 100000 / 1000000 = 10%
        row_2020 = df.filter(df["year"] == 2020)
        assert row_2020["admin_cost_pct"][0] == 10.0


class TestAdminPerStudent:
    def test_returns_per_student(self, db_conn):
        _seed_ipeds(db_conn)
        _seed_cpi(db_conn)
        df = query_admin_per_student(db_conn)
        assert df.height == 2
        # 2020: 100000 / 20000 = 5
        row_2020 = df.filter(df["year"] == 2020)
        assert row_2020["admin_per_student"][0] == 5.0


class TestAdminFacultyRatio:
    def test_returns_ratio(self, db_conn):
        _seed_ipeds(db_conn)
        df = query_admin_faculty_ratio(db_conn)
        assert df.height == 2
        # 2020: 500 / 1500 = 0.333
        row_2020 = df.filter(df["year"] == 2020)
        assert row_2020["admin_faculty_ratio"][0] == 0.333


class TestTopEarners:
    def test_returns_all(self, db_conn):
        _seed_990(db_conn)
        df = query_top_earners(db_conn)
        assert df.height == 2
        assert "canonical_role" in df.columns

    def test_filter_by_year(self, db_conn):
        _seed_990(db_conn)
        df = query_top_earners(db_conn, year=2021)
        assert df.height == 2
        df_empty = query_top_earners(db_conn, year=2019)
        assert df_empty.height == 0


class TestCompByRole:
    def test_groups_by_role(self, db_conn):
        _seed_990(db_conn)
        df = query_comp_by_role(db_conn)
        roles = df["canonical_role"].to_list()
        assert "PRESIDENT" in roles
        assert "PROVOST" in roles


class TestCompVsCpi:
    def test_returns_indexed(self, db_conn):
        _seed_990(db_conn)
        _seed_cpi(db_conn)
        df = query_comp_vs_cpi(db_conn)
        assert df.height > 0
        assert "comp_index" in df.columns
        assert "cpi_index" in df.columns


class TestStaffComposition:
    def test_computes_other(self, db_conn):
        _seed_ipeds(db_conn)
        df = query_staff_composition(db_conn)
        assert df.height == 2
        # 2020: 3000 - 1500 - 500 = 1000
        row = df.filter(df["year"] == 2020)
        assert row["other_staff"][0] == 1000


class TestGrowthIndex:
    def test_base_year_100(self, db_conn):
        _seed_ipeds(db_conn)
        df = query_growth_index(db_conn)
        assert df.height == 2
        first = df.filter(df["year"] == 2020)
        assert first["mgmt_index"][0] == 100.0
        assert first["enrollment_index"][0] == 100.0