AdminAnalytics/tests/test_validation.py
2026-03-30 19:29:33 -04:00

102 lines
3.5 KiB
Python

"""Tests for data validation module."""
from admin_analytics.validation import (
validate_row_counts,
validate_null_rates,
validate_year_coverage,
validate_cross_source_consistency,
format_report,
)
class TestRowCounts:
def test_empty_tables(self, db_conn):
counts = validate_row_counts(db_conn)
assert counts["raw_cpi_u"] == 0
assert counts["raw_institution"] == 0
def test_with_data(self, db_conn):
db_conn.execute(
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
)
db_conn.execute(
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 2, 300.84, 'CUUR0000SA0')"
)
counts = validate_row_counts(db_conn)
assert counts["raw_cpi_u"] == 2
class TestNullRates:
def test_empty_tables_excluded(self, db_conn):
results = validate_null_rates(db_conn)
assert "raw_cpi_u" not in results
def test_no_nulls(self, db_conn):
db_conn.execute(
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
)
results = validate_null_rates(db_conn)
assert results["raw_cpi_u"]["year"] == 0.0
assert results["raw_cpi_u"]["value"] == 0.0
def test_with_nulls(self, db_conn):
db_conn.execute(
"INSERT INTO raw_990_filing (object_id, ein, tax_year) VALUES ('f1', '123', 2023)"
)
db_conn.execute(
"INSERT INTO raw_990_filing (object_id, ein, tax_year, total_revenue) VALUES ('f2', '123', 2023, 100)"
)
results = validate_null_rates(db_conn)
assert results["raw_990_filing"]["total_revenue"] == 50.0
assert results["raw_990_filing"]["ein"] == 0.0
class TestYearCoverage:
def test_empty_tables(self, db_conn):
coverage = validate_year_coverage(db_conn)
assert coverage["raw_cpi_u"]["years"] == []
def test_with_data(self, db_conn):
for year in [2020, 2021, 2023]:
db_conn.execute(
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (?, 1, 100.0, 'CUUR0000SA0')",
[year],
)
coverage = validate_year_coverage(db_conn)
assert coverage["raw_cpi_u"]["years"] == [2020, 2021, 2023]
assert 2022 in coverage["raw_cpi_u"]["gaps"]
class TestCrossSource:
def test_empty(self, db_conn):
result = validate_cross_source_consistency(db_conn)
assert result["years_in_all_sources"] == []
def test_overlap(self, db_conn):
# Add IPEDS finance
db_conn.execute(
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2022)"
)
db_conn.execute(
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2023)"
)
# Add 990 filing
db_conn.execute(
"INSERT INTO raw_990_filing (object_id, tax_year) VALUES ('f1', 2023)"
)
# Add CPI
db_conn.execute(
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.0, 'X')"
)
result = validate_cross_source_consistency(db_conn)
assert 2023 in result["years_in_all_sources"]
assert 2022 not in result["years_in_all_sources"]
class TestFormatReport:
def test_runs_on_empty_db(self, db_conn):
report = format_report(db_conn)
assert "Row Counts" in report
assert "NULL Rates" in report
assert "Year Coverage" in report
assert "Cross-Source" in report