102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
"""Tests for data validation module."""
|
|
|
|
from admin_analytics.validation import (
|
|
validate_row_counts,
|
|
validate_null_rates,
|
|
validate_year_coverage,
|
|
validate_cross_source_consistency,
|
|
format_report,
|
|
)
|
|
|
|
|
|
class TestRowCounts:
|
|
def test_empty_tables(self, db_conn):
|
|
counts = validate_row_counts(db_conn)
|
|
assert counts["raw_cpi_u"] == 0
|
|
assert counts["raw_institution"] == 0
|
|
|
|
def test_with_data(self, db_conn):
|
|
db_conn.execute(
|
|
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
|
|
)
|
|
db_conn.execute(
|
|
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 2, 300.84, 'CUUR0000SA0')"
|
|
)
|
|
counts = validate_row_counts(db_conn)
|
|
assert counts["raw_cpi_u"] == 2
|
|
|
|
|
|
class TestNullRates:
|
|
def test_empty_tables_excluded(self, db_conn):
|
|
results = validate_null_rates(db_conn)
|
|
assert "raw_cpi_u" not in results
|
|
|
|
def test_no_nulls(self, db_conn):
|
|
db_conn.execute(
|
|
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
|
|
)
|
|
results = validate_null_rates(db_conn)
|
|
assert results["raw_cpi_u"]["year"] == 0.0
|
|
assert results["raw_cpi_u"]["value"] == 0.0
|
|
|
|
def test_with_nulls(self, db_conn):
|
|
db_conn.execute(
|
|
"INSERT INTO raw_990_filing (object_id, ein, tax_year) VALUES ('f1', '123', 2023)"
|
|
)
|
|
db_conn.execute(
|
|
"INSERT INTO raw_990_filing (object_id, ein, tax_year, total_revenue) VALUES ('f2', '123', 2023, 100)"
|
|
)
|
|
results = validate_null_rates(db_conn)
|
|
assert results["raw_990_filing"]["total_revenue"] == 50.0
|
|
assert results["raw_990_filing"]["ein"] == 0.0
|
|
|
|
|
|
class TestYearCoverage:
|
|
def test_empty_tables(self, db_conn):
|
|
coverage = validate_year_coverage(db_conn)
|
|
assert coverage["raw_cpi_u"]["years"] == []
|
|
|
|
def test_with_data(self, db_conn):
|
|
for year in [2020, 2021, 2023]:
|
|
db_conn.execute(
|
|
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (?, 1, 100.0, 'CUUR0000SA0')",
|
|
[year],
|
|
)
|
|
coverage = validate_year_coverage(db_conn)
|
|
assert coverage["raw_cpi_u"]["years"] == [2020, 2021, 2023]
|
|
assert 2022 in coverage["raw_cpi_u"]["gaps"]
|
|
|
|
|
|
class TestCrossSource:
|
|
def test_empty(self, db_conn):
|
|
result = validate_cross_source_consistency(db_conn)
|
|
assert result["years_in_all_sources"] == []
|
|
|
|
def test_overlap(self, db_conn):
|
|
# Add IPEDS finance
|
|
db_conn.execute(
|
|
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2022)"
|
|
)
|
|
db_conn.execute(
|
|
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2023)"
|
|
)
|
|
# Add 990 filing
|
|
db_conn.execute(
|
|
"INSERT INTO raw_990_filing (object_id, tax_year) VALUES ('f1', 2023)"
|
|
)
|
|
# Add CPI
|
|
db_conn.execute(
|
|
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.0, 'X')"
|
|
)
|
|
result = validate_cross_source_consistency(db_conn)
|
|
assert 2023 in result["years_in_all_sources"]
|
|
assert 2022 not in result["years_in_all_sources"]
|
|
|
|
|
|
class TestFormatReport:
|
|
def test_runs_on_empty_db(self, db_conn):
|
|
report = format_report(db_conn)
|
|
assert "Row Counts" in report
|
|
assert "NULL Rates" in report
|
|
assert "Year Coverage" in report
|
|
assert "Cross-Source" in report
|