Phase 1 project prototype
This commit is contained in:
parent
29215e2bd2
commit
2c9ae1c312
29 changed files with 2967 additions and 22 deletions
102
tests/test_validation.py
Normal file
102
tests/test_validation.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""Tests for data validation module."""
|
||||
|
||||
from admin_analytics.validation import (
|
||||
validate_row_counts,
|
||||
validate_null_rates,
|
||||
validate_year_coverage,
|
||||
validate_cross_source_consistency,
|
||||
format_report,
|
||||
)
|
||||
|
||||
|
||||
class TestRowCounts:
|
||||
def test_empty_tables(self, db_conn):
|
||||
counts = validate_row_counts(db_conn)
|
||||
assert counts["raw_cpi_u"] == 0
|
||||
assert counts["raw_institution"] == 0
|
||||
|
||||
def test_with_data(self, db_conn):
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
|
||||
)
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 2, 300.84, 'CUUR0000SA0')"
|
||||
)
|
||||
counts = validate_row_counts(db_conn)
|
||||
assert counts["raw_cpi_u"] == 2
|
||||
|
||||
|
||||
class TestNullRates:
|
||||
def test_empty_tables_excluded(self, db_conn):
|
||||
results = validate_null_rates(db_conn)
|
||||
assert "raw_cpi_u" not in results
|
||||
|
||||
def test_no_nulls(self, db_conn):
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')"
|
||||
)
|
||||
results = validate_null_rates(db_conn)
|
||||
assert results["raw_cpi_u"]["year"] == 0.0
|
||||
assert results["raw_cpi_u"]["value"] == 0.0
|
||||
|
||||
def test_with_nulls(self, db_conn):
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_990_filing (object_id, ein, tax_year) VALUES ('f1', '123', 2023)"
|
||||
)
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_990_filing (object_id, ein, tax_year, total_revenue) VALUES ('f2', '123', 2023, 100)"
|
||||
)
|
||||
results = validate_null_rates(db_conn)
|
||||
assert results["raw_990_filing"]["total_revenue"] == 50.0
|
||||
assert results["raw_990_filing"]["ein"] == 0.0
|
||||
|
||||
|
||||
class TestYearCoverage:
|
||||
def test_empty_tables(self, db_conn):
|
||||
coverage = validate_year_coverage(db_conn)
|
||||
assert coverage["raw_cpi_u"]["years"] == []
|
||||
|
||||
def test_with_data(self, db_conn):
|
||||
for year in [2020, 2021, 2023]:
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (?, 1, 100.0, 'CUUR0000SA0')",
|
||||
[year],
|
||||
)
|
||||
coverage = validate_year_coverage(db_conn)
|
||||
assert coverage["raw_cpi_u"]["years"] == [2020, 2021, 2023]
|
||||
assert 2022 in coverage["raw_cpi_u"]["gaps"]
|
||||
|
||||
|
||||
class TestCrossSource:
|
||||
def test_empty(self, db_conn):
|
||||
result = validate_cross_source_consistency(db_conn)
|
||||
assert result["years_in_all_sources"] == []
|
||||
|
||||
def test_overlap(self, db_conn):
|
||||
# Add IPEDS finance
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2022)"
|
||||
)
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2023)"
|
||||
)
|
||||
# Add 990 filing
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_990_filing (object_id, tax_year) VALUES ('f1', 2023)"
|
||||
)
|
||||
# Add CPI
|
||||
db_conn.execute(
|
||||
"INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.0, 'X')"
|
||||
)
|
||||
result = validate_cross_source_consistency(db_conn)
|
||||
assert 2023 in result["years_in_all_sources"]
|
||||
assert 2022 not in result["years_in_all_sources"]
|
||||
|
||||
|
||||
class TestFormatReport:
|
||||
def test_runs_on_empty_db(self, db_conn):
|
||||
report = format_report(db_conn)
|
||||
assert "Row Counts" in report
|
||||
assert "NULL Rates" in report
|
||||
assert "Year Coverage" in report
|
||||
assert "Cross-Source" in report
|
||||
Loading…
Add table
Add a link
Reference in a new issue