"""Tests for data validation module.""" from admin_analytics.validation import ( validate_row_counts, validate_null_rates, validate_year_coverage, validate_cross_source_consistency, format_report, ) class TestRowCounts: def test_empty_tables(self, db_conn): counts = validate_row_counts(db_conn) assert counts["raw_cpi_u"] == 0 assert counts["raw_institution"] == 0 def test_with_data(self, db_conn): db_conn.execute( "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')" ) db_conn.execute( "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 2, 300.84, 'CUUR0000SA0')" ) counts = validate_row_counts(db_conn) assert counts["raw_cpi_u"] == 2 class TestNullRates: def test_empty_tables_excluded(self, db_conn): results = validate_null_rates(db_conn) assert "raw_cpi_u" not in results def test_no_nulls(self, db_conn): db_conn.execute( "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.17, 'CUUR0000SA0')" ) results = validate_null_rates(db_conn) assert results["raw_cpi_u"]["year"] == 0.0 assert results["raw_cpi_u"]["value"] == 0.0 def test_with_nulls(self, db_conn): db_conn.execute( "INSERT INTO raw_990_filing (object_id, ein, tax_year) VALUES ('f1', '123', 2023)" ) db_conn.execute( "INSERT INTO raw_990_filing (object_id, ein, tax_year, total_revenue) VALUES ('f2', '123', 2023, 100)" ) results = validate_null_rates(db_conn) assert results["raw_990_filing"]["total_revenue"] == 50.0 assert results["raw_990_filing"]["ein"] == 0.0 class TestYearCoverage: def test_empty_tables(self, db_conn): coverage = validate_year_coverage(db_conn) assert coverage["raw_cpi_u"]["years"] == [] def test_with_data(self, db_conn): for year in [2020, 2021, 2023]: db_conn.execute( "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (?, 1, 100.0, 'CUUR0000SA0')", [year], ) coverage = validate_year_coverage(db_conn) assert coverage["raw_cpi_u"]["years"] == [2020, 2021, 2023] assert 2022 in coverage["raw_cpi_u"]["gaps"] class TestCrossSource: def test_empty(self, db_conn): result = validate_cross_source_consistency(db_conn) assert result["years_in_all_sources"] == [] def test_overlap(self, db_conn): # Add IPEDS finance db_conn.execute( "INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2022)" ) db_conn.execute( "INSERT INTO raw_ipeds_finance (unitid, year) VALUES (130943, 2023)" ) # Add 990 filing db_conn.execute( "INSERT INTO raw_990_filing (object_id, tax_year) VALUES ('f1', 2023)" ) # Add CPI db_conn.execute( "INSERT INTO raw_cpi_u (year, month, value, series_id) VALUES (2023, 1, 299.0, 'X')" ) result = validate_cross_source_consistency(db_conn) assert 2023 in result["years_in_all_sources"] assert 2022 not in result["years_in_all_sources"] class TestFormatReport: def test_runs_on_empty_db(self, db_conn): report = format_report(db_conn) assert "Row Counts" in report assert "NULL Rates" in report assert "Year Coverage" in report assert "Cross-Source" in report