from datetime import date from admin_analytics.scraper.directory import StaffEntry from admin_analytics.scraper.loader import load_scrape def test_load_scrape(db_conn): entries = [ StaffEntry(name="John Doe", title="Financial Specialist", email="jdoe@udel.edu", unit="COE Central"), StaffEntry(name="Jane Smith", title="Research Associate", email="jsmith@udel.edu", unit="CBE"), StaffEntry(name="Bob Jones", title="Academic Advisor", email="bjones@udel.edu", unit="ME"), ] count = load_scrape(db_conn, entries, scrape_date=date(2026, 3, 30)) assert count == 3 rows = db_conn.execute( "SELECT unit, person_name, category, is_overhead FROM raw_admin_headcount ORDER BY person_name" ).fetchall() assert len(rows) == 3 # Bob Jones - Academic Advisor → ACADEMIC_SUPPORT → not overhead assert rows[0] == ("ME", "Bob Jones", "ACADEMIC_SUPPORT", False) # Jane Smith - Research Associate → RESEARCH → not overhead assert rows[1] == ("CBE", "Jane Smith", "RESEARCH", False) # John Doe - Financial Specialist → FINANCE → overhead assert rows[2] == ("COE Central", "John Doe", "FINANCE", True) def test_load_scrape_idempotent(db_conn): entries = [ StaffEntry(name="John Doe", title="Financial Specialist", email="jdoe@udel.edu", unit="COE Central"), ] d = date(2026, 3, 30) load_scrape(db_conn, entries, scrape_date=d) load_scrape(db_conn, entries, scrape_date=d) # second run should replace count = db_conn.execute( "SELECT COUNT(*) FROM raw_admin_headcount WHERE scrape_date = ?", [d] ).fetchone()[0] assert count == 1