AdminAnalytics/tests/test_scraper_loader.py
2026-03-30 19:29:33 -04:00

41 lines
1.6 KiB
Python

from datetime import date
from admin_analytics.scraper.directory import StaffEntry
from admin_analytics.scraper.loader import load_scrape
def test_load_scrape(db_conn):
entries = [
StaffEntry(name="John Doe", title="Financial Specialist", email="jdoe@udel.edu", unit="COE Central"),
StaffEntry(name="Jane Smith", title="Research Associate", email="jsmith@udel.edu", unit="CBE"),
StaffEntry(name="Bob Jones", title="Academic Advisor", email="bjones@udel.edu", unit="ME"),
]
count = load_scrape(db_conn, entries, scrape_date=date(2026, 3, 30))
assert count == 3
rows = db_conn.execute(
"SELECT unit, person_name, category, is_overhead FROM raw_admin_headcount ORDER BY person_name"
).fetchall()
assert len(rows) == 3
# Bob Jones - Academic Advisor → ACADEMIC_SUPPORT → not overhead
assert rows[0] == ("ME", "Bob Jones", "ACADEMIC_SUPPORT", False)
# Jane Smith - Research Associate → RESEARCH → not overhead
assert rows[1] == ("CBE", "Jane Smith", "RESEARCH", False)
# John Doe - Financial Specialist → FINANCE → overhead
assert rows[2] == ("COE Central", "John Doe", "FINANCE", True)
def test_load_scrape_idempotent(db_conn):
entries = [
StaffEntry(name="John Doe", title="Financial Specialist", email="jdoe@udel.edu", unit="COE Central"),
]
d = date(2026, 3, 30)
load_scrape(db_conn, entries, scrape_date=d)
load_scrape(db_conn, entries, scrape_date=d) # second run should replace
count = db_conn.execute(
"SELECT COUNT(*) FROM raw_admin_headcount WHERE scrape_date = ?", [d]
).fetchone()[0]
assert count == 1