Phase 1 project prototype

This commit is contained in:
emfurst 2026-03-30 19:29:33 -04:00
commit 2c9ae1c312
29 changed files with 2967 additions and 22 deletions

View file

@ -0,0 +1,121 @@
"""Classify staff by title into functional categories.
Categories distinguish admin overhead from grant-funded, student-facing,
and technical roles. This is critical because IPEDS/IRE "staff" counts
lump everyone who isn't tenure-track faculty — including postdocs,
research scientists, and lab technicians who are soft-funded through
extramural research and are NOT administrative overhead.
"""
import re
# Order matters — first match wins.
CATEGORY_PATTERNS: list[tuple[str, re.Pattern]] = [
# Leadership / structural overhead
("LEADERSHIP", re.compile(
r"\b(dean|chief of staff|associate dean|assistant dean)\b", re.I)),
# Faculty listed on staff page — not admin (must come before RESEARCH
# to catch "Adjunct Professor NIST" as faculty, not research)
("FACULTY", re.compile(
r"\b(professor|lecturer|instructor|faculty|adjunct|affiliated)", re.I)),
# Grants administration — debatable; supports extramural funding
("GRANTS_ADMIN", re.compile(
r"\b(grants?\s+anal|pre-?award|post-?award|closeout\s+coord"
r"|sponsored\s+program|grants?\s+admin|grants?\s+manag|grants?\s+coord)", re.I)),
# Research staff — soft-funded, NOT admin bloat
("RESEARCH", re.compile(
r"\b(research\s+(?:associate|assistant|scientist|scholar|fellow)"
r"|postdoc|post-?doctoral|(?:associate\s+)?scientist\b)", re.I)),
# Academic / student-facing support
("ACADEMIC_SUPPORT", re.compile(
r"\b(academic\s+(?:advisor|analyst|program)|undergrad\w*\s+(?:recruit|advisor|affairs)"
r"|graduate\s+(?:services|advisor)|student\s+(?:develop|support|services)"
r"|program\s+(?:coordinator|manager))", re.I)),
# Advancement / development — revenue-generating (fundraising)
("ADVANCEMENT", re.compile(
r"\b(development|fundrais|advancement|alumni\s+relation|donor|giving)", re.I)),
# Finance / procurement
("FINANCE", re.compile(
r"\b(financial|fiscal|budget|procurement|business\s+(?:officer|admin)"
r"|sr\.?\s+business)", re.I)),
# IT / computing
("IT", re.compile(
r"\b(computing|systems?\s+(?:prog|admin)|it\s+|information\s+tech"
r"|support\s+specialist|service\s+desk|digital\s+tech)", re.I)),
# Communications / marketing (must come before DIRECTOR)
("COMMUNICATIONS", re.compile(
r"\b(communicat\w+|marketing|media\s+(?:specialist|coord|director)"
r"|web\s+(?:develop|design|content)|event\s+(?:coord|plan|manag))", re.I)),
# Human resources
("HR", re.compile(
r"\b(human\s+resource|hr\s+analyst|talent|workforce)", re.I)),
# Facilities / space management
("FACILITIES", re.compile(
r"\b(facilit|building|space\s+(?:plan|manag)|safety|engineer\w+\s+facilit)", re.I)),
# Technical / lab operations — not admin bloat
("TECHNICAL", re.compile(
r"\b(machinist|lab\s+(?:manager|coord|tech)|equipment|technician"
r"|instrument)", re.I)),
# Administrative support
("ADMIN_SUPPORT", re.compile(
r"\b(admin\w*\s+(?:assistant|specialist|support|secretary|coord)"
r"|secretary|receptionist|office\s+(?:manager|coord))", re.I)),
# Director-level (catch remaining directors)
("DIRECTOR", re.compile(
r"\b(director|associate\s+director|sr\.?\s+director)\b", re.I)),
]
# Which categories count as administrative overhead
OVERHEAD_CATEGORIES = {
"LEADERSHIP", "FINANCE", "IT", "COMMUNICATIONS", "HR",
"FACILITIES", "ADMIN_SUPPORT", "DIRECTOR",
}
# Debatable — could go either way depending on analysis
DEBATABLE_CATEGORIES = {"GRANTS_ADMIN"}
# NOT overhead — these are mission-aligned or revenue-generating
NON_OVERHEAD_CATEGORIES = {
"RESEARCH", "ACADEMIC_SUPPORT", "ADVANCEMENT", "TECHNICAL", "FACULTY",
}
def classify_title(title: str | None) -> str:
"""Classify a staff title into a functional category.
Returns the category string, or "UNKNOWN" if no pattern matches.
"""
if not title or not title.strip():
return "UNKNOWN"
for category, pattern in CATEGORY_PATTERNS:
if pattern.search(title):
return category
return "UNKNOWN"
def is_overhead(category: str) -> bool | None:
"""Return True if the category is administrative overhead,
False if not, None if debatable.
"""
if category in OVERHEAD_CATEGORIES:
return True
if category in NON_OVERHEAD_CATEGORIES:
return False
if category in DEBATABLE_CATEGORIES:
return None
return None