121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
"""Classify staff by title into functional categories.
|
|
|
|
Categories distinguish admin overhead from grant-funded, student-facing,
|
|
and technical roles. This is critical because IPEDS/IRE "staff" counts
|
|
lump everyone who isn't tenure-track faculty — including postdocs,
|
|
research scientists, and lab technicians who are soft-funded through
|
|
extramural research and are NOT administrative overhead.
|
|
"""
|
|
|
|
import re
|
|
|
|
# Order matters — first match wins.
|
|
CATEGORY_PATTERNS: list[tuple[str, re.Pattern]] = [
|
|
# Leadership / structural overhead
|
|
("LEADERSHIP", re.compile(
|
|
r"\b(dean|chief of staff|associate dean|assistant dean)\b", re.I)),
|
|
|
|
# Faculty listed on staff page — not admin (must come before RESEARCH
|
|
# to catch "Adjunct Professor NIST" as faculty, not research)
|
|
("FACULTY", re.compile(
|
|
r"\b(professor|lecturer|instructor|faculty|adjunct|affiliated)", re.I)),
|
|
|
|
# Grants administration — debatable; supports extramural funding
|
|
("GRANTS_ADMIN", re.compile(
|
|
r"\b(grants?\s+anal|pre-?award|post-?award|closeout\s+coord"
|
|
r"|sponsored\s+program|grants?\s+admin|grants?\s+manag|grants?\s+coord)", re.I)),
|
|
|
|
# Research staff — soft-funded, NOT admin bloat
|
|
("RESEARCH", re.compile(
|
|
r"\b(research\s+(?:associate|assistant|scientist|scholar|fellow)"
|
|
r"|postdoc|post-?doctoral|(?:associate\s+)?scientist\b)", re.I)),
|
|
|
|
# Academic / student-facing support
|
|
("ACADEMIC_SUPPORT", re.compile(
|
|
r"\b(academic\s+(?:advisor|analyst|program)|undergrad\w*\s+(?:recruit|advisor|affairs)"
|
|
r"|graduate\s+(?:services|advisor)|student\s+(?:develop|support|services)"
|
|
r"|program\s+(?:coordinator|manager))", re.I)),
|
|
|
|
# Advancement / development — revenue-generating (fundraising)
|
|
("ADVANCEMENT", re.compile(
|
|
r"\b(development|fundrais|advancement|alumni\s+relation|donor|giving)", re.I)),
|
|
|
|
# Finance / procurement
|
|
("FINANCE", re.compile(
|
|
r"\b(financial|fiscal|budget|procurement|business\s+(?:officer|admin)"
|
|
r"|sr\.?\s+business)", re.I)),
|
|
|
|
# IT / computing
|
|
("IT", re.compile(
|
|
r"\b(computing|systems?\s+(?:prog|admin)|it\s+|information\s+tech"
|
|
r"|support\s+specialist|service\s+desk|digital\s+tech)", re.I)),
|
|
|
|
# Communications / marketing (must come before DIRECTOR)
|
|
("COMMUNICATIONS", re.compile(
|
|
r"\b(communicat\w+|marketing|media\s+(?:specialist|coord|director)"
|
|
r"|web\s+(?:develop|design|content)|event\s+(?:coord|plan|manag))", re.I)),
|
|
|
|
# Human resources
|
|
("HR", re.compile(
|
|
r"\b(human\s+resource|hr\s+analyst|talent|workforce)", re.I)),
|
|
|
|
# Facilities / space management
|
|
("FACILITIES", re.compile(
|
|
r"\b(facilit|building|space\s+(?:plan|manag)|safety|engineer\w+\s+facilit)", re.I)),
|
|
|
|
# Technical / lab operations — not admin bloat
|
|
("TECHNICAL", re.compile(
|
|
r"\b(machinist|lab\s+(?:manager|coord|tech)|equipment|technician"
|
|
r"|instrument)", re.I)),
|
|
|
|
# Administrative support
|
|
("ADMIN_SUPPORT", re.compile(
|
|
r"\b(admin\w*\s+(?:assistant|specialist|support|secretary|coord)"
|
|
r"|secretary|receptionist|office\s+(?:manager|coord))", re.I)),
|
|
|
|
# Director-level (catch remaining directors)
|
|
("DIRECTOR", re.compile(
|
|
r"\b(director|associate\s+director|sr\.?\s+director)\b", re.I)),
|
|
]
|
|
|
|
# Which categories count as administrative overhead
|
|
OVERHEAD_CATEGORIES = {
|
|
"LEADERSHIP", "FINANCE", "IT", "COMMUNICATIONS", "HR",
|
|
"FACILITIES", "ADMIN_SUPPORT", "DIRECTOR",
|
|
}
|
|
|
|
# Debatable — could go either way depending on analysis
|
|
DEBATABLE_CATEGORIES = {"GRANTS_ADMIN"}
|
|
|
|
# NOT overhead — these are mission-aligned or revenue-generating
|
|
NON_OVERHEAD_CATEGORIES = {
|
|
"RESEARCH", "ACADEMIC_SUPPORT", "ADVANCEMENT", "TECHNICAL", "FACULTY",
|
|
}
|
|
|
|
|
|
def classify_title(title: str | None) -> str:
|
|
"""Classify a staff title into a functional category.
|
|
|
|
Returns the category string, or "UNKNOWN" if no pattern matches.
|
|
"""
|
|
if not title or not title.strip():
|
|
return "UNKNOWN"
|
|
|
|
for category, pattern in CATEGORY_PATTERNS:
|
|
if pattern.search(title):
|
|
return category
|
|
|
|
return "UNKNOWN"
|
|
|
|
|
|
def is_overhead(category: str) -> bool | None:
|
|
"""Return True if the category is administrative overhead,
|
|
False if not, None if debatable.
|
|
"""
|
|
if category in OVERHEAD_CATEGORIES:
|
|
return True
|
|
if category in NON_OVERHEAD_CATEGORIES:
|
|
return False
|
|
if category in DEBATABLE_CATEGORIES:
|
|
return None
|
|
return None
|