Initial commit: RAG pipeline for semantic search over personal journal archive

Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval,
incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
This commit is contained in:
Eric 2026-02-20 06:02:28 -05:00
commit e9fc99ddc6
43 changed files with 7349 additions and 0 deletions

90
archived/query_catalog.py Normal file
View file

@ -0,0 +1,90 @@
# query.py
# Run a querry on a vector store
# This version implements a CATALOG prompt
#
# E.M.F. July 2025
# August 2025 - updated for nd ssearch
from llama_index.core import (
StorageContext,
load_index_from_storage,
ServiceContext,
Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.prompts import PromptTemplate
import logging
logging.basicConfig(level=logging.DEBUG)
CATALOG_PROMPT = PromptTemplate(
"""You are a research assistant. Youre given journal snippets (CONTEXT) and a user query.
Your job is NOT to write an essay but to list the best-matching journal files with a 12 sentence rationale.
Rules:
- Use only the CONTEXT; do not invent content.
- Prefer precise references to passages over generalities.
- Output exactly:
1) A brief one-line summary of the overall theme you detect.
2) A bulleted list: **filename** brief rationale. If available in the snippet, include date or section hints.
CONTEXT:
{context_str}
QUERY: {query_str}
Now produce the summary line and the bulleted list of matching files."""
)
# Use a local model to generate
Settings.llm = Ollama(
# model="llama3.1:8B", # First model tested
# model="deepseek-r1:8B", # This model shows its reasoning
model="gemma3:1b",
request_timeout=360.0,
context_window=8000
)
def main():
# Load embedding model (same as used for vector store)
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
Settings.embed_model = embed_model
# Load persisted vector store + metadata
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(
similarity_top_k=10, # pull wide (tune to taste)
#response_mode="compact", # concise synthesis
text_qa_template=CATALOG_PROMPT, # <- custom prompt
# node_postprocessors=[
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
# ],
)
# Query
while True:
q = input("\nEnter your question (or 'exit'): ").strip()
if q.lower() in ("exit", "quit"):
break
print()
response = query_engine.query(q)
# Return the query response and source documents
print(response.response)
print("\nSource documents:")
for sn in response.source_nodes:
meta = getattr(sn, "metadata", None) or sn.node.metadata
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))
if __name__ == "__main__":
main()