Initial commit: RAG pipeline for semantic search over personal journal archive

Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval, incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00 · 2026-02-20 06:02:28 -05:00 · e9fc99ddc6
commit e9fc99ddc6
43 changed files with 7349 additions and 0 deletions
--- a/archived/query_catalog.py
+++ b/archived/query_catalog.py
@ -0,0 +1,90 @@
+# query.py
+# Run a querry on a vector store
+# This version implements a CATALOG prompt
+#
+# E.M.F. July 2025
+# August 2025 - updated for nd ssearch
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.postprocessor import SimilarityPostprocessor
+from llama_index.core.prompts import PromptTemplate
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+
+CATALOG_PROMPT = PromptTemplate(
+"""You are a research assistant. You’re given journal snippets (CONTEXT) and a user query.
+Your job is NOT to write an essay but to list the best-matching journal files with a 1–2 sentence rationale.
+
+Rules:
+- Use only the CONTEXT; do not invent content.
+- Prefer precise references to passages over generalities.
+- Output exactly:
+  1) A brief one-line summary of the overall theme you detect.
+  2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints.
+
+CONTEXT:
+{context_str}
+
+QUERY: {query_str}
+
+Now produce the summary line and the bulleted list of matching files."""
+)
+
+# Use a local model to generate
+Settings.llm = Ollama(
+#    model="llama3.1:8B",    # First model tested
+#    model="deepseek-r1:8B", # This model shows its reasoning
+    model="gemma3:1b",
+    request_timeout=360.0,
+    context_window=8000
+    )
+
+def main():
+    # Load embedding model (same as used for vector store)
+    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
+    index = load_index_from_storage(storage_context)
+
+    query_engine = index.as_query_engine(
+        similarity_top_k=10,                      # pull wide (tune to taste)
+        #response_mode="compact",                  # concise synthesis
+        text_qa_template=CATALOG_PROMPT,         # <- custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+    
+    # Query
+    while True:
+        q = input("\nEnter your question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+        print("\nSource documents:")
+        for sn in response.source_nodes:
+            meta = getattr(sn, "metadata", None) or sn.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))
+
+
+if __name__ == "__main__":
+    main()
+
+
+    
+