Initial commit: RAG pipeline for semantic search over personal journal archive

Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval, incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00 · 2026-02-20 06:02:28 -05:00 · e9fc99ddc6
commit e9fc99ddc6
43 changed files with 7349 additions and 0 deletions
--- a/archived/query_claude_sonnet.py
+++ b/archived/query_claude_sonnet.py
@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""
+query_topk_prompt_engine.py
+
+Query a vector store with a custom prompt for research assistance.
+Uses BAAI/bge-large-en-v1.5 embeddings and Ollama for generation.
+
+E.M.F. January 2026
+Using Claude Sonnet 4.5 to suggest changes
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from llama_index.core import (
+    Settings,
+    StorageContext,
+    load_index_from_storage,
+)
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.postprocessor import SimilarityPostprocessor
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+
+
+# Suppress tokenizer parallelism warnings
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Configuration defaults
+DEFAULT_LLM = "command-r7b"
+DEFAULT_EMBED_MODEL = "BAAI/bge-large-en-v1.5"
+DEFAULT_STORAGE_DIR = "./storage_exp"
+DEFAULT_TOP_K = 15
+DEFAULT_SIMILARITY_CUTOFF = 0.7  # Set to None to disable
+
+
+def get_prompt_template(max_files: int = 10) -> PromptTemplate:
+    """Return the custom prompt template for the query engine."""
+    return PromptTemplate(
+        f"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to {max_files}).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   List up to {max_files} matching files. Format each as:  
+   <filename> - <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{{context_str}}
+
+QUERY:
+{{query_str}}
+
+Now provide the theme and list of matching files."""
+    )
+
+
+def load_models(
+    llm_name: str = DEFAULT_LLM,
+    embed_model_name: str = DEFAULT_EMBED_MODEL,
+    cache_folder: str = "./models",
+    request_timeout: float = 360.0,
+    context_window: int = 8000,
+):
+    """Initialize and configure the LLM and embedding models."""
+    Settings.llm = Ollama(
+        model=llm_name,
+        request_timeout=request_timeout,
+        context_window=context_window,
+    )
+    Settings.embed_model = HuggingFaceEmbedding(
+        cache_folder=cache_folder,
+        model_name=embed_model_name,
+        local_files_only=True,
+    )
+
+
+def load_query_engine(
+    storage_dir: str = DEFAULT_STORAGE_DIR,
+    top_k: int = DEFAULT_TOP_K,
+    similarity_cutoff: float | None = DEFAULT_SIMILARITY_CUTOFF,
+    max_files: int = 10,
+):
+    """Load the vector store and create a query engine with custom prompt."""
+    storage_path = Path(storage_dir)
+    if not storage_path.exists():
+        raise FileNotFoundError(f"Storage directory not found: {storage_dir}")
+
+    storage_context = StorageContext.from_defaults(persist_dir=str(storage_path))
+    index = load_index_from_storage(storage_context)
+
+    # Build postprocessors
+    postprocessors = []
+    if similarity_cutoff is not None:
+        postprocessors.append(SimilarityPostprocessor(similarity_cutoff=similarity_cutoff))
+
+    return index.as_query_engine(
+        similarity_top_k=top_k,
+        text_qa_template=get_prompt_template(max_files),
+        node_postprocessors=postprocessors if postprocessors else None,
+    )
+
+
+def get_node_metadata(node) -> dict:
+    """Safely extract metadata from a source node."""
+    # Handle different node structures in llamaindex
+    if hasattr(node, "metadata") and node.metadata:
+        return node.metadata
+    if hasattr(node, "node") and hasattr(node.node, "metadata"):
+        return node.node.metadata
+    return {}
+
+
+def print_results(response):
+    """Print the query response and source documents."""
+    print("\n" + "=" * 60)
+    print("RESPONSE")
+    print("=" * 60 + "\n")
+    print(response.response)
+
+    print("\n" + "=" * 60)
+    print("SOURCE DOCUMENTS")
+    print("=" * 60 + "\n")
+
+    for i, node in enumerate(response.source_nodes, 1):
+        meta = get_node_metadata(node)
+        score = getattr(node, "score", None)
+        file_name = meta.get("file_name", "Unknown")
+        file_path = meta.get("file_path", "Unknown")
+        score_str = f"{score:.3f}" if score is not None else "N/A"
+        print(f"{i:2}. [{score_str}] {file_name}")
+        print(f"    Path: {file_path}")
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Query a vector store with a custom research assistant prompt.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python query_topk_prompt_engine.py "What themes appear in the documents?"
+  python query_topk_prompt_engine.py --top-k 20 --llm llama3.1:8B "Find references to machine learning"
+        """,
+    )
+    parser.add_argument("query", nargs="+", help="The query text")
+    parser.add_argument(
+        "--llm",
+        default=DEFAULT_LLM,
+        help=f"Ollama model to use for generation (default: {DEFAULT_LLM})",
+    )
+    parser.add_argument(
+        "--storage-dir",
+        default=DEFAULT_STORAGE_DIR,
+        help=f"Path to the vector store (default: {DEFAULT_STORAGE_DIR})",
+    )
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=DEFAULT_TOP_K,
+        help=f"Number of similar documents to retrieve (default: {DEFAULT_TOP_K})",
+    )
+    parser.add_argument(
+        "--similarity-cutoff",
+        type=float,
+        default=DEFAULT_SIMILARITY_CUTOFF,
+        help=f"Minimum similarity score (default: {DEFAULT_SIMILARITY_CUTOFF}, use 0 to disable)",
+    )
+    parser.add_argument(
+        "--max-files",
+        type=int,
+        default=10,
+        help="Maximum files to list in response (default: 10)",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    # Handle similarity cutoff of 0 as "disabled"
+    similarity_cutoff = args.similarity_cutoff if args.similarity_cutoff > 0 else None
+
+    try:
+        print(f"Loading models (LLM: {args.llm})...")
+        load_models(llm_name=args.llm)
+
+        print(f"Loading index from {args.storage_dir}...")
+        query_engine = load_query_engine(
+            storage_dir=args.storage_dir,
+            top_k=args.top_k,
+            similarity_cutoff=similarity_cutoff,
+            max_files=args.max_files,
+        )
+
+        query_text = " ".join(args.query)
+        print(f"Querying: {query_text[:100]}{'...' if len(query_text) > 100 else ''}")
+
+        response = query_engine.query(query_text)
+        print_results(response)
+
+    except FileNotFoundError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error during query: {e}", file=sys.stderr)
+        raise
+
+
+if __name__ == "__main__":
+    main()