ssearch/archived/query_catalog.py
Eric e9fc99ddc6 Initial commit: RAG pipeline for semantic search over personal journal archive
Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval,
incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00

90 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# query.py
# Run a querry on a vector store
# This version implements a CATALOG prompt
#
# E.M.F. July 2025
# August 2025 - updated for nd ssearch
from llama_index.core import (
StorageContext,
load_index_from_storage,
ServiceContext,
Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.prompts import PromptTemplate
import logging
logging.basicConfig(level=logging.DEBUG)
CATALOG_PROMPT = PromptTemplate(
"""You are a research assistant. Youre given journal snippets (CONTEXT) and a user query.
Your job is NOT to write an essay but to list the best-matching journal files with a 12 sentence rationale.
Rules:
- Use only the CONTEXT; do not invent content.
- Prefer precise references to passages over generalities.
- Output exactly:
1) A brief one-line summary of the overall theme you detect.
2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints.
CONTEXT:
{context_str}
QUERY: {query_str}
Now produce the summary line and the bulleted list of matching files."""
)
# Use a local model to generate
Settings.llm = Ollama(
# model="llama3.1:8B", # First model tested
# model="deepseek-r1:8B", # This model shows its reasoning
model="gemma3:1b",
request_timeout=360.0,
context_window=8000
)
def main():
# Load embedding model (same as used for vector store)
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
Settings.embed_model = embed_model
# Load persisted vector store + metadata
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(
similarity_top_k=10, # pull wide (tune to taste)
#response_mode="compact", # concise synthesis
text_qa_template=CATALOG_PROMPT, # <- custom prompt
# node_postprocessors=[
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
# ],
)
# Query
while True:
q = input("\nEnter your question (or 'exit'): ").strip()
if q.lower() in ("exit", "quit"):
break
print()
response = query_engine.query(q)
# Return the query response and source documents
print(response.response)
print("\nSource documents:")
for sn in response.source_nodes:
meta = getattr(sn, "metadata", None) or sn.node.metadata
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))
if __name__ == "__main__":
main()