# query.py # Run a querry on a vector store # This version implements a CATALOG prompt # # E.M.F. July 2025 # August 2025 - updated for nd ssearch from llama_index.core import ( StorageContext, load_index_from_storage, ServiceContext, Settings, ) from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.ollama import Ollama from llama_index.core.postprocessor import SimilarityPostprocessor from llama_index.core.prompts import PromptTemplate import logging logging.basicConfig(level=logging.DEBUG) CATALOG_PROMPT = PromptTemplate( """You are a research assistant. You’re given journal snippets (CONTEXT) and a user query. Your job is NOT to write an essay but to list the best-matching journal files with a 1–2 sentence rationale. Rules: - Use only the CONTEXT; do not invent content. - Prefer precise references to passages over generalities. - Output exactly: 1) A brief one-line summary of the overall theme you detect. 2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints. CONTEXT: {context_str} QUERY: {query_str} Now produce the summary line and the bulleted list of matching files.""" ) # Use a local model to generate Settings.llm = Ollama( # model="llama3.1:8B", # First model tested # model="deepseek-r1:8B", # This model shows its reasoning model="gemma3:1b", request_timeout=360.0, context_window=8000 ) def main(): # Load embedding model (same as used for vector store) embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2") Settings.embed_model = embed_model # Load persisted vector store + metadata storage_context = StorageContext.from_defaults(persist_dir="./storage") index = load_index_from_storage(storage_context) query_engine = index.as_query_engine( similarity_top_k=10, # pull wide (tune to taste) #response_mode="compact", # concise synthesis text_qa_template=CATALOG_PROMPT, # <- custom prompt # node_postprocessors=[ # SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible # ], ) # Query while True: q = input("\nEnter your question (or 'exit'): ").strip() if q.lower() in ("exit", "quit"): break print() response = query_engine.query(q) # Return the query response and source documents print(response.response) print("\nSource documents:") for sn in response.source_nodes: meta = getattr(sn, "metadata", None) or sn.node.metadata print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None)) if __name__ == "__main__": main()