# query_topk.py # Run a querry on a vector store # # E.M.F. July 2025 # August 2025 - updated for nd ssearch # this version uses top-k similarity from llama_index.core import ( StorageContext, load_index_from_storage, ServiceContext, Settings, ) from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.ollama import Ollama # Use a local model to generate Settings.llm = Ollama( model="llama3.1:8B", # First model tested # model="deepseek-r1:8B", # This model shows its reasoning # model="gemma3:1b", request_timeout=360.0, context_window=8000 ) def main(): # Load embedding model (same as used for vector store) embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2") Settings.embed_model = embed_model # Load persisted vector store + metadata storage_context = StorageContext.from_defaults(persist_dir="./storage") index = load_index_from_storage(storage_context) query_engine = index.as_query_engine(similarity_top_k=5) # Query while True: q = input("\nEnter your question (or 'exit'): ").strip() if q.lower() in ("exit", "quit"): break print() response = query_engine.query(q) # Return the query response and source documents print(response.response) print("\nSource documents:") for node in response.source_nodes: meta = getattr(node, "metadata", None) or node.node.metadata print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None)) if __name__ == "__main__": main()