# query_tree.py # # Run a querry on a vector store # This is to test summarization using a tree-summarize response mode # It doesn't work very well, perhaps because of the struture of the data # # E.M.F. August 2025 from llama_index.core import ( StorageContext, load_index_from_storage, ServiceContext, Settings, ) from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.ollama import Ollama # Use a local model to generate Settings.llm = Ollama( model="llama3.1:8B", # First model tested # model="deepseek-r1:8B", # This model shows its reasoning # model="gemma3:1b", request_timeout=360.0, context_window=8000 ) def main(): # Load embedding model (same as used for vector store) embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2") Settings.embed_model = embed_model # Load persisted vector store + metadata storage_context = StorageContext.from_defaults(persist_dir="./storage") index = load_index_from_storage(storage_context) query_engine = index.as_query_engine(response_mode="tree_summarize") # Query while True: q = input("\nEnter your question (or 'exit'): ").strip() if q.lower() in ("exit", "quit"): break print() response = query_engine.query("") # Return the query response and source documents print(response.response) print("\nSource documents:") for node in response.source_nodes: meta = getattr(node, "metadata", None) or node.node.metadata print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None)) if __name__ == "__main__": main()