ssearch/archived/query_tree.py
Eric e9fc99ddc6 Initial commit: RAG pipeline for semantic search over personal journal archive
Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval,
incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00

60 lines
1.7 KiB
Python

# query_tree.py
#
# Run a querry on a vector store
# This is to test summarization using a tree-summarize response mode
# It doesn't work very well, perhaps because of the struture of the data
#
# E.M.F. August 2025
from llama_index.core import (
StorageContext,
load_index_from_storage,
ServiceContext,
Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
# Use a local model to generate
Settings.llm = Ollama(
model="llama3.1:8B", # First model tested
# model="deepseek-r1:8B", # This model shows its reasoning
# model="gemma3:1b",
request_timeout=360.0,
context_window=8000
)
def main():
# Load embedding model (same as used for vector store)
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
Settings.embed_model = embed_model
# Load persisted vector store + metadata
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(response_mode="tree_summarize")
# Query
while True:
q = input("\nEnter your question (or 'exit'): ").strip()
if q.lower() in ("exit", "quit"):
break
print()
response = query_engine.query("<summarization_query>")
# Return the query response and source documents
print(response.response)
print("\nSource documents:")
for node in response.source_nodes:
meta = getattr(node, "metadata", None) or node.node.metadata
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
if __name__ == "__main__":
main()