ssearch/archived/query_exp.py
Eric e9fc99ddc6 Initial commit: RAG pipeline for semantic search over personal journal archive
Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval,
incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00

106 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# query_topk.py
# Run a querry on a vector store
#
# This verison implements a prompt and uses the build_exp.py vector store
# It is based on query_topk.py
# It uses 10 top-k results and a custom prompt
# The next version after this is query_rewrite.py
# build_exp.py modifies the chunk size and overlap form the orignal build.py
#
# E.M.F. August 2025
from llama_index.core import (
StorageContext,
load_index_from_storage,
ServiceContext,
Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.prompts import PromptTemplate
# LLM model to use in query transform and generation
llm="llama3.1:8B"
# Other models tried:
# llm="deepseek-r1:8B"
# llm="gemma3:1b"
# Custom prompt for the query engine
PROMPT = PromptTemplate(
"""You are an expert research assistant. You are given top-ranked journal excerpts (CONTEXT) and a users QUERY.
Instructions:
- Base your response *only* on the CONTEXT.
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
- Aim to reference *as many distinct* relevant files as possible (up to 10).
- Do not invent or generalize; refer to specific passages or facts only.
- If a passage only loosely matches, deprioritize it.
Format your answer in two parts:
1. **Summary Theme**
Summarize the dominant theme from the relevant context.
2. **Matching Files**
Make a bullet list of 10. The format for each should be:
**<filename>** — <rationale tied to content. Include date or section hints if available.>
CONTEXT:
{context_str}
QUERY:
{query_str}
Now provide the theme and list of matching files."""
)
#
# Main program routine
#
def main():
# Use a local model to generate
Settings.llm = Ollama(
model=llm, # First model tested
request_timeout=360.0,
context_window=8000
)
# Load embedding model (same as used for vector store)
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
Settings.embed_model = embed_model
# Load persisted vector store + metadata
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
index = load_index_from_storage(storage_context)
# Build regular query engine with custom prompt
query_engine = index.as_query_engine(
similarity_top_k=10, # pull wide
#response_mode="compact" # concise synthesis
text_qa_template=PROMPT, # custom prompt
# node_postprocessors=[
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
# ],
)
# Query
while True:
q = input("\nEnter your question (or 'exit'): ").strip()
if q.lower() in ("exit", "quit"):
break
print()
response = query_engine.query(q)
# Return the query response and source documents
print(response.response)
print("\nSource documents:")
for node in response.source_nodes:
meta = getattr(node, "metadata", None) or node.node.metadata
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
if __name__ == "__main__":
main()