# query.py
# Run a querry on a vector store
# This version implements a CATALOG prompt
#
# E.M.F. July 2025
# August 2025 - updated for nd ssearch

from llama_index.core import (
    StorageContext,
    load_index_from_storage,
    ServiceContext,
    Settings,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.prompts import PromptTemplate

import logging
logging.basicConfig(level=logging.DEBUG)


CATALOG_PROMPT = PromptTemplate(
"""You are a research assistant. You’re given journal snippets (CONTEXT) and a user query.
Your job is NOT to write an essay but to list the best-matching journal files with a 1–2 sentence rationale.

Rules:
- Use only the CONTEXT; do not invent content.
- Prefer precise references to passages over generalities.
- Output exactly:
  1) A brief one-line summary of the overall theme you detect.
  2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints.

CONTEXT:
{context_str}

QUERY: {query_str}

Now produce the summary line and the bulleted list of matching files."""
)

# Use a local model to generate
Settings.llm = Ollama(
#    model="llama3.1:8B",    # First model tested
#    model="deepseek-r1:8B", # This model shows its reasoning
    model="gemma3:1b",
    request_timeout=360.0,
    context_window=8000
    )

def main():
    # Load embedding model (same as used for vector store)
    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
    Settings.embed_model = embed_model

    # Load persisted vector store + metadata
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)

    query_engine = index.as_query_engine(
        similarity_top_k=10,                      # pull wide (tune to taste)
        #response_mode="compact",                  # concise synthesis
        text_qa_template=CATALOG_PROMPT,         # <- custom prompt
        # node_postprocessors=[
        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
        # ],
    )  
    
    # Query
    while True:
        q = input("\nEnter your question (or 'exit'): ").strip()
        if q.lower() in ("exit", "quit"):
            break
        print()
        response = query_engine.query(q)

        # Return the query response and source documents
        print(response.response) 
        print("\nSource documents:")
        for sn in response.source_nodes:
            meta = getattr(sn, "metadata", None) or sn.node.metadata
            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))


if __name__ == "__main__":
    main()