Initial commit: RAG pipeline for semantic search over personal journal archive

Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval, incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00 · 2026-02-20 06:02:28 -05:00 · e9fc99ddc6
commit e9fc99ddc6
43 changed files with 7349 additions and 0 deletions
--- a/archived/build.py
+++ b/archived/build.py
@ -0,0 +1,51 @@
+# build.py
+#
+# Import documents from data, generate embedded vector store
+# and save to disk in directory ./storage
+#
+# August 2025
+# E. M. Furst
+
+from llama_index.core import (
+    SimpleDirectoryReader,
+    VectorStoreIndex,
+    Settings,
+)
+
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.node_parser import SentenceSplitter
+
+def main():
+    # Choose your embedding model
+    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
+
+    # Configure global settings for LlamaIndex
+    Settings.embed_model = embed_model
+
+    # Load documents
+    documents = SimpleDirectoryReader("./data").load_data()
+
+    # Create the custom textsplitter
+    # Set chunk size and overlap (e.g., 256 tokens, 25 tokens overlap)
+    # see https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/sentence_splitter/#llama_index.core.node_parser.SentenceSplitter
+    text_splitter = SentenceSplitter(
+        chunk_size=256, 
+        chunk_overlap=25,
+        paragraph_separator="\n\n",  # use double newlines to separate paragraphs
+    )
+    Settings.text_splitter = text_splitter
+
+    # Build the index 
+    index = VectorStoreIndex.from_documents(
+        documents, transformations=[text_splitter],
+        show_progress=True,
+    )
+
+    # Persist both vector store and index metadata
+    index.storage_context.persist(persist_dir="./storage")
+    
+    print("Index built and saved to ./storage")
+
+if __name__ == "__main__":
+    main()
+
--- a/archived/build_exp.py
+++ b/archived/build_exp.py
@ -0,0 +1,68 @@
+# build_exp.py
+#
+# Import document from data, generate embedded vector store
+# and save to disk
+#
+# Experiment to include text chunking with a textsplitter
+#
+# August 2025
+# E. M. Furst
+
+from llama_index.core import (
+    SimpleDirectoryReader,
+    VectorStoreIndex,
+    Settings,
+)
+
+from pathlib import Path
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.node_parser import SentenceSplitter
+
+def main():
+    # Choose your embedding model
+    #embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    # embedding is slower with BAAI/bge-large-en-v1.5
+    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
+
+    # Configure global settings for LlamaIndex
+    Settings.embed_model = embed_model
+
+    # Load documents (capabilities?)
+    documents = SimpleDirectoryReader(
+        "./data",
+        # # p is a string path
+        # file_metadata=lambda p: {
+        #     "filename": Path(p).name,            # just the file name
+        #     "filepath": str(Path(p).resolve()),  # absolute path (handy for tracing)
+        # },
+    ).load_data()
+
+    # Create the custom textsplitter
+    # Set chunk size and overlap (e.g., 512 tokens, 10 toekns overlap)
+    # see https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/sentence_splitter/#llama_index.core.node_parser.SentenceSplitter
+    text_splitter = SentenceSplitter(
+        chunk_size=256, 
+        chunk_overlap=25,
+        paragraph_separator="\n\n",  # use double newlines to separate paragraphs
+    )
+    # b/c passing text_splitter in the index build, this may cause problems
+    # test with it commented out...
+    #    Settings.text_splitter = text_splitter
+
+    # Build the index 
+    index = VectorStoreIndex.from_documents(
+        documents, transformations=[text_splitter],
+        show_progress=True,
+    )
+
+    # Persist both vector store and index metadata
+    index.storage_context.persist(persist_dir="./storage_exp")
+    
+#    storage_context = StorageContext.from_defaults(vector_store=index.vector_store)
+#    storage_context.persist(persist_dir="./storage")
+
+    print("Index built and saved to ./storage_exp")
+
+if __name__ == "__main__":
+    main()
+
--- a/archived/claude_diagnostic.py
+++ b/archived/claude_diagnostic.py
@ -0,0 +1,164 @@
+# Better HyDE debugging with targeted tests
+
+from llama_index.core.indices.query.query_transform import HyDEQueryTransform
+from llama_index.core import PromptTemplate
+from llama_index.core import Settings
+from llama_index.core.base.base_query_engine import BaseQueryEngine
+from llama_index.llms.ollama import Ollama
+
+llm="llama3.1:8B"
+
+# Use a local model to generate
+Settings.llm = Ollama(
+    model=llm,    # First model tested
+    request_timeout=360.0,
+    context_window=8000,
+    temperature=0.7,
+    )
+
+
+# Test queries that should produce very different hypothetical documents
+test_queries = [
+    "What is the capital of France?",
+    "How do you make chocolate chip cookies?", 
+    "Explain quantum physics",
+    "Write a love letter",
+    "Describe symptoms of the common cold"
+]
+
+print("=== DEBUGGING HYDE STEP BY STEP ===\n")
+
+# 1. Test the LLM with HyDE-style prompts directly
+print("1. Testing LLM directly with HyDE-style prompts:")
+print("-" * 50)
+
+for query in test_queries[:2]:  # Just test 2 to keep output manageable
+    direct_prompt = f"""Generate a hypothetical document that would contain the answer to this query.
+
+Query: {query}
+
+Hypothetical document:"""
+    
+    response = Settings.llm.complete(direct_prompt)
+    print(f"Query: {query}")
+    print(f"Direct LLM Response: {response.text[:100]}...")
+    print()
+
+# 2. Check HyDE internals - let's see what's actually happening
+print("\n2. Examining HyDE internal behavior:")
+print("-" * 50)
+
+# Create a custom HyDE that shows us everything
+class VerboseHyDETransform(HyDEQueryTransform):
+    def _get_prompts(self):
+        """Show what prompts are being used"""
+        prompts = super()._get_prompts()
+        print(f"HyDE prompts: {prompts}")
+        return prompts
+    
+    def _run_component(self, **kwargs):
+        """Show what's being passed to the LLM"""
+        print(f"HyDE _run_component kwargs: {kwargs}")
+        result = super()._run_component(**kwargs)
+        print(f"HyDE _run_component result: {result}")
+        return result
+
+# Test with verbose HyDE
+verbose_hyde = VerboseHyDETransform(llm=Settings.llm)
+test_result = verbose_hyde.run("What is machine learning?")
+print(f"Final verbose result: {test_result}")
+
+# 3. Try the most basic possible test
+print("\n3. Most basic HyDE test:")
+print("-" * 50)
+
+basic_hyde = HyDEQueryTransform(llm=Settings.llm)
+basic_result = basic_hyde.run("Paris")
+print(f"Input: 'Paris'")
+print(f"Output: '{basic_result}'")
+print(f"Same as input? {basic_result.strip() == 'Paris'}")
+
+# 4. Check if it's a version issue - try alternative approach
+print("\n4. Alternative HyDE approach:")
+print("-" * 50)
+
+try:
+    # Some versions might need different initialization
+    from llama_index.core.query_engine import TransformQueryEngine
+    from llama_index.core.indices.query.query_transform import HyDEQueryTransform
+    
+    # Try with explicit prompt template
+    hyde_prompt_template = PromptTemplate(
+        "Please write a passage to answer the question\n"
+        "Try to include as many key details as possible\n"
+        "\n"
+        "\n"
+        "Passage:{query_str}\n"
+        "\n"
+        "\n"
+        "Passage:"
+    )
+    
+    alt_hyde = HyDEQueryTransform(
+        llm=Settings.llm,
+        hyde_prompt=hyde_prompt_template
+    )
+    
+    alt_result = alt_hyde.run("What causes rain?")
+    print(f"Alternative approach result: {alt_result}")
+    
+except Exception as e:
+    print(f"Alternative approach failed: {e}")
+
+# 5. Check what happens with different query formats
+print("\n5. Testing different input formats:")
+print("-" * 50)
+
+from llama_index.core.schema import QueryBundle
+
+# Test with QueryBundle vs string
+hyde_test = HyDEQueryTransform(llm=Settings.llm)
+
+string_result = hyde_test.run("test query")
+print(f"String input result: '{string_result}'")
+
+query_bundle = QueryBundle(query_str="test query")
+bundle_result = hyde_test.run(query_bundle)
+print(f"QueryBundle input result: '{bundle_result}'")
+
+# 6. Version and import check
+print("\n6. Environment check:")
+print("-" * 50)
+import llama_index
+print(f"LlamaIndex version: {llama_index.__version__}")
+
+# Check what LLM you're actually using
+print(f"LLM type: {type(Settings.llm)}")
+print(f"LLM model name: {getattr(Settings.llm, 'model', 'Unknown')}")
+
+# 7. Try the nuclear option - completely manual implementation
+print("\n7. Manual HyDE implementation:")
+print("-" * 50)
+
+def manual_hyde(query: str, llm):
+    """Completely manual HyDE to see if the concept works"""
+    prompt = f"""You are an expert writer. Generate a realistic document excerpt that would contain the answer to this question.
+
+Question: {query}
+
+Document excerpt:"""
+    
+    response = llm.complete(prompt)
+    return response.text
+
+manual_result = manual_hyde("What is photosynthesis?", Settings.llm)
+print(f"Manual HyDE result: {manual_result[:150]}...")
+
+# 8. Final diagnostic
+print("\n8. Final diagnostic questions:")
+print("-" * 50)
+print("If all the above show the LLM generating proper responses but HyDE still returns original:")
+print("- What LLM are you using? (OpenAI, Anthropic, local model, etc.)")
+print("- What's your LlamaIndex version?")
+print("- Are there any error messages in the logs?")
+print("- Does the LLM have any special configuration or wrappers?")
--- a/archived/output.png
+++ b/archived/output.png
--- a/archived/query.py
+++ b/archived/query.py
@ -0,0 +1,110 @@
+# query_topk_prompt.py
+# Run a querry on a vector store
+# 
+# E. M. Furst August 2025
+
+from llama_index.core import (
+    load_index_from_storage,
+    StorageContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+import os
+
+#
+# Globals
+#
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Embedding model used in vector store (this should match the one in build.py or equivalent)
+embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
+
+# LLM model to use in query transform and generation
+llm="command-r7b"
+
+#
+# Custom prompt for the query engine
+#
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   Make a list of 10 matching files. The format for each should be:  
+   <filename> - 
+   <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate -- in this case using Ollama
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    query_engine = index.as_query_engine(
+        similarity_top_k=15,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # Query
+    while True:
+        q = input("\nEnter a search topic or question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+
+        # Generate the response by querying the engine
+        # This performes the similarity search and then applies the prompt
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+
+
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}")
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_catalog.py
+++ b/archived/query_catalog.py
@ -0,0 +1,90 @@
+# query.py
+# Run a querry on a vector store
+# This version implements a CATALOG prompt
+#
+# E.M.F. July 2025
+# August 2025 - updated for nd ssearch
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.postprocessor import SimilarityPostprocessor
+from llama_index.core.prompts import PromptTemplate
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+
+CATALOG_PROMPT = PromptTemplate(
+"""You are a research assistant. You’re given journal snippets (CONTEXT) and a user query.
+Your job is NOT to write an essay but to list the best-matching journal files with a 1–2 sentence rationale.
+
+Rules:
+- Use only the CONTEXT; do not invent content.
+- Prefer precise references to passages over generalities.
+- Output exactly:
+  1) A brief one-line summary of the overall theme you detect.
+  2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints.
+
+CONTEXT:
+{context_str}
+
+QUERY: {query_str}
+
+Now produce the summary line and the bulleted list of matching files."""
+)
+
+# Use a local model to generate
+Settings.llm = Ollama(
+#    model="llama3.1:8B",    # First model tested
+#    model="deepseek-r1:8B", # This model shows its reasoning
+    model="gemma3:1b",
+    request_timeout=360.0,
+    context_window=8000
+    )
+
+def main():
+    # Load embedding model (same as used for vector store)
+    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
+    index = load_index_from_storage(storage_context)
+
+    query_engine = index.as_query_engine(
+        similarity_top_k=10,                      # pull wide (tune to taste)
+        #response_mode="compact",                  # concise synthesis
+        text_qa_template=CATALOG_PROMPT,         # <- custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+    
+    # Query
+    while True:
+        q = input("\nEnter your question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+        print("\nSource documents:")
+        for sn in response.source_nodes:
+            meta = getattr(sn, "metadata", None) or sn.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))
+
+
+if __name__ == "__main__":
+    main()
+
+
+    
+
--- a/archived/query_claude_sonnet.py
+++ b/archived/query_claude_sonnet.py
@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""
+query_topk_prompt_engine.py
+
+Query a vector store with a custom prompt for research assistance.
+Uses BAAI/bge-large-en-v1.5 embeddings and Ollama for generation.
+
+E.M.F. January 2026
+Using Claude Sonnet 4.5 to suggest changes
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from llama_index.core import (
+    Settings,
+    StorageContext,
+    load_index_from_storage,
+)
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.postprocessor import SimilarityPostprocessor
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+
+
+# Suppress tokenizer parallelism warnings
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Configuration defaults
+DEFAULT_LLM = "command-r7b"
+DEFAULT_EMBED_MODEL = "BAAI/bge-large-en-v1.5"
+DEFAULT_STORAGE_DIR = "./storage_exp"
+DEFAULT_TOP_K = 15
+DEFAULT_SIMILARITY_CUTOFF = 0.7  # Set to None to disable
+
+
+def get_prompt_template(max_files: int = 10) -> PromptTemplate:
+    """Return the custom prompt template for the query engine."""
+    return PromptTemplate(
+        f"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to {max_files}).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   List up to {max_files} matching files. Format each as:  
+   <filename> - <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{{context_str}}
+
+QUERY:
+{{query_str}}
+
+Now provide the theme and list of matching files."""
+    )
+
+
+def load_models(
+    llm_name: str = DEFAULT_LLM,
+    embed_model_name: str = DEFAULT_EMBED_MODEL,
+    cache_folder: str = "./models",
+    request_timeout: float = 360.0,
+    context_window: int = 8000,
+):
+    """Initialize and configure the LLM and embedding models."""
+    Settings.llm = Ollama(
+        model=llm_name,
+        request_timeout=request_timeout,
+        context_window=context_window,
+    )
+    Settings.embed_model = HuggingFaceEmbedding(
+        cache_folder=cache_folder,
+        model_name=embed_model_name,
+        local_files_only=True,
+    )
+
+
+def load_query_engine(
+    storage_dir: str = DEFAULT_STORAGE_DIR,
+    top_k: int = DEFAULT_TOP_K,
+    similarity_cutoff: float | None = DEFAULT_SIMILARITY_CUTOFF,
+    max_files: int = 10,
+):
+    """Load the vector store and create a query engine with custom prompt."""
+    storage_path = Path(storage_dir)
+    if not storage_path.exists():
+        raise FileNotFoundError(f"Storage directory not found: {storage_dir}")
+
+    storage_context = StorageContext.from_defaults(persist_dir=str(storage_path))
+    index = load_index_from_storage(storage_context)
+
+    # Build postprocessors
+    postprocessors = []
+    if similarity_cutoff is not None:
+        postprocessors.append(SimilarityPostprocessor(similarity_cutoff=similarity_cutoff))
+
+    return index.as_query_engine(
+        similarity_top_k=top_k,
+        text_qa_template=get_prompt_template(max_files),
+        node_postprocessors=postprocessors if postprocessors else None,
+    )
+
+
+def get_node_metadata(node) -> dict:
+    """Safely extract metadata from a source node."""
+    # Handle different node structures in llamaindex
+    if hasattr(node, "metadata") and node.metadata:
+        return node.metadata
+    if hasattr(node, "node") and hasattr(node.node, "metadata"):
+        return node.node.metadata
+    return {}
+
+
+def print_results(response):
+    """Print the query response and source documents."""
+    print("\n" + "=" * 60)
+    print("RESPONSE")
+    print("=" * 60 + "\n")
+    print(response.response)
+
+    print("\n" + "=" * 60)
+    print("SOURCE DOCUMENTS")
+    print("=" * 60 + "\n")
+
+    for i, node in enumerate(response.source_nodes, 1):
+        meta = get_node_metadata(node)
+        score = getattr(node, "score", None)
+        file_name = meta.get("file_name", "Unknown")
+        file_path = meta.get("file_path", "Unknown")
+        score_str = f"{score:.3f}" if score is not None else "N/A"
+        print(f"{i:2}. [{score_str}] {file_name}")
+        print(f"    Path: {file_path}")
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Query a vector store with a custom research assistant prompt.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python query_topk_prompt_engine.py "What themes appear in the documents?"
+  python query_topk_prompt_engine.py --top-k 20 --llm llama3.1:8B "Find references to machine learning"
+        """,
+    )
+    parser.add_argument("query", nargs="+", help="The query text")
+    parser.add_argument(
+        "--llm",
+        default=DEFAULT_LLM,
+        help=f"Ollama model to use for generation (default: {DEFAULT_LLM})",
+    )
+    parser.add_argument(
+        "--storage-dir",
+        default=DEFAULT_STORAGE_DIR,
+        help=f"Path to the vector store (default: {DEFAULT_STORAGE_DIR})",
+    )
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=DEFAULT_TOP_K,
+        help=f"Number of similar documents to retrieve (default: {DEFAULT_TOP_K})",
+    )
+    parser.add_argument(
+        "--similarity-cutoff",
+        type=float,
+        default=DEFAULT_SIMILARITY_CUTOFF,
+        help=f"Minimum similarity score (default: {DEFAULT_SIMILARITY_CUTOFF}, use 0 to disable)",
+    )
+    parser.add_argument(
+        "--max-files",
+        type=int,
+        default=10,
+        help="Maximum files to list in response (default: 10)",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    # Handle similarity cutoff of 0 as "disabled"
+    similarity_cutoff = args.similarity_cutoff if args.similarity_cutoff > 0 else None
+
+    try:
+        print(f"Loading models (LLM: {args.llm})...")
+        load_models(llm_name=args.llm)
+
+        print(f"Loading index from {args.storage_dir}...")
+        query_engine = load_query_engine(
+            storage_dir=args.storage_dir,
+            top_k=args.top_k,
+            similarity_cutoff=similarity_cutoff,
+            max_files=args.max_files,
+        )
+
+        query_text = " ".join(args.query)
+        print(f"Querying: {query_text[:100]}{'...' if len(query_text) > 100 else ''}")
+
+        response = query_engine.query(query_text)
+        print_results(response)
+
+    except FileNotFoundError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error during query: {e}", file=sys.stderr)
+        raise
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_exp.py
+++ b/archived/query_exp.py
@ -0,0 +1,106 @@
+# query_topk.py
+# Run a querry on a vector store
+# 
+# This verison implements a prompt and uses the build_exp.py vector store
+# It is based on query_topk.py
+# It uses 10 top-k results and a custom prompt
+# The next version after this is query_rewrite.py
+# build_exp.py modifies the chunk size and overlap form the orignal build.py
+#
+# E.M.F. August 2025
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+
+# LLM model to use in query transform and generation
+llm="llama3.1:8B"
+# Other models tried:
+# llm="deepseek-r1:8B"
+# llm="gemma3:1b"
+
+
+# Custom prompt for the query engine
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked journal excerpts (CONTEXT) and a user’s QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context.
+
+2. **Matching Files**  
+   Make a bullet list of 10. The format for each should be:  
+   **<filename>** — <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    query_engine = index.as_query_engine(
+        similarity_top_k=10,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # Query
+    while True:
+        q = input("\nEnter your question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_multitool.py
+++ b/archived/query_multitool.py
@ -0,0 +1,106 @@
+"""
+This is output generated by ChatG to implement a new regex + vector search engine
+"""
+
+from __future__ import annotations
+from typing import List, Iterable
+import json, re
+
+from llama_index.core import VectorStoreIndex, Settings
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.schema import NodeWithScore, QueryBundle
+from llama_index.core.retrievers import BaseRetriever, EnsembleRetriever
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core import Document
+
+# 0) Configure your LLM + embeddings up front
+# Example: Settings.llm = <your Command-R wrapper> ; Settings.embed_model = <your embeddings>
+# (You can also pass an llm explicitly into the retriever if you prefer.)
+# Settings.llm.complete("hello") should work in v0.10+
+
+# 1) Prepare nodes once (so regex + vector share the same chunks)
+def build_nodes(docs: List[Document], chunk_size: int = 1024, overlap: int = 100):
+    splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
+    return splitter.get_nodes_from_documents(docs)
+
+# 2) LLM-guided regex retriever
+class RegexRetriever(BaseRetriever):
+    def __init__(self, nodes: Iterable, llm=None, top_k: int = 5, flags=re.IGNORECASE):
+        super().__init__()
+        self._nodes = list(nodes)
+        self._llm = llm or Settings.llm
+        self._top_k = top_k
+        self._flags = flags
+
+    def _extract_terms(self, query: str) -> List[str]:
+        """Ask the LLM for up to ~6 distinctive keywords/short phrases. Return a list of strings."""
+        prompt = f"""
+You extract search terms for a boolean/regex search.
+Query: {query}
+
+Rules:
+- Return ONLY a JSON array of strings.
+- Use up to 6 concise keywords/short phrases.
+- Keep phrases short (<= 3 words).
+- Avoid stopwords, punctuation, and generic terms.
+- No explanations, no extra text.
+"""
+        raw = self._llm.complete(prompt).text.strip()
+        try:
+            terms = json.loads(raw)
+            # basic sanitize
+            terms = [t for t in terms if isinstance(t, str) and t.strip()]
+        except Exception:
+            # simple fall-back if JSON parse fails
+            terms = [w for w in re.findall(r"\w+", query) if len(w) > 2][:6]
+        return terms[:6]
+
+    def _compile_patterns(self, terms: List[str]) -> List[re.Pattern]:
+        pats = []
+        for t in terms:
+            # Escape user/LLM output, add word boundaries; allow whitespace inside short phrases
+            escaped = re.escape(t)
+            # turn '\ ' (escaped space) back into '\s+' to match any whitespace in phrases
+            escaped = escaped.replace(r"\ ", r"\s+")
+            pats.append(re.compile(rf"\b{escaped}\b", self._flags))
+        return pats
+
+    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
+        terms = self._extract_terms(query_bundle.query_str)
+        patterns = self._compile_patterns(terms)
+
+        scored: List[tuple] = []
+        for n in self._nodes:
+            txt = n.get_content(metadata_mode="all")
+            hits = 0
+            for p in patterns:
+                if p.search(txt):
+                    hits += 1
+            if hits:
+                # simple score = number of distinct term hits (you can weight phrase vs single word if you like)
+                scored.append((n, float(hits)))
+
+        scored.sort(key=lambda x: x[1], reverse=True)
+        return [NodeWithScore(node=n, score=s) for n, s in scored[: self._top_k]]
+
+# 3) Wire it all together
+def build_query_engine(docs: List[Document], k_vec=5, k_regex=5, weights=(0.7, 0.3)):
+    nodes = build_nodes(docs)
+    # Vector index over the SAME nodes
+    vindex = VectorStoreIndex(nodes)
+
+    vector_ret = vindex.as_retriever(similarity_top_k=k_vec)
+    regex_ret = RegexRetriever(nodes, top_k=k_regex)
+
+    ensemble = EnsembleRetriever(
+        retrievers=[vector_ret, regex_ret],
+        weights=list(weights),       # tune this: more recall from regex? bump weight on regex
+        # uses Reciprocal Rank Fusion by default
+    )
+
+    return RetrieverQueryEngine(retriever=ensemble)
+
+# 4) Use it
+# docs = SimpleDirectoryReader("data").load_data()
+# qe = build_query_engine(docs)
+# print(qe.query("Find entries with strong feelings of depression."))
--- a/archived/query_rewrite_hyde.py
+++ b/archived/query_rewrite_hyde.py
@ -0,0 +1,126 @@
+# query_rewrite_hyde.py
+# Run a querry on a vector store
+# 
+# Latest experiment to include query rewriting using HyDE (Hypothetial Document Embeddings)
+# The goal is to reduce the semantic gap between the query and the indexed documents
+# This verison implements a prompt and uses the build_exp.py vector store
+# Based on query_exp.py
+#
+# E.M.F. July 2025
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.indices.query.query_transform import HyDEQueryTransform
+from llama_index.core.query_engine.transform_query_engine import TransformQueryEngine
+import os
+
+# Globals
+
+# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
+# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# LLM model to use in query transform and generation
+llm="llama3.1:8B"
+# Other models tried:
+# llm="deepseek-r1:8B"
+# llm="gemma3:1b"
+
+# Custom prompt for the query engine
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   Make a list of 10 matching files. The format for each should be:  
+   <filename> — <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    base_query_engine = index.as_query_engine(
+        similarity_top_k=15,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # HyDE is "Hypothetical Document Embeddings"
+    # It generates a hypothetical document based on the query
+    # and uses that to augment the query
+    # Here we include the original query as well
+    # I get better similarity values with include_orignal=True
+    hyde_transform = HyDEQueryTransform(llm=Settings.llm,include_original=True)
+
+    # Query
+    while True:
+        q = input("\nEnter a search topic or question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+
+        # The query uses a HyDE trasformation to rewrite the query
+        query_engine = TransformQueryEngine(base_query_engine, query_transform=hyde_transform)
+
+        # Generate the response by querying the engine
+        # This performes the similarity search and then applies the prompt
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_topk.py
+++ b/archived/query_topk.py
@ -0,0 +1,58 @@
+# query_topk.py
+# Run a querry on a vector store
+#
+# E.M.F. July 2025
+# August 2025 - updated for nd ssearch
+# this version uses top-k similarity
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+
+# Use a local model to generate
+Settings.llm = Ollama(
+    model="llama3.1:8B",    # First model tested
+#    model="deepseek-r1:8B", # This model shows its reasoning
+#    model="gemma3:1b",
+    request_timeout=360.0,
+    context_window=8000
+    )
+
+def main():
+    # Load embedding model (same as used for vector store)
+    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
+    index = load_index_from_storage(storage_context)
+
+    query_engine = index.as_query_engine(similarity_top_k=5)
+
+    # Query
+    while True:
+        q = input("\nEnter your question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
+
+
+if __name__ == "__main__":
+    main()
+
+
+    
+
--- a/archived/query_topk_prompt.py
+++ b/archived/query_topk_prompt.py
@ -0,0 +1,123 @@
+# query_topk_prompt.py
+# Run a querry on a vector store
+# 
+# This version from query_rewrite_hyde.py, but removing hyde and using a custom prompt
+# This verison implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
+# Based on query_exp.py->query_topk.py->query_rewrite_hyde.py
+# The results are as good as with HyDE.
+#
+# E.M.F. August 2025
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+import os
+
+#
+# Globals
+#
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
+# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
+
+# LLM model to use in query transform and generation
+# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
+# to the provided context
+llm="command-r7b"
+# Other models tried:
+#llm="llama3.1:8B"
+#llm="deepseek-r1:8B"
+#llm="gemma3:1b"
+
+#
+# Custom prompt for the query engine
+#
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   Make a list of 10 matching files. The format for each should be:  
+   <filename> - 
+   <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate -- in this case using Ollama
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    query_engine = index.as_query_engine(
+        similarity_top_k=15,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # Query
+    while True:
+        q = input("\nEnter a search topic or question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+
+        # Generate the response by querying the engine
+        # This performes the similarity search and then applies the prompt
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+
+
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}")
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_topk_prompt_dw.py
+++ b/archived/query_topk_prompt_dw.py
@ -0,0 +1,134 @@
+# query_topk_prompt_dw.py
+# Run a querry on a vector store
+# 
+# This version from query_rewrite_hyde.py, but removing hyde and using a custom prompt
+# This verison implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
+# Based on query_exp.py->query_topk.py->query_rewrite_hyde.py
+# The results are as good as with HyDE.
+# Modified for terminal output (132 columns)
+#
+# E.M.F. August 2025
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+import os
+import sys
+import textwrap
+
+# Print wrapping for terminal output
+class Wrap80:
+    def write(self, text):
+        for line in text.splitlines():
+            sys.__stdout__.write(textwrap.fill(line, width=131) + "\n")
+    def flush(self):
+        sys.__stdout__.flush()
+
+sys.stdout = Wrap80()
+
+#
+# Globals
+#
+
+# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
+# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# LLM model to use in query transform and generation
+# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
+# to the provided context
+llm="command-r7b"
+# Other models tried:
+#llm="llama3.1:8B"
+# llm="deepseek-r1:8B"
+# llm="gemma3:1b"
+
+# Custom prompt for the query engine
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   Make a list of 10 matching files. The format for each should be:  
+   <filename> - 
+   <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    query_engine = index.as_query_engine(
+        similarity_top_k=15,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # Query
+    while True:
+        q = input("\nEnter a search topic or question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+
+        # Generate the response by querying the engine
+        # This performes the similarity search and then applies the prompt
+        response = query_engine.query(q)
+
+        # Return the query response and source documents
+        print(response.response) 
+
+
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}", end="")
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_topk_prompt_engine.py
+++ b/archived/query_topk_prompt_engine.py
@ -0,0 +1,123 @@
+# query_topk_prompt_engine.py
+# Run a querry on a vector store
+# 
+# This version is query_topk_prompt.py but the query is passed though the command line.
+#
+# Implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
+# Based on query_exp.py->query_topk.py
+#
+# E.M.F. August 2025
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.core.prompts import PromptTemplate
+import os
+import sys
+
+#
+# Globals
+#
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
+# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5",local_files_only=True)
+
+# LLM model to use in query transform and generation
+# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
+# to the provided context
+llm="command-r7b"
+# Other models tried:
+#llm="llama3.1:8B"
+#llm="deepseek-r1:8B"
+#llm="gemma3:1b"
+
+#
+# Custom prompt for the query engine
+#
+PROMPT = PromptTemplate(
+"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
+
+Instructions:
+- Base your response *only* on the CONTEXT.
+- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
+- Aim to reference *as many distinct* relevant files as possible (up to 10).
+- Do not invent or generalize; refer to specific passages or facts only.
+- If a passage only loosely matches, deprioritize it.
+
+Format your answer in two parts:
+
+1. **Summary Theme**  
+   Summarize the dominant theme from the relevant context in a few sentences.
+
+2. **Matching Files**  
+   Make a list of 10 matching files. The format for each should be:  
+   <filename> - 
+   <rationale tied to content. Include date or section hints if available.>
+
+CONTEXT:
+{context_str}
+
+QUERY:
+{query_str}
+
+Now provide the theme and list of matching files."""
+)
+
+#
+# Main program routine
+#
+
+def main():
+    # Use a local model to generate -- in this case using Ollama
+    Settings.llm = Ollama(
+        model=llm,    # First model tested
+        request_timeout=360.0,
+        context_window=8000
+        )
+   
+    # Load embedding model (same as used for vector store)
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    index = load_index_from_storage(storage_context)
+
+    # Build regular query engine with custom prompt
+    query_engine = index.as_query_engine(
+        similarity_top_k=15,            # pull wide 
+        #response_mode="compact"        # concise synthesis
+        text_qa_template=PROMPT,        # custom prompt
+        # node_postprocessors=[
+        #     SimilarityPostprocessor(similarity_cutoff=0.75)  # keep strong hits; makes result count flexible
+        # ],
+    )  
+
+    # Query
+    if len(sys.argv) < 2:
+        print("Usage: python query.py QUERY_TEXT")
+        sys.exit(1)
+    q = " ".join(sys.argv[1:])    
+
+    # Generate the response by querying the engine
+    # This performes the similarity search and then applies the prompt
+    response = query_engine.query(q)
+
+    # Return the query response and source documents
+    print("\nResponse:\n")
+    print(response.response) 
+
+    print("\nSource documents:")
+    for node in response.source_nodes:
+        meta = getattr(node, "metadata", None) or node.node.metadata
+        print(f"{meta.get('file_name')}  {meta.get('file_path')}  {getattr(node, 'score', None):.3f}")
+
+
+if __name__ == "__main__":
+    main()
--- a/archived/query_tree.py
+++ b/archived/query_tree.py
@ -0,0 +1,60 @@
+# query_tree.py
+#
+# Run a querry on a vector store
+# This is to test summarization using a tree-summarize response mode
+# It doesn't work very well, perhaps because of the struture of the data
+#  
+# E.M.F. August 2025
+
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+
+# Use a local model to generate
+Settings.llm = Ollama(
+    model="llama3.1:8B",    # First model tested
+#    model="deepseek-r1:8B", # This model shows its reasoning
+#    model="gemma3:1b",
+    request_timeout=360.0,
+    context_window=8000
+    )
+
+def main():
+    # Load embedding model (same as used for vector store)
+    embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+    Settings.embed_model = embed_model
+
+    # Load persisted vector store + metadata
+    storage_context = StorageContext.from_defaults(persist_dir="./storage")
+    index = load_index_from_storage(storage_context)
+
+    query_engine = index.as_query_engine(response_mode="tree_summarize")
+
+    # Query
+    while True:
+        q = input("\nEnter your question (or 'exit'): ").strip()
+        if q.lower() in ("exit", "quit"):
+            break
+        print()
+        response = query_engine.query("<summarization_query>")
+
+        # Return the query response and source documents
+        print(response.response) 
+        print("\nSource documents:")
+        for node in response.source_nodes:
+            meta = getattr(node, "metadata", None) or node.node.metadata
+            print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
+
+
+if __name__ == "__main__":
+    main()
+
+
+    
+
--- a/archived/vs_metrics.py
+++ b/archived/vs_metrics.py
@ -0,0 +1,27 @@
+# vs_metrics.py
+# Quantify vector store properties and performance
+#
+# E.M.F. August 2025
+
+# Read in vector store
+
+# What are properties of the vector store?
+# - number of vectors
+# - distribution of distances
+# - clustering?
+
+from llama_index.core import (
+    StorageContext,
+    load_index_from_storage,
+    ServiceContext,
+    Settings,
+)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+# Load embedding model (same as used for vector store)
+embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
+Settings.embed_model = embed_model
+
+# Load persisted vector store + metadata
+storage_context = StorageContext.from_defaults(persist_dir="./storage")
+index = load_index_from_storage(storage_context)