Initial commit: RAG pipeline for semantic search over personal journal archive
Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval, incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
This commit is contained in:
commit
e9fc99ddc6
43 changed files with 7349 additions and 0 deletions
51
archived/build.py
Normal file
51
archived/build.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# build.py
|
||||
#
|
||||
# Import documents from data, generate embedded vector store
|
||||
# and save to disk in directory ./storage
|
||||
#
|
||||
# August 2025
|
||||
# E. M. Furst
|
||||
|
||||
from llama_index.core import (
|
||||
SimpleDirectoryReader,
|
||||
VectorStoreIndex,
|
||||
Settings,
|
||||
)
|
||||
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
|
||||
def main():
|
||||
# Choose your embedding model
|
||||
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
|
||||
|
||||
# Configure global settings for LlamaIndex
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load documents
|
||||
documents = SimpleDirectoryReader("./data").load_data()
|
||||
|
||||
# Create the custom textsplitter
|
||||
# Set chunk size and overlap (e.g., 256 tokens, 25 tokens overlap)
|
||||
# see https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/sentence_splitter/#llama_index.core.node_parser.SentenceSplitter
|
||||
text_splitter = SentenceSplitter(
|
||||
chunk_size=256,
|
||||
chunk_overlap=25,
|
||||
paragraph_separator="\n\n", # use double newlines to separate paragraphs
|
||||
)
|
||||
Settings.text_splitter = text_splitter
|
||||
|
||||
# Build the index
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents, transformations=[text_splitter],
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
# Persist both vector store and index metadata
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
|
||||
print("Index built and saved to ./storage")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
68
archived/build_exp.py
Normal file
68
archived/build_exp.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# build_exp.py
|
||||
#
|
||||
# Import document from data, generate embedded vector store
|
||||
# and save to disk
|
||||
#
|
||||
# Experiment to include text chunking with a textsplitter
|
||||
#
|
||||
# August 2025
|
||||
# E. M. Furst
|
||||
|
||||
from llama_index.core import (
|
||||
SimpleDirectoryReader,
|
||||
VectorStoreIndex,
|
||||
Settings,
|
||||
)
|
||||
|
||||
from pathlib import Path
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
|
||||
def main():
|
||||
# Choose your embedding model
|
||||
#embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
# embedding is slower with BAAI/bge-large-en-v1.5
|
||||
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
|
||||
|
||||
# Configure global settings for LlamaIndex
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load documents (capabilities?)
|
||||
documents = SimpleDirectoryReader(
|
||||
"./data",
|
||||
# # p is a string path
|
||||
# file_metadata=lambda p: {
|
||||
# "filename": Path(p).name, # just the file name
|
||||
# "filepath": str(Path(p).resolve()), # absolute path (handy for tracing)
|
||||
# },
|
||||
).load_data()
|
||||
|
||||
# Create the custom textsplitter
|
||||
# Set chunk size and overlap (e.g., 512 tokens, 10 toekns overlap)
|
||||
# see https://docs.llamaindex.ai/en/stable/api_reference/node_parsers/sentence_splitter/#llama_index.core.node_parser.SentenceSplitter
|
||||
text_splitter = SentenceSplitter(
|
||||
chunk_size=256,
|
||||
chunk_overlap=25,
|
||||
paragraph_separator="\n\n", # use double newlines to separate paragraphs
|
||||
)
|
||||
# b/c passing text_splitter in the index build, this may cause problems
|
||||
# test with it commented out...
|
||||
# Settings.text_splitter = text_splitter
|
||||
|
||||
# Build the index
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents, transformations=[text_splitter],
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
# Persist both vector store and index metadata
|
||||
index.storage_context.persist(persist_dir="./storage_exp")
|
||||
|
||||
# storage_context = StorageContext.from_defaults(vector_store=index.vector_store)
|
||||
# storage_context.persist(persist_dir="./storage")
|
||||
|
||||
print("Index built and saved to ./storage_exp")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
164
archived/claude_diagnostic.py
Normal file
164
archived/claude_diagnostic.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
# Better HyDE debugging with targeted tests
|
||||
|
||||
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
|
||||
from llama_index.core import PromptTemplate
|
||||
from llama_index.core import Settings
|
||||
from llama_index.core.base.base_query_engine import BaseQueryEngine
|
||||
from llama_index.llms.ollama import Ollama
|
||||
|
||||
llm="llama3.1:8B"
|
||||
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
|
||||
# Test queries that should produce very different hypothetical documents
|
||||
test_queries = [
|
||||
"What is the capital of France?",
|
||||
"How do you make chocolate chip cookies?",
|
||||
"Explain quantum physics",
|
||||
"Write a love letter",
|
||||
"Describe symptoms of the common cold"
|
||||
]
|
||||
|
||||
print("=== DEBUGGING HYDE STEP BY STEP ===\n")
|
||||
|
||||
# 1. Test the LLM with HyDE-style prompts directly
|
||||
print("1. Testing LLM directly with HyDE-style prompts:")
|
||||
print("-" * 50)
|
||||
|
||||
for query in test_queries[:2]: # Just test 2 to keep output manageable
|
||||
direct_prompt = f"""Generate a hypothetical document that would contain the answer to this query.
|
||||
|
||||
Query: {query}
|
||||
|
||||
Hypothetical document:"""
|
||||
|
||||
response = Settings.llm.complete(direct_prompt)
|
||||
print(f"Query: {query}")
|
||||
print(f"Direct LLM Response: {response.text[:100]}...")
|
||||
print()
|
||||
|
||||
# 2. Check HyDE internals - let's see what's actually happening
|
||||
print("\n2. Examining HyDE internal behavior:")
|
||||
print("-" * 50)
|
||||
|
||||
# Create a custom HyDE that shows us everything
|
||||
class VerboseHyDETransform(HyDEQueryTransform):
|
||||
def _get_prompts(self):
|
||||
"""Show what prompts are being used"""
|
||||
prompts = super()._get_prompts()
|
||||
print(f"HyDE prompts: {prompts}")
|
||||
return prompts
|
||||
|
||||
def _run_component(self, **kwargs):
|
||||
"""Show what's being passed to the LLM"""
|
||||
print(f"HyDE _run_component kwargs: {kwargs}")
|
||||
result = super()._run_component(**kwargs)
|
||||
print(f"HyDE _run_component result: {result}")
|
||||
return result
|
||||
|
||||
# Test with verbose HyDE
|
||||
verbose_hyde = VerboseHyDETransform(llm=Settings.llm)
|
||||
test_result = verbose_hyde.run("What is machine learning?")
|
||||
print(f"Final verbose result: {test_result}")
|
||||
|
||||
# 3. Try the most basic possible test
|
||||
print("\n3. Most basic HyDE test:")
|
||||
print("-" * 50)
|
||||
|
||||
basic_hyde = HyDEQueryTransform(llm=Settings.llm)
|
||||
basic_result = basic_hyde.run("Paris")
|
||||
print(f"Input: 'Paris'")
|
||||
print(f"Output: '{basic_result}'")
|
||||
print(f"Same as input? {basic_result.strip() == 'Paris'}")
|
||||
|
||||
# 4. Check if it's a version issue - try alternative approach
|
||||
print("\n4. Alternative HyDE approach:")
|
||||
print("-" * 50)
|
||||
|
||||
try:
|
||||
# Some versions might need different initialization
|
||||
from llama_index.core.query_engine import TransformQueryEngine
|
||||
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
|
||||
|
||||
# Try with explicit prompt template
|
||||
hyde_prompt_template = PromptTemplate(
|
||||
"Please write a passage to answer the question\n"
|
||||
"Try to include as many key details as possible\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"Passage:{query_str}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"Passage:"
|
||||
)
|
||||
|
||||
alt_hyde = HyDEQueryTransform(
|
||||
llm=Settings.llm,
|
||||
hyde_prompt=hyde_prompt_template
|
||||
)
|
||||
|
||||
alt_result = alt_hyde.run("What causes rain?")
|
||||
print(f"Alternative approach result: {alt_result}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Alternative approach failed: {e}")
|
||||
|
||||
# 5. Check what happens with different query formats
|
||||
print("\n5. Testing different input formats:")
|
||||
print("-" * 50)
|
||||
|
||||
from llama_index.core.schema import QueryBundle
|
||||
|
||||
# Test with QueryBundle vs string
|
||||
hyde_test = HyDEQueryTransform(llm=Settings.llm)
|
||||
|
||||
string_result = hyde_test.run("test query")
|
||||
print(f"String input result: '{string_result}'")
|
||||
|
||||
query_bundle = QueryBundle(query_str="test query")
|
||||
bundle_result = hyde_test.run(query_bundle)
|
||||
print(f"QueryBundle input result: '{bundle_result}'")
|
||||
|
||||
# 6. Version and import check
|
||||
print("\n6. Environment check:")
|
||||
print("-" * 50)
|
||||
import llama_index
|
||||
print(f"LlamaIndex version: {llama_index.__version__}")
|
||||
|
||||
# Check what LLM you're actually using
|
||||
print(f"LLM type: {type(Settings.llm)}")
|
||||
print(f"LLM model name: {getattr(Settings.llm, 'model', 'Unknown')}")
|
||||
|
||||
# 7. Try the nuclear option - completely manual implementation
|
||||
print("\n7. Manual HyDE implementation:")
|
||||
print("-" * 50)
|
||||
|
||||
def manual_hyde(query: str, llm):
|
||||
"""Completely manual HyDE to see if the concept works"""
|
||||
prompt = f"""You are an expert writer. Generate a realistic document excerpt that would contain the answer to this question.
|
||||
|
||||
Question: {query}
|
||||
|
||||
Document excerpt:"""
|
||||
|
||||
response = llm.complete(prompt)
|
||||
return response.text
|
||||
|
||||
manual_result = manual_hyde("What is photosynthesis?", Settings.llm)
|
||||
print(f"Manual HyDE result: {manual_result[:150]}...")
|
||||
|
||||
# 8. Final diagnostic
|
||||
print("\n8. Final diagnostic questions:")
|
||||
print("-" * 50)
|
||||
print("If all the above show the LLM generating proper responses but HyDE still returns original:")
|
||||
print("- What LLM are you using? (OpenAI, Anthropic, local model, etc.)")
|
||||
print("- What's your LlamaIndex version?")
|
||||
print("- Are there any error messages in the logs?")
|
||||
print("- Does the LLM have any special configuration or wrappers?")
|
||||
BIN
archived/output.png
Normal file
BIN
archived/output.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 785 KiB |
110
archived/query.py
Normal file
110
archived/query.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
# query_topk_prompt.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# E. M. Furst August 2025
|
||||
|
||||
from llama_index.core import (
|
||||
load_index_from_storage,
|
||||
StorageContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
import os
|
||||
|
||||
#
|
||||
# Globals
|
||||
#
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# Embedding model used in vector store (this should match the one in build.py or equivalent)
|
||||
embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
llm="command-r7b"
|
||||
|
||||
#
|
||||
# Custom prompt for the query engine
|
||||
#
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a list of 10 matching files. The format for each should be:
|
||||
<filename> -
|
||||
<rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate -- in this case using Ollama
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=15, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter a search topic or question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
|
||||
# Generate the response by querying the engine
|
||||
# This performes the similarity search and then applies the prompt
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
|
||||
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
90
archived/query_catalog.py
Normal file
90
archived/query_catalog.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# query.py
|
||||
# Run a querry on a vector store
|
||||
# This version implements a CATALOG prompt
|
||||
#
|
||||
# E.M.F. July 2025
|
||||
# August 2025 - updated for nd ssearch
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.postprocessor import SimilarityPostprocessor
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
CATALOG_PROMPT = PromptTemplate(
|
||||
"""You are a research assistant. You’re given journal snippets (CONTEXT) and a user query.
|
||||
Your job is NOT to write an essay but to list the best-matching journal files with a 1–2 sentence rationale.
|
||||
|
||||
Rules:
|
||||
- Use only the CONTEXT; do not invent content.
|
||||
- Prefer precise references to passages over generalities.
|
||||
- Output exactly:
|
||||
1) A brief one-line summary of the overall theme you detect.
|
||||
2) A bulleted list: **filename** — brief rationale. If available in the snippet, include date or section hints.
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY: {query_str}
|
||||
|
||||
Now produce the summary line and the bulleted list of matching files."""
|
||||
)
|
||||
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
# model="llama3.1:8B", # First model tested
|
||||
# model="deepseek-r1:8B", # This model shows its reasoning
|
||||
model="gemma3:1b",
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
def main():
|
||||
# Load embedding model (same as used for vector store)
|
||||
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=10, # pull wide (tune to taste)
|
||||
#response_mode="compact", # concise synthesis
|
||||
text_qa_template=CATALOG_PROMPT, # <- custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter your question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
print("\nSource documents:")
|
||||
for sn in response.source_nodes:
|
||||
meta = getattr(sn, "metadata", None) or sn.node.metadata
|
||||
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(sn, "score", None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
223
archived/query_claude_sonnet.py
Normal file
223
archived/query_claude_sonnet.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
query_topk_prompt_engine.py
|
||||
|
||||
Query a vector store with a custom prompt for research assistance.
|
||||
Uses BAAI/bge-large-en-v1.5 embeddings and Ollama for generation.
|
||||
|
||||
E.M.F. January 2026
|
||||
Using Claude Sonnet 4.5 to suggest changes
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from llama_index.core import (
|
||||
Settings,
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
)
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.postprocessor import SimilarityPostprocessor
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
|
||||
|
||||
# Suppress tokenizer parallelism warnings
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# Configuration defaults
|
||||
DEFAULT_LLM = "command-r7b"
|
||||
DEFAULT_EMBED_MODEL = "BAAI/bge-large-en-v1.5"
|
||||
DEFAULT_STORAGE_DIR = "./storage_exp"
|
||||
DEFAULT_TOP_K = 15
|
||||
DEFAULT_SIMILARITY_CUTOFF = 0.7 # Set to None to disable
|
||||
|
||||
|
||||
def get_prompt_template(max_files: int = 10) -> PromptTemplate:
|
||||
"""Return the custom prompt template for the query engine."""
|
||||
return PromptTemplate(
|
||||
f"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to {max_files}).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
List up to {max_files} matching files. Format each as:
|
||||
<filename> - <rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{{context_str}}
|
||||
|
||||
QUERY:
|
||||
{{query_str}}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
|
||||
def load_models(
|
||||
llm_name: str = DEFAULT_LLM,
|
||||
embed_model_name: str = DEFAULT_EMBED_MODEL,
|
||||
cache_folder: str = "./models",
|
||||
request_timeout: float = 360.0,
|
||||
context_window: int = 8000,
|
||||
):
|
||||
"""Initialize and configure the LLM and embedding models."""
|
||||
Settings.llm = Ollama(
|
||||
model=llm_name,
|
||||
request_timeout=request_timeout,
|
||||
context_window=context_window,
|
||||
)
|
||||
Settings.embed_model = HuggingFaceEmbedding(
|
||||
cache_folder=cache_folder,
|
||||
model_name=embed_model_name,
|
||||
local_files_only=True,
|
||||
)
|
||||
|
||||
|
||||
def load_query_engine(
|
||||
storage_dir: str = DEFAULT_STORAGE_DIR,
|
||||
top_k: int = DEFAULT_TOP_K,
|
||||
similarity_cutoff: float | None = DEFAULT_SIMILARITY_CUTOFF,
|
||||
max_files: int = 10,
|
||||
):
|
||||
"""Load the vector store and create a query engine with custom prompt."""
|
||||
storage_path = Path(storage_dir)
|
||||
if not storage_path.exists():
|
||||
raise FileNotFoundError(f"Storage directory not found: {storage_dir}")
|
||||
|
||||
storage_context = StorageContext.from_defaults(persist_dir=str(storage_path))
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build postprocessors
|
||||
postprocessors = []
|
||||
if similarity_cutoff is not None:
|
||||
postprocessors.append(SimilarityPostprocessor(similarity_cutoff=similarity_cutoff))
|
||||
|
||||
return index.as_query_engine(
|
||||
similarity_top_k=top_k,
|
||||
text_qa_template=get_prompt_template(max_files),
|
||||
node_postprocessors=postprocessors if postprocessors else None,
|
||||
)
|
||||
|
||||
|
||||
def get_node_metadata(node) -> dict:
|
||||
"""Safely extract metadata from a source node."""
|
||||
# Handle different node structures in llamaindex
|
||||
if hasattr(node, "metadata") and node.metadata:
|
||||
return node.metadata
|
||||
if hasattr(node, "node") and hasattr(node.node, "metadata"):
|
||||
return node.node.metadata
|
||||
return {}
|
||||
|
||||
|
||||
def print_results(response):
|
||||
"""Print the query response and source documents."""
|
||||
print("\n" + "=" * 60)
|
||||
print("RESPONSE")
|
||||
print("=" * 60 + "\n")
|
||||
print(response.response)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("SOURCE DOCUMENTS")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
for i, node in enumerate(response.source_nodes, 1):
|
||||
meta = get_node_metadata(node)
|
||||
score = getattr(node, "score", None)
|
||||
file_name = meta.get("file_name", "Unknown")
|
||||
file_path = meta.get("file_path", "Unknown")
|
||||
score_str = f"{score:.3f}" if score is not None else "N/A"
|
||||
print(f"{i:2}. [{score_str}] {file_name}")
|
||||
print(f" Path: {file_path}")
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Query a vector store with a custom research assistant prompt.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python query_topk_prompt_engine.py "What themes appear in the documents?"
|
||||
python query_topk_prompt_engine.py --top-k 20 --llm llama3.1:8B "Find references to machine learning"
|
||||
""",
|
||||
)
|
||||
parser.add_argument("query", nargs="+", help="The query text")
|
||||
parser.add_argument(
|
||||
"--llm",
|
||||
default=DEFAULT_LLM,
|
||||
help=f"Ollama model to use for generation (default: {DEFAULT_LLM})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--storage-dir",
|
||||
default=DEFAULT_STORAGE_DIR,
|
||||
help=f"Path to the vector store (default: {DEFAULT_STORAGE_DIR})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--top-k",
|
||||
type=int,
|
||||
default=DEFAULT_TOP_K,
|
||||
help=f"Number of similar documents to retrieve (default: {DEFAULT_TOP_K})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--similarity-cutoff",
|
||||
type=float,
|
||||
default=DEFAULT_SIMILARITY_CUTOFF,
|
||||
help=f"Minimum similarity score (default: {DEFAULT_SIMILARITY_CUTOFF}, use 0 to disable)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-files",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Maximum files to list in response (default: 10)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Handle similarity cutoff of 0 as "disabled"
|
||||
similarity_cutoff = args.similarity_cutoff if args.similarity_cutoff > 0 else None
|
||||
|
||||
try:
|
||||
print(f"Loading models (LLM: {args.llm})...")
|
||||
load_models(llm_name=args.llm)
|
||||
|
||||
print(f"Loading index from {args.storage_dir}...")
|
||||
query_engine = load_query_engine(
|
||||
storage_dir=args.storage_dir,
|
||||
top_k=args.top_k,
|
||||
similarity_cutoff=similarity_cutoff,
|
||||
max_files=args.max_files,
|
||||
)
|
||||
|
||||
query_text = " ".join(args.query)
|
||||
print(f"Querying: {query_text[:100]}{'...' if len(query_text) > 100 else ''}")
|
||||
|
||||
response = query_engine.query(query_text)
|
||||
print_results(response)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error during query: {e}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
archived/query_exp.py
Normal file
106
archived/query_exp.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# query_topk.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# This verison implements a prompt and uses the build_exp.py vector store
|
||||
# It is based on query_topk.py
|
||||
# It uses 10 top-k results and a custom prompt
|
||||
# The next version after this is query_rewrite.py
|
||||
# build_exp.py modifies the chunk size and overlap form the orignal build.py
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
llm="llama3.1:8B"
|
||||
# Other models tried:
|
||||
# llm="deepseek-r1:8B"
|
||||
# llm="gemma3:1b"
|
||||
|
||||
|
||||
# Custom prompt for the query engine
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked journal excerpts (CONTEXT) and a user’s QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a bullet list of 10. The format for each should be:
|
||||
**<filename>** — <rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=10, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter your question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
archived/query_multitool.py
Normal file
106
archived/query_multitool.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""
|
||||
This is output generated by ChatG to implement a new regex + vector search engine
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import List, Iterable
|
||||
import json, re
|
||||
|
||||
from llama_index.core import VectorStoreIndex, Settings
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.core.schema import NodeWithScore, QueryBundle
|
||||
from llama_index.core.retrievers import BaseRetriever, EnsembleRetriever
|
||||
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
from llama_index.core import Document
|
||||
|
||||
# 0) Configure your LLM + embeddings up front
|
||||
# Example: Settings.llm = <your Command-R wrapper> ; Settings.embed_model = <your embeddings>
|
||||
# (You can also pass an llm explicitly into the retriever if you prefer.)
|
||||
# Settings.llm.complete("hello") should work in v0.10+
|
||||
|
||||
# 1) Prepare nodes once (so regex + vector share the same chunks)
|
||||
def build_nodes(docs: List[Document], chunk_size: int = 1024, overlap: int = 100):
|
||||
splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
|
||||
return splitter.get_nodes_from_documents(docs)
|
||||
|
||||
# 2) LLM-guided regex retriever
|
||||
class RegexRetriever(BaseRetriever):
|
||||
def __init__(self, nodes: Iterable, llm=None, top_k: int = 5, flags=re.IGNORECASE):
|
||||
super().__init__()
|
||||
self._nodes = list(nodes)
|
||||
self._llm = llm or Settings.llm
|
||||
self._top_k = top_k
|
||||
self._flags = flags
|
||||
|
||||
def _extract_terms(self, query: str) -> List[str]:
|
||||
"""Ask the LLM for up to ~6 distinctive keywords/short phrases. Return a list of strings."""
|
||||
prompt = f"""
|
||||
You extract search terms for a boolean/regex search.
|
||||
Query: {query}
|
||||
|
||||
Rules:
|
||||
- Return ONLY a JSON array of strings.
|
||||
- Use up to 6 concise keywords/short phrases.
|
||||
- Keep phrases short (<= 3 words).
|
||||
- Avoid stopwords, punctuation, and generic terms.
|
||||
- No explanations, no extra text.
|
||||
"""
|
||||
raw = self._llm.complete(prompt).text.strip()
|
||||
try:
|
||||
terms = json.loads(raw)
|
||||
# basic sanitize
|
||||
terms = [t for t in terms if isinstance(t, str) and t.strip()]
|
||||
except Exception:
|
||||
# simple fall-back if JSON parse fails
|
||||
terms = [w for w in re.findall(r"\w+", query) if len(w) > 2][:6]
|
||||
return terms[:6]
|
||||
|
||||
def _compile_patterns(self, terms: List[str]) -> List[re.Pattern]:
|
||||
pats = []
|
||||
for t in terms:
|
||||
# Escape user/LLM output, add word boundaries; allow whitespace inside short phrases
|
||||
escaped = re.escape(t)
|
||||
# turn '\ ' (escaped space) back into '\s+' to match any whitespace in phrases
|
||||
escaped = escaped.replace(r"\ ", r"\s+")
|
||||
pats.append(re.compile(rf"\b{escaped}\b", self._flags))
|
||||
return pats
|
||||
|
||||
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
|
||||
terms = self._extract_terms(query_bundle.query_str)
|
||||
patterns = self._compile_patterns(terms)
|
||||
|
||||
scored: List[tuple] = []
|
||||
for n in self._nodes:
|
||||
txt = n.get_content(metadata_mode="all")
|
||||
hits = 0
|
||||
for p in patterns:
|
||||
if p.search(txt):
|
||||
hits += 1
|
||||
if hits:
|
||||
# simple score = number of distinct term hits (you can weight phrase vs single word if you like)
|
||||
scored.append((n, float(hits)))
|
||||
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
return [NodeWithScore(node=n, score=s) for n, s in scored[: self._top_k]]
|
||||
|
||||
# 3) Wire it all together
|
||||
def build_query_engine(docs: List[Document], k_vec=5, k_regex=5, weights=(0.7, 0.3)):
|
||||
nodes = build_nodes(docs)
|
||||
# Vector index over the SAME nodes
|
||||
vindex = VectorStoreIndex(nodes)
|
||||
|
||||
vector_ret = vindex.as_retriever(similarity_top_k=k_vec)
|
||||
regex_ret = RegexRetriever(nodes, top_k=k_regex)
|
||||
|
||||
ensemble = EnsembleRetriever(
|
||||
retrievers=[vector_ret, regex_ret],
|
||||
weights=list(weights), # tune this: more recall from regex? bump weight on regex
|
||||
# uses Reciprocal Rank Fusion by default
|
||||
)
|
||||
|
||||
return RetrieverQueryEngine(retriever=ensemble)
|
||||
|
||||
# 4) Use it
|
||||
# docs = SimpleDirectoryReader("data").load_data()
|
||||
# qe = build_query_engine(docs)
|
||||
# print(qe.query("Find entries with strong feelings of depression."))
|
||||
126
archived/query_rewrite_hyde.py
Normal file
126
archived/query_rewrite_hyde.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# query_rewrite_hyde.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# Latest experiment to include query rewriting using HyDE (Hypothetial Document Embeddings)
|
||||
# The goal is to reduce the semantic gap between the query and the indexed documents
|
||||
# This verison implements a prompt and uses the build_exp.py vector store
|
||||
# Based on query_exp.py
|
||||
#
|
||||
# E.M.F. July 2025
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
|
||||
from llama_index.core.query_engine.transform_query_engine import TransformQueryEngine
|
||||
import os
|
||||
|
||||
# Globals
|
||||
|
||||
# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
|
||||
# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
llm="llama3.1:8B"
|
||||
# Other models tried:
|
||||
# llm="deepseek-r1:8B"
|
||||
# llm="gemma3:1b"
|
||||
|
||||
# Custom prompt for the query engine
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a list of 10 matching files. The format for each should be:
|
||||
<filename> — <rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
base_query_engine = index.as_query_engine(
|
||||
similarity_top_k=15, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# HyDE is "Hypothetical Document Embeddings"
|
||||
# It generates a hypothetical document based on the query
|
||||
# and uses that to augment the query
|
||||
# Here we include the original query as well
|
||||
# I get better similarity values with include_orignal=True
|
||||
hyde_transform = HyDEQueryTransform(llm=Settings.llm,include_original=True)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter a search topic or question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
|
||||
# The query uses a HyDE trasformation to rewrite the query
|
||||
query_engine = TransformQueryEngine(base_query_engine, query_transform=hyde_transform)
|
||||
|
||||
# Generate the response by querying the engine
|
||||
# This performes the similarity search and then applies the prompt
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
58
archived/query_topk.py
Normal file
58
archived/query_topk.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# query_topk.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# E.M.F. July 2025
|
||||
# August 2025 - updated for nd ssearch
|
||||
# this version uses top-k similarity
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model="llama3.1:8B", # First model tested
|
||||
# model="deepseek-r1:8B", # This model shows its reasoning
|
||||
# model="gemma3:1b",
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
def main():
|
||||
# Load embedding model (same as used for vector store)
|
||||
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
query_engine = index.as_query_engine(similarity_top_k=5)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter your question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
123
archived/query_topk_prompt.py
Normal file
123
archived/query_topk_prompt.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# query_topk_prompt.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# This version from query_rewrite_hyde.py, but removing hyde and using a custom prompt
|
||||
# This verison implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
|
||||
# Based on query_exp.py->query_topk.py->query_rewrite_hyde.py
|
||||
# The results are as good as with HyDE.
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
import os
|
||||
|
||||
#
|
||||
# Globals
|
||||
#
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
|
||||
# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
|
||||
# to the provided context
|
||||
llm="command-r7b"
|
||||
# Other models tried:
|
||||
#llm="llama3.1:8B"
|
||||
#llm="deepseek-r1:8B"
|
||||
#llm="gemma3:1b"
|
||||
|
||||
#
|
||||
# Custom prompt for the query engine
|
||||
#
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a list of 10 matching files. The format for each should be:
|
||||
<filename> -
|
||||
<rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate -- in this case using Ollama
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=15, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter a search topic or question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
|
||||
# Generate the response by querying the engine
|
||||
# This performes the similarity search and then applies the prompt
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
|
||||
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
134
archived/query_topk_prompt_dw.py
Normal file
134
archived/query_topk_prompt_dw.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
# query_topk_prompt_dw.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# This version from query_rewrite_hyde.py, but removing hyde and using a custom prompt
|
||||
# This verison implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
|
||||
# Based on query_exp.py->query_topk.py->query_rewrite_hyde.py
|
||||
# The results are as good as with HyDE.
|
||||
# Modified for terminal output (132 columns)
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
import os
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
# Print wrapping for terminal output
|
||||
class Wrap80:
|
||||
def write(self, text):
|
||||
for line in text.splitlines():
|
||||
sys.__stdout__.write(textwrap.fill(line, width=131) + "\n")
|
||||
def flush(self):
|
||||
sys.__stdout__.flush()
|
||||
|
||||
sys.stdout = Wrap80()
|
||||
|
||||
#
|
||||
# Globals
|
||||
#
|
||||
|
||||
# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
|
||||
# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5")
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
|
||||
# to the provided context
|
||||
llm="command-r7b"
|
||||
# Other models tried:
|
||||
#llm="llama3.1:8B"
|
||||
# llm="deepseek-r1:8B"
|
||||
# llm="gemma3:1b"
|
||||
|
||||
# Custom prompt for the query engine
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a list of 10 matching files. The format for each should be:
|
||||
<filename> -
|
||||
<rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=15, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter a search topic or question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
|
||||
# Generate the response by querying the engine
|
||||
# This performes the similarity search and then applies the prompt
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
|
||||
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None)}", end="")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
123
archived/query_topk_prompt_engine.py
Normal file
123
archived/query_topk_prompt_engine.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# query_topk_prompt_engine.py
|
||||
# Run a querry on a vector store
|
||||
#
|
||||
# This version is query_topk_prompt.py but the query is passed though the command line.
|
||||
#
|
||||
# Implements a prompt and uses the build_exp.py vector store with BAAI/bge-large-en-v1.5
|
||||
# Based on query_exp.py->query_topk.py
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
import os
|
||||
import sys
|
||||
|
||||
#
|
||||
# Globals
|
||||
#
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# Embedding model used in vector store (this should match the one in build_exp.py or equivalent)
|
||||
# embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
embed_model = HuggingFaceEmbedding(cache_folder="./models",model_name="BAAI/bge-large-en-v1.5",local_files_only=True)
|
||||
|
||||
# LLM model to use in query transform and generation
|
||||
# command-r7b generates about as quickly as llama3.1:8B, but provides results that stick better
|
||||
# to the provided context
|
||||
llm="command-r7b"
|
||||
# Other models tried:
|
||||
#llm="llama3.1:8B"
|
||||
#llm="deepseek-r1:8B"
|
||||
#llm="gemma3:1b"
|
||||
|
||||
#
|
||||
# Custom prompt for the query engine
|
||||
#
|
||||
PROMPT = PromptTemplate(
|
||||
"""You are an expert research assistant. You are given top-ranked writing excerpts (CONTEXT) and a user's QUERY.
|
||||
|
||||
Instructions:
|
||||
- Base your response *only* on the CONTEXT.
|
||||
- The snippets are ordered from most to least relevant—prioritize insights from earlier (higher-ranked) snippets.
|
||||
- Aim to reference *as many distinct* relevant files as possible (up to 10).
|
||||
- Do not invent or generalize; refer to specific passages or facts only.
|
||||
- If a passage only loosely matches, deprioritize it.
|
||||
|
||||
Format your answer in two parts:
|
||||
|
||||
1. **Summary Theme**
|
||||
Summarize the dominant theme from the relevant context in a few sentences.
|
||||
|
||||
2. **Matching Files**
|
||||
Make a list of 10 matching files. The format for each should be:
|
||||
<filename> -
|
||||
<rationale tied to content. Include date or section hints if available.>
|
||||
|
||||
CONTEXT:
|
||||
{context_str}
|
||||
|
||||
QUERY:
|
||||
{query_str}
|
||||
|
||||
Now provide the theme and list of matching files."""
|
||||
)
|
||||
|
||||
#
|
||||
# Main program routine
|
||||
#
|
||||
|
||||
def main():
|
||||
# Use a local model to generate -- in this case using Ollama
|
||||
Settings.llm = Ollama(
|
||||
model=llm, # First model tested
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
# Build regular query engine with custom prompt
|
||||
query_engine = index.as_query_engine(
|
||||
similarity_top_k=15, # pull wide
|
||||
#response_mode="compact" # concise synthesis
|
||||
text_qa_template=PROMPT, # custom prompt
|
||||
# node_postprocessors=[
|
||||
# SimilarityPostprocessor(similarity_cutoff=0.75) # keep strong hits; makes result count flexible
|
||||
# ],
|
||||
)
|
||||
|
||||
# Query
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python query.py QUERY_TEXT")
|
||||
sys.exit(1)
|
||||
q = " ".join(sys.argv[1:])
|
||||
|
||||
# Generate the response by querying the engine
|
||||
# This performes the similarity search and then applies the prompt
|
||||
response = query_engine.query(q)
|
||||
|
||||
# Return the query response and source documents
|
||||
print("\nResponse:\n")
|
||||
print(response.response)
|
||||
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(f"{meta.get('file_name')} {meta.get('file_path')} {getattr(node, 'score', None):.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
archived/query_tree.py
Normal file
60
archived/query_tree.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# query_tree.py
|
||||
#
|
||||
# Run a querry on a vector store
|
||||
# This is to test summarization using a tree-summarize response mode
|
||||
# It doesn't work very well, perhaps because of the struture of the data
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
from llama_index.llms.ollama import Ollama
|
||||
|
||||
# Use a local model to generate
|
||||
Settings.llm = Ollama(
|
||||
model="llama3.1:8B", # First model tested
|
||||
# model="deepseek-r1:8B", # This model shows its reasoning
|
||||
# model="gemma3:1b",
|
||||
request_timeout=360.0,
|
||||
context_window=8000
|
||||
)
|
||||
|
||||
def main():
|
||||
# Load embedding model (same as used for vector store)
|
||||
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
query_engine = index.as_query_engine(response_mode="tree_summarize")
|
||||
|
||||
# Query
|
||||
while True:
|
||||
q = input("\nEnter your question (or 'exit'): ").strip()
|
||||
if q.lower() in ("exit", "quit"):
|
||||
break
|
||||
print()
|
||||
response = query_engine.query("<summarization_query>")
|
||||
|
||||
# Return the query response and source documents
|
||||
print(response.response)
|
||||
print("\nSource documents:")
|
||||
for node in response.source_nodes:
|
||||
meta = getattr(node, "metadata", None) or node.node.metadata
|
||||
print(meta.get("file_name"), "---", meta.get("file_path"), getattr(node, "score", None))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
27
archived/vs_metrics.py
Normal file
27
archived/vs_metrics.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# vs_metrics.py
|
||||
# Quantify vector store properties and performance
|
||||
#
|
||||
# E.M.F. August 2025
|
||||
|
||||
# Read in vector store
|
||||
|
||||
# What are properties of the vector store?
|
||||
# - number of vectors
|
||||
# - distribution of distances
|
||||
# - clustering?
|
||||
|
||||
from llama_index.core import (
|
||||
StorageContext,
|
||||
load_index_from_storage,
|
||||
ServiceContext,
|
||||
Settings,
|
||||
)
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
|
||||
# Load embedding model (same as used for vector store)
|
||||
embed_model = HuggingFaceEmbedding(model_name="all-mpnet-base-v2")
|
||||
Settings.embed_model = embed_model
|
||||
|
||||
# Load persisted vector store + metadata
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./storage")
|
||||
index = load_index_from_storage(storage_context)
|
||||
Loading…
Add table
Add a link
Reference in a new issue