ssearch/archived/claude_diagnostic.py
Eric e9fc99ddc6 Initial commit: RAG pipeline for semantic search over personal journal archive
Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval,
incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
2026-02-20 06:02:28 -05:00

164 lines
5.1 KiB
Python

# Better HyDE debugging with targeted tests
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core import PromptTemplate
from llama_index.core import Settings
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.llms.ollama import Ollama
llm="llama3.1:8B"
# Use a local model to generate
Settings.llm = Ollama(
model=llm, # First model tested
request_timeout=360.0,
context_window=8000,
temperature=0.7,
)
# Test queries that should produce very different hypothetical documents
test_queries = [
"What is the capital of France?",
"How do you make chocolate chip cookies?",
"Explain quantum physics",
"Write a love letter",
"Describe symptoms of the common cold"
]
print("=== DEBUGGING HYDE STEP BY STEP ===\n")
# 1. Test the LLM with HyDE-style prompts directly
print("1. Testing LLM directly with HyDE-style prompts:")
print("-" * 50)
for query in test_queries[:2]: # Just test 2 to keep output manageable
direct_prompt = f"""Generate a hypothetical document that would contain the answer to this query.
Query: {query}
Hypothetical document:"""
response = Settings.llm.complete(direct_prompt)
print(f"Query: {query}")
print(f"Direct LLM Response: {response.text[:100]}...")
print()
# 2. Check HyDE internals - let's see what's actually happening
print("\n2. Examining HyDE internal behavior:")
print("-" * 50)
# Create a custom HyDE that shows us everything
class VerboseHyDETransform(HyDEQueryTransform):
def _get_prompts(self):
"""Show what prompts are being used"""
prompts = super()._get_prompts()
print(f"HyDE prompts: {prompts}")
return prompts
def _run_component(self, **kwargs):
"""Show what's being passed to the LLM"""
print(f"HyDE _run_component kwargs: {kwargs}")
result = super()._run_component(**kwargs)
print(f"HyDE _run_component result: {result}")
return result
# Test with verbose HyDE
verbose_hyde = VerboseHyDETransform(llm=Settings.llm)
test_result = verbose_hyde.run("What is machine learning?")
print(f"Final verbose result: {test_result}")
# 3. Try the most basic possible test
print("\n3. Most basic HyDE test:")
print("-" * 50)
basic_hyde = HyDEQueryTransform(llm=Settings.llm)
basic_result = basic_hyde.run("Paris")
print(f"Input: 'Paris'")
print(f"Output: '{basic_result}'")
print(f"Same as input? {basic_result.strip() == 'Paris'}")
# 4. Check if it's a version issue - try alternative approach
print("\n4. Alternative HyDE approach:")
print("-" * 50)
try:
# Some versions might need different initialization
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
# Try with explicit prompt template
hyde_prompt_template = PromptTemplate(
"Please write a passage to answer the question\n"
"Try to include as many key details as possible\n"
"\n"
"\n"
"Passage:{query_str}\n"
"\n"
"\n"
"Passage:"
)
alt_hyde = HyDEQueryTransform(
llm=Settings.llm,
hyde_prompt=hyde_prompt_template
)
alt_result = alt_hyde.run("What causes rain?")
print(f"Alternative approach result: {alt_result}")
except Exception as e:
print(f"Alternative approach failed: {e}")
# 5. Check what happens with different query formats
print("\n5. Testing different input formats:")
print("-" * 50)
from llama_index.core.schema import QueryBundle
# Test with QueryBundle vs string
hyde_test = HyDEQueryTransform(llm=Settings.llm)
string_result = hyde_test.run("test query")
print(f"String input result: '{string_result}'")
query_bundle = QueryBundle(query_str="test query")
bundle_result = hyde_test.run(query_bundle)
print(f"QueryBundle input result: '{bundle_result}'")
# 6. Version and import check
print("\n6. Environment check:")
print("-" * 50)
import llama_index
print(f"LlamaIndex version: {llama_index.__version__}")
# Check what LLM you're actually using
print(f"LLM type: {type(Settings.llm)}")
print(f"LLM model name: {getattr(Settings.llm, 'model', 'Unknown')}")
# 7. Try the nuclear option - completely manual implementation
print("\n7. Manual HyDE implementation:")
print("-" * 50)
def manual_hyde(query: str, llm):
"""Completely manual HyDE to see if the concept works"""
prompt = f"""You are an expert writer. Generate a realistic document excerpt that would contain the answer to this question.
Question: {query}
Document excerpt:"""
response = llm.complete(prompt)
return response.text
manual_result = manual_hyde("What is photosynthesis?", Settings.llm)
print(f"Manual HyDE result: {manual_result[:150]}...")
# 8. Final diagnostic
print("\n8. Final diagnostic questions:")
print("-" * 50)
print("If all the above show the LLM generating proper responses but HyDE still returns original:")
print("- What LLM are you using? (OpenAI, Anthropic, local model, etc.)")
print("- What's your LlamaIndex version?")
print("- Are there any error messages in the logs?")
print("- Does the LLM have any special configuration or wrappers?")