Vector search with cross-encoder re-ranking, hybrid BM25+vector retrieval, incremental index updates, and multiple LLM backends (Ollama local, OpenAI API).
164 lines
5.1 KiB
Python
164 lines
5.1 KiB
Python
# Better HyDE debugging with targeted tests
|
|
|
|
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
|
|
from llama_index.core import PromptTemplate
|
|
from llama_index.core import Settings
|
|
from llama_index.core.base.base_query_engine import BaseQueryEngine
|
|
from llama_index.llms.ollama import Ollama
|
|
|
|
llm="llama3.1:8B"
|
|
|
|
# Use a local model to generate
|
|
Settings.llm = Ollama(
|
|
model=llm, # First model tested
|
|
request_timeout=360.0,
|
|
context_window=8000,
|
|
temperature=0.7,
|
|
)
|
|
|
|
|
|
# Test queries that should produce very different hypothetical documents
|
|
test_queries = [
|
|
"What is the capital of France?",
|
|
"How do you make chocolate chip cookies?",
|
|
"Explain quantum physics",
|
|
"Write a love letter",
|
|
"Describe symptoms of the common cold"
|
|
]
|
|
|
|
print("=== DEBUGGING HYDE STEP BY STEP ===\n")
|
|
|
|
# 1. Test the LLM with HyDE-style prompts directly
|
|
print("1. Testing LLM directly with HyDE-style prompts:")
|
|
print("-" * 50)
|
|
|
|
for query in test_queries[:2]: # Just test 2 to keep output manageable
|
|
direct_prompt = f"""Generate a hypothetical document that would contain the answer to this query.
|
|
|
|
Query: {query}
|
|
|
|
Hypothetical document:"""
|
|
|
|
response = Settings.llm.complete(direct_prompt)
|
|
print(f"Query: {query}")
|
|
print(f"Direct LLM Response: {response.text[:100]}...")
|
|
print()
|
|
|
|
# 2. Check HyDE internals - let's see what's actually happening
|
|
print("\n2. Examining HyDE internal behavior:")
|
|
print("-" * 50)
|
|
|
|
# Create a custom HyDE that shows us everything
|
|
class VerboseHyDETransform(HyDEQueryTransform):
|
|
def _get_prompts(self):
|
|
"""Show what prompts are being used"""
|
|
prompts = super()._get_prompts()
|
|
print(f"HyDE prompts: {prompts}")
|
|
return prompts
|
|
|
|
def _run_component(self, **kwargs):
|
|
"""Show what's being passed to the LLM"""
|
|
print(f"HyDE _run_component kwargs: {kwargs}")
|
|
result = super()._run_component(**kwargs)
|
|
print(f"HyDE _run_component result: {result}")
|
|
return result
|
|
|
|
# Test with verbose HyDE
|
|
verbose_hyde = VerboseHyDETransform(llm=Settings.llm)
|
|
test_result = verbose_hyde.run("What is machine learning?")
|
|
print(f"Final verbose result: {test_result}")
|
|
|
|
# 3. Try the most basic possible test
|
|
print("\n3. Most basic HyDE test:")
|
|
print("-" * 50)
|
|
|
|
basic_hyde = HyDEQueryTransform(llm=Settings.llm)
|
|
basic_result = basic_hyde.run("Paris")
|
|
print(f"Input: 'Paris'")
|
|
print(f"Output: '{basic_result}'")
|
|
print(f"Same as input? {basic_result.strip() == 'Paris'}")
|
|
|
|
# 4. Check if it's a version issue - try alternative approach
|
|
print("\n4. Alternative HyDE approach:")
|
|
print("-" * 50)
|
|
|
|
try:
|
|
# Some versions might need different initialization
|
|
from llama_index.core.query_engine import TransformQueryEngine
|
|
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
|
|
|
|
# Try with explicit prompt template
|
|
hyde_prompt_template = PromptTemplate(
|
|
"Please write a passage to answer the question\n"
|
|
"Try to include as many key details as possible\n"
|
|
"\n"
|
|
"\n"
|
|
"Passage:{query_str}\n"
|
|
"\n"
|
|
"\n"
|
|
"Passage:"
|
|
)
|
|
|
|
alt_hyde = HyDEQueryTransform(
|
|
llm=Settings.llm,
|
|
hyde_prompt=hyde_prompt_template
|
|
)
|
|
|
|
alt_result = alt_hyde.run("What causes rain?")
|
|
print(f"Alternative approach result: {alt_result}")
|
|
|
|
except Exception as e:
|
|
print(f"Alternative approach failed: {e}")
|
|
|
|
# 5. Check what happens with different query formats
|
|
print("\n5. Testing different input formats:")
|
|
print("-" * 50)
|
|
|
|
from llama_index.core.schema import QueryBundle
|
|
|
|
# Test with QueryBundle vs string
|
|
hyde_test = HyDEQueryTransform(llm=Settings.llm)
|
|
|
|
string_result = hyde_test.run("test query")
|
|
print(f"String input result: '{string_result}'")
|
|
|
|
query_bundle = QueryBundle(query_str="test query")
|
|
bundle_result = hyde_test.run(query_bundle)
|
|
print(f"QueryBundle input result: '{bundle_result}'")
|
|
|
|
# 6. Version and import check
|
|
print("\n6. Environment check:")
|
|
print("-" * 50)
|
|
import llama_index
|
|
print(f"LlamaIndex version: {llama_index.__version__}")
|
|
|
|
# Check what LLM you're actually using
|
|
print(f"LLM type: {type(Settings.llm)}")
|
|
print(f"LLM model name: {getattr(Settings.llm, 'model', 'Unknown')}")
|
|
|
|
# 7. Try the nuclear option - completely manual implementation
|
|
print("\n7. Manual HyDE implementation:")
|
|
print("-" * 50)
|
|
|
|
def manual_hyde(query: str, llm):
|
|
"""Completely manual HyDE to see if the concept works"""
|
|
prompt = f"""You are an expert writer. Generate a realistic document excerpt that would contain the answer to this question.
|
|
|
|
Question: {query}
|
|
|
|
Document excerpt:"""
|
|
|
|
response = llm.complete(prompt)
|
|
return response.text
|
|
|
|
manual_result = manual_hyde("What is photosynthesis?", Settings.llm)
|
|
print(f"Manual HyDE result: {manual_result[:150]}...")
|
|
|
|
# 8. Final diagnostic
|
|
print("\n8. Final diagnostic questions:")
|
|
print("-" * 50)
|
|
print("If all the above show the LLM generating proper responses but HyDE still returns original:")
|
|
print("- What LLM are you using? (OpenAI, Anthropic, local model, etc.)")
|
|
print("- What's your LlamaIndex version?")
|
|
print("- Are there any error messages in the logs?")
|
|
print("- Does the LLM have any special configuration or wrappers?")
|