ssearch/archived/claude_diagnostic.py

# Better HyDE debugging with targeted tests

from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core import PromptTemplate
from llama_index.core import Settings
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.llms.ollama import Ollama

llm="llama3.1:8B"

# Use a local model to generate
Settings.llm = Ollama(
    model=llm,    # First model tested
    request_timeout=360.0,
    context_window=8000,
    temperature=0.7,
    )


# Test queries that should produce very different hypothetical documents
test_queries = [
    "What is the capital of France?",
    "How do you make chocolate chip cookies?",
    "Explain quantum physics",
    "Write a love letter",
    "Describe symptoms of the common cold"
]

print("=== DEBUGGING HYDE STEP BY STEP ===\n")

# 1. Test the LLM with HyDE-style prompts directly
print("1. Testing LLM directly with HyDE-style prompts:")
print("-" * 50)

for query in test_queries[:2]:  # Just test 2 to keep output manageable
    direct_prompt = f"""Generate a hypothetical document that would contain the answer to this query.

Query: {query}

Hypothetical document:"""

    response = Settings.llm.complete(direct_prompt)
    print(f"Query: {query}")
    print(f"Direct LLM Response: {response.text[:100]}...")
    print()

# 2. Check HyDE internals - let's see what's actually happening
print("\n2. Examining HyDE internal behavior:")
print("-" * 50)

# Create a custom HyDE that shows us everything
class VerboseHyDETransform(HyDEQueryTransform):
    def _get_prompts(self):
        """Show what prompts are being used"""
        prompts = super()._get_prompts()
        print(f"HyDE prompts: {prompts}")
        return prompts

    def _run_component(self, **kwargs):
        """Show what's being passed to the LLM"""
        print(f"HyDE _run_component kwargs: {kwargs}")
        result = super()._run_component(**kwargs)
        print(f"HyDE _run_component result: {result}")
        return result

# Test with verbose HyDE
verbose_hyde = VerboseHyDETransform(llm=Settings.llm)
test_result = verbose_hyde.run("What is machine learning?")
print(f"Final verbose result: {test_result}")

# 3. Try the most basic possible test
print("\n3. Most basic HyDE test:")
print("-" * 50)

basic_hyde = HyDEQueryTransform(llm=Settings.llm)
basic_result = basic_hyde.run("Paris")
print(f"Input: 'Paris'")
print(f"Output: '{basic_result}'")
print(f"Same as input? {basic_result.strip() == 'Paris'}")

# 4. Check if it's a version issue - try alternative approach
print("\n4. Alternative HyDE approach:")
print("-" * 50)

try:
    # Some versions might need different initialization
    from llama_index.core.query_engine import TransformQueryEngine
    from llama_index.core.indices.query.query_transform import HyDEQueryTransform

    # Try with explicit prompt template
    hyde_prompt_template = PromptTemplate(
        "Please write a passage to answer the question\n"
        "Try to include as many key details as possible\n"
        "\n"
        "\n"
        "Passage:{query_str}\n"
        "\n"
        "\n"
        "Passage:"
    )

    alt_hyde = HyDEQueryTransform(
        llm=Settings.llm,
        hyde_prompt=hyde_prompt_template
    )

    alt_result = alt_hyde.run("What causes rain?")
    print(f"Alternative approach result: {alt_result}")

except Exception as e:
    print(f"Alternative approach failed: {e}")

# 5. Check what happens with different query formats
print("\n5. Testing different input formats:")
print("-" * 50)

from llama_index.core.schema import QueryBundle

# Test with QueryBundle vs string
hyde_test = HyDEQueryTransform(llm=Settings.llm)

string_result = hyde_test.run("test query")
print(f"String input result: '{string_result}'")

query_bundle = QueryBundle(query_str="test query")
bundle_result = hyde_test.run(query_bundle)
print(f"QueryBundle input result: '{bundle_result}'")

# 6. Version and import check
print("\n6. Environment check:")
print("-" * 50)
import llama_index
print(f"LlamaIndex version: {llama_index.__version__}")

# Check what LLM you're actually using
print(f"LLM type: {type(Settings.llm)}")
print(f"LLM model name: {getattr(Settings.llm, 'model', 'Unknown')}")

# 7. Try the nuclear option - completely manual implementation
print("\n7. Manual HyDE implementation:")
print("-" * 50)

def manual_hyde(query: str, llm):
    """Completely manual HyDE to see if the concept works"""
    prompt = f"""You are an expert writer. Generate a realistic document excerpt that would contain the answer to this question.

Question: {query}

Document excerpt:"""

    response = llm.complete(prompt)
    return response.text

manual_result = manual_hyde("What is photosynthesis?", Settings.llm)
print(f"Manual HyDE result: {manual_result[:150]}...")

# 8. Final diagnostic
print("\n8. Final diagnostic questions:")
print("-" * 50)
print("If all the above show the LLM generating proper responses but HyDE still returns original:")
print("- What LLM are you using? (OpenAI, Anthropic, local model, etc.)")
print("- What's your LlamaIndex version?")
print("- Are there any error messages in the logs?")
print("- Does the LLM have any special configuration or wrappers?")