Updated to Gemma4 models.

This commit is contained in:
Eric Furst 2026-04-09 10:35:48 -04:00
commit 3347a242ef
4 changed files with 46 additions and 10 deletions

View file

@ -43,8 +43,10 @@ import sys
# Embedding model (must match build_store.py)
EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True)
# LLM model for generation
LLM_MODEL = "command-r7b"
# LLM model for generation. Use temp 0.3.
#LLM_MODEL = "command-r7b"
# testing gemma4:e4b. Recommendations are to use temp 1.0
LLM_MODEL = "gemma4:e4b"
# Cross-encoder model for re-ranking (cached in ./models/)
RERANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2"
@ -89,7 +91,8 @@ def main():
# Note: Ollama temperature defaults to 0.8
Settings.llm = Ollama(
model=LLM_MODEL,
temperature=0.3,
temperature=1.0,
thinking=True, # enable
request_timeout=360.0,
context_window=8000,
)
@ -153,9 +156,20 @@ def main():
n_bm25_only = len([n for n in bm25_nodes if n.node.node_id not in {v.node.node_id for v in vector_nodes}])
n_both = len(vector_nodes) + len(bm25_nodes) - len(merged)
# Estimate context length (prompt + node text)
context_text = "\n\n".join(n.get_content() for n in reranked)
prompt_text = PROMPT.format(context_str=context_text, query_str=q)
n_context_tokens = len(prompt_text.split()) # rough word count; ~1.3 tokens/word
print(f"\nQuery: {q}")
print(f"Vector: {len(vector_nodes)}, BM25: {len(bm25_nodes)}, "
print(f"Vector: {len(vector_nodes)} ({n_vector_only} unique), "
f"BM25: {len(bm25_nodes)} ({n_bm25_only} unique), "
f"overlap: {n_both}, merged: {len(merged)}, re-ranked to: {len(reranked)}")
# The token estimate uses a ~1.3 tokens/word ratio, which is a rough approximation.
# For an exact count you'd need the model's tokenizer, but this gives a useful ballpark
# for gauging how much of the context window we use.
print(f"Context: ~{n_context_tokens} words (~{int(n_context_tokens * 1.3)} tokens)")
# Synthesize response with LLM
synthesizer = get_response_synthesizer(text_qa_template=PROMPT)
@ -169,7 +183,7 @@ def main():
for node in response.source_nodes:
meta = getattr(node, "metadata", None) or node.node.metadata
score = getattr(node, "score", None)
print(f"{meta.get('file_name')} {meta.get('file_path')} {score:.3f}")
print(f"data/{meta.get('file_name')} {score:.3f}")
if __name__ == "__main__":