From 39f1f73e2ac3376212386f6facc63d7b0a7b49c5 Mon Sep 17 00:00:00 2001 From: Eric Furst Date: Sun, 22 Feb 2026 12:41:55 -0500 Subject: [PATCH] Initial commit: RAG demo with build and query scripts --- .gitignore | 15 +++++++ README.md | 36 ++++++++++++++++ build.py | 49 +++++++++++++++++++++ data/.gitkeep | 0 query.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 ++ 6 files changed, 214 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 build.py create mode 100644 data/.gitkeep create mode 100644 query.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4af364b --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# Python +.venv/ +__pycache__/ +*.pyc + +# Generated data +storage/ +models/ + +# macOS +.DS_Store + +# Data (users supply their own) +data/* +!data/.gitkeep diff --git a/README.md b/README.md new file mode 100644 index 0000000..b0a4f7e --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# RAG Demo + +Retrieval Augmented Generation using LlamaIndex with local models. + +This demo builds a semantic search system over a collection of text documents +using a HuggingFace embedding model and Ollama for generation. + +## Tutorial + +See the full walkthrough at: +https://lem.che.udel.edu/wiki/index.php?n=Main.RAG + +## Quick Start + +```bash +# Create and activate virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# Install dependencies +pip install -r requirements.txt + +# Pull the generating model +ollama pull command-r7b + +# Place your .txt documents in ./data, then build the vector store +python build.py + +# Run interactive queries +python query.py +``` + +## Models + +- **Embedding:** BAAI/bge-large-en-v1.5 (downloaded automatically on first run) +- **Generation:** command-r7b via Ollama diff --git a/build.py b/build.py new file mode 100644 index 0000000..84ae988 --- /dev/null +++ b/build.py @@ -0,0 +1,49 @@ +# build.py +# +# Import documents from data, generate embedded vector store +# and save to disk in directory ./storage +# +# August 2025 +# E. M. Furst + +from llama_index.core import ( + SimpleDirectoryReader, + VectorStoreIndex, + Settings, +) + +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.core.node_parser import SentenceSplitter + +def main(): + # Choose your embedding model + embed_model = HuggingFaceEmbedding(cache_folder="./models", + model_name="BAAI/bge-large-en-v1.5") + + # Configure global settings for LlamaIndex + Settings.embed_model = embed_model + + # Load documents + documents = SimpleDirectoryReader("./data").load_data() + + # Create the custom textsplitter + # Set chunk size and overlap (e.g., 256 tokens, 25 tokens overlap) + text_splitter = SentenceSplitter( + chunk_size=500, + chunk_overlap=50, + ) + Settings.text_splitter = text_splitter + + # Build the index + index = VectorStoreIndex.from_documents( + documents, transformations=[text_splitter], + show_progress=True, + ) + + # Persist both vector store and index metadata + index.storage_context.persist(persist_dir="./storage") + + print("Index built and saved to ./storage") + +if __name__ == "__main__": + main() diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/query.py b/query.py new file mode 100644 index 0000000..42eb740 --- /dev/null +++ b/query.py @@ -0,0 +1,110 @@ +# query.py +# +# Run a query on a vector store +# +# August 2025 +# E. M. Furst + +from llama_index.core import ( + load_index_from_storage, + StorageContext, + Settings, +) +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.ollama import Ollama +from llama_index.core.prompts import PromptTemplate +import os, time + +# +# Globals +# +os.environ["TOKENIZERS_PARALLELISM"] = "false" + +# Embedding model used in vector store (this should match the one in build.py) +embed_model = HuggingFaceEmbedding(cache_folder="./models", + model_name="BAAI/bge-large-en-v1.5") + +# LLM model to use in query transform and generation +llm = "command-r7b" + +# +# Custom prompt for the query engine +# +PROMPT = PromptTemplate( +"""You are an expert research assistant. You are given top-ranked writing \ +excerpts (CONTEXT) and a user's QUERY. + +Instructions: +- Base your response *only* on the CONTEXT. +- The snippets are ordered from most to least relevant—prioritize insights \ +from earlier (higher-ranked) snippets. +- Aim to reference *as many distinct* relevant files as possible (up to 10). +- Do not invent or generalize; refer to specific passages or facts only. +- If a passage only loosely matches, deprioritize it. + +Format your answer in two parts: + +1. **Summary Theme** + Summarize the dominant theme from the relevant context in a few sentences. + +2. **Matching Files** + Make a list of 10 matching files. The format for each should be: + - + +CONTEXT: +{context_str} + +QUERY: +{query_str} + +Now provide the theme and list of matching files.""" +) + +# +# Main program routine +# + +def main(): + # Use a local model to generate -- in this case using Ollama + Settings.llm = Ollama( + model=llm, + request_timeout=360.0, + ) + + # Load embedding model (same as used for vector store) + Settings.embed_model = embed_model + + # Load persisted vector store + metadata + storage_context = StorageContext.from_defaults(persist_dir="./storage") + index = load_index_from_storage(storage_context) + + # Build regular query engine with custom prompt + query_engine = index.as_query_engine( + similarity_top_k=15, + text_qa_template=PROMPT, + ) + + # Query + while True: + q = input("\nEnter a search topic or question (or 'exit'): ").strip() + if q.lower() in ("exit", "quit"): + break + print() + + # Generate the response by querying the engine + start_time = time.time() + response = query_engine.query(q) + end_time = time.time() + + # Return the query response and source documents + print(response.response) + + print("\nSource documents:") + for node in response.source_nodes: + meta = getattr(node, "metadata", None) or node.node.metadata + print(f" {meta.get('file_name')} {getattr(node, 'score', None)}") + + print(f"\nElapsed time: {(end_time-start_time):.1f} seconds") + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..86436b7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +llama-index-core +llama-index-readers-file +llama-index-llms-ollama +llama-index-embeddings-huggingface