Rename storage_exp/ to store/, remove unused storage/

Update all active scripts, .gitignore, CLAUDE.md, and README.md.
Also fix stale filename references in script header comments.
This commit is contained in:
Eric Furst 2026-02-26 16:36:57 -05:00
commit 13785d667a
5 changed files with 12 additions and 13 deletions

3
.gitignore vendored
View file

@ -7,8 +7,7 @@ __pycache__/
models/ models/
# Vector stores (large, rebuild with build scripts) # Vector stores (large, rebuild with build scripts)
storage_exp/ store/
storage/
storage_clippings/ storage_clippings/
# Data (symlinks to private files) # Data (symlinks to private files)

View file

@ -27,7 +27,7 @@ ssearch/
│ └── retrieve_clippings.py # Verbatim clippings chunk retrieval │ └── retrieve_clippings.py # Verbatim clippings chunk retrieval
├── data/ # Symlink to journal .txt files ├── data/ # Symlink to journal .txt files
├── clippings/ # Symlink to clippings (PDFs, TXT, webarchive, RTF) ├── clippings/ # Symlink to clippings (PDFs, TXT, webarchive, RTF)
├── storage_exp/ # Persisted journal vector store (~242 MB) ├── store/ # Persisted journal vector store (~242 MB)
├── storage_clippings/ # Persisted clippings vector store (ChromaDB) ├── storage_clippings/ # Persisted clippings vector store (ChromaDB)
├── models/ # Cached HuggingFace models (offline) ├── models/ # Cached HuggingFace models (offline)
├── archived/ # Superseded script versions ├── archived/ # Superseded script versions

View file

@ -1,4 +1,4 @@
# build_exp_claude.py # build_store.py
# #
# Build or update the vector store from journal entries in ./data. # Build or update the vector store from journal entries in ./data.
# #
@ -26,7 +26,7 @@ import time
# Shared constants # Shared constants
DATA_DIR = Path("./data") DATA_DIR = Path("./data")
PERSIST_DIR = "./storage_exp" PERSIST_DIR = "./store"
EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5" EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5"
CHUNK_SIZE = 256 CHUNK_SIZE = 256
CHUNK_OVERLAP = 25 CHUNK_OVERLAP = 25

View file

@ -1,4 +1,4 @@
# query_hybrid_bm25_v4.py # query_hybrid.py
# Hybrid retrieval: BM25 (sparse) + vector similarity (dense) + cross-encoder # Hybrid retrieval: BM25 (sparse) + vector similarity (dense) + cross-encoder
# #
# Combines two retrieval strategies to catch both exact term matches and # Combines two retrieval strategies to catch both exact term matches and
@ -40,7 +40,7 @@ import sys
# Globals # Globals
# #
# Embedding model (must match build_exp_claude.py) # Embedding model (must match build_store.py)
EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True) EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True)
# LLM model for generation # LLM model for generation
@ -105,7 +105,7 @@ def main():
# Load persisted vector store # Load persisted vector store
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp") storage_context = StorageContext.from_defaults(persist_dir="./store")
index = load_index_from_storage(storage_context) index = load_index_from_storage(storage_context)
# --- Retrievers --- # --- Retrievers ---

View file

@ -1,9 +1,9 @@
# retrieve_hybrid_raw.py # retrieve.py
# Hybrid verbatim chunk retrieval: BM25 + vector search + cross-encoder, no LLM. # Hybrid verbatim chunk retrieval: BM25 + vector search + cross-encoder, no LLM.
# #
# Same hybrid retrieval as query_hybrid_bm25_v4.py but outputs raw chunk text # Same hybrid retrieval as query_hybrid.py but outputs raw chunk text
# instead of LLM synthesis. Useful for inspecting what the hybrid pipeline # instead of LLM synthesis. Useful for inspecting what the hybrid pipeline
# retrieves and comparing against retrieve_raw.py (vector-only). # retrieves.
# #
# Each chunk is annotated with its source (vector, BM25, or both) so you can # Each chunk is annotated with its source (vector, BM25, or both) so you can
# see which retriever nominated it. # see which retriever nominated it.
@ -33,7 +33,7 @@ import textwrap
# Globals # Globals
# #
# Embedding model (must match build_exp_claude.py) # Embedding model (must match build_store.py)
EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True) EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True)
# Cross-encoder model for re-ranking (cached in ./models/) # Cross-encoder model for re-ranking (cached in ./models/)
@ -53,7 +53,7 @@ def main():
Settings.embed_model = EMBED_MODEL Settings.embed_model = EMBED_MODEL
# Load persisted vector store # Load persisted vector store
storage_context = StorageContext.from_defaults(persist_dir="./storage_exp") storage_context = StorageContext.from_defaults(persist_dir="./store")
index = load_index_from_storage(storage_context) index = load_index_from_storage(storage_context)
# --- Retrievers --- # --- Retrievers ---