Rename storage_exp/ to store/, remove unused storage/

Update all active scripts, .gitignore, CLAUDE.md, and README.md. Also fix stale filename references in script header comments.
2026-02-26 16:36:57 -05:00 · 2026-02-26 16:36:57 -05:00 · 13785d667a
commit 13785d667a
parent 5a3294f74c
5 changed files with 12 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,8 +7,7 @@ __pycache__/
 models/

 # Vector stores (large, rebuild with build scripts)
-storage_exp/
-storage/
+store/
 storage_clippings/

 # Data (symlinks to private files)
--- a/README.md
+++ b/README.md
@ -27,7 +27,7 @@ ssearch/
 │   └── retrieve_clippings.py   # Verbatim clippings chunk retrieval
 ├── data/                       # Symlink to journal .txt files
 ├── clippings/                  # Symlink to clippings (PDFs, TXT, webarchive, RTF)
-├── storage_exp/                # Persisted journal vector store (~242 MB)
+├── store/                # Persisted journal vector store (~242 MB)
 ├── storage_clippings/          # Persisted clippings vector store (ChromaDB)
 ├── models/                     # Cached HuggingFace models (offline)
 ├── archived/                   # Superseded script versions
--- a/build_store.py
+++ b/build_store.py
@ -1,4 +1,4 @@
-# build_exp_claude.py
+# build_store.py
 #
 # Build or update the vector store from journal entries in ./data.
 #
@ -26,7 +26,7 @@ import time

 # Shared constants
 DATA_DIR = Path("./data")
-PERSIST_DIR = "./storage_exp"
+PERSIST_DIR = "./store"
 EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5"
 CHUNK_SIZE = 256
 CHUNK_OVERLAP = 25
--- a/query_hybrid.py
+++ b/query_hybrid.py
@ -1,4 +1,4 @@
-# query_hybrid_bm25_v4.py
+# query_hybrid.py
 # Hybrid retrieval: BM25 (sparse) + vector similarity (dense) + cross-encoder
 #
 # Combines two retrieval strategies to catch both exact term matches and
@ -40,7 +40,7 @@ import sys
 # Globals
 #

-# Embedding model (must match build_exp_claude.py)
+# Embedding model (must match build_store.py)
 EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True)

 # LLM model for generation
@ -105,7 +105,7 @@ def main():


    # Load persisted vector store
-    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    storage_context = StorageContext.from_defaults(persist_dir="./store")
    index = load_index_from_storage(storage_context)

    # --- Retrievers ---
--- a/retrieve.py
+++ b/retrieve.py
@ -1,9 +1,9 @@
-# retrieve_hybrid_raw.py
+# retrieve.py
 # Hybrid verbatim chunk retrieval: BM25 + vector search + cross-encoder, no LLM.
 #
-# Same hybrid retrieval as query_hybrid_bm25_v4.py but outputs raw chunk text
+# Same hybrid retrieval as query_hybrid.py but outputs raw chunk text
 # instead of LLM synthesis. Useful for inspecting what the hybrid pipeline
-# retrieves and comparing against retrieve_raw.py (vector-only).
+# retrieves.
 #
 # Each chunk is annotated with its source (vector, BM25, or both) so you can
 # see which retriever nominated it.
@ -33,7 +33,7 @@ import textwrap
 # Globals
 #

-# Embedding model (must match build_exp_claude.py)
+# Embedding model (must match build_store.py)
 EMBED_MODEL = HuggingFaceEmbedding(cache_folder="./models", model_name="BAAI/bge-large-en-v1.5", local_files_only=True)

 # Cross-encoder model for re-ranking (cached in ./models/)
@ -53,7 +53,7 @@ def main():
    Settings.embed_model = EMBED_MODEL

    # Load persisted vector store
-    storage_context = StorageContext.from_defaults(persist_dir="./storage_exp")
+    storage_context = StorageContext.from_defaults(persist_dir="./store")
    index = load_index_from_storage(storage_context)

    # --- Retrievers ---