Shell script run_retrieve.sh for non-LLM

gneration queries (returns only chunks), track
development notes and README.
This commit is contained in:
Eric Furst 2026-03-01 07:39:28 -05:00
commit eb9997326f
4 changed files with 1089 additions and 20 deletions

View file

@ -33,7 +33,7 @@ ssearch/
├── archived/ # Superseded script versions ├── archived/ # Superseded script versions
├── saved_output/ # Saved query results and model comparisons ├── saved_output/ # Saved query results and model comparisons
├── requirements.txt # Python dependencies ├── requirements.txt # Python dependencies
├── devlog.txt # Development log and experimental findings ├── devlog.md # Development log and experimental findings
└── *.ipynb # Jupyter notebooks (HyDE, metrics, sandbox) └── *.ipynb # Jupyter notebooks (HyDE, metrics, sandbox)
``` ```
@ -198,4 +198,4 @@ Three Jupyter notebooks document exploration and analysis:
- **Jan 2026**: Command-line interface, prompt improvements, model comparison (command-r7b selected). - **Jan 2026**: Command-line interface, prompt improvements, model comparison (command-r7b selected).
- **Feb 2026**: Cross-encoder re-ranking, hybrid BM25+vector retrieval, LlamaIndex upgrade to 0.14.14, OpenAI API backend, incremental updates, clippings search (ChromaDB), project reorganization. - **Feb 2026**: Cross-encoder re-ranking, hybrid BM25+vector retrieval, LlamaIndex upgrade to 0.14.14, OpenAI API backend, incremental updates, clippings search (ChromaDB), project reorganization.
See `devlog.txt` for detailed development notes and experimental findings. See `devlog.md` for detailed development notes and experimental findings.

View file

@ -3,9 +3,11 @@
# #
# Usage: ./deploy_public.sh ["optional commit message"] # Usage: ./deploy_public.sh ["optional commit message"]
# #
# Checks out an orphan public branch, copies the public files from main, # Checks out the public branch, updates it with public files from main,
# generates a public README (stripping private sections), commits, and # generates a public README (stripping private sections), commits if
# force-pushes to origin. Then switches back to main. # anything changed, and pushes to origin. Then switches back to main.
#
# On first run (no public branch exists), creates an orphan branch.
# #
# E.M.F. February 2026 # E.M.F. February 2026
@ -50,12 +52,14 @@ MAIN_HEAD=$(git rev-parse --short HEAD)
echo "Deploying main ($MAIN_HEAD) -> $BRANCH..." echo "Deploying main ($MAIN_HEAD) -> $BRANCH..."
# Delete local public branch if it exists # Check out public branch, or create orphan if it doesn't exist yet
git branch -D "$BRANCH" 2>/dev/null || true if git show-ref --verify --quiet "refs/heads/$BRANCH"; then
git checkout "$BRANCH"
# Create fresh orphan else
git checkout --orphan "$BRANCH" echo "No local $BRANCH branch — creating orphan..."
git rm -rf . >/dev/null 2>&1 || true git checkout --orphan "$BRANCH"
git rm -rf . >/dev/null 2>&1 || true
fi
# Copy public files from main # Copy public files from main
for f in "${PUBLIC_FILES[@]}"; do for f in "${PUBLIC_FILES[@]}"; do
@ -74,22 +78,23 @@ awk '
skip { next } skip { next }
/archived\// { next } /archived\// { next }
/saved_output\// { next } /saved_output\// { next }
/devlog\.txt/ { next } /devlog\.md/ { next }
/\*\.ipynb/ { next } /\*\.ipynb/ { next }
{ print } { print }
' README.md > README.tmp && mv README.tmp README.md ' README.md > README.tmp && mv README.tmp README.md
# Stage only the public files (not untracked files on disk) # Stage only the public files (not untracked files on disk)
git add "${PUBLIC_FILES[@]}" README.md git add "${PUBLIC_FILES[@]}" README.md
git commit -m "$COMMIT_MSG
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>" # Commit only if there are changes
if git diff --cached --quiet; then
# Push echo "No changes to deploy."
git push --force "$REMOTE" "$BRANCH" else
git commit -m "$COMMIT_MSG"
git push "$REMOTE" "$BRANCH"
echo ""
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"
fi
# Switch back to main # Switch back to main
git checkout main git checkout main
echo ""
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"

1035
devlog.md Normal file

File diff suppressed because it is too large Load diff

29
run_retrieve.sh Executable file
View file

@ -0,0 +1,29 @@
#!/bin/bash
# This shell script will handle I/O for the python query engine
# It will take a query and return the formatted results
# E.M.F. August 2025
# Usage: ./run_query.sh
QUERY_SCRIPT="retrieve.py"
echo -e "$QUERY_SCRIPT -- retrieve vector store chunks based on similaity + BM25 with reranking.\n"
# Loop until input is "exit"
while true; do
read -p "Enter your query (or type 'exit' to quit): " query
if [ "$query" == "exit" ] || [ "$query" == "quit" ] || [ "$query" == "" ] ; then
echo "Exiting..."
break
fi
time_start=$(date +%s)
# Call the python script with the query and format the output
python3 $QUERY_SCRIPT --query "$query" | \
expand | sed -E 's|(.* )(.*/data)|\1./data|' | fold -s -w 131
time_end=$(date +%s)
elapsed=$((time_end - time_start))
echo -e "Query processed in $elapsed seconds.\n"
done