Shell script run_retrieve.sh for non-LLM
gneration queries (returns only chunks), track development notes and README.
This commit is contained in:
parent
d034fdaad0
commit
eb9997326f
4 changed files with 1089 additions and 20 deletions
|
|
@ -33,7 +33,7 @@ ssearch/
|
||||||
├── archived/ # Superseded script versions
|
├── archived/ # Superseded script versions
|
||||||
├── saved_output/ # Saved query results and model comparisons
|
├── saved_output/ # Saved query results and model comparisons
|
||||||
├── requirements.txt # Python dependencies
|
├── requirements.txt # Python dependencies
|
||||||
├── devlog.txt # Development log and experimental findings
|
├── devlog.md # Development log and experimental findings
|
||||||
└── *.ipynb # Jupyter notebooks (HyDE, metrics, sandbox)
|
└── *.ipynb # Jupyter notebooks (HyDE, metrics, sandbox)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -198,4 +198,4 @@ Three Jupyter notebooks document exploration and analysis:
|
||||||
- **Jan 2026**: Command-line interface, prompt improvements, model comparison (command-r7b selected).
|
- **Jan 2026**: Command-line interface, prompt improvements, model comparison (command-r7b selected).
|
||||||
- **Feb 2026**: Cross-encoder re-ranking, hybrid BM25+vector retrieval, LlamaIndex upgrade to 0.14.14, OpenAI API backend, incremental updates, clippings search (ChromaDB), project reorganization.
|
- **Feb 2026**: Cross-encoder re-ranking, hybrid BM25+vector retrieval, LlamaIndex upgrade to 0.14.14, OpenAI API backend, incremental updates, clippings search (ChromaDB), project reorganization.
|
||||||
|
|
||||||
See `devlog.txt` for detailed development notes and experimental findings.
|
See `devlog.md` for detailed development notes and experimental findings.
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,11 @@
|
||||||
#
|
#
|
||||||
# Usage: ./deploy_public.sh ["optional commit message"]
|
# Usage: ./deploy_public.sh ["optional commit message"]
|
||||||
#
|
#
|
||||||
# Checks out an orphan public branch, copies the public files from main,
|
# Checks out the public branch, updates it with public files from main,
|
||||||
# generates a public README (stripping private sections), commits, and
|
# generates a public README (stripping private sections), commits if
|
||||||
# force-pushes to origin. Then switches back to main.
|
# anything changed, and pushes to origin. Then switches back to main.
|
||||||
|
#
|
||||||
|
# On first run (no public branch exists), creates an orphan branch.
|
||||||
#
|
#
|
||||||
# E.M.F. February 2026
|
# E.M.F. February 2026
|
||||||
|
|
||||||
|
|
@ -50,12 +52,14 @@ MAIN_HEAD=$(git rev-parse --short HEAD)
|
||||||
|
|
||||||
echo "Deploying main ($MAIN_HEAD) -> $BRANCH..."
|
echo "Deploying main ($MAIN_HEAD) -> $BRANCH..."
|
||||||
|
|
||||||
# Delete local public branch if it exists
|
# Check out public branch, or create orphan if it doesn't exist yet
|
||||||
git branch -D "$BRANCH" 2>/dev/null || true
|
if git show-ref --verify --quiet "refs/heads/$BRANCH"; then
|
||||||
|
git checkout "$BRANCH"
|
||||||
# Create fresh orphan
|
else
|
||||||
git checkout --orphan "$BRANCH"
|
echo "No local $BRANCH branch — creating orphan..."
|
||||||
git rm -rf . >/dev/null 2>&1 || true
|
git checkout --orphan "$BRANCH"
|
||||||
|
git rm -rf . >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
|
||||||
# Copy public files from main
|
# Copy public files from main
|
||||||
for f in "${PUBLIC_FILES[@]}"; do
|
for f in "${PUBLIC_FILES[@]}"; do
|
||||||
|
|
@ -74,22 +78,23 @@ awk '
|
||||||
skip { next }
|
skip { next }
|
||||||
/archived\// { next }
|
/archived\// { next }
|
||||||
/saved_output\// { next }
|
/saved_output\// { next }
|
||||||
/devlog\.txt/ { next }
|
/devlog\.md/ { next }
|
||||||
/\*\.ipynb/ { next }
|
/\*\.ipynb/ { next }
|
||||||
{ print }
|
{ print }
|
||||||
' README.md > README.tmp && mv README.tmp README.md
|
' README.md > README.tmp && mv README.tmp README.md
|
||||||
|
|
||||||
# Stage only the public files (not untracked files on disk)
|
# Stage only the public files (not untracked files on disk)
|
||||||
git add "${PUBLIC_FILES[@]}" README.md
|
git add "${PUBLIC_FILES[@]}" README.md
|
||||||
git commit -m "$COMMIT_MSG
|
|
||||||
|
|
||||||
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>"
|
# Commit only if there are changes
|
||||||
|
if git diff --cached --quiet; then
|
||||||
# Push
|
echo "No changes to deploy."
|
||||||
git push --force "$REMOTE" "$BRANCH"
|
else
|
||||||
|
git commit -m "$COMMIT_MSG"
|
||||||
|
git push "$REMOTE" "$BRANCH"
|
||||||
|
echo ""
|
||||||
|
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"
|
||||||
|
fi
|
||||||
|
|
||||||
# Switch back to main
|
# Switch back to main
|
||||||
git checkout main
|
git checkout main
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"
|
|
||||||
|
|
|
||||||
29
run_retrieve.sh
Executable file
29
run_retrieve.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# This shell script will handle I/O for the python query engine
|
||||||
|
# It will take a query and return the formatted results
|
||||||
|
|
||||||
|
# E.M.F. August 2025
|
||||||
|
|
||||||
|
# Usage: ./run_query.sh
|
||||||
|
|
||||||
|
QUERY_SCRIPT="retrieve.py"
|
||||||
|
|
||||||
|
echo -e "$QUERY_SCRIPT -- retrieve vector store chunks based on similaity + BM25 with reranking.\n"
|
||||||
|
|
||||||
|
# Loop until input is "exit"
|
||||||
|
while true; do
|
||||||
|
read -p "Enter your query (or type 'exit' to quit): " query
|
||||||
|
if [ "$query" == "exit" ] || [ "$query" == "quit" ] || [ "$query" == "" ] ; then
|
||||||
|
echo "Exiting..."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
time_start=$(date +%s)
|
||||||
|
|
||||||
|
# Call the python script with the query and format the output
|
||||||
|
python3 $QUERY_SCRIPT --query "$query" | \
|
||||||
|
expand | sed -E 's|(.* )(.*/data)|\1./data|' | fold -s -w 131
|
||||||
|
|
||||||
|
time_end=$(date +%s)
|
||||||
|
elapsed=$((time_end - time_start))
|
||||||
|
echo -e "Query processed in $elapsed seconds.\n"
|
||||||
|
done
|
||||||
Loading…
Add table
Add a link
Reference in a new issue