Shell script run_retrieve.sh for non-LLM
gneration queries (returns only chunks), track development notes and README.
This commit is contained in:
parent
d034fdaad0
commit
eb9997326f
4 changed files with 1089 additions and 20 deletions
|
|
@ -33,7 +33,7 @@ ssearch/
|
|||
├── archived/ # Superseded script versions
|
||||
├── saved_output/ # Saved query results and model comparisons
|
||||
├── requirements.txt # Python dependencies
|
||||
├── devlog.txt # Development log and experimental findings
|
||||
├── devlog.md # Development log and experimental findings
|
||||
└── *.ipynb # Jupyter notebooks (HyDE, metrics, sandbox)
|
||||
```
|
||||
|
||||
|
|
@ -198,4 +198,4 @@ Three Jupyter notebooks document exploration and analysis:
|
|||
- **Jan 2026**: Command-line interface, prompt improvements, model comparison (command-r7b selected).
|
||||
- **Feb 2026**: Cross-encoder re-ranking, hybrid BM25+vector retrieval, LlamaIndex upgrade to 0.14.14, OpenAI API backend, incremental updates, clippings search (ChromaDB), project reorganization.
|
||||
|
||||
See `devlog.txt` for detailed development notes and experimental findings.
|
||||
See `devlog.md` for detailed development notes and experimental findings.
|
||||
|
|
|
|||
|
|
@ -3,9 +3,11 @@
|
|||
#
|
||||
# Usage: ./deploy_public.sh ["optional commit message"]
|
||||
#
|
||||
# Checks out an orphan public branch, copies the public files from main,
|
||||
# generates a public README (stripping private sections), commits, and
|
||||
# force-pushes to origin. Then switches back to main.
|
||||
# Checks out the public branch, updates it with public files from main,
|
||||
# generates a public README (stripping private sections), commits if
|
||||
# anything changed, and pushes to origin. Then switches back to main.
|
||||
#
|
||||
# On first run (no public branch exists), creates an orphan branch.
|
||||
#
|
||||
# E.M.F. February 2026
|
||||
|
||||
|
|
@ -50,12 +52,14 @@ MAIN_HEAD=$(git rev-parse --short HEAD)
|
|||
|
||||
echo "Deploying main ($MAIN_HEAD) -> $BRANCH..."
|
||||
|
||||
# Delete local public branch if it exists
|
||||
git branch -D "$BRANCH" 2>/dev/null || true
|
||||
|
||||
# Create fresh orphan
|
||||
git checkout --orphan "$BRANCH"
|
||||
git rm -rf . >/dev/null 2>&1 || true
|
||||
# Check out public branch, or create orphan if it doesn't exist yet
|
||||
if git show-ref --verify --quiet "refs/heads/$BRANCH"; then
|
||||
git checkout "$BRANCH"
|
||||
else
|
||||
echo "No local $BRANCH branch — creating orphan..."
|
||||
git checkout --orphan "$BRANCH"
|
||||
git rm -rf . >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Copy public files from main
|
||||
for f in "${PUBLIC_FILES[@]}"; do
|
||||
|
|
@ -74,22 +78,23 @@ awk '
|
|||
skip { next }
|
||||
/archived\// { next }
|
||||
/saved_output\// { next }
|
||||
/devlog\.txt/ { next }
|
||||
/devlog\.md/ { next }
|
||||
/\*\.ipynb/ { next }
|
||||
{ print }
|
||||
' README.md > README.tmp && mv README.tmp README.md
|
||||
|
||||
# Stage only the public files (not untracked files on disk)
|
||||
git add "${PUBLIC_FILES[@]}" README.md
|
||||
git commit -m "$COMMIT_MSG
|
||||
|
||||
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>"
|
||||
|
||||
# Push
|
||||
git push --force "$REMOTE" "$BRANCH"
|
||||
# Commit only if there are changes
|
||||
if git diff --cached --quiet; then
|
||||
echo "No changes to deploy."
|
||||
else
|
||||
git commit -m "$COMMIT_MSG"
|
||||
git push "$REMOTE" "$BRANCH"
|
||||
echo ""
|
||||
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"
|
||||
fi
|
||||
|
||||
# Switch back to main
|
||||
git checkout main
|
||||
|
||||
echo ""
|
||||
echo "Done. Deployed main ($MAIN_HEAD) -> $REMOTE/$BRANCH"
|
||||
|
|
|
|||
29
run_retrieve.sh
Executable file
29
run_retrieve.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
# This shell script will handle I/O for the python query engine
|
||||
# It will take a query and return the formatted results
|
||||
|
||||
# E.M.F. August 2025
|
||||
|
||||
# Usage: ./run_query.sh
|
||||
|
||||
QUERY_SCRIPT="retrieve.py"
|
||||
|
||||
echo -e "$QUERY_SCRIPT -- retrieve vector store chunks based on similaity + BM25 with reranking.\n"
|
||||
|
||||
# Loop until input is "exit"
|
||||
while true; do
|
||||
read -p "Enter your query (or type 'exit' to quit): " query
|
||||
if [ "$query" == "exit" ] || [ "$query" == "quit" ] || [ "$query" == "" ] ; then
|
||||
echo "Exiting..."
|
||||
break
|
||||
fi
|
||||
time_start=$(date +%s)
|
||||
|
||||
# Call the python script with the query and format the output
|
||||
python3 $QUERY_SCRIPT --query "$query" | \
|
||||
expand | sed -E 's|(.* )(.*/data)|\1./data|' | fold -s -w 131
|
||||
|
||||
time_end=$(date +%s)
|
||||
elapsed=$((time_end - time_start))
|
||||
echo -e "Query processed in $elapsed seconds.\n"
|
||||
done
|
||||
Loading…
Add table
Add a link
Reference in a new issue