28 lines
1.3 KiB
Bash
28 lines
1.3 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# test-7b.sh — Run 7B model benchmarks on Q6A at various context sizes
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
Q6A="${Q6A:-radxa@192.168.1.11}"
|
||
|
|
MODEL="${MODEL:-/home/radxa/models/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf}"
|
||
|
|
DEPLOY_DIR="${DEPLOY_DIR:-llama/bin}"
|
||
|
|
|
||
|
|
CONTEXTS=("2048" "8192" "32768" "65536")
|
||
|
|
|
||
|
|
echo "=== 7B Model Benchmarks ==="
|
||
|
|
echo "Model: ${MODEL}"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
for ctx in "${CONTEXTS[@]}"; do
|
||
|
|
echo "--- Context ${ctx} (NPU) ---"
|
||
|
|
ssh "${Q6A}" "cd ~/${DEPLOY_DIR} && GGML_HEXAGON=1 LD_LIBRARY_PATH=. timeout 120 ./llama-cli -m '${MODEL}' -n 8 -p Hello -ngl 0 -c ${ctx} --no-display-prompt 2>&1" | grep -E 'Prompt:|Generation:|memory'
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
echo "--- Context ${ctx} (CPU-only) ---"
|
||
|
|
ssh "${Q6A}" "cd ~/${DEPLOY_DIR} && mv libggml-hexagon.so libggml-hexagon.so.disabled 2>/dev/null; mv libggml-hexagon.so.0 libggml-hexagon.so.0.disabled 2>/dev/null; mv libggml-hexagon.so.0.9.11 libggml-hexagon.so.0.9.11.disabled 2>/dev/null; LD_LIBRARY_PATH=. timeout 120 ./llama-cli -m '${MODEL}' -n 8 -p Hello -ngl 0 -c ${ctx} --no-display-prompt 2>&1 | grep -E 'Prompt:|Generation:'; mv libggml-hexagon.so.disabled libggml-hexagon.so 2>/dev/null; mv libggml-hexagon.so.0.disabled libggml-hexagon.so.0 2>/dev/null; mv libggml-hexagon.so.0.9.11.disabled libggml-hexagon.so.0.9.11 2>/dev/null"
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
done
|
||
|
|
|
||
|
|
echo "=== Done ==="
|