llamacpp_on_dragon_wing_q6a.../scripts/deploy-to-q6a.sh
Jimmy Devine e6fa9052b3 Add NPU offload results: offload_op, direct-compute, 10GB, Q8_0 4.3x prompt speedup
- offload_op callback now implemented (MUL_MAT/MUL_MAT_ID)
- Memory raised to 10 GiB
- Direct compute mode bypasses broken dspqueue on this board
- Q8_0 1B model: 115 t/s prompt (4.3x vs CPU 27 t/s)
- Generation 9.6 t/s (27% slower than CPU, expected)
- dspqueue path fails with error 0x0000002e
- llama-cli renamed to llama-simple in current build
- Updated scripts for direct-compute mode
- Docs updated with new findings and instructions
2026-05-02 14:17:27 +02:00

45 lines
1.5 KiB
Bash
Executable file

#!/usr/bin/env bash
# deploy-to-q6a.sh — Deploy llama.cpp ARM binaries + DSP .so to Q6A
set -euo pipefail
Q6A="${Q6A:-radxa@192.168.1.11}"
Q6A_PASS="${Q6A_PASS:-radxa}"
BUILD_DIR="${BUILD_DIR:-$HOME/llama.cpp/build-hexagon}"
DEPLOY_DIR="${DEPLOY_DIR:-llama/bin}"
echo "=== Deploying to ${Q6A}:${DEPLOY_DIR} ==="
# Create deploy dir
ssh "${Q6A}" "mkdir -p ~/${DEPLOY_DIR}"
# Deploy ARM binaries
echo "--- ARM binaries ---"
# Deploy llama-simple as llama-cli (old llama-cli has hexagon code baked in)
if [ -f "${BUILD_DIR}/bin/llama-simple" ]; then
scp "${BUILD_DIR}/bin/llama-simple" "${Q6A}:~/${DEPLOY_DIR}/llama-cli"
fi
for f in libggml-hexagon.so libggml-hexagon.so.0 libggml-hexagon.so.0.9.11 \
libggml-base.so libggml-base.so.0 libggml-base.so.0.9.11 \
libggml-cpu.so libggml-cpu.so.0 libggml-cpu.so.0.9.11 \
libggml.so libggml.so.0 libggml.so.0.9.11 \
libllama.so libllama.so.0; do
src="${BUILD_DIR}/bin/${f}"
if [ -f "$src" ]; then
scp "$src" "${Q6A}:~/${DEPLOY_DIR}/" 2>/dev/null
fi
done
echo " done"
# Deploy DSP skel
echo "--- DSP .so ---"
DSP_SO="${BUILD_DIR}/ggml/src/ggml-hexagon/libggml-htp-v68.so"
if [ -f "$DSP_SO" ]; then
scp "$DSP_SO" "${Q6A}:/tmp/"
ssh "${Q6A}" "echo '${Q6A_PASS}' | sudo -S cp /tmp/libggml-htp-v68.so /usr/lib/dsp/cdsp/libggml-htp-v68.so 2>&1"
echo " deployed to /usr/lib/dsp/cdsp/"
else
echo " WARNING: DSP .so not found at $DSP_SO"
fi
echo "=== Deploy complete ==="
ssh "${Q6A}" "ls -la ~/${DEPLOY_DIR}/"