- offload_op callback now implemented (MUL_MAT/MUL_MAT_ID) - Memory raised to 10 GiB - Direct compute mode bypasses broken dspqueue on this board - Q8_0 1B model: 115 t/s prompt (4.3x vs CPU 27 t/s) - Generation 9.6 t/s (27% slower than CPU, expected) - dspqueue path fails with error 0x0000002e - llama-cli renamed to llama-simple in current build - Updated scripts for direct-compute mode - Docs updated with new findings and instructions
45 lines
1.5 KiB
Bash
Executable file
45 lines
1.5 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# deploy-to-q6a.sh — Deploy llama.cpp ARM binaries + DSP .so to Q6A
|
|
set -euo pipefail
|
|
|
|
Q6A="${Q6A:-radxa@192.168.1.11}"
|
|
Q6A_PASS="${Q6A_PASS:-radxa}"
|
|
BUILD_DIR="${BUILD_DIR:-$HOME/llama.cpp/build-hexagon}"
|
|
DEPLOY_DIR="${DEPLOY_DIR:-llama/bin}"
|
|
|
|
echo "=== Deploying to ${Q6A}:${DEPLOY_DIR} ==="
|
|
|
|
# Create deploy dir
|
|
ssh "${Q6A}" "mkdir -p ~/${DEPLOY_DIR}"
|
|
|
|
# Deploy ARM binaries
|
|
echo "--- ARM binaries ---"
|
|
# Deploy llama-simple as llama-cli (old llama-cli has hexagon code baked in)
|
|
if [ -f "${BUILD_DIR}/bin/llama-simple" ]; then
|
|
scp "${BUILD_DIR}/bin/llama-simple" "${Q6A}:~/${DEPLOY_DIR}/llama-cli"
|
|
fi
|
|
for f in libggml-hexagon.so libggml-hexagon.so.0 libggml-hexagon.so.0.9.11 \
|
|
libggml-base.so libggml-base.so.0 libggml-base.so.0.9.11 \
|
|
libggml-cpu.so libggml-cpu.so.0 libggml-cpu.so.0.9.11 \
|
|
libggml.so libggml.so.0 libggml.so.0.9.11 \
|
|
libllama.so libllama.so.0; do
|
|
src="${BUILD_DIR}/bin/${f}"
|
|
if [ -f "$src" ]; then
|
|
scp "$src" "${Q6A}:~/${DEPLOY_DIR}/" 2>/dev/null
|
|
fi
|
|
done
|
|
echo " done"
|
|
|
|
# Deploy DSP skel
|
|
echo "--- DSP .so ---"
|
|
DSP_SO="${BUILD_DIR}/ggml/src/ggml-hexagon/libggml-htp-v68.so"
|
|
if [ -f "$DSP_SO" ]; then
|
|
scp "$DSP_SO" "${Q6A}:/tmp/"
|
|
ssh "${Q6A}" "echo '${Q6A_PASS}' | sudo -S cp /tmp/libggml-htp-v68.so /usr/lib/dsp/cdsp/libggml-htp-v68.so 2>&1"
|
|
echo " deployed to /usr/lib/dsp/cdsp/"
|
|
else
|
|
echo " WARNING: DSP .so not found at $DSP_SO"
|
|
fi
|
|
|
|
echo "=== Deploy complete ==="
|
|
ssh "${Q6A}" "ls -la ~/${DEPLOY_DIR}/"
|