Adds: - Detailed explanation of why Hexagon NPU doesn't accelerate inference - offload_op callback is NULL in ggml-hexagon.cpp - 2048 MiB limit is hardcoded, not hardware-queried - Q4_K_M not supported by HTP kernels (only Q4_0, Q8_0, IQ4_NL, MXFP4) - Full benchmark table: 1B and 7B models, 2K/32K/64K context, CPU vs NPU - All results show CPU and NPU identical within margin of error - 7B test script (test-7b.sh) - Updated deploy script with password handling for DSP .so - Performance baseline in AGENTS.md - Cross-compile pitfalls (CMAKE_SYSROOT, rpcmem_init)
41 lines
1.3 KiB
Bash
Executable file
41 lines
1.3 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# deploy-to-q6a.sh — Deploy llama.cpp ARM binaries + DSP .so to Q6A
|
|
set -euo pipefail
|
|
|
|
Q6A="${Q6A:-radxa@192.168.1.11}"
|
|
Q6A_PASS="${Q6A_PASS:-radxa}"
|
|
BUILD_DIR="${BUILD_DIR:-$HOME/llama.cpp/build-hexagon}"
|
|
DEPLOY_DIR="${DEPLOY_DIR:-llama/bin}"
|
|
|
|
echo "=== Deploying to ${Q6A}:${DEPLOY_DIR} ==="
|
|
|
|
# Create deploy dir
|
|
ssh "${Q6A}" "mkdir -p ~/${DEPLOY_DIR}"
|
|
|
|
# Deploy ARM binaries
|
|
echo "--- ARM binaries ---"
|
|
for f in llama-cli libggml-hexagon.so libggml-hexagon.so.0 libggml-hexagon.so.0.9.11 \
|
|
libggml-base.so libggml-base.so.0 libggml-base.so.0.9.11 \
|
|
libggml-cpu.so libggml-cpu.so.0 libggml-cpu.so.0.9.11 \
|
|
libggml.so libggml.so.0 libggml.so.0.9.11 \
|
|
libllama.so libllama.so.0; do
|
|
src="${BUILD_DIR}/bin/${f}"
|
|
if [ -f "$src" ]; then
|
|
scp "$src" "${Q6A}:~/${DEPLOY_DIR}/" 2>/dev/null
|
|
fi
|
|
done
|
|
echo " done"
|
|
|
|
# Deploy DSP skel
|
|
echo "--- DSP .so ---"
|
|
DSP_SO="${BUILD_DIR}/ggml/src/ggml-hexagon/libggml-htp-v68.so"
|
|
if [ -f "$DSP_SO" ]; then
|
|
scp "$DSP_SO" "${Q6A}:/tmp/"
|
|
ssh "${Q6A}" "echo '${Q6A_PASS}' | sudo -S cp /tmp/libggml-htp-v68.so /usr/lib/dsp/cdsp/libggml-htp-v68.so 2>&1"
|
|
echo " deployed to /usr/lib/dsp/cdsp/"
|
|
else
|
|
echo " WARNING: DSP .so not found at $DSP_SO"
|
|
fi
|
|
|
|
echo "=== Deploy complete ==="
|
|
ssh "${Q6A}" "ls -la ~/${DEPLOY_DIR}/"
|