model : remove duplicate wo_s scale after build_attn (Qwen3, LLaMA) (#22421)

Signed-off-by: Yash Nankani <ynankani@nvidia.com>
This commit is contained in:
ynankani 2026-04-27 07:58:48 +00:00 committed by GitHub
parent d13540becd
commit 0f1bb602dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 0 additions and 9 deletions

View file

@ -72,9 +72,6 @@ llm_build_llama<embed>::llm_build_llama(const llama_model & model, const llm_gra
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
if (model.layers[il].wo_s) {
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
}
cb(cur, "attn_out", il);
}
if (il == n_layer - 1 && inp_out_ids) {

View file

@ -58,9 +58,6 @@ llm_build_qwen3::llm_build_qwen3(const llama_model & model, const llm_graph_para
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
if (model.layers[il].wo_s) {
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
}
}
if (il == n_layer - 1 && inp_out_ids) {
cur = ggml_get_rows(ctx0, cur, inp_out_ids);

View file

@ -58,9 +58,6 @@ llm_build_qwen3moe::llm_build_qwen3moe(const llama_model & model, const llm_grap
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
if (model.layers[il].wo_s) {
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
}
}
if (il == n_layer - 1 && inp_out_ids) {
cur = ggml_get_rows(ctx0, cur, inp_out_ids);