From 150c499caef86a06332ee56924321c8a47f9c86f Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 1 May 2025 12:49:02 -0700 Subject: [PATCH] use silu --- model/models/qwen25vl/model_vision.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/models/qwen25vl/model_vision.go b/model/models/qwen25vl/model_vision.go index 52e4b7ce0..16ea39919 100644 --- a/model/models/qwen25vl/model_vision.go +++ b/model/models/qwen25vl/model_vision.go @@ -108,7 +108,7 @@ func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Visi // Using activation as specified in config (likely GELU or SiLU/Swish) gateOutput := mlp.Gate.Forward(ctx, hiddenStates) upOutput := mlp.Up.Forward(ctx, hiddenStates) - hiddenStates = gateOutput.GELU(ctx).Mul(ctx, upOutput) + hiddenStates = gateOutput.SILU(ctx).Mul(ctx, upOutput) return mlp.Down.Forward(ctx, hiddenStates) }