Bases: Module
MLP for the language component of the Qwen model, which contains a MergedColumnParallelLinear merging 2 outputs via silu activation.
Source code in vllm/model_executor/models/qwen.py
| class QWenMLP(nn.Module):
"""MLP for the language component of the Qwen model, which contains a
MergedColumnParallelLinear merging 2 outputs via silu activation."""
def __init__(
self,
hidden_size: int,
intermediate_size: int,
hidden_act: str = "silu",
quant_config: QuantizationConfig | None = None,
prefix: str = "",
):
super().__init__()
self.gate_up_proj = MergedColumnParallelLinear(
hidden_size,
[intermediate_size] * 2,
bias=False,
quant_config=quant_config,
prefix=f"{prefix}.gate_up_proj",
)
self.c_proj = RowParallelLinear(
intermediate_size,
hidden_size,
bias=False,
quant_config=quant_config,
prefix=f"{prefix}.c_proj",
)
if hidden_act != "silu":
raise ValueError(
f"Unsupported activation: {hidden_act}. Only silu is supported for now."
)
self.act_fn = SiluAndMul()
def forward(self, x: torch.Tensor) -> torch.Tensor:
gate_up, _ = self.gate_up_proj(x)
x = self.act_fn(gate_up)
x, _ = self.c_proj(x)
return x
|