vllm.model_executor.layers.conv ¶

Conv Layer Class.

CausalConv2dLayer ¶

Bases: Conv2dLayer

A causal version of nn.Conv2d where each location in the 2D matrix would have no access to locations on its right or down All arguments are the same as nn.Conv2d except padding which should be set as None

Source code in vllm/model_executor/layers/conv.py

class CausalConv2dLayer(Conv2dLayer):
    """
    A causal version of nn.Conv2d where each location in the 2D matrix would
    have no access to locations on its right or down
    All arguments are the same as nn.Conv2d except padding which should be
    set as None
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int,
        stride: int,
        padding: int = 0,
        dilation: int = 1,
        groups: int = 1,
        bias: bool = True,
        padding_mode: str = "zeros",
        *,
        params_dtype: torch.dtype | None = None,
    ) -> None:
        if padding is not None:
            raise ValueError(
                "Argument padding should be set to None for CausalConv2dLayer."
            )
        self._left_padding: int = kernel_size - 1
        self._right_padding: int = stride - 1
        padding = 0

        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            dilation,
            groups,
            bias,
            padding_mode,
            params_dtype=params_dtype,
        )

    def forward(
        self,
        x: torch.Tensor,
    ) -> torch.Tensor:
        x = F.pad(x, pad=(self._left_padding, self._right_padding, 0, 0))
        x = super().forward(x)
        return x

_left_padding `instance-attribute` ¶

_left_padding: int = kernel_size - 1

_right_padding `instance-attribute` ¶

_right_padding: int = stride - 1

init ¶

__init__(
    in_channels: int,
    out_channels: int,
    kernel_size: int,
    stride: int,
    padding: int = 0,
    dilation: int = 1,
    groups: int = 1,
    bias: bool = True,
    padding_mode: str = "zeros",
    *,
    params_dtype: dtype | None = None,
) -> None

Source code in vllm/model_executor/layers/conv.py

def __init__(
    self,
    in_channels: int,
    out_channels: int,
    kernel_size: int,
    stride: int,
    padding: int = 0,
    dilation: int = 1,
    groups: int = 1,
    bias: bool = True,
    padding_mode: str = "zeros",
    *,
    params_dtype: torch.dtype | None = None,
) -> None:
    if padding is not None:
        raise ValueError(
            "Argument padding should be set to None for CausalConv2dLayer."
        )
    self._left_padding: int = kernel_size - 1
    self._right_padding: int = stride - 1
    padding = 0

    super().__init__(
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        dilation,
        groups,
        bias,
        padding_mode,
        params_dtype=params_dtype,
    )

forward ¶

forward(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def forward(
    self,
    x: torch.Tensor,
) -> torch.Tensor:
    x = F.pad(x, pad=(self._left_padding, self._right_padding, 0, 0))
    x = super().forward(x)
    return x

Conv2dLayer ¶

Bases: ConvLayerBase

Conv layer with Conv2d.

Source code in vllm/model_executor/layers/conv.py

@CustomOp.register("conv2d")
class Conv2dLayer(ConvLayerBase):
    """Conv layer with Conv2d."""

    num_dim = 2

    def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
        assert x.dim() == 4
        B, C, H, W = x.shape
        K1, K2 = self.kernel_size
        H, W = H // K1, W // K2
        x = x.unfold(2, K1, K1).unfold(3, K2, K2)
        x = x.permute(0, 2, 3, 1, 4, 5).reshape(-1, self.input_size)
        x = F.linear(
            x,
            self.weight.view(self.out_channels, self.input_size),
            self.bias,
        )
        x = x.view(B, H, W, self.out_channels).permute(0, 3, 1, 2)
        return x

    def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
        assert x.dim() == 4
        x = F.conv2d(
            x,
            self.weight,
            self.bias,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            groups=self.groups,
        )
        return x

    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
        """Expected input shape: (batch_size, in_channels, height, width)"""
        assert x.dim() == 4
        if self.enable_linear:
            return self._forward_mulmat(x)
        else:
            return self._forward_conv(x)

    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
        # By default, we use CUDNN's convolution ops with optimization.
        return self._forward_conv(x)

num_dim `class-attribute` `instance-attribute` ¶

num_dim = 2

_forward_conv ¶

_forward_conv(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
    assert x.dim() == 4
    x = F.conv2d(
        x,
        self.weight,
        self.bias,
        stride=self.stride,
        padding=self.padding,
        dilation=self.dilation,
        groups=self.groups,
    )
    return x

_forward_mulmat ¶

_forward_mulmat(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
    assert x.dim() == 4
    B, C, H, W = x.shape
    K1, K2 = self.kernel_size
    H, W = H // K1, W // K2
    x = x.unfold(2, K1, K1).unfold(3, K2, K2)
    x = x.permute(0, 2, 3, 1, 4, 5).reshape(-1, self.input_size)
    x = F.linear(
        x,
        self.weight.view(self.out_channels, self.input_size),
        self.bias,
    )
    x = x.view(B, H, W, self.out_channels).permute(0, 3, 1, 2)
    return x

forward_cuda ¶

forward_cuda(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    # By default, we use CUDNN's convolution ops with optimization.
    return self._forward_conv(x)

forward_native ¶

forward_native(x: Tensor) -> Tensor

Expected input shape: (batch_size, in_channels, height, width)

Source code in vllm/model_executor/layers/conv.py

def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    """Expected input shape: (batch_size, in_channels, height, width)"""
    assert x.dim() == 4
    if self.enable_linear:
        return self._forward_mulmat(x)
    else:
        return self._forward_conv(x)

Conv3dLayer ¶

Bases: ConvLayerBase

Conv layer with Conv3d.

Source code in vllm/model_executor/layers/conv.py

@CustomOp.register("conv3d")
class Conv3dLayer(ConvLayerBase):
    """Conv layer with Conv3d."""

    num_dim = 3

    def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
        assert x.dim() == 5
        B, C, T, H, W = x.shape
        K1, K2, K3 = self.kernel_size
        T, H, W = T // K1, H // K2, W // K3
        x = x.unfold(2, K1, K1).unfold(3, K2, K2).unfold(4, K3, K3)
        x = x.permute(0, 2, 3, 4, 1, 5, 6, 7).reshape(-1, self.input_size)
        x = F.linear(
            x,
            self.weight.view(self.out_channels, self.input_size),
            self.bias,
        )
        x = x.view(B, T, H, W, self.out_channels).permute(0, 4, 1, 2, 3)
        return x

    def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
        assert x.dim() == 5
        x = F.conv3d(
            x,
            self.weight,
            self.bias,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            groups=self.groups,
        )
        return x

    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
        """Expected input shape: (batch_size, in_channels, time, height, width)"""
        if self.enable_linear:
            return self._forward_mulmat(x)
        else:
            return self._forward_conv(x)

    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
        # PyTorch2.9.0 disabled CUDNN's Conv3D, which caused a
        # significant performance regression.
        # See: https://github.com/vllm-project/vllm/issues/27406
        # and https://github.com/pytorch/pytorch/issues/166122
        # By default, we use CUDNN's convolution ops with optimization.
        if self.enable_linear and is_torch_equal("2.9.0"):
            return self._forward_mulmat(x)
        return self._forward_conv(x)

num_dim `class-attribute` `instance-attribute` ¶

num_dim = 3

_forward_conv ¶

_forward_conv(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
    assert x.dim() == 5
    x = F.conv3d(
        x,
        self.weight,
        self.bias,
        stride=self.stride,
        padding=self.padding,
        dilation=self.dilation,
        groups=self.groups,
    )
    return x

_forward_mulmat ¶

_forward_mulmat(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
    assert x.dim() == 5
    B, C, T, H, W = x.shape
    K1, K2, K3 = self.kernel_size
    T, H, W = T // K1, H // K2, W // K3
    x = x.unfold(2, K1, K1).unfold(3, K2, K2).unfold(4, K3, K3)
    x = x.permute(0, 2, 3, 4, 1, 5, 6, 7).reshape(-1, self.input_size)
    x = F.linear(
        x,
        self.weight.view(self.out_channels, self.input_size),
        self.bias,
    )
    x = x.view(B, T, H, W, self.out_channels).permute(0, 4, 1, 2, 3)
    return x

forward_cuda ¶

forward_cuda(x: Tensor) -> Tensor

Source code in vllm/model_executor/layers/conv.py

def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    # PyTorch2.9.0 disabled CUDNN's Conv3D, which caused a
    # significant performance regression.
    # See: https://github.com/vllm-project/vllm/issues/27406
    # and https://github.com/pytorch/pytorch/issues/166122
    # By default, we use CUDNN's convolution ops with optimization.
    if self.enable_linear and is_torch_equal("2.9.0"):
        return self._forward_mulmat(x)
    return self._forward_conv(x)

forward_native ¶

forward_native(x: Tensor) -> Tensor

Expected input shape: (batch_size, in_channels, time, height, width)

Source code in vllm/model_executor/layers/conv.py

def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    """Expected input shape: (batch_size, in_channels, time, height, width)"""
    if self.enable_linear:
        return self._forward_mulmat(x)
    else:
        return self._forward_conv(x)

ConvLayerBase ¶

Bases: CustomOp

Conv layer base class.

Source code in vllm/model_executor/layers/conv.py

class ConvLayerBase(CustomOp):
    """Conv layer base class."""

    num_dim: int

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int | tuple[int, ...],
        stride: int | tuple[int, ...] = 1,
        padding: int | tuple[int, ...] = 0,
        dilation: int | tuple[int, ...] = 1,
        groups: int = 1,
        bias: bool = True,
        padding_mode: str = "zeros",
        *,
        params_dtype: torch.dtype | None = None,
    ) -> None:
        super().__init__()

        if params_dtype is None:
            params_dtype = torch.get_default_dtype()

        kernel_size = (
            (kernel_size,) * self.num_dim
            if isinstance(kernel_size, int)
            else kernel_size
        )
        stride = (stride,) * self.num_dim if isinstance(stride, int) else stride
        padding = (padding,) * self.num_dim if isinstance(padding, int) else padding
        dilation = (dilation,) * self.num_dim if isinstance(dilation, int) else dilation

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        self.padding_mode = padding_mode

        self.enable_linear = (
            (self.kernel_size == self.stride)
            and not any(self.padding)
            and self.groups == 1
        )
        self.input_size = in_channels * math.prod(self.kernel_size)

        self.weight = nn.Parameter(
            torch.empty(
                out_channels,
                in_channels // groups,
                *kernel_size,
                dtype=params_dtype,
            ),
        )

        if bias:
            self.bias = nn.Parameter(torch.empty(self.out_channels, dtype=params_dtype))
        else:
            self.register_parameter("bias", None)

    def extra_repr(self) -> str:
        s = f"in_channels={self.in_channels}, "
        s += f"out_channels={self.out_channels}, "
        s += f"kernel_size={self.kernel_size}, "
        s += f"stride={self.stride}, "
        s += f"padding={self.padding}, "
        s += f"bias={self.bias is not None}"
        return s

bias `instance-attribute` ¶

bias = Parameter(empty(out_channels, dtype=params_dtype))

dilation `instance-attribute` ¶

dilation = dilation

enable_linear `instance-attribute` ¶

enable_linear = (
    kernel_size == stride
    and not any(padding)
    and groups == 1
)

groups `instance-attribute` ¶

groups = groups

in_channels `instance-attribute` ¶

in_channels = in_channels

input_size `instance-attribute` ¶

input_size = in_channels * prod(kernel_size)

kernel_size `instance-attribute` ¶

kernel_size = kernel_size

num_dim `instance-attribute` ¶

num_dim: int

out_channels `instance-attribute` ¶

out_channels = out_channels

padding `instance-attribute` ¶

padding = padding

padding_mode `instance-attribute` ¶

padding_mode = padding_mode

stride `instance-attribute` ¶

stride = stride

weight `instance-attribute` ¶

weight = Parameter(
    empty(
        out_channels,
        in_channels // groups,
        *kernel_size,
        dtype=params_dtype,
    )
)

init ¶

__init__(
    in_channels: int,
    out_channels: int,
    kernel_size: int | tuple[int, ...],
    stride: int | tuple[int, ...] = 1,
    padding: int | tuple[int, ...] = 0,
    dilation: int | tuple[int, ...] = 1,
    groups: int = 1,
    bias: bool = True,
    padding_mode: str = "zeros",
    *,
    params_dtype: dtype | None = None,
) -> None

Source code in vllm/model_executor/layers/conv.py

def __init__(
    self,
    in_channels: int,
    out_channels: int,
    kernel_size: int | tuple[int, ...],
    stride: int | tuple[int, ...] = 1,
    padding: int | tuple[int, ...] = 0,
    dilation: int | tuple[int, ...] = 1,
    groups: int = 1,
    bias: bool = True,
    padding_mode: str = "zeros",
    *,
    params_dtype: torch.dtype | None = None,
) -> None:
    super().__init__()

    if params_dtype is None:
        params_dtype = torch.get_default_dtype()

    kernel_size = (
        (kernel_size,) * self.num_dim
        if isinstance(kernel_size, int)
        else kernel_size
    )
    stride = (stride,) * self.num_dim if isinstance(stride, int) else stride
    padding = (padding,) * self.num_dim if isinstance(padding, int) else padding
    dilation = (dilation,) * self.num_dim if isinstance(dilation, int) else dilation

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.kernel_size = kernel_size
    self.stride = stride
    self.padding = padding
    self.dilation = dilation
    self.groups = groups
    self.padding_mode = padding_mode

    self.enable_linear = (
        (self.kernel_size == self.stride)
        and not any(self.padding)
        and self.groups == 1
    )
    self.input_size = in_channels * math.prod(self.kernel_size)

    self.weight = nn.Parameter(
        torch.empty(
            out_channels,
            in_channels // groups,
            *kernel_size,
            dtype=params_dtype,
        ),
    )

    if bias:
        self.bias = nn.Parameter(torch.empty(self.out_channels, dtype=params_dtype))
    else:
        self.register_parameter("bias", None)

extra_repr ¶

extra_repr() -> str

Source code in vllm/model_executor/layers/conv.py

def extra_repr(self) -> str:
    s = f"in_channels={self.in_channels}, "
    s += f"out_channels={self.out_channels}, "
    s += f"kernel_size={self.kernel_size}, "
    s += f"stride={self.stride}, "
    s += f"padding={self.padding}, "
    s += f"bias={self.bias is not None}"
    return s

vllm.model_executor.layers.conv ¶

CausalConv2dLayer ¶

_left_padding instance-attribute ¶

_right_padding instance-attribute ¶

__init__ ¶

forward ¶

Conv2dLayer ¶

num_dim class-attribute instance-attribute ¶

_forward_conv ¶

_forward_mulmat ¶

forward_cuda ¶

forward_native ¶

Conv3dLayer ¶

num_dim class-attribute instance-attribute ¶

_forward_conv ¶

_forward_mulmat ¶

forward_cuda ¶

forward_native ¶

ConvLayerBase ¶

bias instance-attribute ¶

dilation instance-attribute ¶

enable_linear instance-attribute ¶

groups instance-attribute ¶

in_channels instance-attribute ¶

input_size instance-attribute ¶

kernel_size instance-attribute ¶

num_dim instance-attribute ¶

out_channels instance-attribute ¶

padding instance-attribute ¶

padding_mode instance-attribute ¶

stride instance-attribute ¶

weight instance-attribute ¶

__init__ ¶

extra_repr ¶

_left_padding `instance-attribute` ¶

_right_padding `instance-attribute` ¶

init ¶

num_dim `class-attribute` `instance-attribute` ¶

num_dim `class-attribute` `instance-attribute` ¶

bias `instance-attribute` ¶

dilation `instance-attribute` ¶

enable_linear `instance-attribute` ¶

groups `instance-attribute` ¶

in_channels `instance-attribute` ¶

input_size `instance-attribute` ¶

kernel_size `instance-attribute` ¶

num_dim `instance-attribute` ¶

out_channels `instance-attribute` ¶

padding `instance-attribute` ¶

padding_mode `instance-attribute` ¶

stride `instance-attribute` ¶

weight `instance-attribute` ¶

init ¶