Skip to content

vllm.model_executor.models.interfaces_base

VllmModel

Bases: Protocol[T_co]

The interface required for all models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py
@runtime_checkable
class VllmModel(Protocol[T_co]):
    """The interface required for all models in vLLM."""

    def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: ...

    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
        """Apply token embeddings to `input_ids`."""
        ...

    def forward(self, input_ids: torch.Tensor, positions: torch.Tensor) -> T_co: ...

embed_input_ids

embed_input_ids(input_ids: Tensor) -> Tensor

Apply token embeddings to input_ids.

Source code in vllm/model_executor/models/interfaces_base.py
def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
    """Apply token embeddings to `input_ids`."""
    ...

VllmModelForPooling

Bases: VllmModel[T_co], Protocol[T_co]

The interface required for all pooling models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py
@runtime_checkable
class VllmModelForPooling(VllmModel[T_co], Protocol[T_co]):
    """The interface required for all pooling models in vLLM."""

    is_pooling_model: ClassVar[Literal[True]] = True
    """
    A flag that indicates this model supports pooling.

    Note:
        There is no need to redefine this flag if this class is in the
        MRO of your model class.
    """

    default_seq_pooling_type: ClassVar[SequencePoolingType] = "LAST"
    """
    Indicates the [vllm.config.pooler.PoolerConfig.seq_pooling_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.default_pooling_type][]
    decorator to conveniently set this field.
    """

    default_tok_pooling_type: ClassVar[TokenPoolingType] = "ALL"
    """
    Indicates the [vllm.config.pooler.PoolerConfig.tok_pooling_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.default_pooling_type][]
    decorator to conveniently set this field.
    """

    attn_type: ClassVar[AttnTypeStr] = "decoder"
    """
    Indicates the
    [vllm.config.model.ModelConfig.attn_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.attn_type][]
    decorator to conveniently set this field.
    """

    pooler: Pooler
    """The pooler is only called on TP rank 0."""

attn_type class-attribute

attn_type: AttnTypeStr = 'decoder'

Indicates the vllm.config.model.ModelConfig.attn_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.attn_type decorator to conveniently set this field.

default_seq_pooling_type class-attribute

default_seq_pooling_type: SequencePoolingType = 'LAST'

Indicates the vllm.config.pooler.PoolerConfig.seq_pooling_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.default_pooling_type decorator to conveniently set this field.

default_tok_pooling_type class-attribute

default_tok_pooling_type: TokenPoolingType = 'ALL'

Indicates the vllm.config.pooler.PoolerConfig.tok_pooling_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.default_pooling_type decorator to conveniently set this field.

is_pooling_model class-attribute

is_pooling_model: Literal[True] = True

A flag that indicates this model supports pooling.

Note

There is no need to redefine this flag if this class is in the MRO of your model class.

pooler instance-attribute

pooler: Pooler

The pooler is only called on TP rank 0.

VllmModelForTextGeneration

Bases: VllmModel[T], Protocol[T]

The interface required for all generative models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py
@runtime_checkable
class VllmModelForTextGeneration(VllmModel[T], Protocol[T]):
    """The interface required for all generative models in vLLM."""

    def compute_logits(
        self,
        hidden_states: T,
    ) -> T | None:
        """Return `None` if TP rank > 0."""
        ...

compute_logits

compute_logits(hidden_states: T) -> T | None

Return None if TP rank > 0.

Source code in vllm/model_executor/models/interfaces_base.py
def compute_logits(
    self,
    hidden_states: T,
) -> T | None:
    """Return `None` if TP rank > 0."""
    ...

attn_type

attn_type(attn_type: AttnTypeStr)

Decorator to set VllmModelForPooling.attn_type.

Source code in vllm/model_executor/models/interfaces_base.py
def attn_type(attn_type: AttnTypeStr):
    """Decorator to set `VllmModelForPooling.attn_type`."""

    def func(model: _T) -> _T:
        model.attn_type = attn_type  # type: ignore
        return model

    return func

default_pooling_type

default_pooling_type(
    *,
    seq_pooling_type: SequencePoolingType = "LAST",
    tok_pooling_type: TokenPoolingType = "ALL",
)

Decorator to set VllmModelForPooling.default_*_pooling_type.

Source code in vllm/model_executor/models/interfaces_base.py
def default_pooling_type(
    *,
    seq_pooling_type: SequencePoolingType = "LAST",
    tok_pooling_type: TokenPoolingType = "ALL",
):
    """Decorator to set `VllmModelForPooling.default_*_pooling_type`."""

    def func(model: _T) -> _T:
        model.default_seq_pooling_type = seq_pooling_type  # type: ignore
        model.default_tok_pooling_type = tok_pooling_type  # type: ignore
        return model

    return func