Skip to content

vllm.utils

Modules:

Name Description
argparse_utils

Argument parsing utilities for vLLM.

async_utils

Contains helpers related to asynchronous code.

cache
collection_utils

Contains helpers that are applied to collections.

counter
deep_gemm

Compatibility wrapper for DeepGEMM API changes.

flashinfer

Compatibility wrapper for FlashInfer API changes.

func_utils

Contains helpers that are applied to functions.

gc_utils
hashing
import_utils

Contains helpers related to importing modules.

jsontree

Helper functions to work with nested JSON structures.

math_utils

Math utility functions for vLLM.

mem_constants
mem_utils
nccl
network_utils
platform_utils
profiling
registry
serial_utils
system_utils
tensor_schema
torch_utils

STR_BACKEND_ENV_VAR module-attribute

STR_BACKEND_ENV_VAR: str = 'VLLM_ATTENTION_BACKEND'

STR_FLASHINFER_ATTN_VAL module-attribute

STR_FLASHINFER_ATTN_VAL: str = 'FLASHINFER'

STR_FLASH_ATTN_VAL module-attribute

STR_FLASH_ATTN_VAL: str = 'FLASH_ATTN'

STR_INVALID_VAL module-attribute

STR_INVALID_VAL: str = 'INVALID'

STR_XFORMERS_ATTN_VAL module-attribute

STR_XFORMERS_ATTN_VAL: str = 'XFORMERS'

_DEPRECATED_MAPPINGS module-attribute

_DEPRECATED_MAPPINGS = {
    "cprofile": "profiling",
    "cprofile_context": "profiling",
    "get_open_port": "network_utils",
}

logger module-attribute

logger = init_logger(__name__)

__dir__

__dir__() -> list[str]
Source code in vllm/utils/__init__.py
def __dir__() -> list[str]:
    # expose deprecated names in dir() for better UX/tab-completion
    return sorted(list(globals().keys()) + list(_DEPRECATED_MAPPINGS.keys()))

__getattr__

__getattr__(name: str) -> Any

Module-level getattr to handle deprecated utilities.

Source code in vllm/utils/__init__.py
def __getattr__(name: str) -> Any:  # noqa: D401 - short deprecation docstring
    """Module-level getattr to handle deprecated utilities."""
    if name in _DEPRECATED_MAPPINGS:
        submodule_name = _DEPRECATED_MAPPINGS[name]
        warnings.warn(
            f"vllm.utils.{name} is deprecated and will be removed in a future version. "
            f"Use vllm.utils.{submodule_name}.{name} instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        module = __import__(f"vllm.utils.{submodule_name}", fromlist=[submodule_name])
        return getattr(module, name)
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

length_from_prompt_token_ids_or_embeds

length_from_prompt_token_ids_or_embeds(
    prompt_token_ids: list[int] | None,
    prompt_embeds: Tensor | None,
) -> int

Calculate the request length (in number of tokens) give either prompt_token_ids or prompt_embeds.

Source code in vllm/utils/__init__.py
def length_from_prompt_token_ids_or_embeds(
    prompt_token_ids: list[int] | None,
    prompt_embeds: torch.Tensor | None,
) -> int:
    """Calculate the request length (in number of tokens) give either
    prompt_token_ids or prompt_embeds.
    """
    prompt_token_len = None if prompt_token_ids is None else len(prompt_token_ids)
    prompt_embeds_len = None if prompt_embeds is None else len(prompt_embeds)

    if prompt_token_len is None:
        if prompt_embeds_len is None:
            raise ValueError("Neither prompt_token_ids nor prompt_embeds were defined.")
        return prompt_embeds_len
    else:
        if prompt_embeds_len is not None and prompt_embeds_len != prompt_token_len:
            raise ValueError(
                "Prompt token ids and prompt embeds had different lengths"
                f" prompt_token_ids={prompt_token_len}"
                f" prompt_embeds={prompt_embeds_len}"
            )
        return prompt_token_len

random_uuid

random_uuid() -> str
Source code in vllm/utils/__init__.py
def random_uuid() -> str:
    return str(uuid.uuid4().hex)