vllm.v1.structured_output.backend_guidance ¶

GuidanceGrammar `dataclass` ¶

Bases: StructuredOutputGrammar

Source code in vllm/v1/structured_output/backend_guidance.py

@dataclass
class GuidanceGrammar(StructuredOutputGrammar):
    ll_matcher: llguidance.LLMatcher
    ll_tokenizer: llguidance.LLTokenizer
    vocab_size: int
    printed_error: bool = False
    terminated: bool = False
    rollback_lag: int = 0

    def check_error(self):
        if not self.printed_error:
            err = self.ll_matcher.get_error()
            if err:
                self.printed_error = True
                logger.warning("LLMatcher error: %s", err)

    def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
        """Accepts a list of tokens and advances the parser.

        Returns True if the parser was advanced successfully.
        Returns False if the parser failed to advance.
        """

        if self.ll_tokenizer.eos_token in tokens:
            if self.ll_matcher.is_stopped() and not self.terminated:
                self.rollback_lag = 1
            self.terminated = True

        if self.ll_matcher.is_stopped():
            return True

        # TODO - Add jump decoding support in the future:
        # self.ll_matcher.compute_ff_bytes() - this should always work
        # self.ll_matcher.compute_ff_tokens() - this only works for
        #   "canonical" tokenizers
        # For conversion between the two, see
        # https://github.com/guidance-ai/llguidance/blob/main/docs/fast_forward.md

        r = self.ll_matcher.consume_tokens(tokens)

        self.check_error()

        return r

    def validate_tokens(self, tokens: list[int]) -> list[int]:
        """Checks if the list of tokens are accepted by the parser in sequence.
        Will not advance the parser.

        Returns the prefix list of tokens that are accepted by the parser.
        """
        if len(tokens) == 0:
            return []
        if self.ll_matcher.is_stopped():
            return []

        num_tokens = self.ll_matcher.validate_tokens(tokens)

        self.check_error()

        return tokens[:num_tokens]

    def rollback(self, num_tokens: int) -> None:
        if num_tokens > 0:
            self.ll_matcher.rollback(num_tokens - self.rollback_lag)
            self.terminated = False
            self.rollback_lag = 0
            self.check_error()

    def fill_bitmask(self, bitmask: torch.Tensor, idx: int) -> None:
        # this will automatically return [EOS] mask if the matcher is stopped
        # or otherwise in an error state
        llguidance_torch.fill_next_token_bitmask(self.ll_matcher, bitmask, idx)
        self.check_error()

    def is_terminated(self) -> bool:
        return self.terminated

    def reset(self):
        # This method may be not needed anymore? TODO
        self.ll_matcher.reset()

accept_tokens ¶

accept_tokens(request_id: str, tokens: list[int]) -> bool

Accepts a list of tokens and advances the parser.

Returns True if the parser was advanced successfully. Returns False if the parser failed to advance.

Source code in vllm/v1/structured_output/backend_guidance.py

def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
    """Accepts a list of tokens and advances the parser.

    Returns True if the parser was advanced successfully.
    Returns False if the parser failed to advance.
    """

    if self.ll_tokenizer.eos_token in tokens:
        if self.ll_matcher.is_stopped() and not self.terminated:
            self.rollback_lag = 1
        self.terminated = True

    if self.ll_matcher.is_stopped():
        return True

    # TODO - Add jump decoding support in the future:
    # self.ll_matcher.compute_ff_bytes() - this should always work
    # self.ll_matcher.compute_ff_tokens() - this only works for
    #   "canonical" tokenizers
    # For conversion between the two, see
    # https://github.com/guidance-ai/llguidance/blob/main/docs/fast_forward.md

    r = self.ll_matcher.consume_tokens(tokens)

    self.check_error()

    return r

validate_tokens ¶

validate_tokens(tokens: list[int]) -> list[int]

Checks if the list of tokens are accepted by the parser in sequence. Will not advance the parser.

Returns the prefix list of tokens that are accepted by the parser.

Source code in vllm/v1/structured_output/backend_guidance.py

def validate_tokens(self, tokens: list[int]) -> list[int]:
    """Checks if the list of tokens are accepted by the parser in sequence.
    Will not advance the parser.

    Returns the prefix list of tokens that are accepted by the parser.
    """
    if len(tokens) == 0:
        return []
    if self.ll_matcher.is_stopped():
        return []

    num_tokens = self.ll_matcher.validate_tokens(tokens)

    self.check_error()

    return tokens[:num_tokens]

has_guidance_unsupported_json_features ¶

has_guidance_unsupported_json_features(
    schema: dict[str, Any],
) -> bool

Check if JSON schema contains features unsupported by guidance/llguidance.

Source code in vllm/v1/structured_output/backend_guidance.py

def has_guidance_unsupported_json_features(schema: dict[str, Any]) -> bool:
    """Check if JSON schema contains features unsupported by guidance/llguidance."""

    def check_object(obj: dict[str, Any]) -> bool:
        if not isinstance(obj, dict):
            return False

        # patternProperties is not supported by llguidance
        if "patternProperties" in obj:
            return True

        # Recursively check all nested objects and arrays
        for value in obj.values():
            if isinstance(value, dict):
                if check_object(value):
                    return True
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict) and check_object(item):
                        return True

        return False

    return check_object(schema)

vllm.v1.structured_output.backend_guidance ¶

GuidanceGrammar dataclass ¶

accept_tokens ¶

validate_tokens ¶

has_guidance_unsupported_json_features ¶

GuidanceGrammar `dataclass` ¶