render completion request and return engine prompts without generating.
Source code in vllm/entrypoints/openai/completion/api_router.py
| @router.post(
"/v1/completions/render",
dependencies=[Depends(validate_json_request)],
response_model=list,
responses={
HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
HTTPStatus.NOT_FOUND.value: {"model": ErrorResponse},
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
},
)
async def render_completion(request: CompletionRequest, raw_request: Request):
"""render completion request and return engine prompts without generating."""
handler = completion(raw_request)
if handler is None:
base_server = raw_request.app.state.openai_serving_tokenization
return base_server.create_error_response(
message="The model does not support Completions API"
)
try:
result = await handler.render_completion_request(request)
except Exception as e:
return handler.create_error_response(e)
if isinstance(result, ErrorResponse):
return JSONResponse(content=result.model_dump(), status_code=result.error.code)
return JSONResponse(content=result)
|