[Bug]: Kwai-Keye/Keye-VL-8B-Preview failed to start in graph and eager mode due to Keye-VL does not support _Backend.TORCH_SDPA backend now

Your current environment

image : v0.9.2rc1

VLLM_USE_MODELSCOPE=True vllm serve Kwai-Keye/Keye-VL-8B-Preview --tensor_parallel_size 1 --trust_remote_code --enforce-eager & VLLM_USE_MODELSCOPE=True vllm serve Kwai-Keye/Keye-VL-8B-Preview --tensor_parallel_size 1 --trust_remote_code &

🐛 Describe the bug

bug：

WARNING 07-22 06:24:26 [_custom_ops.py:20] Failed to import from vllm._C with ModuleNotFoundError("No module named 'vllm._C'") INFO 07-22 06:24:31 [parallel_state.py:1076] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0 INFO 07-22 06:24:38 [model_runner_v1.py:1798] Starting to load model /root/.cache/modelscope/hub/models/Kwai-Keye/Keye-VL-8B-Preview... ERROR 07-22 06:24:40 [core.py:586] EngineCore failed to start. ERROR 07-22 06:24:40 [core.py:586] Traceback (most recent call last): ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 577, in run_engine_core ERROR 07-22 06:24:40 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 404, in __init__ ERROR 07-22 06:24:40 [core.py:586] super().__init__(vllm_config, executor_class, log_stats, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 75, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.model_executor = executor_class(vllm_config) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/executor_base.py", line 53, in __init__ ERROR 07-22 06:24:40 [core.py:586] self._init_executor() ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 48, in _init_executor ERROR 07-22 06:24:40 [core.py:586] self.collective_rpc("load_model") ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 57, in collective_rpc ERROR 07-22 06:24:40 [core.py:586] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/utils/__init__.py", line 2736, in run_method ERROR 07-22 06:24:40 [core.py:586] return func(*args, **kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 213, in load_model ERROR 07-22 06:24:40 [core.py:586] self.model_runner.load_model() ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1801, in load_model ERROR 07-22 06:24:40 [core.py:586] self.model = get_model(vllm_config=self.vllm_config) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/__init__.py", line 59, in get_model ERROR 07-22 06:24:40 [core.py:586] return loader.load_model(vllm_config=vllm_config, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model ERROR 07-22 06:24:40 [core.py:586] model = initialize_model(vllm_config=vllm_config, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model ERROR 07-22 06:24:40 [core.py:586] return model_class(vllm_config=vllm_config, prefix=prefix) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 1364, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.visual = KeyeSiglipVisionModel( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 743, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.vision_model = KeyeSiglipVisionTransformer( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 663, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.encoder = KeyeSiglipEncoder( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 569, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.layers = nn.ModuleList([ ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 570, in <listcomp> ERROR 07-22 06:24:40 [core.py:586] KeyeSiglipEncoderLayer( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 512, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.self_attn = KeyeSiglipAttention( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 389, in __init__ ERROR 07-22 06:24:40 [core.py:586] raise RuntimeError( ERROR 07-22 06:24:40 [core.py:586] RuntimeError: Keye-VL does not support _Backend.TORCH_SDPA backend now. Process EngineCore_0: Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 590, in run_engine_core raise e File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 577, in run_engine_core engine_core = EngineCoreProc(*args, **kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 404, in __init__ super().__init__(vllm_config, executor_class, log_stats, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 75, in __init__ self.model_executor = executor_class(vllm_config) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/executor_base.py", line 53, in __init__ self._init_executor() File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 48, in _init_executor self.collective_rpc("load_model") File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 57, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/utils/__init__.py", line 2736, in run_method return func(*args, **kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 213, in load_model self.model_runner.load_model() File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1801, in load_model self.model = get_model(vllm_config=self.vllm_config) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/__init__.py", line 59, in get_model return loader.load_model(vllm_config=vllm_config, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model model = initialize_model(vllm_config=vllm_config, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model return model_class(vllm_config=vllm_config, prefix=prefix) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 1364, in __init__ self.visual = KeyeSiglipVisionModel( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 743, in __init__ self.vision_model = KeyeSiglipVisionTransformer( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 663, in __init__ self.encoder = KeyeSiglipEncoder( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 569, in __init__ self.layers = nn.ModuleList([ File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 570, in <listcomp> KeyeSiglipEncoderLayer( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 512, in __init__ self.self_attn = KeyeSiglipAttention( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 389, in __init__ raise RuntimeError( RuntimeError: Keye-VL does not support _Backend.TORCH_SDPA backend now. Traceback (most recent call last): File "/usr/local/python3.10.17/bin/vllm", line 8, in <module> sys.exit(main()) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/cli/main.py", line 65, in main args.dispatch_function(args) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/cli/serve.py", line 55, in cmd uvloop.run(run_server(args)) File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 82, in run return loop.run_until_complete(wrapper()) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 61, in wrapper return await main File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 1431, in run_server await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker async with build_async_engine_client(args, client_config) as engine_client: File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client async with build_async_engine_client_from_engine_args( File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args async_llm = AsyncLLM.from_vllm_config( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/async_llm.py", line 162, in from_vllm_config return cls( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/async_llm.py", line 124, in __init__ self.engine_core = EngineCoreClient.make_async_mp_client( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 96, in make_async_mp_client return AsyncMPClient(*client_args) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 666, in __init__ super().__init__( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 403, in __init__ with launch_core_engines(vllm_config, executor_class, File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/utils.py", line 434, in launch_core_engines wait_for_engine_startup( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/utils.py", line 484, in wait_for_engine_startup raise RuntimeError("Engine core initialization failed. " RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} [ERROR] 2025-07-22-06:24:48 (PID:13561, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Bug]: Kwai-Keye/Keye-VL-8B-Preview failed to start in graph and eager mode due to Keye-VL does not support _Backend.TORCH_SDPA backend now #1961

Your current environment

🐛 Describe the bug

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[Bug]: Kwai-Keye/Keye-VL-8B-Preview failed to start in graph and eager mode due to Keye-VL does not support _Backend.TORCH_SDPA backend now #1961

Description

Your current environment

🐛 Describe the bug

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions