Skip to content

[Bug]: Kwai-Keye/Keye-VL-8B-Preview failed to start in graph and eager mode due to Keye-VL does not support _Backend.TORCH_SDPA backend now #1961

@zhangxinyuehfad

Description

@zhangxinyuehfad

Your current environment

image : v0.9.2rc1

VLLM_USE_MODELSCOPE=True vllm serve Kwai-Keye/Keye-VL-8B-Preview --tensor_parallel_size 1 --trust_remote_code --enforce-eager & VLLM_USE_MODELSCOPE=True vllm serve Kwai-Keye/Keye-VL-8B-Preview --tensor_parallel_size 1 --trust_remote_code & 

🐛 Describe the bug

bug:

WARNING 07-22 06:24:26 [_custom_ops.py:20] Failed to import from vllm._C with ModuleNotFoundError("No module named 'vllm._C'") INFO 07-22 06:24:31 [parallel_state.py:1076] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0 INFO 07-22 06:24:38 [model_runner_v1.py:1798] Starting to load model /root/.cache/modelscope/hub/models/Kwai-Keye/Keye-VL-8B-Preview... ERROR 07-22 06:24:40 [core.py:586] EngineCore failed to start. ERROR 07-22 06:24:40 [core.py:586] Traceback (most recent call last): ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 577, in run_engine_core ERROR 07-22 06:24:40 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 404, in __init__ ERROR 07-22 06:24:40 [core.py:586] super().__init__(vllm_config, executor_class, log_stats, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 75, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.model_executor = executor_class(vllm_config) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/executor_base.py", line 53, in __init__ ERROR 07-22 06:24:40 [core.py:586] self._init_executor() ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 48, in _init_executor ERROR 07-22 06:24:40 [core.py:586] self.collective_rpc("load_model") ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 57, in collective_rpc ERROR 07-22 06:24:40 [core.py:586] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/utils/__init__.py", line 2736, in run_method ERROR 07-22 06:24:40 [core.py:586] return func(*args, **kwargs) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 213, in load_model ERROR 07-22 06:24:40 [core.py:586] self.model_runner.load_model() ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1801, in load_model ERROR 07-22 06:24:40 [core.py:586] self.model = get_model(vllm_config=self.vllm_config) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/__init__.py", line 59, in get_model ERROR 07-22 06:24:40 [core.py:586] return loader.load_model(vllm_config=vllm_config, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model ERROR 07-22 06:24:40 [core.py:586] model = initialize_model(vllm_config=vllm_config, ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model ERROR 07-22 06:24:40 [core.py:586] return model_class(vllm_config=vllm_config, prefix=prefix) ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 1364, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.visual = KeyeSiglipVisionModel( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 743, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.vision_model = KeyeSiglipVisionTransformer( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 663, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.encoder = KeyeSiglipEncoder( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 569, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.layers = nn.ModuleList([ ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 570, in <listcomp> ERROR 07-22 06:24:40 [core.py:586] KeyeSiglipEncoderLayer( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 512, in __init__ ERROR 07-22 06:24:40 [core.py:586] self.self_attn = KeyeSiglipAttention( ERROR 07-22 06:24:40 [core.py:586] File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 389, in __init__ ERROR 07-22 06:24:40 [core.py:586] raise RuntimeError( ERROR 07-22 06:24:40 [core.py:586] RuntimeError: Keye-VL does not support _Backend.TORCH_SDPA backend now. Process EngineCore_0: Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 590, in run_engine_core raise e File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 577, in run_engine_core engine_core = EngineCoreProc(*args, **kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 404, in __init__ super().__init__(vllm_config, executor_class, log_stats, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core.py", line 75, in __init__ self.model_executor = executor_class(vllm_config) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/executor_base.py", line 53, in __init__ self._init_executor() File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 48, in _init_executor self.collective_rpc("load_model") File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/executor/uniproc_executor.py", line 57, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/utils/__init__.py", line 2736, in run_method return func(*args, **kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 213, in load_model self.model_runner.load_model() File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1801, in load_model self.model = get_model(vllm_config=self.vllm_config) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/__init__.py", line 59, in get_model return loader.load_model(vllm_config=vllm_config, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/base_loader.py", line 38, in load_model model = initialize_model(vllm_config=vllm_config, File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/model_loader/utils.py", line 64, in initialize_model return model_class(vllm_config=vllm_config, prefix=prefix) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 1364, in __init__ self.visual = KeyeSiglipVisionModel( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 743, in __init__ self.vision_model = KeyeSiglipVisionTransformer( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 663, in __init__ self.encoder = KeyeSiglipEncoder( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 569, in __init__ self.layers = nn.ModuleList([ File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 570, in <listcomp> KeyeSiglipEncoderLayer( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 512, in __init__ self.self_attn = KeyeSiglipAttention( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/model_executor/models/keye.py", line 389, in __init__ raise RuntimeError( RuntimeError: Keye-VL does not support _Backend.TORCH_SDPA backend now. Traceback (most recent call last): File "/usr/local/python3.10.17/bin/vllm", line 8, in <module> sys.exit(main()) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/cli/main.py", line 65, in main args.dispatch_function(args) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/cli/serve.py", line 55, in cmd uvloop.run(run_server(args)) File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 82, in run return loop.run_until_complete(wrapper()) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/__init__.py", line 61, in wrapper return await main File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 1431, in run_server await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker async with build_async_engine_client(args, client_config) as engine_client: File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client async with build_async_engine_client_from_engine_args( File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in __aenter__ return await anext(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args async_llm = AsyncLLM.from_vllm_config( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/async_llm.py", line 162, in from_vllm_config return cls( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/async_llm.py", line 124, in __init__ self.engine_core = EngineCoreClient.make_async_mp_client( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 96, in make_async_mp_client return AsyncMPClient(*client_args) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 666, in __init__ super().__init__( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/core_client.py", line 403, in __init__ with launch_core_engines(vllm_config, executor_class, File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/utils.py", line 434, in launch_core_engines wait_for_engine_startup( File "/__w/vllm-benchmarks/vllm-benchmarks/vllm-empty/vllm/v1/engine/utils.py", line 484, in wait_for_engine_startup raise RuntimeError("Engine core initialization failed. " RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} [ERROR] 2025-07-22-06:24:48 (PID:13561, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception 

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions