Skip to content

Commit 506ed87

Browse files
[ROCm][CI][Bugfix] Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers accuracy issues (#29909)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
1 parent 4dd7978 commit 506ed87

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

docker/Dockerfile.rocm

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
6565
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
6666
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
6767
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
68-
# Centralized v1 package - copied to both test and final stages
6968
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
7069

7170
# -----------------------
@@ -98,7 +97,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
9897
uv pip install --system hf_transfer
9998
ENV HF_HUB_ENABLE_HF_TRANSFER=1
10099

101-
# Copy in the v1 package
100+
# Copy in the v1 package (for python-only install test group)
102101
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
103102

104103
# Source code is used in the `python_only_compile.sh` test
@@ -130,9 +129,6 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
130129
&& pip uninstall -y vllm \
131130
&& uv pip install --system *.whl
132131

133-
# Copy in the v1 package
134-
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1
135-
136132
ARG COMMON_WORKDIR
137133

138134
# Copy over the benchmark scripts as well

requirements/rocm-test.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ torchgeo==0.7.0
7070
mteb==2.1.2
7171

7272
# Data processing
73-
xgrammar @ git+https://github.com/mlc-ai/xgrammar.git@eafd4db51b78acc64b3f0764ef27dfd206c28628
74-
# Test async scheduling
73+
xgrammar==0.1.27
74+
# Test async scheduling
7575

7676
# Utilities
7777
num2words==0.5.14
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""Pytest configuration for vLLM tests."""
4+
5+
import torch
6+
7+
from vllm.platforms import current_platform
8+
9+
10+
def pytest_configure(config):
11+
"""Disable Flash/MemEfficient SDP on ROCm to avoid HF
12+
Transformers accuracy issues.
13+
"""
14+
if not current_platform.is_rocm():
15+
return
16+
17+
torch.backends.cuda.enable_flash_sdp(False)
18+
torch.backends.cuda.enable_mem_efficient_sdp(False)
19+
torch.backends.cuda.enable_math_sdp(True)

0 commit comments

Comments
 (0)