-
-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CI/Build] build on empty device for better dev experience #4773
Changes from 12 commits
03850aa
f47bc9c
bc83673
947c2fa
24c513e
3c1819c
c68f261
b77ea4c
cd0729f
18b33ff
f632c8e
37c40ed
cf75037
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,9 +61,11 @@ def embed_commit_hash(): | |
|
||
VLLM_TARGET_DEVICE = envs.VLLM_TARGET_DEVICE | ||
|
||
# vLLM only supports Linux platform | ||
assert sys.platform.startswith( | ||
"linux"), "vLLM only supports Linux platform (including WSL)." | ||
if not sys.platform.startswith("linux"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change actually makes it possible to install the published |
||
logger.info(f"vLLM only supports Linux platform (including WSL). " | ||
f"Building on {sys.platform}, " | ||
f"so vLLM may not be able to run correctly",) | ||
VLLM_TARGET_DEVICE = "empty" | ||
|
||
MAIN_CUDA_VERSION = "12.1" | ||
|
||
|
@@ -231,6 +233,10 @@ def build_extensions(self) -> None: | |
subprocess.check_call(["cmake", *build_args], cwd=self.build_temp) | ||
|
||
|
||
def _no_device() -> bool: | ||
return VLLM_TARGET_DEVICE == "empty" | ||
|
||
|
||
def _is_cuda() -> bool: | ||
has_cuda = torch.version.cuda is not None | ||
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda | ||
|
@@ -350,7 +356,9 @@ def find_version(filepath: str) -> str: | |
def get_vllm_version() -> str: | ||
version = find_version(get_path("vllm", "version.py")) | ||
|
||
if _is_cuda(): | ||
if _no_device(): | ||
version += "+empty" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was actually not sure if it is better to add "+empty" to the version or not.. WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adding "+empty" looks good to me. |
||
elif _is_cuda(): | ||
cuda_version = str(get_nvcc_cuda_version()) | ||
if cuda_version != MAIN_CUDA_VERSION: | ||
cuda_version_str = cuda_version.replace(".", "")[:3] | ||
|
@@ -404,7 +412,9 @@ def _read_requirements(filename: str) -> List[str]: | |
resolved_requirements.append(line) | ||
return resolved_requirements | ||
|
||
if _is_cuda(): | ||
if _no_device(): | ||
requirements = _read_requirements("requirements-cuda.txt") | ||
elif _is_cuda(): | ||
requirements = _read_requirements("requirements-cuda.txt") | ||
cuda_major, cuda_minor = torch.version.cuda.split(".") | ||
modified_requirements = [] | ||
|
@@ -453,6 +463,9 @@ def _read_requirements(filename: str) -> List[str]: | |
ext_modules = [] | ||
package_data["vllm"].append("*.so") | ||
|
||
if _no_device(): | ||
ext_modules = [] | ||
|
||
setup( | ||
name="vllm", | ||
version=get_vllm_version(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is because we take the CUDA requirements for the "empty" device wheel, and
xformers
andvllm-flash-attn
are available only on LinuxThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
platform_system == 'Linux'
makes sense to me.is
platform_machine == 'x86_64'
necessary?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vllm-flash-attn
have published wheels only forx86_64
, and no published tar.gz - https://pypi.org/project/vllm-flash-attn/#filesxformers
also has wheels only for 64bit machines. It does have a tar.gz but from what I found online it can't be installed on 32bit - https://pypi.org/project/xformers/#filesSo I'm pretty sure it's needed for both