[Package] Add scripts for Python packaging

This PR adds the scripts for Python packaging. The CD workflow will be followed up.
mlc-ai · Nov 17, 2024 · 5992591 · 5992591
1 parent 769d18c
commit 5992591
Show file tree

Hide file tree

Showing 5 changed files with 551 additions and 3 deletions.
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -1,3 +1,5 @@
-# ninja==1.11.1.1
-# setuptools==68.2.2
-# transformers==4.42.3
+torch
+transformers
+pybind11
+pydantic
+pytest
diff --git a/scripts/build_xgrammar_wheel_manylinux.sh b/scripts/build_xgrammar_wheel_manylinux.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+
+source /multibuild/manylinux_utils.sh
+source /opt/rh/gcc-toolset-11/enable # GCC-11 is the hightest GCC version compatible with NVCC < 12
+
+function usage() {
+	echo "Usage: $0"
+}
+
+function in_array() {
+	KEY=$1
+	ARRAY=$2
+	for e in ${ARRAY[*]}; do
+		if [[ "$e" == "$1" ]]; then
+			return 0
+		fi
+	done
+	return 1
+}
+
+function build_xgrammar_wheel() {
+	python_dir=$1
+	PYTHON_BIN="${python_dir}/bin/python"
+
+	cd "${XGRAMMAR_PYTHON_DIR}" &&
+		${PYTHON_BIN} setup.py bdist_wheel
+}
+
+function audit_xgrammar_wheel() {
+	python_version_str=$1
+
+	cd "${XGRAMMAR_PYTHON_DIR}" &&
+		mkdir -p repaired_wheels &&
+		auditwheel repair ${AUDITWHEEL_OPTS} dist/*cp${python_version_str}*.whl
+
+	rm -rf ${XGRAMMAR_PYTHON_DIR}/dist/ \
+		${XGRAMMAR_PYTHON_DIR}/build/ \
+		${XGRAMMAR_PYTHON_DIR}/*.egg-info
+}
+
+XGRAMMAR_PYTHON_DIR="/workspace/xgrammar/python"
+PYTHON_VERSIONS_CPU=("3.9" "3.10" "3.11" "3.12")
+
+while [[ $# -gt 0 ]]; do
+	arg="$1"
+	case $arg in
+	-h | --help)
+		usage
+		exit -1
+		;;
+	*) # unknown option
+		echo "Unknown argument: $arg"
+		echo
+		usage
+		exit -1
+		;;
+	esac
+done
+
+echo "Building XGrammar for CPU only"
+PYTHON_VERSIONS=${PYTHON_VERSIONS_CPU[*]}
+
+AUDITWHEEL_OPTS="--plat ${AUDITWHEEL_PLAT} -w repaired_wheels/"
+AUDITWHEEL_OPTS="--exclude libtorch --exclude libtorch_cpu --exclude libtorch_python ${AUDITWHEEL_OPTS}"
+
+# config the cmake
+cd /workspace/xgrammar
+
+# setup config.cmake
+echo set\(XGRAMMAR_BUILD_PYTHON_BINDINGS ON\) >>config.cmake
+echo set\(XGRAMMAR_BUILD_KERNELS OFF\) >>config.cmake
+echo set\(XGRAMMAR_BUILD_CUDA_KERNELS OFF\) >>config.cmake
+echo set\(XGRAMMAR_BUILD_CXX_TESTS OFF\) >>config.cmake
+
+# compile the xgrammar
+python3 -m pip install ninja pybind11
+python3 -m pip install torch --index-url https://download.pytorch.org/whl/cpu
+mkdir -p build
+cd build
+cmake .. -G Ninja
+ninja -j 4
+find . -type d -name 'CMakeFiles' -exec rm -rf {} +
+
+UNICODE_WIDTH=32 # Dummy value, irrelevant for Python 3
+
+# Not all manylinux Docker images will have all Python versions,
+# so check the existing python versions before generating packages
+for python_version in ${PYTHON_VERSIONS[*]}; do
+	echo "> Looking for Python ${python_version}."
+
+	# Remove the . in version string, e.g. "3.8" turns into "38"
+	python_version_str="$(echo "${python_version}" | sed -r 's/\.//g')"
+	cpython_dir="/opt/conda/envs/py${python_version_str}/"
+
+	# For compatibility in environments where Conda is not installed,
+	# revert back to previous method of locating cpython_dir.
+	if ! [ -d "${cpython_dir}" ]; then
+		cpython_dir=$(cpython_path "${python_version}" "${UNICODE_WIDTH}" 2>/dev/null)
+	fi
+
+	if [ -d "${cpython_dir}" ]; then
+		echo "Generating package for Python ${python_version}."
+		build_xgrammar_wheel ${cpython_dir}
+
+		echo "Running auditwheel on package for Python ${python_version}."
+		audit_xgrammar_wheel ${python_version_str}
+	else
+		echo "Python ${python_version} not found. Skipping."
+	fi
+
+done
diff --git a/scripts/docker/bash.sh b/scripts/docker/bash.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+#
+# Start a bash, mount /workspace to be current directory.
+#
+# Usage: docker/bash.sh <CONTAINER_NAME>
+#     Starts an interactive session
+#
+# Usage2: docker/bash.sh <CONTAINER_NAME> [COMMAND]
+#     Execute command in the docker image, non-interactive
+#
+if [ "$#" -lt 1 ]; then
+    echo "Usage: docker/bash.sh <CONTAINER_NAME> [--no-gpu] [COMMAND]"
+    exit -1
+fi
+
+if [ "$1" == "--no-gpu" ]; then
+    ENABLE_NV_DOCKER=0
+    shift
+else
+    ENABLE_NV_DOCKER=1
+fi
+
+DOCKER_IMAGE_NAME=("$1")
+
+
+if [ "$#" -eq 1 ]; then
+    COMMAND="bash"
+    if [[ $(uname) == "Darwin" ]]; then
+        # Docker's host networking driver isn't supported on macOS.
+        # Use default bridge network and expose port for jupyter notebook.
+        DOCKER_EXTRA_PARAMS=("-it -p 8888:8888")
+    else
+        DOCKER_EXTRA_PARAMS=("-it --net=host")
+    fi
+else
+    shift 1
+    COMMAND=("$@")
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+WORKSPACE="$(pwd)"
+
+# Use nvidia-docker if the container is GPU.
+if [[ ! -z $CUDA_VISIBLE_DEVICES ]]; then
+    CUDA_ENV="-e CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+else
+    CUDA_ENV=""
+fi
+
+# If this is an wheel test command then pass the env var to docker.
+if [[ ! -z $WHEEL_TEST ]]; then
+    WHEEL_TEST="-e WHEEL_TEST=${WHEEL_TEST}"
+fi
+
+if [[ "${DOCKER_IMAGE_NAME}" == *"cu"* ]]; then
+    if [ "$ENABLE_NV_DOCKER" -eq 1 ]; then
+        if ! type "nvidia-docker" 1> /dev/null 2> /dev/null
+        then
+            DOCKER_BINARY="docker"
+            CUDA_ENV=" --gpus all "${CUDA_ENV}
+        else
+            DOCKER_BINARY="nvidia-docker"
+        fi
+    else
+        DOCKER_BINARY="docker"
+    fi
+else
+    DOCKER_BINARY="docker"
+fi
+
+# Print arguments.
+echo "WORKSPACE: ${WORKSPACE}"
+echo "DOCKER CONTAINER NAME: ${DOCKER_IMAGE_NAME}"
+echo ""
+
+echo "Running '${COMMAND[@]}' inside ${DOCKER_IMAGE_NAME}..."
+
+# By default we cleanup - remove the container once it finish running (--rm)
+# and share the PID namespace (--pid=host) so the process inside does not have
+# pid 1 and SIGKILL is propagated to the process inside (jenkins can kill it).
+
+${DOCKER_BINARY} run --rm --pid=host\
+    -v ${WORKSPACE}:/workspace \
+    -v ${SCRIPT_DIR}:/docker \
+    -w /workspace \
+    ${CUDA_ENV} \
+    ${WHEEL_TEST} \
+    ${DOCKER_EXTRA_PARAMS[@]} \
+    ${DOCKER_IMAGE_NAME} \
+    ${COMMAND[@]}