diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index ce067955..2d18a3f8 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -91,7 +91,7 @@ jobs:
           # Install torch
           $cudaVersion = $env:CUDA_VERSION.Replace('.', '')
           $cudaVersionPytorch = $cudaVersion.Substring(0, $cudaVersion.Length - 1)
-          if ([int]$cudaVersionPytorch -gt 118) { $pytorchVersion = "torch==2.2.0" } else {$pytorchVersion = "torch==2.0.1"}
+          if ([int]$cudaVersionPytorch -gt 118) { $pytorchVersion = "torch==2.3.1" } else {$pytorchVersion = "torch==2.3.1"}
           python -m pip install --upgrade --no-cache-dir $pytorchVersion+cu$cudaVersionPytorch --index-url https://download.pytorch.org/whl/cu$cudaVersionPytorch
           python -m pip install build setuptools wheel ninja requests
 
@@ -210,9 +210,9 @@ jobs:
           python -m pip install --upgrade build setuptools wheel requests
 
           if [[ "${{ matrix.rocm }}" == "5.7.1" ]]; then
-            python -m pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/rocm5.7
+            python -m pip install torch==2.3.1 --index-url https://download.pytorch.org/whl/rocm5.7
           elif [[ "${{ matrix.rocm }}" == "5.6.1" ]]; then
-            python -m pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/rocm5.6
+            python -m pip install torch==2.3.1 --index-url https://download.pytorch.org/whl/rocm5.6
           else
             echo Unknown rocm version for python install
             exit 1
@@ -230,4 +230,4 @@ jobs:
         uses: shogo82148/actions-upload-release-asset@v1
         with:
           upload_url: ${{ needs.release.outputs.upload_url }}
-          asset_path: ./dist/*.whl
\ No newline at end of file
+          asset_path: ./dist/*.whl
diff --git a/setup.py b/setup.py
index dd601988..23adf1f5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,159 +1,159 @@
-import os
-import torch
-import platform
-import requests
-from pathlib import Path
-from setuptools import setup, find_packages
-from torch.utils.cpp_extension import CUDAExtension
-
-
-def get_latest_kernels_version(repo):
-    """
-    Get the latest version of the kernels from the github repo.
-    """
-    response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest")
-    data = response.json()
-    tag_name = data["tag_name"]
-    version = tag_name.replace("v", "")
-    return version
-
-
-def get_kernels_whl_url(
-    gpu_system_version,
-    release_version,
-    python_version,
-    platform,
-    architecture,
-):
-    """
-    Get the url for the kernels wheel file.
-    """
-    return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl"
-
-
-AUTOAWQ_VERSION = "0.2.5"
-PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
-IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()
-
-CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
-if CUDA_VERSION:
-    CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
-
-ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip
-if ROCM_VERSION:
-    if ROCM_VERSION.startswith("5.6"):
-        ROCM_VERSION = "5.6.1"
-    elif ROCM_VERSION.startswith("5.7"):
-        ROCM_VERSION = "5.7.1"
-
-    ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
-
-if not PYPI_BUILD:
-    if IS_CPU_ONLY:
-        AUTOAWQ_VERSION += "+cpu"
-    elif CUDA_VERSION:
-        AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}"
-    elif ROCM_VERSION:
-        AUTOAWQ_VERSION += f"+rocm{ROCM_VERSION}"
-    else:
-        raise RuntimeError(
-            "Your system must have either Nvidia or AMD GPU to build this package."
-        )
-
-common_setup_kwargs = {
-    "version": AUTOAWQ_VERSION,
-    "name": "autoawq",
-    "author": "Casper Hansen",
-    "license": "MIT",
-    "python_requires": ">=3.8.0",
-    "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
-    "long_description": (Path(__file__).parent / "README.md").read_text(
-        encoding="UTF-8"
-    ),
-    "long_description_content_type": "text/markdown",
-    "url": "https://github.com/casper-hansen/AutoAWQ",
-    "keywords": ["awq", "autoawq", "quantization", "transformers"],
-    "platforms": ["linux", "windows"],
-    "classifiers": [
-        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
-        "Environment :: GPU :: NVIDIA CUDA :: 12",
-        "License :: OSI Approved :: MIT License",
-        "Natural Language :: English",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: C++",
-    ],
-}
-
-requirements = [
-    "torch>=2.0.1",
-    "transformers>=4.35.0",
-    "tokenizers>=0.12.1",
-    "typing_extensions>=4.8.0",
-    "accelerate",
-    "datasets",
-    "zstandard",
-]
-
-try:
-    if ROCM_VERSION:
-        import exlv2_ext
-    else:
-        import awq_ext
-
-    KERNELS_INSTALLED = True
-except ImportError:
-    KERNELS_INSTALLED = False
-
-# kernels can be downloaded from pypi for cuda+121 only
-# for everything else, we need to download the wheels from github
-if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
-    if CUDA_VERSION and CUDA_VERSION.startswith("12"):
-        requirements.append("autoawq-kernels")
-    elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]:
-        gpu_system_version = (
-            f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}"
-        )
-        kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels")
-        python_version = "".join(platform.python_version_tuple()[:2])
-        platform_name = platform.system().lower()
-        architecture = platform.machine().lower()
-        latest_rocm_kernels_wheels = get_kernels_whl_url(
-            gpu_system_version,
-            kernels_version,
-            python_version,
-            platform_name,
-            architecture,
-        )
-        requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}")
-    else:
-        raise RuntimeError(
-            "Your system have a GPU with an unsupported CUDA or ROCm version. "
-            "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
-        )
-elif IS_CPU_ONLY:
-    requirements.append("intel-extension-for-transformers>=1.4.2")
-
-force_extension = os.getenv("PYPI_FORCE_TAGS", "0")
-if force_extension == "1":
-    # NOTE: We create an empty CUDAExtension because torch helps us with
-    # creating the right boilerplate to enable correct targeting of
-    # the autoawq-kernels package
-    common_setup_kwargs["ext_modules"] = [
-        CUDAExtension(
-            name="test_kernel",
-            sources=[],
-        )
-    ]
-
-setup(
-    packages=find_packages(),
-    install_requires=requirements,
-    extras_require={
-        "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"],
-        "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"]
-    },
-    **common_setup_kwargs,
-)
+import os
+import torch
+import platform
+import requests
+from pathlib import Path
+from setuptools import setup, find_packages
+from torch.utils.cpp_extension import CUDAExtension
+
+
+def get_latest_kernels_version(repo):
+    """
+    Get the latest version of the kernels from the github repo.
+    """
+    response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest")
+    data = response.json()
+    tag_name = data["tag_name"]
+    version = tag_name.replace("v", "")
+    return version
+
+
+def get_kernels_whl_url(
+    gpu_system_version,
+    release_version,
+    python_version,
+    platform,
+    architecture,
+):
+    """
+    Get the url for the kernels wheel file.
+    """
+    return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl"
+
+
+AUTOAWQ_VERSION = "0.2.5"
+PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1"
+IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()
+
+CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda
+if CUDA_VERSION:
+    CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3]
+
+ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip
+if ROCM_VERSION:
+    if ROCM_VERSION.startswith("5.6"):
+        ROCM_VERSION = "5.6.1"
+    elif ROCM_VERSION.startswith("5.7"):
+        ROCM_VERSION = "5.7.1"
+
+    ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3]
+
+if not PYPI_BUILD:
+    if IS_CPU_ONLY:
+        AUTOAWQ_VERSION += "+cpu"
+    elif CUDA_VERSION:
+        AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}"
+    elif ROCM_VERSION:
+        AUTOAWQ_VERSION += f"+rocm{ROCM_VERSION}"
+    else:
+        raise RuntimeError(
+            "Your system must have either Nvidia or AMD GPU to build this package."
+        )
+
+common_setup_kwargs = {
+    "version": AUTOAWQ_VERSION,
+    "name": "autoawq",
+    "author": "Casper Hansen",
+    "license": "MIT",
+    "python_requires": ">=3.8.0",
+    "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.",
+    "long_description": (Path(__file__).parent / "README.md").read_text(
+        encoding="UTF-8"
+    ),
+    "long_description_content_type": "text/markdown",
+    "url": "https://github.com/casper-hansen/AutoAWQ",
+    "keywords": ["awq", "autoawq", "quantization", "transformers"],
+    "platforms": ["linux", "windows"],
+    "classifiers": [
+        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+        "Environment :: GPU :: NVIDIA CUDA :: 12",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: C++",
+    ],
+}
+
+requirements = [
+    "torch==2.3.1",
+    "transformers>=4.35.0",
+    "tokenizers>=0.12.1",
+    "typing_extensions>=4.8.0",
+    "accelerate",
+    "datasets",
+    "zstandard",
+]
+
+try:
+    if ROCM_VERSION:
+        import exlv2_ext
+    else:
+        import awq_ext
+
+    KERNELS_INSTALLED = True
+except ImportError:
+    KERNELS_INSTALLED = False
+
+# kernels can be downloaded from pypi for cuda+121 only
+# for everything else, we need to download the wheels from github
+if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION):
+    if CUDA_VERSION and CUDA_VERSION.startswith("12"):
+        requirements.append("autoawq-kernels")
+    elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]:
+        gpu_system_version = (
+            f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}"
+        )
+        kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels")
+        python_version = "".join(platform.python_version_tuple()[:2])
+        platform_name = platform.system().lower()
+        architecture = platform.machine().lower()
+        latest_rocm_kernels_wheels = get_kernels_whl_url(
+            gpu_system_version,
+            kernels_version,
+            python_version,
+            platform_name,
+            architecture,
+        )
+        requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}")
+    else:
+        raise RuntimeError(
+            "Your system have a GPU with an unsupported CUDA or ROCm version. "
+            "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels"
+        )
+elif IS_CPU_ONLY:
+    requirements.append("intel-extension-for-transformers>=1.4.2")
+
+force_extension = os.getenv("PYPI_FORCE_TAGS", "0")
+if force_extension == "1":
+    # NOTE: We create an empty CUDAExtension because torch helps us with
+    # creating the right boilerplate to enable correct targeting of
+    # the autoawq-kernels package
+    common_setup_kwargs["ext_modules"] = [
+        CUDAExtension(
+            name="test_kernel",
+            sources=[],
+        )
+    ]
+
+setup(
+    packages=find_packages(),
+    install_requires=requirements,
+    extras_require={
+        "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"],
+        "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"]
+    },
+    **common_setup_kwargs,
+)