Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into feature/online-flow
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Nov 14, 2024
2 parents 74d660a + ae8de6d commit 1875733
Show file tree
Hide file tree
Showing 216 changed files with 19,876 additions and 18,305 deletions.
9 changes: 5 additions & 4 deletions .devops/llama-cli-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /

ENTRYPOINT [ "/llama-cli" ]
7 changes: 4 additions & 3 deletions .devops/llama-cli-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ WORKDIR /app
COPY . .

RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENTRYPOINT [ "/llama-cli" ]
7 changes: 4 additions & 3 deletions .devops/llama-server-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server

# Must be set to 0.0.0.0 so it can listen to requests from host machine
Expand Down
7 changes: 4 additions & 3 deletions .devops/llama-server-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ WORKDIR /app
COPY . .

RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server

# Must be set to 0.0.0.0 so it can listen to requests from host machine
Expand Down
6 changes: 3 additions & 3 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
};

postPatch = ''
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
'';

Expand Down Expand Up @@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "GGML_HIPBLAS" useRocm)
(cmakeBool "GGML_HIP" useRocm)
(cmakeBool "GGML_METAL" useMetalKit)
(cmakeBool "GGML_VULKAN" useVulkan)
(cmakeBool "GGML_STATIC" enableStatic)
Expand Down
27 changes: 20 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,13 @@ jobs:
sysctl -a
mkdir build
cd build
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
cmake .. \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_CURL=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_RPC=ON \
-DBUILD_SHARED_LIBS=OFF
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
Expand Down Expand Up @@ -113,7 +119,12 @@ jobs:
sysctl -a
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_CURL=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON \
-DBUILD_SHARED_LIBS=OFF
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
Expand Down Expand Up @@ -394,13 +405,13 @@ jobs:
- name: Build with native CMake HIP support
id: cmake_build
run: |
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
cmake --build build --config Release -j $(nproc)
- name: Build with legacy HIP support
id: cmake_build_legacy_hip
run: |
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
cmake --build build2 --config Release -j $(nproc)
ubuntu-22-cmake-sycl:
Expand Down Expand Up @@ -569,6 +580,7 @@ jobs:
mkdir build
cd build
cmake -G Xcode .. \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
Expand Down Expand Up @@ -599,6 +611,7 @@ jobs:
mkdir build
cd build
cmake -G Xcode .. \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
Expand Down Expand Up @@ -734,7 +747,7 @@ jobs:
id: clone_kompute
if: ${{ matrix.build == 'kompute-x64' }}
run: |
git submodule update --init ggml/src/kompute
git submodule update --init ggml/src/ggml-kompute/kompute
- name: Download OpenBLAS
id: get_openblas
Expand Down Expand Up @@ -1001,7 +1014,7 @@ jobs:
run: |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
windows-latest-cmake-hip-release:
Expand Down Expand Up @@ -1037,7 +1050,7 @@ jobs:
run: |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "kompute"]
path = ggml/src/kompute
path = ggml/src/ggml-kompute/kompute
url = https://github.com/nomic-ai/kompute.git
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")


# At the moment some compile definitions are placed within the ggml/src
# directory but not exported on the `ggml` target. This could be improved by
# determining _precisely_ which defines are necessary for the llama-config
Expand Down
Loading

0 comments on commit 1875733

Please sign in to comment.