diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 40dc46088..fbd72c101 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -29,26 +29,9 @@ jobs: run: if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi extract-cuda-backend-branch: - name: Extract cuda branch name - runs-on: ubuntu-22.04 - outputs: - cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Extract Private Branch from PR Description - id: extract - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body') - echo "PR Description: $DESCRIPTION" - CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true - if [ -z "$CUDA_BE_BRANCH" ]; then - CUDA_BE_BRANCH="main" # Default branch if not specified - fi - echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH" - echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH" + uses: ./.github/workflows/extract-backends.yml + with: + pr-number: ${{ github.event.pull_request.number }} test-linux-curve: name: Test on Linux diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index 5f7d5b5c2..c5240e6db 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -24,26 +24,9 @@ jobs: uses: ./.github/workflows/check-changed-files.yml extract-cuda-backend-branch: - name: Extract cuda branch name - runs-on: ubuntu-22.04 - outputs: - cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Extract Private Branch from PR Description - id: extract - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body') - echo "PR Description: $DESCRIPTION" - CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true - if [ -z "$CUDA_BE_BRANCH" ]; then - CUDA_BE_BRANCH="main" # Default branch if not specified - fi - echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH" - echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH" + uses: ./.github/workflows/extract-backends.yml + with: + pr-number: ${{ github.event.pull_request.number }} run-examples: runs-on: [self-hosted, Linux, X64, icicle, examples] diff --git a/.github/workflows/extract-backends.yml b/.github/workflows/extract-backends.yml new file mode 100644 index 000000000..80aac40d9 --- /dev/null +++ b/.github/workflows/extract-backends.yml @@ -0,0 +1,36 @@ +name: Extract Icicle Backend Branch + +on: + workflow_call: + inputs: + pr-number: + description: 'The PR number to fetch the description for' + required: true + type: number + outputs: + cuda-backend-branch: + description: "Branch name for cuda backend" + value: ${{ jobs.extract-cuda-backend-branch.outputs.cuda-backend-branch }} + +jobs: + extract-cuda-backend-branch: + name: Extract cuda branch name + runs-on: ubuntu-22.04 + outputs: + cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Extract Private Branch from PR Description + id: extract + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + DESCRIPTION=$(gh pr view ${{ inputs.pr-number }} --json body -q '.body') + echo "PR Description: $DESCRIPTION" + CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true + if [ -z "$CUDA_BE_BRANCH" ]; then + CUDA_BE_BRANCH="main" # Default branch if not specified + fi + echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH" + echo "cuda-backend-branch=$CUDA_BE_BRANCH" >> "$GITHUB_OUTPUT" \ No newline at end of file diff --git a/.github/workflows/golang.yml b/.github/workflows/golang.yml new file mode 100644 index 000000000..997728a0c --- /dev/null +++ b/.github/workflows/golang.yml @@ -0,0 +1,122 @@ +name: GoLang + +on: + pull_request: + branches: + - V3 + - yshekel/V3 # TODO remove when merged to V3 + push: + branches: + - V3 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + check-changed-files: + uses: ./.github/workflows/check-changed-files.yml + + check-format: + name: Check Code Format + runs-on: ubuntu-22.04 + needs: check-changed-files + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup go + uses: actions/setup-go@v5 + with: + go-version: '1.20.0' + - name: Check gofmt + if: needs.check-changed-files.outputs.golang == 'true' + run: if [[ $(go list ./... | xargs go fmt) ]]; then echo "Please run go fmt"; exit 1; fi + + extract-cuda-backend-branch: + uses: ./.github/workflows/extract-backends.yml + with: + pr-number: ${{ github.event.pull_request.number }} + + build-curves-linux: + name: Build and test curves on Linux + runs-on: [self-hosted, Linux, X64, icicle] + needs: [check-changed-files, check-format, extract-cuda-backend-branch] + strategy: + matrix: + curve: + - name: bn254 + build_args: + - name: bls12_381 + build_args: + - name: bls12_377 + build_args: + - name: bw6_761 + build_args: + - name: grumpkin + build_args: + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + - name: Checkout CUDA Backend + uses: actions/checkout@v4 + with: + repository: ingonyama-zk/icicle-cuda-backend + path: ./icicle/backend/cuda + token: ${{ secrets.GITHUB_TOKEN }} + ssh-key: ${{ secrets.CUDA_PULL_KEY }} + ref: ${{ needs.extract-cuda-backend-branch.outputs.cuda-backend-branch }} + - name: Setup go + uses: actions/setup-go@v5 + with: + go-version: '1.20.0' + - name: Build + working-directory: ./wrappers/golang + if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true' + # builds a single curve with the curve's specified build args + run: | + ./build.sh -curve=${{ matrix.curve.name }} ${{ matrix.curve.build_args }} -cuda_backend=local + - name: Test + working-directory: ./wrappers/golang/curves + if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true' + run: | + CURVE=$(echo ${{ matrix.curve.name }} | sed -e 's/_//g') + export ICICLE_BACKEND_INSTALL_DIR=/usr/local/lib + go test ./$CURVE/tests -count=1 -failfast -p 2 -timeout 60m -v + + build-fields-linux: + name: Build and test fields on Linux + runs-on: [self-hosted, Linux, X64, icicle] + needs: [check-changed-files, check-format, extract-cuda-backend-branch] + strategy: + matrix: + field: + - name: babybear + build_args: + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + - name: Checkout CUDA Backend + uses: actions/checkout@v4 + with: + repository: ingonyama-zk/icicle-cuda-backend + path: ./icicle/backend/cuda + token: ${{ secrets.GITHUB_TOKEN }} + ssh-key: ${{ secrets.CUDA_PULL_KEY }} + ref: ${{ needs.extract-cuda-backend-branch.outputs.cuda-backend-branch }} + - name: Setup go + uses: actions/setup-go@v5 + with: + go-version: '1.20.0' + - name: Build + working-directory: ./wrappers/golang + if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true' + # builds a single field with the fields specified build args + run: | + ./build.sh -field=${{ matrix.field.name }} ${{ matrix.field.build_args }} -cuda_backend=local + - name: Test + working-directory: ./wrappers/golang/fields + if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true' + run: | + FIELD=$(echo ${{ matrix.field.name }} | sed -e 's/_//g') + export ICICLE_BACKEND_INSTALL_DIR=/usr/local/lib + go test ./$FIELD/tests -count=1 -failfast -p 2 -timeout 60m -v diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 41da8b9eb..fd96fc9b9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -35,26 +35,9 @@ jobs: run: if [[ $(find . -path ./icicle-curves/icicle-curve-template -prune -o -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --) ]]; then echo "Please run cargo fmt"; exit 1; fi extract-cuda-backend-branch: - name: Extract cuda branch name - runs-on: ubuntu-22.04 - outputs: - cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Extract Private Branch from PR Description - id: extract - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body') - echo "PR Description: $DESCRIPTION" - CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true - if [ -z "$CUDA_BE_BRANCH" ]; then - CUDA_BE_BRANCH="main" # Default branch if not specified - fi - echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH" - echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH" + uses: ./.github/workflows/extract-backends.yml + with: + pr-number: ${{ github.event.pull_request.number }} test-linux: name: Test on Linux diff --git a/README.md b/README.md index 8a6bd03b3..a2bf0c134 100644 --- a/README.md +++ b/README.md @@ -96,13 +96,42 @@ cargo build --release ### Go +There are two ways to build from source in Go: + +1. Clone the repo, update your go.mod to point to the local clone, and build ICICLE within the clone + +```sh +git clone https://github.com/ingonyama-zk/icicle.git +``` + Add ICICLE v3 to your go.mod file: +```go +require github.com/ingonyama-zk/icicle/v3 v3.0.0 + +replace github.com/ingonyama-zk/icicle/v3 => ../path/to/cloned/icicle +``` + +Navigate to the cloned repo's golang bindings and build the library using the supplied [build script][ICICLE-GO-BUILD-SCRIPT] + +```sh +cd icicle/wrappers/golang +chmod +x build.sh +./build.sh -curve=bn254 +``` + +2. Update your go.mod to include ICICLE as a dependency, navigate to the dependency in your GOMODCACHE and build ICICLE there + ```sh go get github.com/ingonyama-zk/icicle/v3 +cd $(go env GOMODCACHE)/github.com/ingonyama-zk/icicle/v3@/wrappers/golang +chmod +x build.sh +./build.sh -curve=bn254 ``` -Before you can use ICICLE in your project you will need to build it using the provided [build script][ICICLE-GO-BUILD-SCRIPT]. +> [!NOTE] +> To specify the field, use the flag -field=, where can be one of the following: babybear, stark252, m31. +> To specify a curve, use the flag -curve=, where can be one of the following: bn254, bls12_377, bls12_381, bw6_761, grumpkin. Once ICICLE has been built, you can add specific packages when you need them in your application: diff --git a/docs/docs/icicle/getting_started.md b/docs/docs/icicle/getting_started.md index f1e7bc0cb..4476ef08b 100644 --- a/docs/docs/icicle/getting_started.md +++ b/docs/docs/icicle/getting_started.md @@ -36,7 +36,7 @@ Each ICICLE release includes a tar file named `icicle30-.tar.gz`, - [Full C++ example](https://github.com/ingonyama-zk/icicle/tree/yshekel/V3/examples/c++/install-and-use-icicle) - [Full Rust example](https://github.com/ingonyama-zk/icicle/tree/yshekel/V3/examples/rust/install-and-use-icicle) -- Full GO example (TODO) +- [Full Go example](https://github.com/ingonyama-zk/icicle/tree/yshekel/V3/examples/golang/install-and-use-icicle) *(TODO update links to main branch when merged) diff --git a/docs/docs/icicle/golang-bindings.md b/docs/docs/icicle/golang-bindings.md index c006faeec..fd78edce1 100644 --- a/docs/docs/icicle/golang-bindings.md +++ b/docs/docs/icicle/golang-bindings.md @@ -1,7 +1,5 @@ # Golang bindings -TODO update for V3 - Golang bindings allow you to use ICICLE as a golang library. The source code for all Golang packages can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang). @@ -9,57 +7,60 @@ The Golang bindings are comprised of multiple packages. [`core`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/core) which defines all shared methods and structures, such as configuration structures, or memory slices. -[`cuda-runtime`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/cuda_runtime) which defines abstractions for CUDA methods for allocating memory, initializing and managing streams, and `DeviceContext` which enables users to define and keep track of devices. +[`runtime`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/runtime) which defines abstractions for ICICLE methods for allocating memory, initializing and managing streams, and `Device` which enables users to define and keep track of devices. -Each supported curve, field, and hash has its own package which you can find in the respective directories [here](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang). If your project uses BN254 you only need to import that single package named [`bn254`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/curves/bn254). +Each supported curve and field has its own package which you can find in the respective directories [here](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang). If your project uses BN254 you only need to import that single package named [`bn254`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/curves/bn254). ## Using ICICLE Golang bindings in your project To add ICICLE to your `go.mod` file. ```bash -go get github.com/ingonyama-zk/icicle +go get github.com/ingonyama-zk/icicle/v3 ``` If you want to specify a specific branch ```bash -go get github.com/ingonyama-zk/icicle@ +go get github.com/ingonyama-zk/icicle/v3@ ``` For a specific commit ```bash -go get github.com/ingonyama-zk/icicle@ +go get github.com/ingonyama-zk/icicle/v3@ ``` -To build the shared libraries you can run [this](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/build.sh) script: +### Building from source + +To build the shared libraries you can run [this](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/build.sh) script inside the downloaded go dependency: ```sh -./build.sh [-curve=] [-field=] [-hash=] [-cuda_version=] [-g2] [-ecntt] [-devmode] +./build.sh [-curve=] [-field=] [-cuda_version=] [-skip_msm] [-skip_ntt] [-skip_g2] [-skip_ecntt] [-skip_fieldext] curve - The name of the curve to build or "all" to build all supported curves field - The name of the field to build or "all" to build all supported fields -hash - The name of the hash to build or "all" to build all supported hashes --g2 - Optional - build with G2 enabled --ecntt - Optional - build with ECNTT enabled --devmode - Optional - build in devmode +-skip_msm - Optional - build with MSM disabled +-skip_ntt - Optional - build with NTT disabled +-skip_g2 - Optional - build with G2 disabled +-skip_ecntt - Optional - build with ECNTT disabled +-skip_fieldext - Optional - build without field extension -help - Optional - Displays usage information ``` :::note -If more than one curve or more than one field or more than one hash is supplied, the last one supplied will be built +If more than one curve or more than one field is supplied, the last one supplied will be built ::: -To build ICICLE libraries for all supported curves with G2 and ECNTT enabled. +To build ICICLE libraries for all supported curves without certain features, you can use their -skip_ flags. For example, for disabling G2 and ECNTT: ```bash -./build.sh -curve=all -g2 -ecntt +./build.sh -curve=all -skip_g2 -skip_ecntt ``` -If you wish to build for a specific curve, for example bn254, without G2 or ECNTT enabled. +By default, all features are enabled. To build for a specific field or curve, you can pass the `-field=` or `-curve=` flags: ``` bash ./build.sh -curve=bn254 @@ -69,15 +70,24 @@ Now you can import ICICLE into your project ```go import ( - "github.com/stretchr/testify/assert" - "testing" - - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) ... ``` +### Building with precompiled libs + +Download the frontend release binaries from our [github release page](https://github.com/ingonyama-zk/icicle/releases), for example: icicle30-ubuntu22.tar.gz for ICICLE v3 on ubuntu 22.04 + +Extract the libs and move them to the downloaded go dependency in your GOMODCACHE + +```sh +# extract frontend part +tar xzvf icicle30-ubuntu22.tar.gz +cp -r ./icicle/lib/* $(go env GOMODCACHE)/github.com/ingonyama-zk/icicle/v3@/build/lib/ +``` + ## Running tests To run all tests, for all curves: @@ -86,23 +96,23 @@ To run all tests, for all curves: go test ./... -count=1 ``` -If you wish to run test for a specific curve: +If you wish to run test for a specific curve or field: ```bash -go test -count=1 +go test -count=1 ``` ## How do Golang bindings work? -The libraries produced from the CUDA code compilation are used to bind Golang to ICICLE's CUDA code. +The golang packages are binded to the libraries produced from compiling ICICLE using cgo. -1. These libraries (named `libingo_curve_.a` and `libingo_field_.a`) can be imported in your Go project to leverage the GPU accelerated functionalities provided by ICICLE. +1. These libraries (named `libicicle_curve_.a` and `libicicle_field_.a`) can be imported in your Go project to leverage the accelerated functionalities provided by ICICLE. 2. In your Go project, you can use `cgo` to link these libraries. Here's a basic example on how you can use `cgo` to link these libraries: ```go /* -#cgo LDFLAGS: -L/path/to/shared/libs -lingo_curve_bn254 -L$/path/to/shared/libs -lingo_field_bn254 -lstdc++ -lm +#cgo LDFLAGS: -L/path/to/shared/libs -licicle_device -lstdc++ -lm -Wl,-rpath=/path/to/shared/libs #include "icicle.h" // make sure you use the correct header file(s) */ import "C" diff --git a/docs/docs/icicle/golang-bindings/ecntt.md b/docs/docs/icicle/golang-bindings/ecntt.md index 90b0b9617..4fd1acf8e 100644 --- a/docs/docs/icicle/golang-bindings/ecntt.md +++ b/docs/docs/icicle/golang-bindings/ecntt.md @@ -1,13 +1,11 @@ # ECNTT -TODO update for V3 - ## ECNTT Method The `ECNtt[T any]()` function performs the Elliptic Curve Number Theoretic Transform (EC-NTT) on the input points slice, using the provided dir (direction), cfg (configuration), and stores the results in the results slice. ```go -func ECNtt[T any](points core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTConfig[T], results core.HostOrDeviceSlice) core.IcicleError +func ECNtt[T any](points core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTConfig[T], results core.HostOrDeviceSlice) runtime.EIcicleError ``` ### Parameters @@ -19,7 +17,7 @@ func ECNtt[T any](points core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTC ### Return Value -- **`CudaError`**: A `core.IcicleError` value, which will be `core.IcicleErrorCode(0)` if the EC-NTT operation was successful, or an error if something went wrong. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the EC-NTT operation was successful, or an error if something went wrong. ## NTT Configuration (NTTConfig) @@ -27,29 +25,29 @@ The `NTTConfig` structure holds configuration parameters for the NTT operation, ```go type NTTConfig[T any] struct { - Ctx cr.DeviceContext - CosetGen T - BatchSize int32 - ColumnsBatch bool - Ordering Ordering - areInputsOnDevice bool - areOutputsOnDevice bool - IsAsync bool - NttAlgorithm NttAlgorithm + StreamHandle runtime.Stream + CosetGen T + BatchSize int32 + ColumnsBatch bool + Ordering Ordering + areInputsOnDevice bool + areOutputsOnDevice bool + IsAsync bool + Ext config_extension.ConfigExtensionHandler } ``` ### Fields -- **`Ctx`**: Device context containing details like device ID and stream ID. -- **`CosetGen`**: Coset generator used for coset (i)NTTs, defaulting to no coset being used. +- **`StreamHandle`**: Specifies the stream (queue) to use for async execution. +- **`CosetGen`**: Coset generator. Used to perform coset (i)NTTs. - **`BatchSize`**: The number of NTTs to compute in one operation, defaulting to 1. -- **`ColumnsBatch`**: If true the function will compute the NTTs over the columns of the input matrix and not over the rows. Defaults to `false`. +- **`ColumnsBatch`**: If true the function will compute the NTTs over the columns of the input matrix and not over the rows. - **`Ordering`**: Ordering of inputs and outputs (`KNN`, `KNR`, `KRN`, `KRR`), affecting how data is arranged. - **`areInputsOnDevice`**: Indicates if input scalars are located on the device. - **`areOutputsOnDevice`**: Indicates if results are stored on the device. - **`IsAsync`**: Controls whether the NTT operation runs asynchronously. -- **`NttAlgorithm`**: Explicitly select the NTT algorithm. ECNTT supports running on `Radix2` algoruithm. +- **`Ext`**: Extended configuration for backend. ### Default Configuration @@ -65,30 +63,38 @@ func GetDefaultNTTConfig[T any](cosetGen T) NTTConfig[T] package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/ecntt" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/ntt" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func Main() { - // Obtain the default NTT configuration with a predefined coset generator. - cfg := GetDefaultNttConfig() - - // Define the size of the input scalars. - size := 1 << 18 - - // Generate Points for the ECNTT operation. - points := GenerateProjectivePoints(size) - - // Set the direction of the NTT (forward or inverse). - dir := core.KForward - - // Allocate memory for the results of the NTT operation. - results := make(core.HostSlice[Projective], size) - - // Perform the NTT operation. - err := ECNtt(points, dir, &cfg, results) - if err != cr.CudaSuccess { - panic("ECNTT operation failed") - } + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + // Set Cuda device to perform + device := runtime.CreateDevice("CUDA", 0) + runtime.SetDevice(&device) + // Obtain the default NTT configuration with a predefined coset generator. + cfg := ntt.GetDefaultNttConfig() + + // Define the size of the input scalars. + size := 1 << 18 + + // Generate Points for the ECNTT operation. + points := bn254.GenerateProjectivePoints(size) + + // Set the direction of the NTT (forward or inverse). + dir := core.KForward + + // Allocate memory for the results of the NTT operation. + results := make(core.HostSlice[bn254.Projective], size) + + // Perform the NTT operation. + err := ecntt.ECNtt(points, dir, &cfg, results) + if err != runtime.Success { + panic("ECNTT operation failed") + } } ``` diff --git a/docs/docs/icicle/golang-bindings/keccak.md b/docs/docs/icicle/golang-bindings/keccak.md deleted file mode 100644 index 930f797d6..000000000 --- a/docs/docs/icicle/golang-bindings/keccak.md +++ /dev/null @@ -1,96 +0,0 @@ -# Keccak - -TODO update for V3 - -## Keccak Example - -```go -package main - -import ( - "encoding/hex" - - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/hash/keccak" -) - -func createHostSliceFromHexString(hexString string) core.HostSlice[uint8] { - byteArray, err := hex.DecodeString(hexString) - if err != nil { - panic("Not a hex string") - } - return core.HostSliceFromElements([]uint8(byteArray)) -} - -func main() { - input := createHostSliceFromHexString("1725b6") - outHost256 := make(core.HostSlice[uint8], 32) - - cfg := keccak.GetDefaultHashConfig() - e := keccak.Keccak256(input, int32(input.Len()), 1, outHost256, &cfg) - if e.CudaErrorCode != cr.CudaSuccess { - panic("Keccak256 hashing failed") - } - - outHost512 := make(core.HostSlice[uint8], 64) - e = keccak.Keccak512(input, int32(input.Len()), 1, outHost512, &cfg) - if e.CudaErrorCode != cr.CudaSuccess { - panic("Keccak512 hashing failed") - } - - numberOfBlocks := 3 - outHostBatch256 := make(core.HostSlice[uint8], 32*numberOfBlocks) - e = keccak.Keccak256(input, int32(input.Len()/numberOfBlocks), int32(numberOfBlocks), outHostBatch256, &cfg) - if e.CudaErrorCode != cr.CudaSuccess { - panic("Keccak256 batch hashing failed") - } -} -``` - -## Keccak Methods - -```go -func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError -func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError -``` - -### Parameters - -- **`input`**: A slice containing the input data for the Keccak256 hash function. It can reside in either host memory or device memory. -- **`inputBlockSize`**: An integer specifying the size of the input data for a single hash. -- **`numberOfBlocks`**: An integer specifying the number of results in the hash batch. -- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory. -- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation. - -### Return Value - -- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation. - -## HashConfig - -The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware. - -```go -type HashConfig struct { - Ctx cr.DeviceContext - areInputsOnDevice bool - areOutputsOnDevice bool - IsAsync bool -} -``` - -### Fields - -- **`Ctx`**: Device context containing details like device id and stream. -- **`areInputsOnDevice`**: Indicates if input data is located on the device. -- **`areOutputsOnDevice`**: Indicates if output hash is stored on the device. -- **`IsAsync`**: If true, runs the Keccak256/Keccak512 operation asynchronously. - -### Default Configuration - -Use `GetDefaultHashConfig` to obtain a default configuration, which can then be customized as needed. - -```go -func GetDefaultHashConfig() HashConfig -``` \ No newline at end of file diff --git a/docs/docs/icicle/golang-bindings/msm-pre-computation.md b/docs/docs/icicle/golang-bindings/msm-pre-computation.md index 5a542ba79..f13998d3e 100644 --- a/docs/docs/icicle/golang-bindings/msm-pre-computation.md +++ b/docs/docs/icicle/golang-bindings/msm-pre-computation.md @@ -1,14 +1,12 @@ # MSM Pre computation -TODO update for V3 - To understand the theory behind MSM pre computation technique refer to Niall Emmart's [talk](https://youtu.be/KAWlySN7Hm8?feature=shared&t=1734). ## Core package -### MSM PrecomputePoints +### MSM PrecomputeBases -`PrecomputePoints` and `G2PrecomputePoints` exists for all supported curves. +`PrecomputeBases` and `G2PrecomputeBases` exists for all supported curves. #### Description @@ -16,18 +14,17 @@ This function extends each provided base point $(P)$ with its multiples $(2^lP, The precomputation process is crucial for optimizing MSM operations, especially when dealing with large sets of points and scalars. By precomputing and storing multiples of the base points, the MSM function can more efficiently compute the scalar-point multiplications. -#### `PrecomputePoints` +#### `PrecomputeBases` Precomputes points for MSM by extending each base point with its multiples. ```go -func PrecomputePoints(points core.HostOrDeviceSlice, msmSize int, cfg *core.MSMConfig, outputBases core.DeviceSlice) cr.CudaError +func PrecomputeBases(bases core.HostOrDeviceSlice, cfg *core.MSMConfig, outputBases core.DeviceSlice) runtime.EIcicleError ``` ##### Parameters -- **`points`**: A slice of the original affine points to be extended with their multiples. -- **`msmSize`**: The size of a single msm in order to determine optimal parameters. +- **`bases`**: A slice of the original affine points to be extended with their multiples. - **`cfg`**: The MSM configuration parameters. - **`outputBases`**: The device slice allocated for storing the extended points. @@ -39,37 +36,43 @@ package main import ( "log" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { - cfg := bn254.GetDefaultMSMConfig() + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + // Set Cuda device to perform + device := runtime.CreateDevice("CUDA", 0) + runtime.SetDevice(&device) + + cfg := core.GetDefaultMSMConfig() points := bn254.GenerateAffinePoints(1024) - var precomputeFactor int32 = 8 + cfg.PrecomputeFactor = 8 var precomputeOut core.DeviceSlice - precomputeOut.Malloc(points[0].Size()*points.Len()*int(precomputeFactor), points[0].Size()) + precomputeOut.Malloc(points[0].Size(), points.Len()*int(cfg.PrecomputeFactor)) - err := bn254.PrecomputePoints(points, 1024, &cfg, precomputeOut) - if err != cr.CudaSuccess { + err := msm.PrecomputeBases(points, &cfg, precomputeOut) + if err != runtime.Success { log.Fatalf("PrecomputeBases failed: %v", err) } } ``` -#### `G2PrecomputePoints` +#### `G2PrecomputeBases` This method is the same as `PrecomputePoints` but for G2 points. Extends each G2 curve base point with its multiples for optimized MSM computations. ```go -func G2PrecomputePoints(points core.HostOrDeviceSlice, msmSize int, cfg *core.MSMConfig, outputBases core.DeviceSlice) cr.CudaError +func G2PrecomputeBases(bases core.HostOrDeviceSlice, cfg *core.MSMConfig, outputBases core.DeviceSlice) runtime.EIcicleError ``` ##### Parameters -- **`points`**: A slice of the original affine points to be extended with their multiples. -- **`msmSize`**: The size of a single msm in order to determine optimal parameters. +- **`bases`**: A slice of the original affine points to be extended with their multiples. - **`cfg`**: The MSM configuration parameters. - **`outputBases`**: The device slice allocated for storing the extended points. @@ -81,20 +84,26 @@ package main import ( "log" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - g2 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/g2" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/g2" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { - cfg := g2.G2GetDefaultMSMConfig() + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + // Set Cuda device to perform + device := runtime.CreateDevice("CUDA", 0) + runtime.SetDevice(&device) + + cfg := core.GetDefaultMSMConfig() points := g2.G2GenerateAffinePoints(1024) - var precomputeFactor int32 = 8 + cfg.PrecomputeFactor = 8 var precomputeOut core.DeviceSlice - precomputeOut.Malloc(points[0].Size()*points.Len()*int(precomputeFactor), points[0].Size()) + precomputeOut.Malloc(points[0].Size(), points.Len()*int(cfg.PrecomputeFactor)) - err := g2.G2PrecomputePoints(points, 1024, 0, &cfg, precomputeOut) - if err != cr.CudaSuccess { + err := g2.G2PrecomputeBases(points, &cfg, precomputeOut) + if err != runtime.Success { log.Fatalf("PrecomputeBases failed: %v", err) } } diff --git a/docs/docs/icicle/golang-bindings/msm.md b/docs/docs/icicle/golang-bindings/msm.md index 7463e9f1e..7209eca47 100644 --- a/docs/docs/icicle/golang-bindings/msm.md +++ b/docs/docs/icicle/golang-bindings/msm.md @@ -1,20 +1,24 @@ # MSM -TODO update for V3 - ## MSM Example ```go package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" - bn254_msm "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + // Set Cuda device to perform + device := runtime.CreateDevice("CUDA", 0) + runtime.SetDevice(&device) + // Obtain the default MSM configuration. cfg := core.GetDefaultMSMConfig() @@ -26,43 +30,43 @@ func main() { points := bn254.GenerateAffinePoints(size) // Create a CUDA stream for asynchronous operations. - stream, _ := cr.CreateStream() + stream, _ := runtime.CreateStream() var p bn254.Projective // Allocate memory on the device for the result of the MSM operation. var out core.DeviceSlice - _, e := out.MallocAsync(p.Size(), p.Size(), stream) + _, e := out.MallocAsync(p.Size(), 1, stream) - if e != cr.CudaSuccess { + if e != runtime.Success { panic(e) } // Set the CUDA stream in the MSM configuration. - cfg.Ctx.Stream = &stream + cfg.StreamHandle = stream cfg.IsAsync = true // Perform the MSM operation. - e = bn254_msm.Msm(scalars, points, &cfg, out) + e = msm.Msm(scalars, points, &cfg, out) - if e != cr.CudaSuccess { + if e != runtime.Success { panic(e) } // Allocate host memory for the results and copy the results from the device. outHost := make(core.HostSlice[bn254.Projective], 1) - cr.SynchronizeStream(&stream) + runtime.SynchronizeStream(stream) + runtime.DestroyStream(stream) outHost.CopyFromDevice(&out) // Free the device memory allocated for the results. out.Free() } - ``` ## MSM Method ```go -func Msm(scalars core.HostOrDeviceSlice, points core.HostOrDeviceSlice, cfg *core.MSMConfig, results core.HostOrDeviceSlice) cr.CudaError +func Msm(scalars core.HostOrDeviceSlice, points core.HostOrDeviceSlice, cfg *core.MSMConfig, results core.HostOrDeviceSlice) runtime.EIcicleError ``` ### Parameters @@ -74,7 +78,7 @@ func Msm(scalars core.HostOrDeviceSlice, points core.HostOrDeviceSlice, cfg *cor ### Return Value -- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the MSM operation. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. ## MSMConfig @@ -82,37 +86,37 @@ The `MSMConfig` structure holds configuration parameters for the MSM operation, ```go type MSMConfig struct { - Ctx cr.DeviceContext - PrecomputeFactor int32 - C int32 - Bitsize int32 - LargeBucketFactor int32 - batchSize int32 - areScalarsOnDevice bool - AreScalarsMontgomeryForm bool - arePointsOnDevice bool - ArePointsMontgomeryForm bool - areResultsOnDevice bool - IsBigTriangle bool - IsAsync bool + StreamHandle runtime.Stream + PrecomputeFactor int32 + C int32 + Bitsize int32 + BatchSize int32 + ArePointsSharedInBatch bool + areScalarsOnDevice bool + AreScalarsMontgomeryForm bool + areBasesOnDevice bool + AreBasesMontgomeryForm bool + areResultsOnDevice bool + IsAsync bool + Ext config_extension.ConfigExtensionHandler } ``` ### Fields -- **`Ctx`**: Device context containing details like device id and stream. +- **`StreamHandle`**: Specifies the stream (queue) to use for async execution. - **`PrecomputeFactor`**: Controls the number of extra points to pre-compute. - **`C`**: Window bitsize, a key parameter in the "bucket method" for MSM. - **`Bitsize`**: Number of bits of the largest scalar. -- **`LargeBucketFactor`**: Sensitivity to frequently occurring buckets. -- **`batchSize`**: Number of results to compute in one batch. +- **`BatchSize`**: Number of results to compute in one batch. +- **`ArePointsSharedInBatch`**: Bases are shared for batch. Set to true if all MSMs use the same bases. Otherwise, the number of bases and number of scalars are expected to be equal. - **`areScalarsOnDevice`**: Indicates if scalars are located on the device. - **`AreScalarsMontgomeryForm`**: True if scalars are in Montgomery form. -- **`arePointsOnDevice`**: Indicates if points are located on the device. -- **`ArePointsMontgomeryForm`**: True if point coordinates are in Montgomery form. +- **`areBasesOnDevice`**: Indicates if bases are located on the device. +- **`AreBasesMontgomeryForm`**: True if point coordinates are in Montgomery form. - **`areResultsOnDevice`**: Indicates if results are stored on the device. -- **`IsBigTriangle`**: If `true` MSM will run in Large triangle accumulation if `false` Bucket accumulation will be chosen. Default value: false. - **`IsAsync`**: If true, runs MSM asynchronously. +- **`Ext`**: Extended configuration for backend. ### Default Configuration @@ -122,9 +126,9 @@ Use `GetDefaultMSMConfig` to obtain a default configuration, which can then be c func GetDefaultMSMConfig() MSMConfig ``` -## How do I toggle between the supported algorithms? +## Batched msm -When creating your MSM Config you may state which algorithm you wish to use. `cfg.Ctx.IsBigTriangle = true` will activate Large triangle reduction and `cfg.Ctx.IsBigTriangle = false` will activate iterative reduction. +For batch msm, simply allocate the results array with size corresponding to batch size and set the `ArePointsSharedInBatch` flag in config struct. ```go ... @@ -149,7 +153,7 @@ The number of results is interpreted from the size of `var out core.DeviceSlice` batchSize := 3 var p G2Projective var out core.DeviceSlice -out.Malloc(batchSize*p.Size(), p.Size()) +out.Malloc(p.Size(), batchSize) ... ``` @@ -166,7 +170,7 @@ Now you may import `g2` package of the specified curve. ```go import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/g2" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/g2" ) ``` @@ -176,25 +180,26 @@ This package include `G2Projective` and `G2Affine` points as well as a `G2Msm` m package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" - g2 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/g2" + "log" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { cfg := core.GetDefaultMSMConfig() - size := 1 << 12 - batchSize := 3 - totalSize := size * batchSize - scalars := bn254.GenerateScalars(totalSize) - points := g2.G2GenerateAffinePoints(totalSize) - - var p g2.G2Projective - var out core.DeviceSlice - out.Malloc(batchSize*p.Size(), p.Size()) - g2.G2Msm(scalars, points, &cfg, out) + points := bn254.GenerateAffinePoints(1024) + var precomputeFactor int32 = 8 + var precomputeOut core.DeviceSlice + precomputeOut.Malloc(points[0].Size(), points.Len()*int(precomputeFactor)) + + err := msm.PrecomputeBases(points, &cfg, precomputeOut) + if err != runtime.Success { + log.Fatalf("PrecomputeBases failed: %v", err) + } } - ``` `G2Msm` works the same way as normal MSM, the difference is that it uses G2 Points. diff --git a/docs/docs/icicle/golang-bindings/multi-gpu.md b/docs/docs/icicle/golang-bindings/multi-gpu.md index 9706223a6..186c02018 100644 --- a/docs/docs/icicle/golang-bindings/multi-gpu.md +++ b/docs/docs/icicle/golang-bindings/multi-gpu.md @@ -1,7 +1,5 @@ # Multi GPU APIs -TODO update for V3 - To learn more about the theory of Multi GPU programming refer to [this part](../multi-gpu.md) of documentation. Here we will cover the core multi GPU apis and an [example](#a-multi-gpu-example) @@ -21,48 +19,64 @@ import ( "fmt" "sync" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + bn254 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { - numDevices, _ := cr.GetDeviceCount() + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + + device := runtime.CreateDevice("CUDA", 0) + err := runtime.SetDevice(&device) + numDevices, _ := runtime.GetDeviceCount() fmt.Println("There are ", numDevices, " devices available") + + if err != runtime.Success { + panic(err) + } wg := sync.WaitGroup{} for i := 0; i < numDevices; i++ { + internalDevice := runtime.Device{DeviceType: device.DeviceType, Id: int32(i)} wg.Add(1) - // RunOnDevice makes sure each MSM runs on a single thread - cr.RunOnDevice(i, func(args ...any) { + runtime.RunOnDevice(&internalDevice, func(args ...any) { defer wg.Done() - cfg := bn254.GetDefaultMSMConfig() + currentDevice, err := runtime.GetActiveDevice() + if err != runtime.Success { + panic("Failed to get current device") + } + + fmt.Println("Running on ", currentDevice.GetDeviceType(), " ", currentDevice.Id, " device") + + cfg := msm.GetDefaultMSMConfig() cfg.IsAsync = true - for _, power := range []int{10, 18} { - size := 1 << power // 2^pwr - - // generate random scalars - scalars := bn254.GenerateScalars(size) - points := bn254.GenerateAffinePoints(size) - - // create a stream and allocate result pointer - stream, _ := cr.CreateStream() - var p bn254.Projective - var out core.DeviceSlice - out.MallocAsync(p.Size(), p.Size(), stream) - // assign stream to device context - cfg.Ctx.Stream = &stream - - // execute MSM - bn254.Msm(scalars, points, &cfg, out) - // read result from device - outHost := make(core.HostSlice[bn254.Projective], 1) - outHost.CopyFromDeviceAsync(&out, stream) - out.FreeAsync(stream) - - // sync the stream - cr.SynchronizeStream(&stream) + size := 1 << 10 + scalars := bn254.GenerateScalars(size) + points := bn254.GenerateAffinePoints(size) + + stream, _ := runtime.CreateStream() + var p bn254.Projective + var out core.DeviceSlice + _, err = out.MallocAsync(p.Size(), 1, stream) + if err != runtime.Success { + panic("Allocating bytes on device for Projective results failed") + } + cfg.StreamHandle = stream + + err = msm.Msm(scalars, points, &cfg, out) + if err != runtime.Success { + panic("Msm failed") } + outHost := make(core.HostSlice[bn254.Projective], 1) + outHost.CopyFromDeviceAsync(&out, stream) + out.FreeAsync(stream) + + runtime.SynchronizeStream(stream) + runtime.DestroyStream(stream) + // Check with gnark-crypto }) } wg.Wait() @@ -73,7 +87,7 @@ This example demonstrates a basic pattern for distributing tasks across multiple ## Device Management API -To streamline device management we offer as part of `cuda_runtime` package methods for dealing with devices. +To streamline device management we offer as part of `runtime` package methods for dealing with devices. ### `RunOnDevice` @@ -89,7 +103,7 @@ While the goroutine is locked to the host thread, the Go runtime will not assign **Parameters:** -- **`deviceId int`**: The ID of the device on which to run the provided function. Device IDs start from 0. +- **`device *Device`**: A pointer to the `Device` instance to be used to run code. - **`funcToRun func(args ...any)`**: The function to be executed on the specified device. - **`args ...any`**: Arguments to be passed to `funcToRun`. @@ -104,7 +118,8 @@ Any goroutines launched within `funcToRun` are not automatically bound to the sa **Example:** ```go -RunOnDevice(0, func(args ...any) { +device := runtime.CreateDevice("CUDA", 0) +RunOnDevice(&device, func(args ...any) { fmt.Println("This runs on GPU 0") // CUDA-related operations here will target GPU 0 }, nil) @@ -112,7 +127,7 @@ RunOnDevice(0, func(args ...any) { ### `SetDevice` -Sets the active device for the current host thread. All subsequent CUDA calls made from this thread will target the specified device. +Sets the active device for the current host thread. All subsequent calls made from this thread will target the specified device. :::warning This function should not be used directly in conjunction with goroutines. If you want to run multi-gpu scenarios with goroutines you should use [RunOnDevice](#runondevice) @@ -120,38 +135,27 @@ This function should not be used directly in conjunction with goroutines. If you **Parameters:** -- **`device int`**: The ID of the device to set as the current device. +- **`device *Device`**: A pointer to the `Device` instance to be used to run code. **Returns:** -- **`CudaError`**: Error code indicating the success or failure of the operation. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. ### `GetDeviceCount` -Retrieves the number of CUDA-capable devices available on the host. +Retrieves the number of devices available on the host. **Returns:** -- **`(int, CudaError)`**: The number of devices and an error code indicating the success or failure of the operation. +- **`(int, EIcicleError)`**: The number of devices and an error code indicating the success or failure of the operation. -### `GetDevice` +### `GetActiveDevice` -Gets the ID of the currently active device for the calling host thread. +Gets the device of the currently active device for the calling host thread. **Returns:** -- **`(int, CudaError)`**: The ID of the current device and an error code indicating the success or failure of the operation. - -### `GetDeviceFromPointer` - -Retrieves the device associated with a given pointer. - -**Parameters:** - -- **`ptr unsafe.Pointer`**: Pointer to query. - -**Returns:** +- **`(*Device, EIcicleError)`**: The device pointer and an error code indicating the success or failure of the operation. -- **`int`**: The device ID associated with the memory pointed to by `ptr`. -This documentation should provide a clear understanding of how to effectively manage multiple GPUs in Go applications using CUDA, with a particular emphasis on the `RunOnDevice` function for executing tasks on specific GPUs. +This documentation should provide a clear understanding of how to effectively manage multiple GPUs in Go applications using CUDA and other backends, with a particular emphasis on the `RunOnDevice` function for executing tasks on specific GPUs. diff --git a/docs/docs/icicle/golang-bindings/ntt.md b/docs/docs/icicle/golang-bindings/ntt.md index eeccd12ac..9a947603e 100644 --- a/docs/docs/icicle/golang-bindings/ntt.md +++ b/docs/docs/icicle/golang-bindings/ntt.md @@ -1,63 +1,71 @@ # NTT -TODO update for V3 - ## NTT Example ```go package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/ntt" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" - "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" + "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" ) func init() { - cfg := bn254.GetDefaultNttConfig() - initDomain(18, cfg) + // Load backend using env path + runtime.LoadBackendFromEnvOrDefault() + // Set Cuda device to perform + device := runtime.CreateDevice("CUDA", 0) + runtime.SetDevice(&device) + + cfg := core.GetDefaultNTTInitDomainConfig() + initDomain(18, cfg) } -func initDomain[T any](largestTestSize int, cfg core.NTTConfig[T]) core.IcicleError { - rouMont, _ := fft.Generator(uint64(1 << largestTestSize)) - rou := rouMont.Bits() - rouIcicle := bn254.ScalarField{} +func initDomain(largestTestSize int, cfg core.NTTInitDomainConfig) runtime.EIcicleError { + rouMont, _ := fft.Generator(uint64(1 << largestTestSize)) + rou := rouMont.Bits() + rouIcicle := bn254.ScalarField{} + limbs := core.ConvertUint64ArrToUint32Arr(rou[:]) - rouIcicle.FromLimbs(rou[:]) - e := bn254.InitDomain(rouIcicle, cfg.Ctx, false) - return e + rouIcicle.FromLimbs(limbs) + e := ntt.InitDomain(rouIcicle, cfg) + return e } func main() { - // Obtain the default NTT configuration with a predefined coset generator. - cfg := bn254.GetDefaultNttConfig() + // Obtain the default NTT configuration with a predefined coset generator. + cfg := ntt.GetDefaultNttConfig() + + // Define the size of the input scalars. + size := 1 << 18 - // Define the size of the input scalars. - size := 1 << 18 + // Generate scalars for the NTT operation. + scalars := bn254.GenerateScalars(size) - // Generate scalars for the NTT operation. - scalars := bn254.GenerateScalars(size) + // Set the direction of the NTT (forward or inverse). + dir := core.KForward - // Set the direction of the NTT (forward or inverse). - dir := core.KForward + // Allocate memory for the results of the NTT operation. + results := make(core.HostSlice[bn254.ScalarField], size) - // Allocate memory for the results of the NTT operation. - results := make(core.HostSlice[bn254.ScalarField], size) + // Perform the NTT operation. + err := ntt.Ntt(scalars, dir, &cfg, results) + if err != runtime.Success { + panic("NTT operation failed") + } - // Perform the NTT operation. - err := bn254.Ntt(scalars, dir, &cfg, results) - if err.CudaErrorCode != cr.CudaSuccess { - panic("NTT operation failed") - } + ntt.ReleaseDomain() } ``` ## NTT Method ```go -func Ntt[T any](scalars core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTConfig[T], results core.HostOrDeviceSlice) core.IcicleError +func Ntt[T any](scalars core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTConfig[T], results core.HostOrDeviceSlice) runtime.EIcicleError ``` ### Parameters @@ -69,7 +77,7 @@ func Ntt[T any](scalars core.HostOrDeviceSlice, dir core.NTTDir, cfg *core.NTTCo ### Return Value -- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the NTT operation. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. ## NTT Configuration (NTTConfig) @@ -77,29 +85,29 @@ The `NTTConfig` structure holds configuration parameters for the NTT operation, ```go type NTTConfig[T any] struct { - Ctx cr.DeviceContext - CosetGen T - BatchSize int32 - ColumnsBatch bool - Ordering Ordering - areInputsOnDevice bool - areOutputsOnDevice bool - IsAsync bool - NttAlgorithm NttAlgorithm + StreamHandle runtime.Stream + CosetGen T + BatchSize int32 + ColumnsBatch bool + Ordering Ordering + areInputsOnDevice bool + areOutputsOnDevice bool + IsAsync bool + Ext config_extension.ConfigExtensionHandler } ``` ### Fields -- **`Ctx`**: Device context containing details like device ID and stream ID. -- **`CosetGen`**: Coset generator used for coset (i)NTTs, defaulting to no coset being used. +- **`StreamHandle`**: Specifies the stream (queue) to use for async execution. +- **`CosetGen`**: Coset generator. Used to perform coset (i)NTTs. - **`BatchSize`**: The number of NTTs to compute in one operation, defaulting to 1. -- **`ColumnsBatch`**: If true the function will compute the NTTs over the columns of the input matrix and not over the rows. Defaults to `false`. -- **`Ordering`**: Ordering of inputs and outputs (`KNN`, `KNR`, `KRN`, `KRR`, `KMN`, `KNM`), affecting how data is arranged. +- **`ColumnsBatch`**: If true the function will compute the NTTs over the columns of the input matrix and not over the rows. +- **`Ordering`**: Ordering of inputs and outputs (`KNN`, `KNR`, `KRN`, `KRR`), affecting how data is arranged. - **`areInputsOnDevice`**: Indicates if input scalars are located on the device. - **`areOutputsOnDevice`**: Indicates if results are stored on the device. - **`IsAsync`**: Controls whether the NTT operation runs asynchronously. -- **`NttAlgorithm`**: Explicitly select the NTT algorithm. Default value: Auto (the implementation selects radix-2 or mixed-radix algorithm based on heuristics). +- **`Ext`**: Extended configuration for backend. ### Default Configuration @@ -114,7 +122,7 @@ func GetDefaultNTTConfig[T any](cosetGen T) NTTConfig[T] Before performing NTT operations, it's necessary to initialize the NTT domain; it only needs to be called once per GPU since the twiddles are cached. ```go -func InitDomain(primitiveRoot ScalarField, ctx cr.DeviceContext, fastTwiddles bool) core.IcicleError +func InitDomain(primitiveRoot bn254.ScalarField, cfg core.NTTInitDomainConfig) runtime.EIcicleError ``` This function initializes the domain with a given primitive root, optionally using fast twiddle factors to optimize the computation. @@ -124,30 +132,9 @@ This function initializes the domain with a given primitive root, optionally usi The `ReleaseDomain` function is responsible for releasing the resources associated with a specific domain in the CUDA device context. ```go -func ReleaseDomain(ctx cr.DeviceContext) core.IcicleError +func ReleaseDomain() runtime.EIcicleError ``` -### Parameters - -- **`ctx`**: a reference to the `DeviceContext` object, which represents the CUDA device context. - ### Return Value -The function returns a `core.IcicleError`, which represents the result of the operation. If the operation is successful, the function returns `core.IcicleErrorCode(0)`. - -### Example - -```go -import ( - "github.com/icicle-crypto/icicle-core/cr" - "github.com/icicle-crypto/icicle-core/core" -) - -func example() { - cfg := GetDefaultNttConfig() - err := ReleaseDomain(cfg.Ctx) - if err != nil { - // Handle the error - } -} -``` +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. diff --git a/docs/docs/icicle/golang-bindings/vec-ops.md b/docs/docs/icicle/golang-bindings/vec-ops.md index c39c8f2f0..e93d9a0a2 100644 --- a/docs/docs/icicle/golang-bindings/vec-ops.md +++ b/docs/docs/icicle/golang-bindings/vec-ops.md @@ -17,9 +17,10 @@ Icicle exposes a number of vector operations which a user can use: package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/vecOps" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { @@ -30,8 +31,8 @@ func main() { cfg := core.DefaultVecOpsConfig() // Perform vector multiplication - err := bn254.VecOp(a, b, out, cfg, core.Add) - if err != cr.CudaSuccess { + err := vecOps.VecOp(a, b, out, cfg, core.Add) + if err != runtime.Success { panic("Vector addition failed") } } @@ -43,9 +44,10 @@ func main() { package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/vecOps" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { @@ -56,8 +58,8 @@ func main() { cfg := core.DefaultVecOpsConfig() // Perform vector multiplication - err := bn254.VecOp(a, b, out, cfg, core.Sub) - if err != cr.CudaSuccess { + err := vecOps.VecOp(a, b, out, cfg, core.Sub) + if err != runtime.Success { panic("Vector subtraction failed") } } @@ -69,9 +71,10 @@ func main() { package main import ( - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/vecOps" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" ) func main() { @@ -82,8 +85,8 @@ func main() { cfg := core.DefaultVecOpsConfig() // Perform vector multiplication - err := bn254.VecOp(a, b, out, cfg, core.Mul) - if err != cr.CudaSuccess { + err := vecOps.VecOp(a, b, out, cfg, core.Mul) + if err != runtime.Success { panic("Vector multiplication failed") } } @@ -92,7 +95,7 @@ func main() { ### VecOps Method ```go -func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) +func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret runtime.EIcicleError) ``` #### Parameters @@ -105,7 +108,7 @@ func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.V #### Return Value -- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the vector operation. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. ### VecOpsConfig @@ -113,21 +116,23 @@ The `VecOpsConfig` structure holds configuration parameters for the vector opera ```go type VecOpsConfig struct { - Ctx cr.DeviceContext - isAOnDevice bool - isBOnDevice bool - isResultOnDevice bool - IsAsync bool + StreamHandle runtime.Stream + isAOnDevice bool + isBOnDevice bool + isResultOnDevice bool + IsAsync bool + Ext config_extension.ConfigExtensionHandler } ``` #### Fields -- **Ctx**: Device context containing details like device ID and stream ID. -- **isAOnDevice**: Indicates if vector `a` is located on the device. -- **isBOnDevice**: Indicates if vector `b` is located on the device. -- **isResultOnDevice**: Specifies where the result vector should be stored (device or host memory). -- **IsAsync**: Controls whether the vector operation runs asynchronously. +- **`StreamHandle`**: Specifies the stream (queue) to use for async execution. +- **`isAOnDevice`**: Indicates if vector `a` is located on the device. +- **`isBOnDevice`**: Indicates if vector `b` is located on the device. +- **`isResultOnDevice`**: Specifies where the result vector should be stored (device or host memory). +- **`IsAsync`**: Controls whether the vector operation runs asynchronously. +- **`Ext`**: Extended configuration for backend. #### Default Configuration @@ -146,7 +151,7 @@ The function takes a matrix represented as a 1D slice and transposes it, storing ### Function ```go -func TransposeMatrix(in, out core.HostOrDeviceSlice, columnSize, rowSize int, ctx cr.DeviceContext, onDevice, isAsync bool) (ret core.IcicleError) +func TransposeMatrix(in, out core.HostOrDeviceSlice, columnSize, rowSize int, config core.VecOpsConfig) runtime.EIcicleError ``` ## Parameters @@ -155,13 +160,11 @@ func TransposeMatrix(in, out core.HostOrDeviceSlice, columnSize, rowSize int, ct - **`out`**: The output matrix is a `core.HostOrDeviceSlice`, which will be the transpose of the input matrix, stored as a 1D slice. - **`columnSize`**: The number of columns in the input matrix. - **`rowSize`**: The number of rows in the input matrix. -- **`ctx`**: The device context `cr.DeviceContext` to be used for the matrix transpose operation. -- **`onDevice`**: Indicates whether the input and output slices are stored on the device (GPU) or the host (CPU). -- **`isAsync`**: Indicates whether the matrix transpose operation should be executed asynchronously. +- **`config`**: A `VecOpsConfig` object containing various configuration options for the vector operations. ## Return Value -The function returns a `core.IcicleError` value, which represents the result of the matrix transpose operation. If the operation is successful, the returned value will be `0`. +- **`EIcicleError`**: A `runtime.EIcicleError` value, which will be `runtime.Success` if the operation was successful, or an error if something went wrong. ## Example Usage @@ -173,11 +176,11 @@ var output = make(core.HostSlice[ScalarField], 20) // ... // Get device context -ctx, _ := cr.GetDefaultDeviceContext() +cfg, _ := runtime.GetDefaultDeviceContext() // Transpose the matrix -err := TransposeMatrix(input, output, 5, 4, ctx, false, false) -if err.IcicleErrorCode != core.IcicleErrorCode(0) { +err := TransposeMatrix(input, output, 5, 4, cfg) +if err != runtime.Success { // Handle the error } diff --git a/docs/docs/icicle/programmers_guide/go.md b/docs/docs/icicle/programmers_guide/go.md index 30404ce4c..1cd6bec11 100644 --- a/docs/docs/icicle/programmers_guide/go.md +++ b/docs/docs/icicle/programmers_guide/go.md @@ -1 +1,296 @@ -TODO \ No newline at end of file +# ICICLE Golang Usage Guide + +## Overview + +This guide covers the usage of Icicle's Golang API, including device management, memory operations, data transfer, synchronization, and compute APIs. + +## Device Management + +:::note +See all ICICLE runtime APIs in [runtime.go](https://github.com/ingonyama-zk/icicle/blob/yshekel/V3/wrappers/golang/runtime/runtime.go) +::: + +### Loading a Backend + +The backend can be loaded from a specific path or from an environment variable. This is essential for setting up the computing environment. + +```go +import "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + +result := runtime.LoadBackendFromEnvOrDefault() +// or load from custom install dir +result := runtime.LoadBackend("/path/to/backend/installdir", true) +``` + +### Setting and Getting Active Device + +You can set the active device for the current thread and retrieve it when needed: + +```go +device = runtime.CreateDevice("CUDA", 0) // or other +result := runtime.SetDevice(device) +// or query current (thread) device +activeDevice := runtime.GetActiveDevice() +``` + +### Querying Device Information + +Retrieve the number of available devices and check if a pointer is allocated on the host or on the active device: + +```go +numDevices := runtime.GetDeviceCount() + +var ptr unsafe.Pointer +isHostMemory = runtime.IsHostMemory(ptr) +isDeviceMemory = runtime.IsActiveDeviceMemory(ptr) +``` + +## Memory Management + +### Allocating and Freeing Memory + +Memory can be allocated and freed on the active device: + +```go +ptr, err := runtime.Malloc(1024) // Allocate 1024 bytes +err := runtime.Free(ptr) // Free the allocated memory +``` + +### Asynchronous Memory Operations + +You can perform memory allocation and deallocation asynchronously using streams: + +```go +stream, err := runtime.CreateStream() + +ptr, err := runtime.MallocAsync(1024, stream) +err = runtime.FreeAsync(ptr, stream) +``` + +### Querying Available Memory + +Retrieve the total and available memory on the active device: + +```go +size_t total_memory, available_memory; +availableMemory, err := runtime.GetAvailableMemory() +freeMemory := availableMemory.Free +totalMemory := availableMemory.Total +``` + +### Setting Memory Values + +Set memory to a specific value on the active device, synchronously or asynchronously: + +```go +err := runtime.Memset(ptr, 0, 1024) // Set 1024 bytes to 0 +err := runtime.MemsetAsync(ptr, 0, 1024, stream) +``` + +## Data Transfer + +### Explicit Data Transfers + +To avoid device-inference overhead, use explicit copy functions: + +```go +result := runtime.CopyToHost(host_dst, device_src, size) +result := runtime.CopyToHostAsync(host_dst, device_src, size, stream) +result := runtime.CopyToDevice(device_dst, host_src, size) +result := runtime.CopyToDeviceAsync(device_dst, host_src, size, stream) +``` + +## Stream Management + +### Creating and Destroying Streams + +Streams are used to manage asynchronous operations: + +```go +stream, err := runtime.CreateStream() +err = runtime.DestroyStream(stream) +``` + +## Synchronization + +### Synchronizing Streams and Devices + +Ensure all previous operations on a stream or device are completed before proceeding: + +```go +err := runtime.StreamSynchronize(stream) +err := runtime.DeviceSynchronize() +``` + +## Device Properties + +### Checking Device Availability + +Check if a device is available and retrieve a list of registered devices: + +```go +dev := runtime.CreateDevice("CPU", 0) +isCPUAvail := runtime.IsDeviceAvailable(dev) +``` + +### Querying Device Properties + +Retrieve properties of the active device: + +```go +properties, err := runtime.GetDeviceProperties(properties); + +/******************/ +// where DeviceProperties is +type DeviceProperties struct { + UsingHostMemory bool // Indicates if the device uses host memory + NumMemoryRegions int32 // Number of memory regions available on the device + SupportsPinnedMemory bool // Indicates if the device supports pinned memory +} +``` + +## Compute APIs + +### Multi-Scalar Multiplication (MSM) Example + +Icicle provides high-performance compute APIs such as the Multi-Scalar Multiplication (MSM) for cryptographic operations. Here's a simple example of how to use the MSM API. + +```go +package main + +import ( + "fmt" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + bn254Msm "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" +) + +func main() { + + // Load installed backends + runtime.LoadBackendFromEnvOrDefault() + + // trying to choose CUDA if available, or fallback to CPU otherwise (default device) + deviceCuda := runtime.CreateDevice("CUDA", 0) // GPU-0 + if runtime.IsDeviceAvailable(&deviceCuda) { + runtime.SetDevice(&deviceCuda) + } // else we stay on CPU backend + + // Setup inputs + const size = 1 << 18 + + // Generate random inputs + scalars := bn254.GenerateScalars(size) + points := bn254.GenerateAffinePoints(size) + + // (optional) copy scalars to device memory explicitly + var scalarsDevice core.DeviceSlice + scalars.CopyToDevice(&scalarsDevice, true) + + // MSM configuration + cfgBn254 := core.GetDefaultMSMConfig() + + // allocate memory for the result + result := make(core.HostSlice[bn254.Projective], 1) + + // execute bn254 MSM on device + err := bn254Msm.Msm(scalarsDevice, points, &cfgBn254, result) + + // Check for errors + if err != runtime.Success { + errorString := fmt.Sprint( + "bn254 Msm failed: ", err) + panic(errorString) + } + + // free explicitly allocated device memory + scalarsDevice.Free() +} +``` + +### Polynomial Operations Example + +Here's another example demonstrating polynomial operations using Icicle: + +```go +package main + +import ( + "fmt" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/ntt" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/polynomial" +) + +func initBabybearDomain() runtime.EIcicleError { + cfgInitDomain := core.GetDefaultNTTInitDomainConfig() + rouIcicle := babybear.ScalarField{} + rouIcicle.FromUint32(1461624142) + return ntt.InitDomain(rouIcicle, cfgInitDomain) +} + +func init() { + // Load installed backends + runtime.LoadBackendFromEnvOrDefault() + + // trying to choose CUDA if available, or fallback to CPU otherwise (default device) + deviceCuda := runtime.CreateDevice("CUDA", 0) // GPU-0 + if runtime.IsDeviceAvailable(&deviceCuda) { + runtime.SetDevice(&deviceCuda) + } // else we stay on CPU backend + + // build domain for ntt is required for some polynomial ops that rely on ntt + err := initBabybearDomain() + if err != runtime.Success { + errorString := fmt.Sprint( + "Babybear Domain initialization failed: ", err) + panic(errorString) + } +} + +func main() { + + // Setup inputs + const polySize = 1 << 10 + + // randomize two polynomials over babybear field + var fBabybear polynomial.DensePolynomial + defer fBabybear.Delete() + var gBabybear polynomial.DensePolynomial + defer gBabybear.Delete() + fBabybear.CreateFromCoeffecitients(babybear.GenerateScalars(polySize)) + gBabybear.CreateFromCoeffecitients(babybear.GenerateScalars(polySize / 2)) + + // Perform polynomial multiplication + rBabybear := fBabybear.Multiply(&gBabybear) // Executes on the current device + defer rBabybear.Delete() + rDegree := rBabybear.Degree() + + fmt.Println("f Degree: ", fBabybear.Degree()) + fmt.Println("g Degree: ", gBabybear.Degree()) + fmt.Println("r Degree: ", rDegree) +} +``` + +In this example, the polynomial multiplication is used to perform polynomial multiplication on CUDA or CPU, showcasing the flexibility and power of Icicle's compute APIs. + +## Error Handling + +### Checking for Errors + +Icicle APIs return an `EIcicleError` enumeration value. Always check the returned value to ensure that operations were successful. + +```go +if result != runtime.SUCCESS { + // Handle error +} +``` + +This guide provides an overview of the essential APIs available in Icicle for C++. The provided examples should help you get started with integrating Icicle into your high-performance computing projects. diff --git a/examples/ZKContainer.md b/examples/ZKContainer.md deleted file mode 100644 index 1eacf3c29..000000000 --- a/examples/ZKContainer.md +++ /dev/null @@ -1,23 +0,0 @@ -# ZKContainer - -We recommend using [ZKContainer](https://www.ingonyama.com/blog/product-announcement-zk-containers), where we have already preinstalled all the required dependencies, to run Icicle examples. -To use our containers you will need [Docker](https://www.docker.com/) and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html). - -In each example directory, ZKContainer files are located in a subdirectory `.devcontainer`. - -- File `Dockerfile` specifies how to build an image of a ZKContainer. -- File `devcontainer.json` enables running ZKContainer from Visual Studio Code. - -## Running ZKContainer from shell - -```sh -docker build -t icicle-example-poseidon -f .devcontainer/Dockerfile . -``` - -To run the example interactively, start the container - -```sh -docker run -it --rm --gpus all -v .:/icicle-example icicle-example-poseidon -``` - -Inside the container, run the commands for building the library for whichever [build system](../README.md#build-systems) you choose to use. diff --git a/examples/golang/install-and-use-icicle/README.md b/examples/golang/install-and-use-icicle/README.md new file mode 100644 index 000000000..b3e7d4e29 --- /dev/null +++ b/examples/golang/install-and-use-icicle/README.md @@ -0,0 +1,96 @@ +# Example: Install and use ICICLE + +This example shows how to install CUDA backend and use it in a Go application. + +Download release binaries from our [github release page](https://github.com/ingonyama-zk/icicle/releases): +- **Backend** icicle30-ubuntu22-cuda122.tar.gz + +> [!NOTE] +> The names of the files are based on the release version. Ensure you update the tar file names in the example if you are using a different release. + +## Optional: Using Docker with Ubuntu 22 + +While not mandatory, this example can be demonstrated in an Ubuntu 22 Docker container. +```bash +docker run -it --rm --gpus all -v ./:/workspace -w /workspace icicle-release-ubuntu22-cuda122 bash +``` + +This command starts a bash session in the Docker container with GPUs enabled and the example files mapped to /workspace in the container. + +### Building the docker image + +The Docker image is based on NVIDIA’s image for Ubuntu 22.04 and can be built from the following Dockerfile: +```dockerfile +# Use the official NVIDIA development runtime image for Ubuntu 22.04 +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 + +# Install necessary packages +RUN apt-get update && apt-get install -y \ + build-essential \ + cmake \ + tar + +ENV GOLANG_VERSION 1.21.1 +RUN curl -L https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz | tar -xz -C /usr/local +ENV PATH="/usr/local/go/bin:${PATH}" +``` + +Build the Docker image with the following command: +```bash +docker build -t icicle-release-ubuntu20-cuda122 -f Dockerfile.ubuntu20 . +``` + +## Extract and Install the CUDA Backend + +```bash +cd release +# extract CUDA backend +tar xzvf icicle30-ubuntu22-cuda122.tar.gz -C /opt +``` + +## Build the Go Frontend and Execute + +Update your go.mod to include ICICLE as a dependency, navigate to the dependency in your GOMODCACHE and build ICICLE there + +```sh +go get github.com/ingonyama-zk/icicle/v3 +cd $(go env GOMODCACHE)/github.com/ingonyama-zk/icicle/v3@/wrappers/golang +chmod +x build.sh +./build.sh -curve=bn254 +./build.sh -curve=bls12_377 +``` + +Once ICICLE has been built, you can add specific packages when you need them in your application and load the backend + +```go +import ( + runtime "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + core "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + bn254 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + bn254MSM "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" +) + +// This loads the CUDA backend that you extracted to /opt +runtime.LoadBackendFromEnvOrDefault() +``` + +## Install in a Custom Location +If you prefer to install the CUDA backend in a custom location such as /custom/path, follow these steps: + +```bash +mkdir -p /custom/path +tar xzvf icicle30-ubuntu22-cuda122.tar.gz -C /custom/path +``` + +If installed in a custom location, you need to define the environment variable: + +```bash +export ICICLE_BACKEND_INSTALL_DIR=/custom/path/icicle/lib/backend +``` + +Alternatively, you can load the backend programmatically in your Go code using the `LoadBackend` function from the `runtime` package: +```go +func LoadBackend(path string, isRecursive bool) EIcicleError + +runtime.LoadBackend("/custom/path/to/backend", true) +``` diff --git a/examples/golang/install-and-use-icicle/go.mod b/examples/golang/install-and-use-icicle/go.mod new file mode 100644 index 000000000..a1cbf320a --- /dev/null +++ b/examples/golang/install-and-use-icicle/go.mod @@ -0,0 +1,10 @@ +module ingonyama.com/install_and_use_icicle + +go 1.22.1 + +// TODO - When v3 is pushed to main, switch to this +// require ( +// github.com/ingonyama-zk/icicle/v3 v3.0.0 +// ) + +require github.com/ingonyama-zk/icicle/v3 v3.0.0-20240902084701-0e5a616c7ea6 diff --git a/examples/golang/install-and-use-icicle/go.sum b/examples/golang/install-and-use-icicle/go.sum new file mode 100644 index 000000000..3fa728cd9 --- /dev/null +++ b/examples/golang/install-and-use-icicle/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/ingonyama-zk/icicle/v3 v3.0.0-20240902084701-0e5a616c7ea6 h1:xxK3awsYhfTRNT3IeUzIF31miwq8ECla0NxHaibGi9U= +github.com/ingonyama-zk/icicle/v3 v3.0.0-20240902084701-0e5a616c7ea6/go.mod h1:jWb8eWG0p+5PAOLtweqW3NOxR/HwM3ywrjXM/diiUcY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/examples/golang/install-and-use-icicle/main.go b/examples/golang/install-and-use-icicle/main.go new file mode 100644 index 000000000..ae3a20217 --- /dev/null +++ b/examples/golang/install-and-use-icicle/main.go @@ -0,0 +1,221 @@ +package main + +import ( + "flag" + "fmt" + "time" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377" + bls12377G2 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377/g2" + bls12377Msm "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377/msm" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + bn254G2 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/g2" + bn254Msm "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" +) + +func main() { + runtime.LoadBackendFromEnvOrDefault() + + var logSizeMin int + var logSizeMax int + var deviceType string + + flag.IntVar(&logSizeMin, "l", 10, "Minimum log size") + flag.IntVar(&logSizeMax, "u", 10, "Maximum log size") + flag.StringVar(&deviceType, "device", "CUDA", "Device type") + flag.Parse() + + device := runtime.CreateDevice(deviceType, 0) + runtime.SetDevice(&device) + + sizeMax := 1 << logSizeMax + + print("Generating BN254 scalars ... ") + startTime := time.Now() + scalarsBn254Max := bn254.GenerateScalars(sizeMax) + println(time.Since(startTime).String()) + + print("Generating BN254 points ... ") + startTime = time.Now() + pointsBn254Max := bn254.GenerateAffinePoints(sizeMax) + println(time.Since(startTime).String()) + + print("Generating BN254 G2 points ... ") + startTime = time.Now() + pointsBn254G2Max := bn254G2.G2GenerateAffinePoints(sizeMax) + println(time.Since(startTime).String()) + + print("Generating BLS12_377 scalars ... ") + startTime = time.Now() + scalarsBls12377Max := bls12377.GenerateScalars(sizeMax) + println(time.Since(startTime).String()) + + print("Generating BLS12_377 points ... ") + startTime = time.Now() + pointsBls12377Max := bls12377.GenerateAffinePoints(sizeMax) + println(time.Since(startTime).String()) + + print("Generating BLS12_377 G2 points ... ") + startTime = time.Now() + pointsBls12377G2Max := bls12377G2.G2GenerateAffinePoints(sizeMax) + println(time.Since(startTime).String()) + + for logSize := logSizeMin; logSize <= logSizeMax; logSize++ { + + // Define the size of the problem, here 2^18. + size := 1 << logSize + + fmt.Printf("---------------------- MSM size 2^%d=%d ------------------------\n", logSize, size) + + // println(scalarsBls12377, pointsBls12377, pointsBn254G2) + // println(scalarsBn254, pointsBn254, pointsBls12377G2) + + print("Configuring bn254 MSM ... ") + startTime = time.Now() + + scalarsBn254 := scalarsBn254Max[:size] + pointsBn254 := pointsBn254Max[:size] + pointsBn254G2 := pointsBn254G2Max[:size] + + cfgBn254 := core.GetDefaultMSMConfig() + cfgBn254G2 := core.GetDefaultMSMConfig() + cfgBn254.IsAsync = true + cfgBn254G2.IsAsync = true + + streamBn254, _ := runtime.CreateStream() + streamBn254G2, _ := runtime.CreateStream() + + cfgBn254.StreamHandle = streamBn254 + cfgBn254G2.StreamHandle = streamBn254G2 + + var projectiveBn254 bn254.Projective + var projectiveBn254G2 bn254G2.G2Projective + + var msmResultBn254 core.DeviceSlice + var msmResultBn254G2 core.DeviceSlice + + _, e := msmResultBn254.MallocAsync(projectiveBn254.Size(), 1, streamBn254) + if e != runtime.Success { + errorString := fmt.Sprint( + "Bn254 Malloc failed: ", e) + panic(errorString) + } + _, e = msmResultBn254G2.MallocAsync(projectiveBn254G2.Size(), 1, streamBn254G2) + if e != runtime.Success { + errorString := fmt.Sprint( + "Bn254 Malloc G2 failed: ", e) + panic(errorString) + } + + println(time.Since(startTime).String()) + + print("Configuring Bls12377 MSM ... ") + startTime = time.Now() + + scalarsBls12377 := scalarsBls12377Max[:size] + pointsBls12377 := pointsBls12377Max[:size] + pointsBls12377G2 := pointsBls12377G2Max[:size] + + cfgBls12377 := core.GetDefaultMSMConfig() + cfgBls12377G2 := core.GetDefaultMSMConfig() + cfgBls12377.IsAsync = true + cfgBls12377G2.IsAsync = true + + streamBls12377, _ := runtime.CreateStream() + streamBls12377G2, _ := runtime.CreateStream() + + cfgBls12377.StreamHandle = streamBls12377 + cfgBls12377G2.StreamHandle = streamBls12377G2 + + var projectiveBls12377 bls12377.Projective + var projectiveBls12377G2 bls12377G2.G2Projective + + var msmResultBls12377 core.DeviceSlice + var msmResultBls12377G2 core.DeviceSlice + + _, e = msmResultBls12377.MallocAsync(projectiveBls12377.Size(), 1, streamBls12377) + if e != runtime.Success { + errorString := fmt.Sprint( + "Bls12_377 Malloc failed: ", e) + panic(errorString) + } + _, e = msmResultBls12377G2.MallocAsync(projectiveBls12377G2.Size(), 1, streamBls12377G2) + if e != runtime.Success { + errorString := fmt.Sprint( + "Bls12_377 Malloc G2 failed: ", e) + panic(errorString) + } + + println(time.Since(startTime).String()) + + print("Executing bn254 MSM on device ... ") + startTime = time.Now() + + currentDevice, _ := runtime.GetActiveDevice() + print("Device: ", currentDevice.GetDeviceType()) + + e = bn254Msm.Msm(scalarsBn254, pointsBn254, &cfgBn254, msmResultBn254) + if e != runtime.Success { + errorString := fmt.Sprint( + "bn254 Msm failed: ", e) + panic(errorString) + } + e = bn254G2.G2Msm(scalarsBn254, pointsBn254G2, &cfgBn254G2, msmResultBn254G2) + if e != runtime.Success { + errorString := fmt.Sprint( + "bn254 Msm G2 failed: ", e) + panic(errorString) + } + + msmResultBn254Host := make(core.HostSlice[bn254.Projective], 1) + msmResultBn254G2Host := make(core.HostSlice[bn254G2.G2Projective], 1) + + msmResultBn254Host.CopyFromDeviceAsync(&msmResultBn254, streamBn254) + msmResultBn254G2Host.CopyFromDeviceAsync(&msmResultBn254G2, streamBn254G2) + + msmResultBn254.FreeAsync(streamBn254) + msmResultBn254G2.FreeAsync(streamBn254G2) + + runtime.SynchronizeStream(streamBn254) + runtime.SynchronizeStream(streamBn254G2) + + println(time.Since(startTime).String()) + + print("Executing Bls12377 MSM on device ... ") + startTime = time.Now() + + currentDevice, _ = runtime.GetActiveDevice() + print("Device: ", currentDevice.GetDeviceType()) + + e = bls12377Msm.Msm(scalarsBls12377, pointsBls12377, &cfgBls12377, msmResultBls12377) + if e != runtime.Success { + errorString := fmt.Sprint( + "bls12_377 Msm failed: ", e) + panic(errorString) + } + e = bls12377G2.G2Msm(scalarsBls12377, pointsBls12377G2, &cfgBls12377G2, msmResultBls12377G2) + if e != runtime.Success { + errorString := fmt.Sprint( + "bls12_377 Msm G2 failed: ", e) + panic(errorString) + } + + msmResultBls12377Host := make(core.HostSlice[bls12377.Projective], 1) + msmResultBls12377G2Host := make(core.HostSlice[bls12377G2.G2Projective], 1) + + msmResultBls12377Host.CopyFromDeviceAsync(&msmResultBls12377, streamBls12377) + msmResultBls12377G2Host.CopyFromDeviceAsync(&msmResultBls12377G2, streamBls12377G2) + + msmResultBls12377.FreeAsync(streamBls12377) + msmResultBls12377G2.FreeAsync(streamBls12377G2) + + runtime.SynchronizeStream(streamBls12377) + runtime.SynchronizeStream(streamBls12377G2) + + println(time.Since(startTime).String()) + } +} diff --git a/examples/golang/msm/main.go b/examples/golang/msm/main.go index ffc2b407a..48658194c 100644 --- a/examples/golang/msm/main.go +++ b/examples/golang/msm/main.go @@ -5,27 +5,34 @@ import ( "fmt" "time" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377" - bls12377G2 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377/g2" - bls12377Msm "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377/msm" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + bls12377G2 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377/g2" + bls12377Msm "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377/msm" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" - bn254G2 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/g2" - bn254Msm "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/msm" + bn254G2 "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/g2" + bn254Msm "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/msm" ) func main() { + runtime.LoadBackendFromEnvOrDefault() + var logSizeMin int var logSizeMax int + var deviceType string - flag.IntVar(&logSizeMin, "l", 17, "Minimum log size") - flag.IntVar(&logSizeMax, "u", 22, "Maximum log size") + flag.IntVar(&logSizeMin, "l", 10, "Minimum log size") + flag.IntVar(&logSizeMax, "u", 10, "Maximum log size") + flag.StringVar(&deviceType, "device", "CUDA", "Device type") flag.Parse() + device := runtime.CreateDevice(deviceType, 0) + runtime.SetDevice(&device) + sizeMax := 1 << logSizeMax print("Generating BN254 scalars ... ") @@ -80,11 +87,11 @@ func main() { cfgBn254.IsAsync = true cfgBn254G2.IsAsync = true - streamBn254, _ := cr.CreateStream() - streamBn254G2, _ := cr.CreateStream() + streamBn254, _ := runtime.CreateStream() + streamBn254G2, _ := runtime.CreateStream() - cfgBn254.Ctx.Stream = &streamBn254 - cfgBn254G2.Ctx.Stream = &streamBn254G2 + cfgBn254.StreamHandle = streamBn254 + cfgBn254G2.StreamHandle = streamBn254G2 var projectiveBn254 bn254.Projective var projectiveBn254G2 bn254G2.G2Projective @@ -92,14 +99,14 @@ func main() { var msmResultBn254 core.DeviceSlice var msmResultBn254G2 core.DeviceSlice - _, e := msmResultBn254.MallocAsync(projectiveBn254.Size(), projectiveBn254.Size(), streamBn254) - if e != cr.CudaSuccess { + _, e := msmResultBn254.MallocAsync(projectiveBn254.Size(), 1, streamBn254) + if e != runtime.Success { errorString := fmt.Sprint( "Bn254 Malloc failed: ", e) panic(errorString) } - _, e = msmResultBn254G2.MallocAsync(projectiveBn254G2.Size(), projectiveBn254G2.Size(), streamBn254G2) - if e != cr.CudaSuccess { + _, e = msmResultBn254G2.MallocAsync(projectiveBn254G2.Size(), 1, streamBn254G2) + if e != runtime.Success { errorString := fmt.Sprint( "Bn254 Malloc G2 failed: ", e) panic(errorString) @@ -119,11 +126,11 @@ func main() { cfgBls12377.IsAsync = true cfgBls12377G2.IsAsync = true - streamBls12377, _ := cr.CreateStream() - streamBls12377G2, _ := cr.CreateStream() + streamBls12377, _ := runtime.CreateStream() + streamBls12377G2, _ := runtime.CreateStream() - cfgBls12377.Ctx.Stream = &streamBls12377 - cfgBls12377G2.Ctx.Stream = &streamBls12377G2 + cfgBls12377.StreamHandle = streamBls12377 + cfgBls12377G2.StreamHandle = streamBls12377G2 var projectiveBls12377 bls12377.Projective var projectiveBls12377G2 bls12377G2.G2Projective @@ -131,14 +138,14 @@ func main() { var msmResultBls12377 core.DeviceSlice var msmResultBls12377G2 core.DeviceSlice - _, e = msmResultBls12377.MallocAsync(projectiveBls12377.Size(), projectiveBls12377.Size(), streamBls12377) - if e != cr.CudaSuccess { + _, e = msmResultBls12377.MallocAsync(projectiveBls12377.Size(), 1, streamBls12377) + if e != runtime.Success { errorString := fmt.Sprint( "Bls12_377 Malloc failed: ", e) panic(errorString) } - _, e = msmResultBls12377G2.MallocAsync(projectiveBls12377G2.Size(), projectiveBls12377G2.Size(), streamBls12377G2) - if e != cr.CudaSuccess { + _, e = msmResultBls12377G2.MallocAsync(projectiveBls12377G2.Size(), 1, streamBls12377G2) + if e != runtime.Success { errorString := fmt.Sprint( "Bls12_377 Malloc G2 failed: ", e) panic(errorString) @@ -149,14 +156,17 @@ func main() { print("Executing bn254 MSM on device ... ") startTime = time.Now() + currentDevice, _ := runtime.GetActiveDevice() + print("Device: ", currentDevice.GetDeviceType()) + e = bn254Msm.Msm(scalarsBn254, pointsBn254, &cfgBn254, msmResultBn254) - if e != cr.CudaSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "bn254 Msm failed: ", e) panic(errorString) } e = bn254G2.G2Msm(scalarsBn254, pointsBn254G2, &cfgBn254G2, msmResultBn254G2) - if e != cr.CudaSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "bn254 Msm G2 failed: ", e) panic(errorString) @@ -171,22 +181,25 @@ func main() { msmResultBn254.FreeAsync(streamBn254) msmResultBn254G2.FreeAsync(streamBn254G2) - cr.SynchronizeStream(&streamBn254) - cr.SynchronizeStream(&streamBn254G2) + runtime.SynchronizeStream(streamBn254) + runtime.SynchronizeStream(streamBn254G2) println(time.Since(startTime).String()) print("Executing Bls12377 MSM on device ... ") startTime = time.Now() + currentDevice, _ = runtime.GetActiveDevice() + print("Device: ", currentDevice.GetDeviceType()) + e = bls12377Msm.Msm(scalarsBls12377, pointsBls12377, &cfgBls12377, msmResultBls12377) - if e != cr.CudaSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "bls12_377 Msm failed: ", e) panic(errorString) } e = bls12377G2.G2Msm(scalarsBls12377, pointsBls12377G2, &cfgBls12377G2, msmResultBls12377G2) - if e != cr.CudaSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "bls12_377 Msm G2 failed: ", e) panic(errorString) @@ -201,8 +214,8 @@ func main() { msmResultBls12377.FreeAsync(streamBls12377) msmResultBls12377G2.FreeAsync(streamBls12377G2) - cr.SynchronizeStream(&streamBls12377) - cr.SynchronizeStream(&streamBls12377G2) + runtime.SynchronizeStream(streamBls12377) + runtime.SynchronizeStream(streamBls12377G2) println(time.Since(startTime).String()) } diff --git a/examples/golang/ntt/main.go b/examples/golang/ntt/main.go index 4f1ee03ac..4594d5ecc 100644 --- a/examples/golang/ntt/main.go +++ b/examples/golang/ntt/main.go @@ -5,26 +5,33 @@ import ( "fmt" "time" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + runtime "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377" - bls12377Ntt "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377/ntt" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" + bls12377Ntt "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bls12377/ntt" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" - bn254Ntt "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/ntt" + bn254Ntt "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/ntt" bls12377Fft "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/fft" bn254Fft "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" ) func main() { + runtime.LoadBackendFromEnvOrDefault() + var logSize int + var deviceType string flag.IntVar(&logSize, "s", 20, "Log size") + flag.StringVar(&deviceType, "device", "CUDA", "Device type") flag.Parse() + device := runtime.CreateDevice(deviceType, 0) + runtime.SetDevice(&device) + size := 1 << logSize fmt.Printf("---------------------- NTT size 2^%d=%d ------------------------\n", logSize, size) @@ -44,32 +51,33 @@ func main() { cfgBls12377 := bls12377Ntt.GetDefaultNttConfig() cfgBls12377.IsAsync = true + cfgInitDomainBls := core.GetDefaultNTTInitDomainConfig() rouMontBn254, _ := bn254Fft.Generator(uint64(size)) rouBn254 := rouMontBn254.Bits() rouIcicleBn254 := bn254.ScalarField{} limbsBn254 := core.ConvertUint64ArrToUint32Arr(rouBn254[:]) rouIcicleBn254.FromLimbs(limbsBn254) - bn254Ntt.InitDomain(rouIcicleBn254, cfgBn254.Ctx, false) + bn254Ntt.InitDomain(rouIcicleBn254, cfgInitDomainBls) rouMontBls12377, _ := bls12377Fft.Generator(uint64(size)) rouBls12377 := rouMontBls12377.Bits() rouIcicleBls12377 := bls12377.ScalarField{} limbsBls12377 := core.ConvertUint64ArrToUint32Arr(rouBls12377[:]) rouIcicleBls12377.FromLimbs(limbsBls12377) - bls12377Ntt.InitDomain(rouIcicleBls12377, cfgBls12377.Ctx, false) + bls12377Ntt.InitDomain(rouIcicleBls12377, cfgInitDomainBls) print("Configuring bn254 NTT ... ") startTime = time.Now() - streamBn254, _ := cr.CreateStream() + streamBn254, _ := runtime.CreateStream() - cfgBn254.Ctx.Stream = &streamBn254 + cfgBn254.StreamHandle = streamBn254 var nttResultBn254 core.DeviceSlice - _, e := nttResultBn254.MallocAsync(size*scalarsBn254.SizeOfElement(), scalarsBn254.SizeOfElement(), streamBn254) - if e != cr.CudaSuccess { + _, e := nttResultBn254.MallocAsync(size*scalarsBn254.SizeOfElement(), 1, streamBn254) + if e != runtime.Success { errorString := fmt.Sprint( "Bn254 Malloc failed: ", e) panic(errorString) @@ -80,14 +88,14 @@ func main() { print("Configuring Bls12377 NTT ... ") startTime = time.Now() - streamBls12377, _ := cr.CreateStream() + streamBls12377, _ := runtime.CreateStream() - cfgBls12377.Ctx.Stream = &streamBls12377 + cfgBls12377.StreamHandle = streamBls12377 var nttResultBls12377 core.DeviceSlice - _, e = nttResultBls12377.MallocAsync(size*scalarsBls12377.SizeOfElement(), scalarsBls12377.SizeOfElement(), streamBls12377) - if e != cr.CudaSuccess { + _, e = nttResultBls12377.MallocAsync(size*scalarsBls12377.SizeOfElement(), 1, streamBls12377) + if e != runtime.Success { errorString := fmt.Sprint( "Bls12_377 Malloc failed: ", e) panic(errorString) @@ -99,7 +107,7 @@ func main() { startTime = time.Now() err := bn254Ntt.Ntt(scalarsBn254, core.KForward, &cfgBn254, nttResultBn254) - if err.CudaErrorCode != cr.CudaSuccess { + if err != runtime.Success { errorString := fmt.Sprint( "bn254 Ntt failed: ", e) panic(errorString) @@ -108,14 +116,14 @@ func main() { nttResultBn254Host := make(core.HostSlice[bn254.ScalarField], size) nttResultBn254Host.CopyFromDeviceAsync(&nttResultBn254, streamBn254) nttResultBn254.FreeAsync(streamBn254) - cr.SynchronizeStream(&streamBn254) + runtime.SynchronizeStream(streamBn254) println(time.Since(startTime).String()) print("Executing Bls12377 NTT on device ... ") startTime = time.Now() err = bls12377Ntt.Ntt(scalarsBls12377, core.KForward, &cfgBls12377, nttResultBls12377) - if err.CudaErrorCode != cr.CudaSuccess { + if err != runtime.Success { errorString := fmt.Sprint( "bls12_377 Ntt failed: ", e) panic(errorString) @@ -125,7 +133,7 @@ func main() { nttResultBls12377Host.CopyFromDeviceAsync(&nttResultBls12377, streamBls12377) nttResultBls12377.FreeAsync(streamBls12377) - cr.SynchronizeStream(&streamBls12377) + runtime.SynchronizeStream(streamBls12377) println(time.Since(startTime).String()) } diff --git a/examples/golang/polynomials/main.go b/examples/golang/polynomials/main.go index 5238a92c3..563dfeedd 100644 --- a/examples/golang/polynomials/main.go +++ b/examples/golang/polynomials/main.go @@ -5,56 +5,61 @@ import ( "fmt" bn254Fft "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" - cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime" - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254" - bn254Ntt "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/ntt" - bn254Polynomial "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/polynomial" - - "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core" - babybear "github.com/ingonyama-zk/icicle/v2/wrappers/golang/fields/babybear" - babybearNtt "github.com/ingonyama-zk/icicle/v2/wrappers/golang/fields/babybear/ntt" - babybearPolynomial "github.com/ingonyama-zk/icicle/v2/wrappers/golang/fields/babybear/polynomial" + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254" + bn254Ntt "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/ntt" + bn254Polynomial "github.com/ingonyama-zk/icicle/v3/wrappers/golang/curves/bn254/polynomial" + runtime "github.com/ingonyama-zk/icicle/v3/wrappers/golang/runtime" + + "github.com/ingonyama-zk/icicle/v3/wrappers/golang/core" + babybear "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear" + babybearNtt "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/ntt" + babybearPolynomial "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/polynomial" ) var maxNttLogSize uint var polyLogSize uint -func initBn254Domain() core.IcicleError { - deviceCfg, _ := cr.GetDefaultDeviceContext() +func initBn254Domain() runtime.EIcicleError { + cfgInitDomain := core.GetDefaultNTTInitDomainConfig() rouMontBn254, _ := bn254Fft.Generator(uint64(1 << maxNttLogSize)) rouBn254 := rouMontBn254.Bits() rouIcicleBn254 := bn254.ScalarField{} limbsBn254 := core.ConvertUint64ArrToUint32Arr(rouBn254[:]) rouIcicleBn254.FromLimbs(limbsBn254) - return bn254Ntt.InitDomain(rouIcicleBn254, deviceCfg, false) + return bn254Ntt.InitDomain(rouIcicleBn254, cfgInitDomain) } -func initBabybearDomain() core.IcicleError { - deviceCfg, _ := cr.GetDefaultDeviceContext() +func initBabybearDomain() runtime.EIcicleError { + cfgInitDomain := core.GetDefaultNTTInitDomainConfig() rouIcicle := babybear.ScalarField{} rouIcicle.FromUint32(1461624142) - return babybearNtt.InitDomain(rouIcicle, deviceCfg, false) + return babybearNtt.InitDomain(rouIcicle, cfgInitDomain) } func init() { + runtime.LoadBackendFromEnvOrDefault() + + var deviceType string + flag.UintVar(&maxNttLogSize, "maxNttLogSize", 20, "") flag.UintVar(&polyLogSize, "polyLogSize", 15, "") + flag.StringVar(&deviceType, "device", "CUDA", "Device type") + + device := runtime.CreateDevice(deviceType, 0) + runtime.SetDevice(&device) e := initBn254Domain() - if e.IcicleErrorCode != core.IcicleSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "Bn254 Domain initialization failed: ", e) panic(errorString) } e = initBabybearDomain() - if e.IcicleErrorCode != core.IcicleSuccess { + if e != runtime.Success { errorString := fmt.Sprint( "Babybear Domain initialization failed: ", e) panic(errorString) } - - bn254Polynomial.InitPolyBackend() - babybearPolynomial.InitPolyBackend() } func main() { polySize := 1 << polyLogSize @@ -94,8 +99,8 @@ func main() { // in this example domain in on host and evals on device. hostDomain := core.HostSliceFromElements([]bn254.ScalarField{five, thirty}) var deviceImage core.DeviceSlice - _, err := deviceImage.Malloc(five.Size()*hostDomain.Len(), five.Size()) - if err != cr.CudaSuccess { + _, err := deviceImage.Malloc(five.Size(), hostDomain.Len()) + if err != runtime.Success { errorString := fmt.Sprint( "deviceImage allocation failed: ", err) panic(errorString) diff --git a/go.mod b/go.mod index 4aa8d85fe..bf11c0b30 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/ingonyama-zk/icicle/v2 +module github.com/ingonyama-zk/icicle/v3 go 1.20 diff --git a/icicle/src/curves/ffi_extern.cpp b/icicle/src/curves/ffi_extern.cpp index 4e3ff8499..db27a9126 100644 --- a/icicle/src/curves/ffi_extern.cpp +++ b/icicle/src/curves/ffi_extern.cpp @@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, to_affine)(projective_t* point, affine_t* p *point_out = projective_t::to_affine(*point); } +extern "C" void CONCAT_EXPAND(CURVE, from_affine)(affine_t* point, projective_t* point_out) +{ + *point_out = projective_t::from_affine(*point); +} + extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* points, int size) { projective_t::rand_host_many(points, size); @@ -46,6 +51,11 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_to_affine)(g2_projective_t* point, g2_af *point_out = g2_projective_t::to_affine(*point); } +extern "C" void CONCAT_EXPAND(CURVE, g2_from_affine)(g2_affine_t* point, g2_projective_t* point_out) +{ + *point_out = g2_projective_t::from_affine(*point); +} + extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projective_t* points, int size) { g2_projective_t::rand_host_many(points, size); diff --git a/wrappers/golang/README.md b/wrappers/golang/README.md new file mode 100644 index 000000000..c78ab60c2 --- /dev/null +++ b/wrappers/golang/README.md @@ -0,0 +1,89 @@ +# Golang Bindings + +In order to build the underlying ICICLE libraries you should run the build script found [here](./build.sh). + +Build script USAGE + +```sh +./build.sh [-curve=] [-field=] [-hash=] [-cuda_version=] [-skip_msm] [-skip_ntt] [-skip_g2] [-skip_ecntt] [-skip_fieldext] + +curve - The name of the curve to build or "all" to build all supported curves +field - The name of the field to build or "all" to build all supported fields +-skip_msm - Optional - build with MSM disabled +-skip_ntt - Optional - build with NTT disabled +-skip_g2 - Optional - build with G2 disabled +-skip_ecntt - Optional - build with ECNTT disabled +-skip_fieldext - Optional - build without field extension +-help - Optional - Displays usage information +``` + +To build ICICLE libraries for all supported curves with G2 and ECNTT enabled. + +```sh +./build.sh -curve=all +``` + +If you wish to build for a specific curve, for example bn254, without G2 or ECNTT enabled. + +```sh +./build.sh -curve=bn254 -skip_g2 -skip_ecntt +``` + +## Supported curves, fields and operations + +### Supported curves and operations + +| Operation\Curve | bn254 | bls12_377 | bls12_381 | bw6-761 | grumpkin | +| --- | :---: | :---: | :---: | :---: | :---: | +| MSM | ✅ | ✅ | ✅ | ✅ | ✅ | +| G2 | ✅ | ✅ | ✅ | ✅ | ❌ | +| NTT | ✅ | ✅ | ✅ | ✅ | ❌ | +| ECNTT | ✅ | ✅ | ✅ | ✅ | ❌ | +| VecOps | ✅ | ✅ | ✅ | ✅ | ✅ | +| Polynomials | ✅ | ✅ | ✅ | ✅ | ❌ | + +### Supported fields and operations + +| Operation\Field | babybear | +| --- | :---: | +| VecOps | ✅ | +| Polynomials | ✅ | +| NTT | ✅ | +| Extension Field | ✅ | + +## Running golang tests + +To run the tests for curve bn254. + +```sh +go test ./wrappers/golang_v3/curves/bn254/tests -count=1 -v +``` + +To run all the tests in the golang bindings + +```sh +go test ./... -count=1 -v +``` + +## How do Golang bindings work? + +The libraries produced from the code compilation are used to bind Golang to ICICLE's code. + +1. These libraries (named `libicicle_curve_.so` and `libicicle_field_.so`) can be imported in your Go project to leverage the accelerated functionalities provided by ICICLE. + +2. In your Go project, you can use `cgo` to link these libraries. Here's a basic example on how you can use `cgo` to link these libraries: + +```go +/* +#cgo LDFLAGS: -L/path/to/shared/libs -licicle_device -lstdc++ -lm -Wl,-rpath=/path/to/shared/libs +#include "icicle.h" // make sure you use the correct header file(s) +*/ +import "C" + +func main() { + // Now you can call the C functions from the ICICLE libraries. + // Note that C function calls are prefixed with 'C.' in Go code. +} +``` + +Replace `/path/to/shared/libs` with the actual path where the shared libraries are located on your system. diff --git a/wrappers/golang/build.sh b/wrappers/golang/build.sh new file mode 100755 index 000000000..2698dd3a2 --- /dev/null +++ b/wrappers/golang/build.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +MSM_DEFINED=ON +NTT_DEFINED=ON +G2_DEFINED=ON +ECNTT_DEFINED=ON +EXT_FIELD=ON + +CUDA_COMPILER_PATH=/usr/local/cuda/bin/nvcc + +DEVMODE=OFF +BUILD_CURVES=( ) +BUILD_FIELDS=( ) + +SUPPORTED_CURVES=("bn254" "bls12_377" "bls12_381" "bw6_761", "grumpkin") +SUPPORTED_FIELDS=("babybear") +CUDA_BACKEND=OFF + +BUILD_DIR="${ICICLE_BUILD_DIR:-$(realpath "$PWD/../../icicle/build")}" + +if [[ $1 == "-help" ]]; then + echo "Build script for building ICICLE cpp libraries" + echo "" + echo "If more than one curve or more than one field is supplied, the last one supplied will be built" + echo "" + echo "USAGE: ./build.sh [OPTION...]" + echo "" + echo "OPTIONS:" + echo " -curve= Specifies the curve to be built. If \"all\" is supplied," + echo " all curves will be built with any additional curve options." + echo "" + echo " -skip_msm Builds the curve library with MSM (multi-scalar multiplication) disabled." + echo "" + echo " -skip_ntt Builds the curve/field library with NTT (number theoretic transform) disabled." + echo "" + echo " -skip_g2 Builds the curve library with G2 (a secondary group) disabled." + echo "" + echo " -skip_ecntt Builds the curve library with ECNTT (elliptic curve NTT) disabled." + echo "" + echo " -field= Specifies the field to be built. If \"all\" is supplied," + echo " all fields will be built with any additional field options." + echo "" + echo " -skip_fieldext Builds the field library with the extension field disabled." + echo "" + echo " -cuda_backend=