.github/workflows/android.yml

name: Android

on:
  push:
    branches:
      - main
      - release/*
  pull_request:
    paths:
      - .ci/docker/**
      - .github/workflows/android.yml
      - build/*android*.sh
      - install_requirements.sh
      - examples/demo-apps/android/**
      - extension/android/**
      - extension/module/**
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
  cancel-in-progress: true

jobs:
  build-llm-demo:
    name: build-llm-demo
    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
    strategy:
      matrix:
          tokenizer: [bpe, tiktoken]
    with:
      runner: linux.2xlarge
      docker-image: executorch-ubuntu-22.04-clang12-android
      submodules: 'true'
      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
      timeout: 90
      upload-artifact: android-apps
      script: |
        set -eux

        # The generic Linux job chooses to use base env, not the one setup by the image
        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
        conda activate "${CONDA_ENV}"
        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
        export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

        # Build LLM Demo for Android
        bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}

  # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
  upload-artifacts:
    needs: build-llm-demo
    runs-on: linux.2xlarge
    steps:
      - name: Download the artifacts from GitHub
        uses: actions/download-artifact@v3
        with:
          # The name here needs to match the name of the upload-artifact parameter
          name: android-apps
          path: ${{ runner.temp }}/artifacts/

      - name: Verify the artifacts
        shell: bash
        working-directory: ${{ runner.temp }}/artifacts/
        run: |
          ls -lah ./

      - name: Upload the artifacts to S3
        uses: seemethere/upload-artifact-s3@v5
        with:
          s3-bucket: gha-artifacts
          s3-prefix: |
            ${{ github.repository }}/${{ github.run_id }}/artifact
          # NOTE: Consume stale artifacts won't make sense for benchmarking as the goal is always to
          # benchmark models as fresh as possible. I'm okay to keep the 14 retention-days for now
          # for TorchChat until we have a periodic job can publish it more often. Ideally I want to
          # reduce it to <= 2 day, meaning the benchmark job will run daily.
          retention-days: 14
          if-no-files-found: ignore
          path: ${{ runner.temp }}/artifacts/

  # Running Android emulator directly on the runner and not using Docker
  run-emulator:
    needs: build-llm-demo
    runs-on: amz2023.linux.4xlarge
    env:
      ANDROID_NDK_VERSION: r26c
      API_LEVEL: 34
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@main
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
            This is used to run Android emulators, ANDROID_HOME is installed at /opt/android/sdk

      - uses: actions/checkout@v3
        with:
          submodules: false

      - name: Setup conda
        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
        with:
          python-version: '3.10'

      - name: Install Android dependencies
        shell: bash
        run: |
          set -eux

          # Reuse the script that install Android on ET Docker image
          sudo -E bash .ci/docker/common/install_android.sh

      - name: Gradle cache
        uses: gradle/actions/setup-gradle@v3

      - name: AVD cache
        uses: actions/cache@v4
        id: avd-cache
        with:
          path: |
            ~/.android/avd/*
            ~/.android/adb*
          key: avd-${{ env.API_LEVEL }}

      # NB: It takes about 10m to cold boot the emulator here
      - name: Run Android emulator
        env:
          ANDROID_HOME: /opt/android/sdk
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: ${{ env.API_LEVEL }}
          # NB: x86_64 emulator is slow because the lack of KVM support on AWS, it
          # seems that we can use metal instance for that but it hasn't been tried
          # out yet. Also arm64-v8a arch requires an ARM runner
          arch: x86_64
          script: ./build/run_android_emulator.sh
          # NB: This is to boot the emulator faster following the instructions on
          # https://github.com/ReactiveCircus/android-emulator-runner. The max number
          # of cores we can set is 6, any higher number will be reduced to 6.
          cores: 6
          ram-size: 12288M
          force-avd-creation: false
          disable-animations: true
          emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none
          # This is to make sure that the job doesn't fail flakily
          emulator-boot-timeout: 900

  # Let's see how expensive this job is, we might want to tone it down by running it periodically
  test-llama-app:
    needs: upload-artifacts
    permissions:
      id-token: write
      contents: read
    uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
    strategy:
      matrix:
        # https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine
        # mentions that tiktoken is only for Llama3. So, we can export it later in another archive
        # like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is
        # updated to run Llama3
        tokenizer: [bpe]
    with:
      device-type: android
      runner: linux.2xlarge
      test-infra-ref: ''
      # This is the ARN of ExecuTorch project on AWS
      project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
      # This is the custom Android device pool that only includes Samsung Galaxy S2x
      device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
      # Uploaded to S3 from the previous job, the name of the app comes from the project itself
      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
      # The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
      test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
      # Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
      # days and the job will automatically re-upload the file when that happens.
      extra-data: https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip