diff --git a/.azuredevops/rocm-ci.yml b/.azuredevops/rocm-ci.yml new file mode 100644 index 000000000..2f388cb76 --- /dev/null +++ b/.azuredevops/rocm-ci.yml @@ -0,0 +1,44 @@ +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + +variables: +- group: common +- template: /.azuredevops/variables-global.yml@pipelines_repo + +trigger: + batch: true + branches: + include: + - develop + paths: + exclude: + - .github + - .jenkins + - docs + - '.*.y*ml' + - '*.md' + - copyright.txt + - LICENSE.txt + +pr: + autoCancel: true + branches: + include: + - develop + paths: + exclude: + - .github + - .jenkins + - docs + - '.*.y*ml' + - '*.md' + - copyright.txt + - LICENSE.txt + drafts: false + +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocAL.yml@pipelines_repo diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index d0056aeaf..55c82bc44 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -2,9 +2,9 @@ name: Linting on: push: - branches: [develop, main] + branches: [master, develop] pull_request: - branches: [develop, main] + branches: [master, develop] jobs: call-workflow-passing-data: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4323a034f..b5dba5a08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,39 +1,107 @@ +

+ # Changelog -rocAL documentation is available at -[https://rocm.docs.amd.com/projects/rocAL/en/latest/](https://rocm.docs.amd.com/projects/rocAL/en/latest/) +## Online Documentation + +[rocAL Documentation](https://github.com/ROCm/rocAL) -## rocAL 1.0.0 (unreleased) +## rocAL 2.0.0 (unreleased) -### Additions +### Added -* +* Packages - dev & tests +* Support for audio loader and decoder, which uses libsndfile library to decode wav files +* C++ rocAL audio unit test and python script to run and compare the outputs +* Python support for audio decoders +* Pytorch iterator for Audio +* Python audio unit test, and support to verify outputs +* rocDecode for HW decode +* Support for Audio augmentation - PreEmphasis filter +* Support for reading from file lists in file reader +* Support for Audio augmentation - Spectrogram +* Support for Audio augmentation - ToDecibels +* Support for downmixing audio channels during decoding +* Support for Audio augmentation - Resample +* Support for TensorTensorAdd and TensorScalarMultiply operations +* Support for Uniform and Normal distribution nodes +* Support for Audio augmentation - NonSilentRegionDetection +* Support for generic augmentation - Slice +* Support for generic augmentation - Normalize +* Support for Audio augmentation - MelFilterBank ### Optimizations -* +* Tests +* Setup Script +* CentOS 7 support +* SLES 15 SP5 support -### Changes +### Changed -* Removed CuPy from `setup.py` +* Image to tensor updates +* ROCm install - use case graphics removed -### Fixes +### Fixed -* +* Tests & readme -### Tested configurations +### Tested Configurations * Linux distribution - + Ubuntu - `20.04` / `22.04` -* ROCm: rocm-core - `5.4.0.50400-72` + * Ubuntu - `20.04` / `22.04` + * CentOS - `7` + * RedHat - `8` / `9` + * SLES - `15-SP5` +* ROCm: rocm-core - `6.1.0.60100-64` +* RPP - `rpp` & `rpp-dev`/`rpp-devel` +* MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel` +* rocDecode - `rocdecode` & `rocdecode-dev`/`rocdecode-devel` +* Protobuf - `libprotobuf-dev`/`protobuf-devel` +* RapidJSON - `https://github.com/Tencent/rapidjson` +* Turbo JPEG - [Version 3.0.2](https://libjpeg-turbo.org/) +* PyBind11 - [v2.11.1](https://github.com/pybind/pybind11) +* FFMPEG - `ffmpeg 4` dev package +* OpenCV - `libopencv` / [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) +* libsndfile - [1.0.31](https://github.com/libsndfile/libsndfile/releases/tag/1.0.31) +* rocAL Setup Script - `V2.5.0` +* Dependencies for all the above packages + +### Known issues + +* Requires custom deps install + +## rocAL 1.0.0 + +### Added + +* rocAL Tests + +### Optimizations + +* Image augmentations + +### Changed + +* Deps + +### Fixed + +* minor issues + +### Tested Configurations + +* Linux distribution + * Ubuntu - `20.04` / `22.04` +* ROCm: rocm-core - `6.0.60002-1` * Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4) * OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) +* RPP - [1.4.0](https://github.com/ROCms/rpp/releases/tag/1.4.0) * FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2) -* RPP - `rpp` & `rpp-dev`/`rpp-devel` -* MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel` +* MIVisionX - [master](https://github.com/ROCm/MIVisionX) * Dependencies for all the above packages -* rocAL Setup Script - `V1.1.0` +* rocAL Setup Script - `V1.0.2` ### Known issues -* +* Requires custom version of libturbo-JPEG diff --git a/CMakeLists.txt b/CMakeLists.txt index e4ddb7f99..051ffa34e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. cmake_minimum_required(VERSION 3.5) -set(VERSION "1.0.0") +set(VERSION "2.0.0") # Set Project Version and Language project(rocal VERSION ${VERSION} LANGUAGES CXX) @@ -51,7 +51,8 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) option(ENHANCED_MESSAGE "rocAL Enhanced Message Option" ON) option(GPU_SUPPORT "Build rocAL with GPU Support" ON) option(BUILD_PYPACKAGE "Build rocAL Python Package" ON) -option(BUILD_WITH_AMD_ADVANCE "Build rocAL with Advanced GPU support" OFF) +option(PYTHON_VERSION_SUGGESTED "Python version to build rocal" "") +option(BUILD_WITH_AMD_ADVANCE "Build rocAL for advanced AMD GPU Architecture" OFF) set(DEFAULT_BUILD_TYPE "Release") @@ -117,13 +118,8 @@ else() endif() message("-- ${BoldBlue}rocAL Build Type -- ${CMAKE_BUILD_TYPE}${ColourReset}") -message("-- ${Cyan}rocAL Developer Options${ColourReset}") -message("-- ${Cyan} -D GPU_SUPPORT=${GPU_SUPPORT} [Turn ON/OFF GPU support (default:ON)]${ColourReset}") -message("-- ${Cyan} -D BACKEND=${BACKEND} [Select rocAL Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}") -message("-- ${Cyan} -D BUILD_PYPACKAGE=${BUILD_PYPACKAGE} [rocAL Python Package(default:ON)]${ColourReset}") -message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [rocAL support for advanced GPU(default:OFF)]${ColourReset}") - list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) find_package(HALF QUIET) if(HALF_FOUND) @@ -134,8 +130,19 @@ else() message("-- ${Blue}rocAL Built with float16 Support OFF${ColourReset}") endif() +message("-- ${Cyan}rocAL Developer Options${ColourReset}") +message("-- ${Cyan} -D GPU_SUPPORT=${GPU_SUPPORT} [Turn ON/OFF GPU support (default:ON)]${ColourReset}") +message("-- ${Cyan} -D BACKEND=${BACKEND} [Select rocAL Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}") +message("-- ${Cyan} -D BUILD_PYPACKAGE=${BUILD_PYPACKAGE} [rocAL Python Package(default:ON)]${ColourReset}") +message("-- ${Cyan} -D PYTHON_VERSION_SUGGESTED=${PYTHON_VERSION_SUGGESTED} [User provided python version to use for rocAL Python Bindings(default:System Version)]${ColourReset}") +message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}") + add_subdirectory(rocAL) -add_subdirectory(rocAL_pybind) +if(BUILD_PYPACKAGE) + add_subdirectory(rocAL_pybind) +else() + message("-- ${Cyan}rocAL Python Module turned OFF by user option -D BUILD_PYPACKAGE=OFF ${ColourReset}") +endif() # install rocAL docs -- {ROCM_PATH}/${CMAKE_INSTALL_DATADIR}/doc/rocal/ install(FILES docs/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/doc/rocal COMPONENT runtime) @@ -143,11 +150,11 @@ install(FILES docs/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/doc/rocal COMP # test package install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/data DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test) -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp_api_tests/ DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test) -# CTest - Needs RPP Installed +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp_api/ DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test) +# CTest - Needs rocAL Installed enable_testing() include(CTest) -add_subdirectory(tests/cpp_api_tests) +add_subdirectory(tests/cpp_api) # set package information set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) @@ -180,8 +187,6 @@ set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan) install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime) -install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-dev COMPONENT dev) -install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-test COMPONENT test) if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) @@ -194,8 +199,20 @@ endif() set(ROCAL_DEBIAN_PACKAGE_LIST "rpp, mivisionx") set(ROCAL_RPM_PACKAGE_LIST "rpp, mivisionx") # Set the dev dependent packages -set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "rpp-dev, mivisionx-dev, liblmdb-dev") -set(ROCAL_RPM_DEV_PACKAGE_LIST "rpp-devel, mivisionx-devel, lmdb-devel") +set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "rpp-dev, mivisionx-dev, liblmdb-dev, libprotobuf-dev") +set(ROCAL_RPM_DEV_PACKAGE_LIST "rpp-devel, mivisionx-devel, lmdb-devel, protobuf-devel") + +# Add rocDecode Deps for HW Decode - Exclude centos-7 +if (EXISTS "/etc/os-release") + file(READ "/etc/os-release" OS_RELEASE) + string(REGEX MATCH "CentOS-7" CENTOS_7_FOUND ${OS_RELEASE}) + if(NOT CENTOS_7_FOUND) + set(ROCAL_DEBIAN_PACKAGE_LIST "${ROCAL_DEBIAN_PACKAGE_LIST}, rocdecode") + set(ROCAL_RPM_PACKAGE_LIST "${ROCAL_RPM_PACKAGE_LIST}, rocdecode") + set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "${ROCAL_DEBIAN_DEV_PACKAGE_LIST}, rocdecode-dev") + set(ROCAL_RPM_DEV_PACKAGE_LIST "${ROCAL_RPM_DEV_PACKAGE_LIST}, rocdecode-devel") + endif() +endif() # '%{?dist}' breaks manual builds on debian systems due to empty Provides execute_process( @@ -231,21 +248,17 @@ set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${ROCAL_RPM_PACKAGE_LIST}") set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}") -set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}") set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel") set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_RUNTIME_PACKAGE_NAME}, ${ROCAL_RPM_DEV_PACKAGE_LIST}") set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel") -set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel") # RPM package - specific variable for ASAN set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${ROCAL_RPM_PACKAGE_LIST}" ) set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan") -set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan") # RPM package specific variable for ASAN set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" ) set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" ) set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test") -set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test") if(NOT ROCM_DEP_ROCMCORE) string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}) @@ -271,10 +284,10 @@ set(CPACK_DEBIAN_PACKAGE_RELEASE "local") set(CPACK_RPM_PACKAGE_RELEASE "local") if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) - set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) endif() if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) - set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) endif() if(APPLE) diff --git a/README.md b/README.md index 37d2edc5e..4bf9eda92 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ rocAL can be currently used to perform the following operations either with rand - + @@ -24,31 +24,31 @@ rocAL can be currently used to perform the following operations either with rand - + - + - + - + - + @@ -60,91 +60,157 @@ rocAL can be currently used to perform the following operations either with rand + Ubuntu - `20.04` / `22.04` + CentOS - `7` + RedHat - `8` / `9` - + SLES - `15-SP4` -* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) -* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32` -* [RPP](https://github.com/ROCm/rpp) -* [AMD OpenVX™](https://github.com/ROCm/MIVisionX/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media` - MIVisionX Components -* [Turbo JPEG](https://libjpeg-turbo.org/) - Version 2.0.6.2 from `https://github.com/rrawther/libjpeg-turbo.git` -* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher -* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher -* [LMBD Library](http://www.lmdb.tech/doc/) -* [RapidJSON](https://github.com/Tencent/rapidjson) -* [PyBind11](https://github.com/pybind/pybind11) -* [HIP](https://github.com/ROCm/HIP) -* OpenMP -* C++17 - -## Build and install instructions - -* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) -* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32` + + SLES - `15-SP5` -### Package install +* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) +> [!IMPORTANT] +> `gfx908` or higher GPU required -Install rocAL runtime, development, and test packages. -* Runtime package - `rocal` only provides the dynamic libraries -* Development package - `rocal-dev`/`rocal-devel` provides the libraries, executables, header files, and samples -* Test package - `rocal-test` provides ctest to verify installation +* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm +> [!IMPORTANT] +> `sudo amdgpu-install --usecase=rocm` -##### On `Ubuntu` +* [HIP](https://github.com/ROCm/HIP) ```shell - sudo apt-get install rocal rocal-dev rocal-test + sudo apt install rocm-hip-runtime-dev ``` -##### On `CentOS`/`RedHat` + +* [RPP](https://github.com/ROCm/rpp) ```shell - sudo yum install rocal rocal-devel rocal-test + sudo apt install rpp-dev ``` -##### On `SLES` + +* MIVisionX Components: [AMD OpenVX™](https://github.com/ROCm/MIVisionX/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media` ```shell - sudo zypper install rocal rocal-devel rocal-test + sudo apt install mivisionx-dev + ``` + +* [rocDecode](https://github.com/ROCm/rocDecode) + ```shell + sudo apt install rocdecode-dev + ``` + +* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher + ```shell + sudo apt install half + ``` + +* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher + ```shell + sudo apt install libprotobuf-dev + ``` + +* [LMBD Library](http://www.lmdb.tech/doc/) + ```shell + sudo apt install liblmdb-dev ``` - **Note:** - * Package install requires `Turbo JPEG`, `PyBind 11 v2.10.4` and `Protobuf V3.12.4` manual install - * `CentOS`/`RedHat`/`SLES` requires `FFMPEG Dev` package manual install +* Python3 and Python3 PIP + ```shell + sudo apt install python3-dev python3-pip + ``` -#### Source build and install +* Python Wheel + ```shell + pip3 install wheel + ``` -### Prerequisites setup script for Linux - rocAL-setup.py +* [PyBind11](https://github.com/pybind/pybind11) + * Source: `https://github.com/pybind/pybind11` + * Tag: [v2.11.1](https://github.com/pybind/pybind11/releases/tag/v2.11.1) -For the convenience of the developer, we here provide the setup script which will install all the dependencies required by this project. +* [Turbo JPEG](https://libjpeg-turbo.org/) + * Source: `https://github.com/libjpeg-turbo/libjpeg-turbo.git` + * Tag: [3.0.2](https://github.com/libjpeg-turbo/libjpeg-turbo/releases/tag/3.0.2) -**NOTE:** This script only needs to be executed once. +* [RapidJSON](https://github.com/Tencent/rapidjson) + * Source: `https://github.com/Tencent/rapidjson.git` + * Tag: `master` -### Prerequisites for running the script +* **Optional**: FFMPEG + ```shell + sudo apt install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev + ``` -* Linux distribution - + Ubuntu - `20.04` / `22.04` - + CentOS - `7` - + RedHat - `8` / `9` - + SLES - `15-SP4` -* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) -* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32` +* **Optional**: OpenCV + ```shell + sudo apt install libopencv-dev + ``` + +> [!IMPORTANT] +> * Compiler features required +> * OpenMP +> * C++17 -**usage:** +>[!NOTE] +> * All package installs are shown with the `apt` package manager. Use the appropriate package manager for your operating system. + +### Prerequisites setup script + +For your convenience, we provide the setup script,[rocAL-setup.py](https://github.com/ROCm/rocAL/blob/develop/rocAL-setup.py), which installs all required dependencies. Run this script only once. ```shell -python rocAL-setup.py --directory [setup directory - optional (default:~/)] - --opencv [OpenCV Version - optional (default:4.6.0)] - --protobuf [ProtoBuf Version - optional (default:3.12.4)] - --pybind11 [PyBind11 Version - optional (default:v2.10.4)] - --reinstall [Remove previous setup and reinstall (default:OFF)[options:ON/OFF]] - --backend [rocAL Dependency Backend - optional (default:HIP) [options:OCL/HIP]] - --rocm_path [ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required] +python rocAL-setup.py --directory [setup directory - optional (default:~/)] + --rocm_path [ROCm Installation Path - optional (default:/opt/rocm)] + --backend [rocAL Dependency Backend - optional (default:HIP) [options:OCL/HIP]] + --ffmpeg [FFMPEG Installation - optional (default:OFF)[options:ON/OFF]] + --reinstall [Reinstall - optional (default:OFF)[options:ON/OFF]] ``` -**Note:** - * **ROCm upgrade** requires the setup script **rerun**. -### Using rocAL-setup.py - +## Installation instructions + +The installation process uses the following steps: + +* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification + +* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm` + +* Use **either** [package install](#package-install) **or** [source install](#source-install) as described below. + +### Package install + +Install rocAL runtime, development, and test packages. + +* Runtime package - `rocal` only provides the dynamic libraries +* Development package - `rocal-dev`/`rocal-devel` provides the libraries, executables, header files, and samples +* Test package - `rocal-test` provides ctest to verify installation + +#### `Ubuntu` + + ```shell + sudo apt-get install rocal rocal-dev rocal-test + ``` + +#### `CentOS`/`RedHat` + + ```shell + sudo yum install rocal rocal-devel rocal-test + ``` + +#### `SLES` + + ```shell + sudo zypper install rocal rocal-devel rocal-test + ``` + +>[!NOTE] +> * Package install requires `TurboJPEG` and `RapidJSON` manual install +> * `CentOS`/`RedHat`/`SLES` requires additional `FFMPEG Dev` package manual install + +### Source install + +To build rocAL from source and install, follow the steps below: + * Clone rocAL source code ```shell git clone https://github.com/ROCm/rocAL.git ``` + **Note:** rocAL has support for two GPU backends: **OPENCL** and **HIP**: +#### HIP Backend + * Instructions for building rocAL with the **HIP** GPU backend (default GPU backend): + run the setup script to install all the dependencies required by the **HIP** GPU backend: ```shell @@ -161,32 +227,38 @@ git clone https://github.com/ROCm/rocAL.git sudo cmake --build . --target PyPackageInstall sudo make install ``` +>[!NOTE] +> * `PyPackageInstall` used for rocal_pybind installation +> * `sudo` required for pybind installation + +>[!IMPORTANT] +> * Use `-D PYTHON_VERSION_SUGGESTED=3.x` with `cmake` for using a specific Python3 version if required. +> * Use `-D AUDIO_SUPPORT=ON` to enable Audio features, Audio support will be enabled by default with ROCm versions > 6.2 + run tests - [test option instructions](https://github.com/ROCm/MIVisionX/wiki/CTest) ```shell make test ``` - **Note:** - + `PyPackageInstall` used for rocal_pybind installation - + `sudo` required for pybind installation - +>[!NOTE] +> To run tests with verbose option, use `make test ARGS="-VV"`. + +#### OpenCL Backend * Instructions for building rocAL with [**OPENCL** GPU backend](https://github.com/ROCm/rocAL/wiki/OpenCL-Backend) - **Note:** - + rocAL_pybind is not supported on OPENCL backend - + rocAL cannot be installed for both GPU backends in the same default folder (i.e., /opt/rocm/) - + if an app interested in installing rocAL with both GPU backends, then add **-DCMAKE_INSTALL_PREFIX** in the cmake - commands to install rocAL with OPENCL and HIP backends into two separate custom folders. +>[!NOTE] +> + rocAL_pybind is not supported on OPENCL backend +> + rocAL cannot be installed for both GPU backends in the same default folder (i.e., /opt/rocm/) +> + if an app interested in installing rocAL with both GPU backends, then add **-DCMAKE_INSTALL_PREFIX** in the cmake commands to install rocAL with OPENCL and HIP backends into two separate custom folders. ## Verify installation * The installer will copy - + Executables into `/opt/rocm/bin` - + Libraries into `/opt/rocm/lib` - + Header files into `/opt/rocm/include/rocal` - + Apps, & Samples folder into `/opt/rocm/share/rocal` - + Documents folder into `/opt/rocm/share/doc/rocal` + * Executables into `/opt/rocm/bin` + * Libraries into `/opt/rocm/lib` + * Header files into `/opt/rocm/include/rocal` + * Apps, & Samples folder into `/opt/rocm/share/rocal` + * Documents folder into `/opt/rocm/share/doc/rocal` ### Verify with rocal-test package @@ -197,7 +269,14 @@ mkdir rocAL-test && cd rocAL-test cmake /opt/rocm/share/rocal/test/ ctest -VV ``` - +>[!NOTE] +> * Make sure all rocAL required libraries are in your PATH +> * `RHEL`/`SLES` - Export FFMPEG libraries into your PATH +> + `export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64/:/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH` +> ```shell +> export PATH=$PATH:/opt/rocm/bin +> export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib +> ``` ## Documentation Run the steps below to build documentation locally. @@ -209,7 +288,7 @@ pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` * Doxygen -``` +```bash doxygen .Doxyfile ``` @@ -232,17 +311,20 @@ Review all notable [changes](CHANGELOG.md#changelog) with the latest release ### Tested Configurations * Linux distribution - + Ubuntu - `20.04` / `22.04` - + CentOS - `7` - + RedHat - `8` / `9` - + SLES - `15-SP4` -* ROCm: rocm-core - `5.7.0.50700-6` + * Ubuntu - `20.04` / `22.04` + * CentOS - `7` + * RedHat - `8` / `9` + * SLES - `15-SP5` +* ROCm: rocm-core - `6.1.0.60100-64` * RPP - `rpp` & `rpp-dev`/`rpp-devel` * MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel` -* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4) -* OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) -* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2) -* RapidJSON- [master](https://github.com/Tencent/rapidjson) -* PyBind11 - [v2.10.4](https://github.com/pybind/pybind11) -* rocAL Setup Script - `V1.1.0` +* rocDecode - `rocdecode` & `rocdecode-dev`/`rocdecode-devel` +* Protobuf - `libprotobuf-dev`/`protobuf-devel` +* RapidJSON - `https://github.com/Tencent/rapidjson` +* Turbo JPEG - [Version 3.0.2](https://libjpeg-turbo.org/) +* PyBind11 - [v2.11.1](https://github.com/pybind/pybind11) +* FFMPEG - `ffmpeg` dev package +* OpenCV - `libopencv-dev` / [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0) +* libsndfile - [1.0.31](https://github.com/libsndfile/libsndfile/releases/tag/1.0.31) +* rocAL Setup Script - `V2.5.0` * Dependencies for all the above packages diff --git a/apps/README.md b/apps/README.md deleted file mode 100644 index 4205215fe..000000000 --- a/apps/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Applications - -rocAL has several applications built on top of AMD optimized libraries that can be used as prototypes or used as models to develop products. - -## Prerequisites -* [rocAL](https://github.com/ROCm/rocAL) - -## Image Augmentation - -This sample [application](./image_augmentation#image-augmentation-application) demonstrates the basic usage of rocAL's C API to load JPEG images from the disk and modify them in different possible ways and displays the output images. - -

diff --git a/apps/image_augmentation/image_augmentation.cpp b/apps/image_augmentation/image_augmentation.cpp deleted file mode 100644 index 91184f805..000000000 --- a/apps/image_augmentation/image_augmentation.cpp +++ /dev/null @@ -1,296 +0,0 @@ -/* -MIT License - -Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include -#include -#include -#include - -#include -using namespace cv; - -#if USE_OPENCV_4 -#define CV_FONT_HERSHEY_DUPLEX FONT_HERSHEY_DUPLEX -#define CV_WINDOW_AUTOSIZE WINDOW_AUTOSIZE -#define CV_RGB2BGR cv::COLOR_BGR2RGB -#else -#include -#endif - -#include "rocal_api.h" - -#define DISPLAY -using namespace std::chrono; - - -int main(int argc, const char ** argv) -{ - // check command-line usage - const int MIN_ARG_COUNT = 2; - if(argc < MIN_ARG_COUNT) { - printf( "Usage: image_augmentation \ - decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count \n" ); - return -1; - } - int argIdx = 0; - const char * folderPath1 = argv[++argIdx]; - int video_mode = 0;// 0 means no video decode, 1 means hardware, 2 means software decoding - bool display = 1;// Display the images - int aug_depth = 1;// how deep is the augmentation tree - int rgb = 1;// process color images - int decode_width = 0; - int decode_height = 0; - bool processing_device = 1; - size_t shard_count = 2; - int shuffle = 0; - int dec_mode = 0; - - if(argc >= argIdx+MIN_ARG_COUNT) - processing_device = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - decode_width = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - decode_height = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - video_mode = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - rgb = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - display = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - shard_count = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - shuffle = atoi(argv[++argIdx]); - - if(argc >= argIdx+MIN_ARG_COUNT) - dec_mode = atoi(argv[++argIdx]); - - int inputBatchSize = 2; - - std::cout << ">>> Running on " << (processing_device?"GPU":"CPU") << std::endl; - - RocalImageColor color_format = (rgb != 0) ? RocalImageColor::ROCAL_COLOR_RGB24 : RocalImageColor::ROCAL_COLOR_U8; - - auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1); - - if(rocalGetStatus(handle) != ROCAL_OK) - { - std::cout << "Could not create the rocAL contex\n"; - return -1; - } - - RocalDecoderType dec_type = (RocalDecoderType) dec_mode; - - /*>>>>>>>>>>>>>>>> Creating rocAL parameters <<<<<<<<<<<<<<<<*/ - - // Creating uniformly distributed random objects to override some of the default augmentation parameters - RocalFloatParam rand_crop_area = rocalCreateFloatUniformRand( 0.3, 0.5 ); - RocalIntParam color_temp_adj = rocalCreateIntParameter(0); - - // Creating a custom random object to set a limited number of values to randomize the rotation angle - const size_t num_values = 3; - float values[num_values] = {0,10,135}; - double frequencies[num_values] = {1, 5, 5}; - - RocalFloatParam rand_angle = rocalCreateFloatRand( values , frequencies, num_values); - - - /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/ - RocalImage input1; - - - if(video_mode != 0) - { - unsigned sequence_length = 3; - unsigned frame_step = 3; - unsigned frame_stride = 1; - if (decode_height <= 0 || decode_width <= 0) - { - std::cout << "Output width and height is needed for video decode\n"; - return -1; - } - input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE:RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false); - } - else - { - // The jpeg file loader can automatically select the best size to decode all images to that size - // User can alternatively set the size or change the policy that is used to automatically find the size - if (dec_type == RocalDecoderType::ROCAL_DECODER_OPENCV) std::cout << "Using OpenCV decoder for Jpeg Source\n"; - if(decode_height <= 0 || decode_width <= 0) - input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false); - else - input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type); - - } - - if(rocalGetStatus(handle) != ROCAL_OK) - { - std::cout << "JPEG source could not initialize : "<>>>>>>>>>>>>>>>>>> Diplay using OpenCV <<<<<<<<<<<<<<<<<*/ - //initializations for logos and heading - cv::Mat AMD_Epyc_Black_resize, AMD_ROCm_Black_resize; - AMD_Epyc_Black_resize = cv::imread("../../../samples/images/amd-epyc-black-resize.png"); - AMD_ROCm_Black_resize = cv::imread("../../../samples/images/rocm-black-resize.png"); - int fontFace = CV_FONT_HERSHEY_DUPLEX; - int thickness = 1.3; - std::string bufferName = "rocAL Image Augmentation"; - - int h = rocalGetAugmentationBranchCount(handle) * rocalGetOutputHeight(handle); - int w = rocalGetOutputWidth(handle); - int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24 ) ? 3 : 1); - std::cout << "output width "<< w << " output height "<< h << " color planes "<< p << std::endl; - const unsigned number_of_cols = video_mode ? 1 : 10; - auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24 ) ? CV_8UC3 : CV_8UC1); - cv::Mat mat_output(h+AMD_ROCm_Black_resize.rows, w*number_of_cols, cv_color_format); - cv::Mat mat_input(h, w, cv_color_format); - cv::Mat mat_color; - int col_counter = 0; - if (display) - cv::namedWindow( "output", CV_WINDOW_AUTOSIZE ); - - //adding heading to output display - cv::Rect roi = Rect(0,0,w*number_of_cols,AMD_Epyc_Black_resize.rows); - mat_output(roi).setTo(cv::Scalar(128,128,128)); - putText(mat_output, bufferName, Point(250, 70), fontFace, 1.2, cv::Scalar(66,13,9), thickness,5); - - //adding logos to output display - cv::Mat mat_output_ROI = mat_output(cv::Rect(w*number_of_cols - AMD_Epyc_Black_resize.cols,0, AMD_Epyc_Black_resize.cols, AMD_Epyc_Black_resize.rows)); - cv::Mat mat_output_ROI_1 = mat_output(cv::Rect(0,0, AMD_ROCm_Black_resize.cols, AMD_ROCm_Black_resize.rows)); - AMD_Epyc_Black_resize.copyTo(mat_output_ROI); - AMD_ROCm_Black_resize.copyTo(mat_output_ROI_1); - - high_resolution_clock::time_point t1 = high_resolution_clock::now(); - int counter = 0; - int color_temp_increment = 1; - while (!rocalIsEmpty(handle)) - { - if(rocalRun(handle) != 0) - break; - - if(rocalGetIntValue(color_temp_adj) <= -99 || rocalGetIntValue(color_temp_adj)>=99) - color_temp_increment *= -1; - - rocalUpdateIntParameter(rocalGetIntValue(color_temp_adj)+color_temp_increment, color_temp_adj); - - rocalCopyToOutput(handle, mat_input.data, h*w*p); - counter += inputBatchSize; - if(!display) - continue; - - mat_input.copyTo(mat_output(cv::Rect( col_counter*w, AMD_ROCm_Black_resize.rows, w, h))); - if(color_format == RocalImageColor::ROCAL_COLOR_RGB24 ) - { - cv::cvtColor(mat_output, mat_color, CV_RGB2BGR); - cv::imshow("output",mat_color); - } - else - { - cv::imshow("output",mat_output); - } - cv::waitKey(1); - col_counter = (col_counter+1)%number_of_cols; - } - high_resolution_clock::time_point t2 = high_resolution_clock::now(); - auto dur = duration_cast( t2 - t1 ).count(); - auto rocal_timing = rocalGetTimingInfo(handle); - std::cout << "Load time "<< rocal_timing.load_time << std::endl; - std::cout << "Decode time "<< rocal_timing.decode_time << std::endl; - std::cout << "Process time "<< rocal_timing.process_time << std::endl; - std::cout << "Transfer time "<< rocal_timing.transfer_time << std::endl; - std::cout << ">>>>> "<< counter << " images/frames Processed. Total Elapsed Time " << dur/1000000 << " sec " << dur%1000000 << " us " << std::endl; - rocalRelease(handle); - mat_input.release(); - mat_output.release(); - return 0; -} diff --git a/cmake/FindMIVisionX.cmake b/cmake/FindMIVisionX.cmake index 52360537d..dcf89e67b 100644 --- a/cmake/FindMIVisionX.cmake +++ b/cmake/FindMIVisionX.cmake @@ -33,11 +33,9 @@ find_path(MIVisionX_INCLUDE_DIRS NAMES vx_ext_amd.h HINTS $ENV{MIVisionX_PATH}/include/mivisionx - $ENV{ROCM_PATH}/include/mivisionx PATHS ${MIVisionX_PATH}/include/mivisionx /usr/include - $ENV{ROCM_PATH}/include/mivisionx ${ROCM_PATH}/include/mivisionx ) mark_as_advanced(MIVisionX_INCLUDE_DIRS) @@ -47,12 +45,9 @@ find_library(OPENVX_LIBRARIES NAMES libopenvx${SHARED_LIB_TYPE} HINTS $ENV{MIVisionX_PATH}/lib - $ENV{MIVisionX_PATH}/lib64 PATHS ${MIVisionX_PATH}/lib - ${MIVisionX_PATH}/lib64 /usr/lib - $ENV{ROCM_PATH}/lib ${ROCM_PATH}/lib ) mark_as_advanced(OPENVX_LIBRARIES) @@ -62,30 +57,13 @@ find_library(VXRPP_LIBRARIES NAMES libvx_rpp${SHARED_LIB_TYPE} HINTS $ENV{MIVisionX_PATH}/lib - $ENV{MIVisionX_PATH}/lib64 PATHS ${MIVisionX_PATH}/lib - ${MIVisionX_PATH}/lib64 /usr/lib - $ENV{ROCM_PATH}/lib ${ROCM_PATH}/lib ) mark_as_advanced(VXRPP_LIBRARIES) -find_path(MIVisionX_LIBRARIES_DIRS - NAMES libopenvx${SHARED_LIB_TYPE} - HINTS - $ENV{MIVisionX_PATH}/lib - $ENV{MIVisionX_PATH}/lib64 - PATHS - ${MIVisionX_PATH}/lib - ${MIVisionX_PATH}/lib64 - /usr/lib - $ENV{ROCM_PATH}/lib - ${ROCM_PATH}/lib -) -mark_as_advanced(MIVisionX_LIBRARIES_DIRS) - if(OPENVX_LIBRARIES AND MIVisionX_INCLUDE_DIRS) set(MIVisionX_FOUND TRUE) endif( ) @@ -95,19 +73,36 @@ find_package_handle_standard_args( MIVisionX FOUND_VAR MIVisionX_FOUND REQUIRED_VARS OPENVX_LIBRARIES - VXRPP_LIBRARIES + VXRPP_LIBRARIES MIVisionX_INCLUDE_DIRS - MIVisionX_LIBRARIES_DIRS ) set(MIVisionX_FOUND ${MIVisionX_FOUND} CACHE INTERNAL "") set(OPENVX_LIBRARIES ${OPENVX_LIBRARIES} CACHE INTERNAL "") set(VXRPP_LIBRARIES ${VXRPP_LIBRARIES} CACHE INTERNAL "") set(MIVisionX_INCLUDE_DIRS ${MIVisionX_INCLUDE_DIRS} CACHE INTERNAL "") -set(MIVisionX_LIBRARIES_DIRS ${MIVisionX_LIBRARIES_DIRS} CACHE INTERNAL "") if(MIVisionX_FOUND) - message("-- ${White}Using MIVisionX -- \n\tLibraries:${OPENVX_LIBRARIES} \n\tIncludes:${MIVisionX_INCLUDE_DIRS}${ColourReset}") + if(VXRPP_LIBRARIES) + if(EXISTS "${MIVisionX_INCLUDE_DIRS}/vx_ext_rpp_version.h") + # Find RPP Version + file(READ "${MIVisionX_INCLUDE_DIRS}/vx_ext_rpp_version.h" VX_EXT_RPP_VERSION_FILE) + string(REGEX MATCH "VX_EXT_RPP_VERSION_MAJOR ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE}) + set(VX_EXT_RPP_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE INTERNAL "") + string(REGEX MATCH "VX_EXT_RPP_VERSION_MINOR ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE}) + set(VX_EXT_RPP_VERSION_MINOR ${CMAKE_MATCH_1} CACHE INTERNAL "") + string(REGEX MATCH "VX_EXT_RPP_VERSION_PATCH ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE}) + set(VX_EXT_RPP_VERSION_PATCH ${CMAKE_MATCH_1} CACHE INTERNAL "") + message("-- ${White}Found VX RPP Version: ${VX_EXT_RPP_VERSION_MAJOR}.${VX_EXT_RPP_VERSION_MINOR}.${VX_EXT_RPP_VERSION_PATCH}${ColourReset}") + message("-- ${White}Using MIVisionX -- \n\tLibraries:${OPENVX_LIBRARIES} \n\tIncludes:${MIVisionX_INCLUDE_DIRS}${ColourReset}") + else() + set(VX_EXT_RPP_VERSION_MAJOR 0) + set(VX_EXT_RPP_VERSION_MINOR 0) + set(VX_EXT_RPP_VERSION_PATCH 0) + endif() + else() + message("-- ${Yellow}VX RPP - Not Found${ColourReset}") + endif() else() if(MIVisionX_FIND_REQUIRED) message(FATAL_ERROR "{Red}FindMIVisionX -- NOT FOUND${ColourReset}") diff --git a/cmake/FindSndFile.cmake b/cmake/FindSndFile.cmake new file mode 100644 index 000000000..e8e412d39 --- /dev/null +++ b/cmake/FindSndFile.cmake @@ -0,0 +1,69 @@ +################################################################################ +# +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ +find_path(SNDFILE_INCLUDE_DIRS + NAMES sndfile.h + HINTS + $ENV{SNDFILE_PATH}/include + PATHS + /usr/local/include + /usr/include +) +mark_as_advanced(SNDFILE_INCLUDE_DIRS) + +find_library(SNDFILE_LIBRARIES + NAMES sndfile libsndfile + HINTS + $ENV{SNDFILE_PATH}/lib + $ENV{SNDFILE_PATH}/lib64 + PATHS + ${CMAKE_SYSTEM_PREFIX_PATH} + ${SNDFILE_PATH} + /usr/local/ + PATH_SUFFIXES lib lib64 +) +mark_as_advanced(SNDFILE_LIBRARIES) + +if(SNDFILE_LIBRARIES AND SNDFILE_INCLUDE_DIRS) + set(SNDFILE_FOUND TRUE) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(SndFile + FOUND_VAR SNDFILE_FOUND + REQUIRED_VARS + SNDFILE_LIBRARIES + SNDFILE_INCLUDE_DIRS +) + +set(SNDFILE_FOUND ${SNDFILE_FOUND} CACHE INTERNAL "") +set(SNDFILE_LIBRARIES ${SNDFILE_LIBRARIES} CACHE INTERNAL "") +set(SNDFILE_INCLUDE_DIRS ${SNDFILE_INCLUDE_DIRS} CACHE INTERNAL "") + +if(SNDFILE_FOUND) + message("-- ${White}Using SndFile -- \n\tLibraries:${SNDFILE_LIBRARIES} \n\tIncludes:${SNDFILE_INCLUDE_DIRS}${ColourReset}") +else() + message( "-- ${Yellow}NOTE: FindSndFile failed to find -- SndFile${ColourReset}" ) +endif() diff --git a/cmake/FindTurboJpeg.cmake b/cmake/FindTurboJpeg.cmake index 60cd9cf6b..83493049b 100644 --- a/cmake/FindTurboJpeg.cmake +++ b/cmake/FindTurboJpeg.cmake @@ -34,8 +34,8 @@ find_path(TurboJpeg_INCLUDE_DIRS HINTS $ENV{TURBO_JPEG_PATH}/include PATHS - ${TURBO_JPEG_PATH}/include /usr/include + ${TURBO_JPEG_PATH}/include /opt/libjpeg-turbo/include ) mark_as_advanced(TurboJpeg_INCLUDE_DIRS) @@ -46,27 +46,39 @@ find_library(TurboJpeg_LIBRARIES $ENV{TURBO_JPEG_PATH}/lib $ENV{TURBO_JPEG_PATH}/lib64 PATHS + /usr/lib ${TURBO_JPEG_PATH}/lib ${TURBO_JPEG_PATH}/lib64 - /usr/lib /opt/libjpeg-turbo/lib ) mark_as_advanced(TurboJpeg_LIBRARIES) -find_path(TurboJpeg_LIBRARIES_DIRS - NAMES libturbojpeg${SHARED_LIB_TYPE} +# Libjpeg +find_path(LIBJPEG_INCLUDE_DIR + NAMES jpeglib.h + HINTS + $ENV{TURBO_JPEG_PATH}/include + PATHS + /usr/include + ${TURBO_JPEG_PATH}/include + /opt/libjpeg-turbo/include +) +mark_as_advanced(LIBJPEG_INCLUDE_DIR) + +find_library(LIBJPEG_LIBRARIES + NAMES libjpeg${SHARED_LIB_TYPE} HINTS $ENV{TURBO_JPEG_PATH}/lib $ENV{TURBO_JPEG_PATH}/lib64 PATHS + /usr/lib ${TURBO_JPEG_PATH}/lib ${TURBO_JPEG_PATH}/lib64 - /usr/lib /opt/libjpeg-turbo/lib ) -mark_as_advanced(TurboJpeg_LIBRARIES_DIRS) +mark_as_advanced(LIBJPEG_LIBRARIES) -if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS) +if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS AND LIBJPEG_INCLUDE_DIR AND LIBJPEG_LIBRARIES) set(TurboJpeg_FOUND TRUE) endif( ) @@ -76,13 +88,15 @@ find_package_handle_standard_args( TurboJpeg REQUIRED_VARS TurboJpeg_LIBRARIES TurboJpeg_INCLUDE_DIRS - TurboJpeg_LIBRARIES_DIRS + LIBJPEG_INCLUDE_DIR + LIBJPEG_LIBRARIES ) set(TurboJpeg_FOUND ${TurboJpeg_FOUND} CACHE INTERNAL "") set(TurboJpeg_LIBRARIES ${TurboJpeg_LIBRARIES} CACHE INTERNAL "") set(TurboJpeg_INCLUDE_DIRS ${TurboJpeg_INCLUDE_DIRS} CACHE INTERNAL "") -set(TurboJpeg_LIBRARIES_DIRS ${TurboJpeg_LIBRARIES_DIRS} CACHE INTERNAL "") +set(LIBJPEG_LIBRARIES ${LIBJPEG_LIBRARIES} CACHE INTERNAL "") +set(LIBJPEG_INCLUDE_DIR ${LIBJPEG_INCLUDE_DIR} CACHE INTERNAL "") if(TurboJpeg_FOUND) message("-- ${White}Using Turbo JPEG -- \n\tLibraries:${TurboJpeg_LIBRARIES} \n\tIncludes:${TurboJpeg_INCLUDE_DIRS}${ColourReset}") diff --git a/docker/README.md b/docker/README.md index 6be6057bb..7275cba3c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,6 +4,23 @@ Docker is a set of platform as a service (PaaS) products that use OS-level virtu ## Build - dockerfiles +```shell +sudo docker build --build-arg {ARG_1_NAME}={ARG_1_VALUE} [--build-arg {ARG_2_NAME}={ARG_2_VALUE}] -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} . ``` -sudo docker build --build-arg {ARG_NAME}={ARG_VALUE} -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} . + +## ARG options + +* Pytorch docker: + ``` +PYTORCH_VERSION: rocm/pytorch docker tag +ROCAL_PYTHON_VERSION_SUGGESTED: Python version if required for rocal_pybind +``` + +* Tensorflow docker: + +``` +TENSORFLOW_VERSION: rocm/tensorflow docker tag +ROCAL_PYTHON_VERSION_SUGGESTED: Python version if required for rocal_pybind +``` + diff --git a/docker/rocal-on-rhel-09.dockerfile b/docker/rocal-on-rhel-09.dockerfile deleted file mode 100644 index a23e0775a..000000000 --- a/docker/rocal-on-rhel-09.dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -FROM compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-rel-5.4:67-rhel-9.x-stg1 - -ENV ROCAL_DEPS_ROOT=/rocAL-deps -WORKDIR $ROCAL_DEPS_ROOT - -RUN sudo yum update -y - -# install rocAL base dependencies -RUN sudo yum -y install gcc g++ cmake pkg-config git kernel-devel - -# install OpenCV -RUN sudo yum install opencv opencv-devel - -# install rocAL neural net dependencies -RUN sudo yum -y install rocblas rocblas-devel miopen-hip miopen-hip-devel migraphx migraphx-devel - - -# install rocAL dependencies -RUN apt-get -y install curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang -RUN mkdir rocAL_deps && cd rocAL_deps && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \ - unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd -RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev && \ - git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ - -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd -RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential -RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ - ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd -RUN git clone -b 0.99 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \ - cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd - -ENV ROCAL_WORKSPACE=/workspace -WORKDIR $ROCAL_WORKSPACE - -# Install MIVisionX -RUN git clone https://github.com/ROCm/MIVisionX.git && \ - mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install \ No newline at end of file diff --git a/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile b/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile deleted file mode 100644 index 79f9c1894..000000000 --- a/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile +++ /dev/null @@ -1,69 +0,0 @@ -ARG PYTORCH_VERSION=latest -FROM rocm/pytorch:${PYTORCH_VERSION} - -ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4/ubuntu/focal/amdgpu-install_5.4.50400-1_all.deb -ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50400-1_all.deb - -ENV ROCAL_DEPS_ROOT=/rocAL-deps -WORKDIR $ROCAL_DEPS_ROOT - -RUN apt-get update -y - -# install rocAL base dependencies -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git - -# install ROCm for rocAL OpenCL/HIP dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration && \ - sudo apt-get -y clean && dpkg --add-architecture i386 && \ - sudo rm -rf /etc/apt/sources.list.d/amdgpu.list && \ - sudo rm -rf /etc/apt/sources.list.d/rocm.list && \ - wget ${ROCM_INSTALLER_REPO} && \ - sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \ - sudo apt-get update -y && \ - sudo amdgpu-install -y --usecase=graphics,rocm - -# install OpenCV -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \ - libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip && \ - mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \ - mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd - -# install FFMPEG -ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/" -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \ - libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \ - nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \ - wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \ - ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \ - make -j8 && sudo make install && cd - -# install rocAL neural net dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \ - mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \ - unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd - -# install rocAL dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ - git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ - -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \ - cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ - ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd -ENV CUPY_INSTALL_USE_HIP=1 -ENV ROCM_HOME=/opt/rocm -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \ - git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \ - cmake ../ && make -j4 && sudo make install && cd ../../ && \ - pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ - cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \ - git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \ - pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21 - -ENV ROCAL_WORKSPACE=/workspace -WORKDIR $ROCAL_WORKSPACE - -# install MIVisionX -RUN git clone https://github.com/ROCm/MIVisionX.git && \ - mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install diff --git a/docker/rocal-on-ubuntu-20.dockerfile b/docker/rocal-on-ubuntu-20.dockerfile index 7e0f57fd6..f6d2bd1e4 100644 --- a/docker/rocal-on-ubuntu-20.dockerfile +++ b/docker/rocal-on-ubuntu-20.dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 -ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4.1/ubuntu/focal/amdgpu-install_5.4.50401-1_all.deb -ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50401-1_all.deb +ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/6.1.1/ubuntu/focal/amdgpu-install_6.1.60101-1_all.deb +ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_6.1.60101-1_all.deb ENV ROCAL_DEPS_ROOT=/rocAL-deps WORKDIR $ROCAL_DEPS_ROOT @@ -13,11 +13,11 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g # install ROCm for rocAL OpenCL/HIP dependency RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration && \ - sudo apt-get -y clean && dpkg --add-architecture i386 && \ + sudo apt-get -y clean && \ wget ${ROCM_INSTALLER_REPO} && \ sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \ sudo apt-get update -y && \ - sudo amdgpu-install -y --usecase=graphics,rocm + sudo amdgpu-install -y --usecase=rocm # install OpenCV RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \ @@ -26,41 +26,41 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0- mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd # install FFMPEG -ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/" -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \ - libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \ - nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \ - wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \ - ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \ - make -j8 && sudo make install && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev # install rocAL neural net dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \ - mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \ - unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev # install rocAL dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ - git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ - -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \ - cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ - ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ + git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \ + git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ + ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../ + ENV CUPY_INSTALL_USE_HIP=1 ENV ROCM_HOME=/opt/rocm RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \ git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \ cmake ../ && make -j4 && sudo make install && cd ../../ && \ - pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ + pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \ - git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \ - pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21 + pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \ + env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \ + git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \ + pip install -e . --no-cache-dir -vvvv + +# install MIVisionX +RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \ + mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install + +# install rocDecode +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev ENV ROCAL_WORKSPACE=/workspace WORKDIR $ROCAL_WORKSPACE -# install MIVisionX -RUN git clone https://github.com/ROCm/MIVisionX.git && \ - mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install +# Install rocAL +RUN pip install --upgrade pip +RUN git clone -b develop https://github.com/ROCm/rocAL && \ + mkdir build && cd build && cmake ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install \ No newline at end of file diff --git a/docker/rocal-on-ubuntu-22.dockerfile b/docker/rocal-on-ubuntu-22.dockerfile index e68d3d4b9..c98d55278 100644 --- a/docker/rocal-on-ubuntu-22.dockerfile +++ b/docker/rocal-on-ubuntu-22.dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:22.04 -ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4.1/ubuntu/jammy/amdgpu-install_5.4.50401-1_all.deb -ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50401-1_all.deb +ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/6.1.1/ubuntu/jammy/amdgpu-install_6.1.60101-1_all.deb +ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_6.1.60101-1_all.deb ENV ROCAL_DEPS_ROOT=/rocAL-deps WORKDIR $ROCAL_DEPS_ROOT @@ -13,11 +13,11 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g # install ROCm for rocAL OpenCL/HIP dependencies RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration libstdc++-12-dev && \ - sudo apt-get -y clean && dpkg --add-architecture i386 && \ + sudo apt-get -y clean && \ wget ${ROCM_INSTALLER_REPO} && \ sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \ sudo apt-get update -y && \ - sudo amdgpu-install -y --usecase=graphics,rocm + sudo amdgpu-install -y --usecase=rocm # install OpenCV RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python3-dev python3-numpy \ @@ -26,43 +26,43 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0- mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd # install FFMPEG -ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/" -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \ - libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \ - nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \ - wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \ - ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \ - make -j8 && sudo make install && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev # install rocAL neural net dependencies -RUN apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx +RUN apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev # install rocAL dependencies -RUN apt-get -y install curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang -RUN mkdir rocAL_deps && cd rocAL_deps && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \ - unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd +RUN apt-get -y install rpp-dev curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev && \ - git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ - -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd + git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \ + make -j4 && sudo make install && cd ../../ RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd -RUN git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \ - cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ENV CUPY_INSTALL_USE_HIP=1 ENV ROCM_HOME=/opt/rocm RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \ git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \ cmake ../ && make -j4 && sudo make install && cd ../../ && \ - pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ + pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \ - git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \ - pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21 + pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \ + env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \ + git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \ + pip install -e . --no-cache-dir -vvvv + +# Install MIVisionX +RUN git clone https://github.com/ROCm/MIVisionX && cd MIVisionX && \ + mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install + +# install rocDecode +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev ENV ROCAL_WORKSPACE=/workspace WORKDIR $ROCAL_WORKSPACE -# Install MIVisionX -RUN git clone https://github.com/ROCm/MIVisionX.git && \ - mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install \ No newline at end of file +# Install rocAL +RUN pip install --upgrade pip +RUN git clone -b develop https://github.com/ROCm/rocAL && \ + mkdir build && cd build && cmake ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install \ No newline at end of file diff --git a/docker/rocal-with-pytorch.dockerfile b/docker/rocal-with-pytorch.dockerfile index 8569747ad..a497ac5fa 100644 --- a/docker/rocal-with-pytorch.dockerfile +++ b/docker/rocal-with-pytorch.dockerfile @@ -1,4 +1,5 @@ ARG PYTORCH_VERSION=latest +ARG ROCAL_PYTHON_VERSION_SUGGESTED=3.9 FROM rocm/pytorch:${PYTORCH_VERSION} ENV ROCAL_DEPS_ROOT=/rocAL-deps @@ -10,47 +11,42 @@ RUN apt-get update -y RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git # install OpenCV -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \ - libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip && \ +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev \ + libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-dev unzip && \ mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \ mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd # install FFMPEG -ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/" -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \ - libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \ - nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \ - wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \ - ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \ - make -j8 && sudo make install && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev # install rocAL neural net dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \ - mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \ - unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev # install rocAL dependency -RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ - git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ - -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \ - cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \ - git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ - ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd -ENV CUPY_INSTALL_USE_HIP=1 -ENV ROCM_HOME=/opt/rocm +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ + git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \ + make -j4 && sudo make install && cd ../../ && \ + git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ + ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../ + RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \ git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \ cmake ../ && make -j4 && sudo make install && cd ../../ && \ - pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ - cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \ - git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \ - pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21 + pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ + cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ + +# install MIVisionX +RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \ + mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install && cd + +# install rocDecode +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev ENV ROCAL_WORKSPACE=/workspace WORKDIR $ROCAL_WORKSPACE -# install MIVisionX -RUN git clone https://github.com/ROCm/MIVisionX.git && \ - mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install +# Install rocAL +RUN pip install --upgrade pip +RUN git clone -b develop https://github.com/ROCm/rocAL && \ + mkdir build && cd build && cmake -D PYTHON_VERSION_SUGGESTED=${ROCAL_PYTHON_VERSION_SUGGESTED} ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install \ No newline at end of file diff --git a/docker/rocal-with-tensorflow.dockerfile b/docker/rocal-with-tensorflow.dockerfile new file mode 100644 index 000000000..09668aede --- /dev/null +++ b/docker/rocal-with-tensorflow.dockerfile @@ -0,0 +1,54 @@ +ARG TENSORFLOW_VERSION=latest +ARG ROCAL_PYTHON_VERSION_SUGGESTED=3.9 +FROM rocm/tensorflow:${TENSORFLOW_VERSION} + +ENV ROCAL_DEPS_ROOT=/rocAL-deps +WORKDIR $ROCAL_DEPS_ROOT + +RUN apt-get update -y + +# install rocAL base dependencies +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git + +# install OpenCV +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev \ + libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-dev unzip && \ + mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \ + mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd + +# install FFMPEG +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev + +# install rocAL neural net dependency +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev + +# install rocAL dependency +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \ + git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \ + make -j4 && sudo make install && cd ../../ && \ + git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \ + ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../ + +ENV CUPY_INSTALL_USE_HIP=1 +ENV ROCM_HOME=/opt/rocm +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \ + git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \ + cmake ../ && make -j4 && sudo make install && cd ../../ && \ + pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \ + cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ + +# install MIVisionX +RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \ + mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install && cd + +# install rocDecode +RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev + +ENV ROCAL_WORKSPACE=/workspace +WORKDIR $ROCAL_WORKSPACE + +# Install rocAL +RUN pip install --upgrade pip +RUN git clone -b develop https://github.com/ROCm/rocAL && \ + mkdir build && cd build && cmake -D PYTHON_VERSION_SUGGESTED=${ROCAL_PYTHON_VERSION_SUGGESTED} ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install diff --git a/docs/README.md b/docs/README.md index 1b9abccf2..e7988e3f0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,6 +7,7 @@ AMD ROCm Augmentation Library (rocAL) is designed to efficiently do such process These pipelines are programmable by the user using both C++ and Python APIs. ## User Guide Chapters + * [Chapter 1 - Overview](user_guide/ch1.md) * [Chapter 2 - Architecture Components](user_guide/ch2.md) * [Chapter 3 - Installation](user_guide/ch3.md) @@ -16,11 +17,11 @@ These pipelines are programmable by the user using both C++ and Python APIs. ## Key Components of rocAL -* Full processing pipeline support for data_loading, meta-data loading, augmentations, and data-format conversions for training and inference. -* Being able to do processing on CPU or Radeon GPU (with OpenCL or HIP backend) -* Ease of integration with framework plugins in Python -* Support variety of augmentation operations through AMD’s Radeon Performance Primitives (RPP). -* All available public and open-sourced under ROCm. +* Full processing pipeline support for data_loading, meta-data loading, augmentations, and data-format conversions for training and inference. +* Being able to do processing on CPU or Radeon GPU (with OpenCL or HIP backend) +* Ease of integration with framework plugins in Python +* Support variety of augmentation operations through AMD’s Radeon Performance Primitives (RPP). +* All available public and open-sourced under ROCm. ## Prerequisites @@ -32,37 +33,37 @@ Refer [rocAL build instructions](https://github.com/ROCm/rocAL#build-instruction ## rocAL Python -* rocAL Python package has been created using Pybind11 which enables data transfer between rocAL C++ API and Python API. -* Module imports are made similar to other data loaders like NVidia's DALI. -* rocal_pybind package has both PyTorch and TensorFlow framework support. -* Various reader format support including FileReader, COCOReader, and TFRecordReader. -* example folder contains sample implementations for each reader variation as well as sample training script for PyTorch -* rocAL is integrated into MLPerf Resnet-50 Pytorch classification example on the ImageNet dataset. +* rocAL Python package has been created using Pybind11 which enables data transfer between rocAL C++ API and Python API. +* rocal Python Bindings has both PyTorch and TensorFlow framework support. +* Various reader format support including FileReader, COCOReader, and TFRecordReader. +* [examples folder](https://github.com/ROCm/rocAL/docs/exmaples) has sample implementations for PyTorch and Tensorflow training and inference pipeline. ## rocAL Python API ### amd.rocal.fn -* Contains the image augmentations & file read and decode operations which are linked to rocAL C++ API -* All ops (listed below) are supported for the single input image and batched inputs. - -| Image Augmentation | Reader and Decoder | Geometric Ops | -| :----------------: | :--------------------: | :-----------------: | -| ColorTwist | File Reader | CropMirrorNormalize | -| Brightness | ImageDecoder | Resize | -| Gamma Correction | ImageDecoderRandomCrop | ResizeCrop | -| Snow | COCOReader | WarpAffine | -| Rain | TFRecordReader | FishEye | -| Blur | | LensCorrection | -| Jitter | | Rotate | -| Hue | | | -| Saturation | | | -| Fog | | | -| Contrast | | | -| Vignette | | | -| SNPNoise | | | -| Pixelate | | | -| Blend | | | +* Contains the image augmentations & file read and decode operations which are linked to rocAL C++ API +* All ops (listed below) are supported for the single input image and batched inputs. + +| Image Augmentation | Reader and Decoder | Geometric Ops | +| :----------------: | :-----------------------: | :--------------------------: | +| Color Twist | Image File Reader | Crop Mirror Normalize | +| Color Temperature | Caffe Reader | Crop Resize | +| Brightness | Caffe2 Reader | Resize | +| Gamma Correction | CIFAR10 Reader | Random Crop | +| Snow | COCO Reader | Warp Affine | +| Rain | TF Record Reader | Fish Eye | +| Blur | MXNet Record Reader | Lens Correction | +| Jitter | Video File Reader | Rotate | +| Hue | Image Decoder | Crop | +| Saturation | Image Decoder Random Crop | Flip | +| Fog | Video Decoder | Resize Crop Mirror | +| Contrast | | Resize Crop Mirror Normalize | +| Vignette | | | +| SNP Noise | | | +| Pixelate | | | +| Blend | | | +| Exposure | | | ### amd.rocal.pipeline @@ -73,51 +74,24 @@ Refer [rocAL build instructions](https://github.com/ROCm/rocAL#build-instruction ### amd.rocal.types -amd.rocal.types are enums exported from C++ API to python. Some examples include CPU, GPU, FLOAT, FLOAT16, RGB, GRAY, etc.. +amd.rocal.types are enums exported from C++ API to python. Some examples include CPU, GPU, FLOAT, FLOAT16, RGB, GRAY, etc. ### amd.rocal.plugin.pytorch -* Contains ROCALGenericIterator for Pytorch. -* ROCALClassificationIterator class implements iterator for image classification and return images with corresponding labels. -* From the above classes, any hybrid iterator pipeline can be created by adding augmentations. -* see example [PyTorch Simple Example](./examples). Requires PyTorch. - -### installing rocAL python plugin (Python 3.6) - -* Build and install RPP -* Build and install MIVisionX which installs rocAL c++ lib -* Go to the [rocal_pybind](https://github.com/ROCm/rocAL/tree/develop/rocAL_pybind) folder -* sudo ./run.sh - -### Steps to run MLPerf Resnet50 classification training with rocAL on a system with MI50+ and ROCm - -* Step 1: Ensure you have downloaded ILSVRC2012_img_val.tar (6.3GB) and ILSVRC2012_img_train.tar (138 GB) files and unzip into train and val folders -* Step 2: Build [MIVisionX Pytorch docker](https://github.com/ROCm/rocAL/blob/master/docker/README.md) -* Step 3: Install rocAL python_pybind plugin as described above -* Step 4: Clone [MLPerf](https://github.com/rrawther/MLPerf-mGPU) branch and checkout mlperf-v1.1-rocal branch - -``` bash -git clone -b mlperf-v1.1-rocal https://github.com/rrawther/MLPerf-mGPU -``` +* Contains ROCALGenericIterator for Pytorch. +* ROCALClassificationIterator class implements iterator for image classification and return images with corresponding labels. +* From the above classes, any hybrid iterator pipeline can be created by adding augmentations. +* See example [PyTorch Simple Example](./examples/pytorch/). Requires PyTorch. -* Step 5: Modify RN50_AMP_LARS_8GPUS_NCHW.sh or RN50_AMP_LARS_8GPUS_NHWC.sh to reflect correct path for imagenet directory -* Step 8: Run RN50_AMP_LARS_8GPUS_NCHC.sh or RN50_AMP_LARS_8GPUS_NHWC.sh +### amd.rocal.plugin.tf -``` bash -./RN50_AMP_LARS_8GPUS_NCHW.sh -(or) -./RN50_AMP_LARS_8GPUS_NHWC.sh -``` +* Contains ROCALIterator for TensorFlow. +* Any hybrid iterator pipeline can be created by adding augmentations. +* See example [Tensorflow Simple Example](./examples/tf/). Requires TensorFlow. -### MIVisionX Pytorch Docker +### installing rocAL python plugin (Python 3.9+) -* Refer to the [docker](https://github.com/ROCm/MIVisionX#docker) page for prerequisites and information on building the docker -* Step 1: Run the docker image* - -``` bash -sudo docker run -it -v :/data -v /:/dockerx -w /dockerx --privileged --device=/dev/kfd --device=/dev/dri --group-add video --shm-size=4g --ipc="host" --network=host -``` +* Build and install RPP +* Build and install MIVisionX +* Build and install [rocAL](https://github.com/ROCm/rocAL/) - * Optional: Map localhost directory on the docker image - * option to map the localhost directory with imagenet dataset folder to be accessed on the docker image. - * usage: -v {LOCAL_HOST_DIRECTORY_PATH}:{DOCKER_DIRECTORY_PATH} diff --git a/docs/data/amd-epyc-black-resize.png b/docs/data/amd-epyc-black-resize.png new file mode 100644 index 000000000..a79b38ca9 Binary files /dev/null and b/docs/data/amd-epyc-black-resize.png differ diff --git a/docs/data/rocm-black-resize.png b/docs/data/rocm-black-resize.png new file mode 100644 index 000000000..34829a6ad Binary files /dev/null and b/docs/data/rocm-black-resize.png differ diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 150b8472c..d8f9f5570 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -951,7 +951,16 @@ INPUT = ../../README.md \ ../../rocAL/include/api/rocal_api_meta_data.h \ ../../rocAL/include/api/rocal_api_parameters.h \ ../../rocAL/include/api/rocal_api_types.h \ - ../../rocAL/include/api/rocal_api.h + ../../rocAL/include/api/rocal_api.h \ + ../../rocAL_pybind/amd/rocal/decoders.py \ + ../../rocAL_pybind/amd/rocal/fn.py \ + ../../rocAL_pybind/amd/rocal/pipeline.py \ + ../../rocAL_pybind/amd/rocal/random.py \ + ../../rocAL_pybind/amd/rocal/readers.py \ + ../../rocAL_pybind/amd/rocal/types.py \ + ../../rocAL_pybind/amd/rocal/plugin/generic.py \ + ../../rocAL_pybind/amd/rocal/plugin/pytorch.py \ + ../../rocAL_pybind/amd/rocal/plugin/tf.py # This tag can be used to specify the character encoding of the source files diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 000000000..7e9089c38 --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,7 @@ +# Examples + +Use the links below to see more examples: + +- [Image Processing](https://github.com/ROCm/rocAL/tree/master/docs/examples/image_processing) + +- [Pytorch](https://github.com/ROCm/rocAL/tree/master/docs/examples/pytorch) diff --git a/docs/examples.rst b/docs/examples.rst index 1baa5e5bf..10607f2a3 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -12,4 +12,6 @@ Use the links below to see more examples: * `Image Processing `_ * `Pytorch `_ +* `Tensorflow `_ +* `Jupyter Notebooks `_ diff --git a/docs/examples/image_processing/inference_pipeline.py b/docs/examples/image_processing/inference_pipeline.py index a7db74e16..f97da7b37 100644 --- a/docs/examples/image_processing/inference_pipeline.py +++ b/docs/examples/image_processing/inference_pipeline.py @@ -31,7 +31,7 @@ seed = 1549361629 -image_dir = "../../../../data/images/AMD-tinyDataSet/" +image_dir = "../../../data/images/AMD-tinyDataSet/" batch_size = 4 gpu_id = 0 diff --git a/docs/examples/notebooks/augmentation_examples.ipynb b/docs/examples/notebooks/augmentation_examples.ipynb new file mode 100644 index 000000000..997f93ca2 --- /dev/null +++ b/docs/examples/notebooks/augmentation_examples.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f777cf2", + "metadata": {}, + "source": [ + "Image Augmentation App" + ] + }, + { + "cell_type": "markdown", + "id": "185d3b2d", + "metadata": {}, + "source": [ + "This application demonstrates a simple rocAL pipeline with different augmentations supported by rocAL." + ] + }, + { + "cell_type": "markdown", + "id": "2ace8c53", + "metadata": {}, + "source": [ + " Common Code " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15c865e2", + "metadata": {}, + "outputs": [], + "source": [ + "from amd.rocal.plugin.generic import ROCALClassificationIterator\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.fn as fn\n", + "import amd.rocal.types as types\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "markdown", + "id": "e0569dcc", + "metadata": {}, + "source": [ + "Configuring rocAL pipeline " + ] + }, + { + "cell_type": "markdown", + "id": "00c2815c", + "metadata": {}, + "source": [ + "Configure the pipeline parameters as required by the user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f118bb1f", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = \"/media/MIVisionX-data/rocal_data/coco/coco_10_img/train_10images_2017/\"\n", + "rocal_cpu = True\n", + "device = \"cpu\" \n", + "batch_size = 1\n", + "num_threads = 1\n", + "random_seed = 1\n", + "local_rank = 0\n", + "world_size = 1\n", + "display = True " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e069c4d", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu, tensor_layout=types.NHWC , tensor_dtype=types.FLOAT)\n" + ] + }, + { + "cell_type": "markdown", + "id": "df5e6005", + "metadata": {}, + "source": [ + " Image augmentation pipeline " + ] + }, + { + "cell_type": "markdown", + "id": "e1ab279e", + "metadata": {}, + "source": [ + "Here the file reader is used followed by the turbo jpeg decoder. In this pipeline, cascaded augmentations are added on the decoded images.
Multiple augmentation outputs are returned using set_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9e1dd12", + "metadata": {}, + "outputs": [], + "source": [ + "with pipe:\n", + " jpegs, _ = fn.readers.file(file_root=data_path)\n", + " images = fn.decoders.image(jpegs, file_root=data_path, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=True)\n", + " resize_outputs = fn.resize(images, resize_width=300, resize_height=300)\n", + " brightness_outputs = fn.brightness(resize_outputs)\n", + " contrast_outputs = fn.contrast(resize_outputs)\n", + " fisheye_outputs = fn.fish_eye(resize_outputs)\n", + " fog_outputs = fn.fog(resize_outputs)\n", + " exposure_outputs = fn.exposure(resize_outputs)\n", + " color_twist_outputs = fn.color_twist(resize_outputs)\n", + " saturation_outputs = fn.saturation(resize_outputs)\n", + " pipe.set_outputs(resize_outputs, brightness_outputs, contrast_outputs, fisheye_outputs, fog_outputs, exposure_outputs, color_twist_outputs, saturation_outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6be22ee", + "metadata": {}, + "outputs": [], + "source": [ + "pipe.build()\n", + "# Dataloader\n", + "data_loader = ROCALClassificationIterator(pipe)" + ] + }, + { + "cell_type": "markdown", + "id": "401897a3", + "metadata": {}, + "source": [ + "Visualizing outputs" + ] + }, + { + "cell_type": "markdown", + "id": "349bf77c", + "metadata": {}, + "source": [ + "The output of augmented images are displayed using imshow()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67db109e", + "metadata": {}, + "outputs": [], + "source": [ + "cnt = 0\n", + "aug_list = [\"resize\", \"brightness\", \"contrast\", \"fisheye\", \"fog\", \"exposure\", \"colortwist\", \"saturation\"] \n", + "fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15,15))\n", + "row = 0\n", + "col = 0\n", + "for i, it in enumerate(data_loader, 0):\n", + " for img in it[0]:\n", + " if(cnt < len(aug_list)):\n", + " axes[row, col].set_title(aug_list[cnt])\n", + " img = (img[0]).astype('uint8')\n", + " axes[row, col].imshow(img)\n", + " cnt += 1\n", + " row += 1\n", + " if(row == 2):\n", + " row = 0\n", + " col += 1\n", + " if(col == 4):\n", + " col = 0\n", + "data_loader.reset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/notebooks/classification_training_flowerdataset.ipynb b/docs/examples/notebooks/classification_training_flowerdataset.ipynb new file mode 100644 index 000000000..b6b785b99 --- /dev/null +++ b/docs/examples/notebooks/classification_training_flowerdataset.ipynb @@ -0,0 +1,667 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "56c9244e", + "metadata": {}, + "source": [ + "## Rocal Classification training \n", + "This example showcases a usecase for rocAL classification training with small dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73bdd89e", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch import nn\n", + "from torch.autograd import Variable\n", + "import torchvision.models as models\n", + "import time\n", + "import math\n", + "import tqdm as tqdm\n", + "import time \n", + "from amd.rocal.plugin.pytorch import ROCALClassificationIterator\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.fn as fn\n", + "import amd.rocal.types as types\n", + "from torch.optim import Optimizer\n", + "from collections import defaultdict" + ] + }, + { + "cell_type": "markdown", + "id": "3ce165e2", + "metadata": {}, + "source": [ + "## Setting Dataset path \n", + "Here we are setting the dataset path which will be used in the training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a06c781", + "metadata": {}, + "outputs": [], + "source": [ + "start_time = time.time()\n", + "device = torch.device('cpu')\n", + "data_dir = './Flower102/split_data/' # Run create_classification_flower_dataset.py before running the notebook for dataset creation\n", + "train_dir = data_dir + '/train'\n", + "val_dir = data_dir + '/val'\n", + "test_dir = data_dir + '/test'" + ] + }, + { + "cell_type": "markdown", + "id": "f3e8acc2", + "metadata": {}, + "source": [ + "## Defining the Pipeline\n", + "This is defining a pipeline for a classification task. This pipeline will read images from a directory, decode them, apply augmentations and return (image, label) pairs.This pipeline uses image_random_crop then the output is resized to a dimension of (224,224) followed by normalization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90041830", + "metadata": {}, + "outputs": [], + "source": [ + "def train_pipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):\n", + " pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, rocal_cpu=rocal_cpu,\n", + " tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, prefetch_queue_depth=6)\n", + " with pipe:\n", + " jpegs, labels = fn.readers.file(file_root=data_path)\n", + " rocal_device = 'cpu' if rocal_cpu else 'gpu'\n", + " decode = fn.decoders.image_random_crop(jpegs, output_type=types.RGB,\n", + " file_root=data_path, num_shards=world_size, random_shuffle=True)\n", + " res = fn.resize(decode, resize_width=224, resize_height=224, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n", + " coin_flip = fn.random.coin_flip(probability=0.5)\n", + " cmnp = fn.crop_mirror_normalize(res,\n", + " output_dtype=types.FLOAT,\n", + " output_layout=types.NCHW,\n", + " crop=(224, 224),\n", + " mirror=coin_flip,\n", + " mean=[0, 0, 0], std=[1, 1, 1])\n", + " if (one_hot):\n", + " _ = fn.one_hot(labels, num_classes)\n", + " pipe.set_outputs(cmnp)\n", + " print('rocal \"{0}\" variant'.format(rocal_device))\n", + " return pipe\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60094a08", + "metadata": {}, + "outputs": [], + "source": [ + "def val_pipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):\n", + " pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank + 10, rocal_cpu=rocal_cpu,\n", + " tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, prefetch_queue_depth=2)\n", + " with pipe:\n", + " jpegs, labels = fn.readers.file(file_root=data_path)\n", + " rocal_device = 'cpu' if rocal_cpu else 'gpu'\n", + " decode = fn.decoders.image_random_crop(\n", + " jpegs, output_type=types.RGB, file_root=data_path, num_shards=world_size, random_shuffle=False)\n", + " res = fn.resize(decode, resize_width=224, resize_height=224, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n", + " cmnp = fn.crop_mirror_normalize(res,\n", + " output_dtype=types.FLOAT16 if fp16 else types.FLOAT,\n", + " output_layout=types.NCHW,\n", + " crop=(224, 224),\n", + " mirror=0,\n", + " mean=[0, 0, 0],\n", + " std=[1, 1, 1])\n", + " if (one_hot):\n", + " _ = fn.one_hot(labels, num_classes)\n", + " pipe.set_outputs(cmnp)\n", + " print('rocal \"{0}\" variant'.format(rocal_device))\n", + " return pipe" + ] + }, + { + "cell_type": "markdown", + "id": "538094db", + "metadata": {}, + "source": [ + "## Building the Pipeline\n", + "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function.\n", + "Then iterator object is created with ROCALClassificationIterator(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "838fea17", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = train_pipeline(data_path=train_dir, batch_size=64, num_classes=1, one_hot=0,\n", + " local_rank=1, world_size=1, num_thread=3, crop=10, rocal_cpu='cpu', fp16=False)\n", + "pipe.build()\n", + "trainloader = ROCALClassificationIterator(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c50d4b16", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = val_pipeline(data_path=val_dir, batch_size=64, num_classes=1, one_hot=0, local_rank=1 , world_size=1 , num_thread=3, crop=10, rocal_cpu='cpu', fp16=False)\n", + "pipe.build()\n", + "valloader = ROCALClassificationIterator(pipe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37e1447e", + "metadata": {}, + "outputs": [], + "source": [ + "def accuracy(output, target, is_test=False):\n", + " global total\n", + " global correct\n", + " batch_size = target.size(0)\n", + " total += batch_size \n", + " _, pred = output.max(dim=1)\n", + " if is_test:\n", + " preds.extend(pred)\n", + " correct += torch.sum(pred == target.data)\n", + " return (correct.float()/total) * 100\n", + "\n", + "def reset():\n", + " global total, correct\n", + " global train_loss, test_loss, best_acc\n", + " global trn_losses, trn_accs, val_losses, val_accs\n", + " total, correct = 0, 0\n", + " train_loss, test_loss, best_acc = 0.0, 0.0, 0.0\n", + " trn_losses, trn_accs, val_losses, val_accs = [], [], [], []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "527e3311", + "metadata": {}, + "outputs": [], + "source": [ + "class AvgStats(object):\n", + " def __init__(self):\n", + " self.reset()\n", + " \n", + " def reset(self):\n", + " self.losses =[]\n", + " self.precs =[]\n", + " self.its = []\n", + " \n", + " def append(self, loss, prec, it):\n", + " self.losses.append(loss)\n", + " self.precs.append(prec)\n", + " self.its.append(it)" + ] + }, + { + "cell_type": "markdown", + "id": "0f9795e4", + "metadata": {}, + "source": [ + "## Saving checkpoints\n", + "The checkpoints are saved and updated if any new best val_acc is acheived. Then the saved checkpoint is used by the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fff85f1", + "metadata": {}, + "outputs": [], + "source": [ + "def save_checkpoint(model, is_best, filename='./checkpoint.pth.tar'):\n", + " if is_best:\n", + " torch.save(model.state_dict(), filename) # save checkpoint\n", + " else:\n", + " print (\"=> Validation Accuracy did not improve\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df0c62d9", + "metadata": {}, + "outputs": [], + "source": [ + "def load_checkpoint(model, filename = './checkpoint.pth.tar'):\n", + " sd = torch.load(filename, map_location=lambda storage, loc: storage)\n", + " names = set(model.state_dict().keys())\n", + " for n in list(sd.keys()): \n", + " if n not in names and n+'_raw' in names:\n", + " if n+'_raw' not in sd: sd[n+'_raw'] = sd[n]\n", + " del sd[n]\n", + " model.load_state_dict(sd)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8448f914", + "metadata": {}, + "outputs": [], + "source": [ + "class CLR(object):\n", + " def __init__(self, optim, bn, base_lr=1e-7, max_lr=100):\n", + " self.base_lr = base_lr\n", + " self.max_lr = max_lr\n", + " self.optim = optim\n", + " self.bn = bn - 1\n", + " ratio = self.max_lr/self.base_lr\n", + " self.mult = ratio ** (1/self.bn)\n", + " self.best_loss = 1e9\n", + " self.iteration = 0\n", + " self.lrs = []\n", + " self.losses = []\n", + " \n", + " def calc_lr(self, loss):\n", + " self.iteration +=1\n", + " if math.isnan(loss) or loss > 4 * self.best_loss:\n", + " return -1\n", + " if loss < self.best_loss and self.iteration > 1:\n", + " self.best_loss = loss\n", + " \n", + " mult = self.mult ** self.iteration\n", + " lr = self.base_lr * mult\n", + " \n", + " self.lrs.append(lr)\n", + " self.losses.append(loss)\n", + " \n", + " return lr\n", + " \n", + " def plot(self, start=10, end=-5):\n", + " plt.xlabel(\"Learning Rate\")\n", + " plt.ylabel(\"Losses\")\n", + " plt.plot(self.lrs[start:end], self.losses[start:end])\n", + " plt.xscale('log')\n", + " \n", + " \n", + " def plot_lr(self):\n", + " plt.xlabel(\"Iterations\")\n", + " plt.ylabel(\"Learning Rate\")\n", + " plt.plot(self.lrs)\n", + " plt.yscale('log')" + ] + }, + { + "cell_type": "markdown", + "id": "bd494db3", + "metadata": {}, + "source": [ + "## Defining Optimizer\n", + "The optimizer object used in inner loop for fast weight updates. In this example Lookahead optimizer is implemented." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cdb0ca6", + "metadata": {}, + "outputs": [], + "source": [ + "class Lookahead(Optimizer):\n", + " def __init__(self, optimizer, alpha=0.5, k=5):\n", + " assert(0.0 <= alpha <= 1.0)\n", + " assert(k >= 1)\n", + " self.optimizer = optimizer\n", + " self.alpha = alpha\n", + " self.k = k\n", + " self.param_groups = self.optimizer.param_groups\n", + " self.state = defaultdict(dict)\n", + " for group in self.param_groups:\n", + " group['k_counter'] = 0\n", + " self.slow_weights = [[param.clone().detach() for param in group['params']] for group in self.param_groups]\n", + " \n", + " def step(self, closure=None):\n", + " loss = self.optimizer.step(closure)\n", + " for group, slow_Weight in zip(self.param_groups, self.slow_weights):\n", + " group['k_counter'] += 1\n", + " if group['k_counter'] == self.k:\n", + " for param, weight in zip(group['params'], slow_Weight):\n", + " weight.data.add_(self.alpha, (param.data - weight.data))\n", + " param.data.copy_(weight.data)\n", + " group['k_counter'] = 0\n", + "\n", + " return loss\n", + "\n", + " def state_dict(self):\n", + " fast_dict = self.optimizer.state_dict()\n", + " fast_state = fast_dict['state']\n", + " param_groups = fast_dict['param_groups']\n", + " slow_state = {(id(k) if isinstance(k, torch.Tensor) else k): v\n", + " for k, v in self.state.items()}\n", + " return {\n", + " 'fast_state': fast_state,\n", + " 'param_groups': param_groups,\n", + " 'slow_state': slow_state\n", + " }\n", + "\n", + " def load_state_dict(self, state_dict):\n", + " fast_dict = {\n", + " 'state': state_dict['fast_state'],\n", + " 'param_groups': state_dict['param_groups']\n", + " }\n", + " slow_dict = {\n", + " 'state': state_dict['slow_state'],\n", + " 'param_groups': state_dict['param_groups']\n", + " }\n", + " super(Lookahead, self).load_state_dict(slow_dict)\n", + " self.optimizer.load_state_dict(fast_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5358b12", + "metadata": {}, + "outputs": [], + "source": [ + "train_loss = 0.0\n", + "test_loss = 0.0\n", + "best_acc = 0.0\n", + "trn_losses = []\n", + "trn_accs = []\n", + "val_losses = []\n", + "val_accs = []\n", + "total = 0\n", + "correct = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8002987", + "metadata": {}, + "outputs": [], + "source": [ + "def update_lr(optimizer, lr):\n", + " for g in optimizer.param_groups:\n", + " g['lr'] = lr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbd95fdd", + "metadata": {}, + "outputs": [], + "source": [ + "def lr_find(clr, model, optimizer=None):\n", + "\n", + " t = tqdm.tqdm(trainloader, leave=False, total=len(trainloader))\n", + " running_loss = 0.\n", + " avg_beta = 0.98\n", + " model.train()\n", + " \n", + " for i,data in enumerate(t):\n", + " input = data[0]\n", + " target = data[1]\n", + " input, target = input.to(device), target.to(device)\n", + " var_ip, var_tg = Variable(input), Variable(target)\n", + " output = model(var_ip)\n", + " loss = criterion(output, var_tg)\n", + " \n", + " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n", + " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n", + " t.set_postfix(loss=smoothed_loss)\n", + " \n", + " lr = clr.calc_lr(smoothed_loss)\n", + " if lr == -1 :\n", + " break\n", + " update_lr(optimizer, lr) \n", + " \n", + " # compute gradient and do SGD step\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " trainloader.reset()" + ] + }, + { + "cell_type": "markdown", + "id": "55a56d6e", + "metadata": {}, + "source": [ + "## Defining train and test function \n", + "To train the model, the data iterator has to be looped over, the inputs are feeded to the network, and optimized .Then the model is tested with batch of images from the test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "126dc268", + "metadata": {}, + "outputs": [], + "source": [ + "def train(epoch=0, model=None, optimizer=None):\n", + " model.train()\n", + " global best_acc\n", + " global trn_accs, trn_losses\n", + " is_improving = True\n", + " counter = 0\n", + " running_loss = 0.\n", + " avg_beta = 0.98\n", + " \n", + " for i, ([input],target) in enumerate(trainloader):\n", + " bt_start = time.time()\n", + " var_ip, var_tg = Variable(input), Variable(target)\n", + " \n", + " output = model(var_ip)\n", + " loss = criterion(output, var_tg)\n", + " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n", + " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n", + " trn_losses.append(smoothed_loss)\n", + " \n", + " # measure accuracy and record loss\n", + " prec = accuracy(output.data, target)\n", + " trn_accs.append(prec)\n", + " train_stats.append(smoothed_loss, prec, time.time()-bt_start)\n", + " if prec > best_acc :\n", + " best_acc = prec\n", + " save_checkpoint(model, True)\n", + "\n", + " # compute gradient and do SGD step\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " trainloader.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bb16a6f", + "metadata": {}, + "outputs": [], + "source": [ + "def test(model=None):\n", + " with torch.no_grad():\n", + " model.eval()\n", + " global val_accs, val_losses\n", + " running_loss = 0.\n", + " avg_beta = 0.98\n", + " for i, ([input],target) in enumerate(valloader):\n", + " bt_start = time.time()\n", + " input, target = input.to(device), target.to(device)\n", + " var_ip, var_tg = Variable(input), Variable(target)\n", + " output = model(var_ip)\n", + " loss = criterion(output, var_tg)\n", + " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n", + " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n", + "\n", + " # measure accuracy and record loss\n", + " prec = accuracy(output.data, target, is_test=True)\n", + " test_stats.append(loss.item(), prec, time.time()-bt_start)\n", + " val_losses.append(smoothed_loss)\n", + " val_accs.append(prec)\n", + " valloader.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c3334cd", + "metadata": {}, + "outputs": [], + "source": [ + "def fit(model=None, sched=None, optimizer=None):\n", + " print(\"Epoch\\tTrn_loss\\tVal_loss\\tTrn_acc\\t\\tVal_acc\")\n", + " for j in range(epoch):\n", + " train(epoch=j, model=model, optimizer=optimizer)\n", + " \n", + " test(model)\n", + " if sched:\n", + " sched.step(j)\n", + " print(\"{}\\t{:06.8f}\\t{:06.8f}\\t{:06.8f}\\t{:06.8f}\"\n", + " .format(j+1, trn_losses[-1], val_losses[-1], trn_accs[-1], val_accs[-1]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b676d359", + "metadata": {}, + "outputs": [], + "source": [ + "model = models.resnet18(pretrained=True)\n", + "model.fc = nn.Linear(in_features=model.fc.in_features, out_features=102)\n", + "\n", + "\n", + "for param in model.parameters():\n", + " param.require_grad = False\n", + " \n", + "for param in model.fc.parameters():\n", + " param.require_grad = True\n", + " \n", + "model = model.to(device)\n", + "\n", + "save_checkpoint(model, True, 'before_start_resnet18.pth.tar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bca9db3d", + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()\n", + "optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)\n", + "optimizer = Lookahead(optim)\n", + "\n", + "clr = CLR(optim, len(trainloader))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "886ae8bc", + "metadata": {}, + "outputs": [], + "source": [ + "load_checkpoint(model, 'before_start_resnet18.pth.tar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d93bf209", + "metadata": {}, + "outputs": [], + "source": [ + "preds = []\n", + "epoch = 10\n", + "train_stats = AvgStats()\n", + "test_stats = AvgStats()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92e32e6d", + "metadata": {}, + "outputs": [], + "source": [ + "reset()" + ] + }, + { + "cell_type": "markdown", + "id": "63c43680", + "metadata": {}, + "source": [ + "## Define a Loss function and optimizer\n", + "Here Classification Cross-Entropy loss and SGD with momentum is used as loss function and optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06585899", + "metadata": {}, + "outputs": [], + "source": [ + "criterion = nn.CrossEntropyLoss()\n", + "optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)\n", + "optimizer = Lookahead(optim)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63b70407", + "metadata": {}, + "outputs": [], + "source": [ + "fit(model=model, optimizer=optim)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b91a2bb0", + "metadata": {}, + "outputs": [], + "source": [ + "end_time = time.time()\n", + "print(\"Total_time \",end_time - start_time)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/notebooks/create_classification_flower_dataset.py b/docs/examples/notebooks/create_classification_flower_dataset.py new file mode 100644 index 000000000..6113286e2 --- /dev/null +++ b/docs/examples/notebooks/create_classification_flower_dataset.py @@ -0,0 +1,83 @@ +import csv +import os +import shutil +import sys +import warnings +from scipy.io import loadmat +import pandas as pd +import splitfolders + +path = os.getcwd() +os.system('pip install split_folders') +if not os.path.exists("dataset_flower"): + os.mkdir("dataset_flower") +os.chdir("dataset_flower") +os.system("wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz") +os.system("tar -xvf 102flowers.tgz") + +os.chdir(path) +if not os.path.exists("Flower102"): + os.mkdir("Flower102") + +os.system("wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat") +# get the path/directory +imgs = [] +folder_dir = "./dataset_flower/jpg/" +for image in os.listdir(folder_dir): + imgs.append(image) +imgs.sort() + +warnings.filterwarnings("ignore") +mat_labels = loadmat("./imagelabels.mat") +label = mat_labels["labels"] +label = label[0] +for i in range(len(label)): + label[i] = label[i] - 1 + +if not os.path.exists("./Flower102/map.csv"): + os.system("touch ./Flower102/map.csv") + +fields = ["images", "label"] +with open("./Flower102/map.csv", "w") as csvfile: + csvwriter = csv.writer(csvfile) + # writing the fields + csvwriter.writerow(fields) + for i in range(len(imgs)): + row = [imgs[i], label[i]] + csvwriter.writerow(row) + +labels_map = pd.read_csv(r"./Flower102/map.csv") +train_dir = r"./dataset_flower/jpg" # source folder +dest_folder = r"./Flower102/flower/" # destination folder +if not os.path.exists(dest_folder): + os.mkdir(dest_folder) + +for filename, class_name in labels_map.values: + # Create subdirectory with `class_name` + if int(class_name) >= 50: + continue + else: + if not os.path.exists(dest_folder + str(class_name)): + os.mkdir(dest_folder + str(class_name)) + src_path = train_dir + "/" + filename + dst_path = dest_folder + str(class_name) + "/" + filename + try: + shutil.copy(src_path, dst_path) + print("sucessfull") + except IOError as e: + print("Unable to copy file {} to {}".format(src_path, dst_path)) + except: + print( + "When try copy file {} to {}, unexpected error: {}".format( + src_path, dst_path, sys.exc_info() + ) + ) + +input_folder = "./Flower102/flower" +splitfolders.ratio( + input_folder, + output="./Flower102/split_data", + seed=42, + ratio=(0.7, 0.2, 0.1), + group_prefix=None, +) diff --git a/docs/examples/image_processing/decoder_examples.ipynb b/docs/examples/notebooks/decoder_examples.ipynb similarity index 91% rename from docs/examples/image_processing/decoder_examples.ipynb rename to docs/examples/notebooks/decoder_examples.ipynb index 27098f079..1545f0314 100644 --- a/docs/examples/image_processing/decoder_examples.ipynb +++ b/docs/examples/notebooks/decoder_examples.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ "%matplotlib inline\n", "\n", "seed = 1549361629\n", - "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n", + "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n", "batch_size = 4\n", "gpu_id = 0\n", "\n", @@ -61,7 +61,7 @@ " pipe.build()\n", " data_loader = ROCALClassificationIterator(pipe, device, device_id)\n", " images = next(iter(data_loader))\n", - " show_images(images[0], device)\n" + " show_images(images[0][0], device)\n" ] }, { @@ -82,9 +82,9 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", "\n", "pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n", @@ -109,12 +109,13 @@ "source": [ "@pipeline_def(seed=seed)\n", "def image_decoder_random_crop_pipeline(device=\"cpu\"):\n", - " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n", + " jpegs, labels = fn.readers.file(file_root=image_dir)\n", " images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n", - " device=device,\n", " output_type=types.RGB,\n", + " shard_id = 0,\n", + " num_shards = 1,\n", " random_shuffle=True)\n", - " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n", + " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n", " \n", "pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n", " reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n", @@ -184,7 +185,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" }, "vscode": { "interpreter": { diff --git a/docs/examples/notebooks/resize_implementation.ipynb b/docs/examples/notebooks/resize_implementation.ipynb new file mode 100644 index 000000000..0c267762a --- /dev/null +++ b/docs/examples/notebooks/resize_implementation.ipynb @@ -0,0 +1,201 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f777cf2", + "metadata": {}, + "source": [ + "Image Augmentation App" + ] + }, + { + "cell_type": "markdown", + "id": "185d3b2d", + "metadata": {}, + "source": [ + "This application demonstrates a simple rocAL pipeline with different interpolation types for resize augmentation supported by rocAL." + ] + }, + { + "cell_type": "markdown", + "id": "2ace8c53", + "metadata": {}, + "source": [ + " Common Code " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15c865e2", + "metadata": {}, + "outputs": [], + "source": [ + "from amd.rocal.plugin.generic import ROCALClassificationIterator\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.fn as fn\n", + "import amd.rocal.types as types\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "e0569dcc", + "metadata": {}, + "source": [ + "Configuring rocAL pipeline " + ] + }, + { + "cell_type": "markdown", + "id": "00c2815c", + "metadata": {}, + "source": [ + "Configure the pipeline parameters as required by the user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f118bb1f", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = \"/media/MIVisionX-data/rocal_data/coco/coco_10_img/train_10images_2017/\"\n", + "rocal_cpu = True\n", + "device = \"cpu\"\n", + "batch_size = 1\n", + "num_threads = 1\n", + "random_seed = 1\n", + "local_rank = 0\n", + "world_size = 1\n", + "display = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e069c4d", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu, tensor_layout=types.NHWC, tensor_dtype=types.FLOAT)\n" + ] + }, + { + "cell_type": "markdown", + "id": "df5e6005", + "metadata": {}, + "source": [ + " Image augmentation pipeline " + ] + }, + { + "cell_type": "markdown", + "id": "e1ab279e", + "metadata": {}, + "source": [ + "Here the file reader is used followed by the turbo jpeg decoder. In this pipeline, cascaded augmentations are added on the decoded images.
Multiple augmentation outputs are returned using set_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "804d8895", + "metadata": {}, + "outputs": [], + "source": [ + "resize_w = 200\n", + "resize_h = 200\n", + "with pipe:\n", + " jpegs, _ = fn.readers.file(file_root=data_path)\n", + " images = fn.decoders.image(jpegs, file_root=data_path, device=0, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n", + " output_linear = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.LINEAR_INTERPOLATION)\n", + " output_nearest_neighbor = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.NEAREST_NEIGHBOR_INTERPOLATION)\n", + " output_cubic = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.CUBIC_INTERPOLATION)\n", + " output_lanczos = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.LANCZOS_INTERPOLATION)\n", + " output_gaussian = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.GAUSSIAN_INTERPOLATION)\n", + " output_triangular = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n", + " pipe.set_outputs(output_linear, output_nearest_neighbor, output_cubic, output_lanczos, output_gaussian, output_triangular)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6be22ee", + "metadata": {}, + "outputs": [], + "source": [ + "pipe.build()\n", + "# Dataloader\n", + "data_loader = ROCALClassificationIterator(pipe)" + ] + }, + { + "cell_type": "markdown", + "id": "401897a3", + "metadata": {}, + "source": [ + "Visualizing outputs" + ] + }, + { + "cell_type": "markdown", + "id": "349bf77c", + "metadata": {}, + "source": [ + "The output of augmented images are displayed using imshow()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7cf8686", + "metadata": {}, + "outputs": [], + "source": [ + "cnt = 0\n", + "aug_list = [\"LINEAR_INTERPOLATION\", \"NEAREST_NEIGHBOR_INTERPOLATION\", \"CUBIC_INTERPOLATION\", \"LANCZOS_INTERPOLATION\", \"GAUSSIAN_INTERPOLATION\", \"TRIANGULAR_INTERPOLATION\"] \n", + "row = 0\n", + "col = 0\n", + "fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 15))\n", + "for i, it in enumerate(data_loader, 0):\n", + " for img in it[0]:\n", + " if cnt < len(aug_list):\n", + " axes[row, col].set_title(aug_list[cnt])\n", + " img = (img[0]).astype(\"uint8\")\n", + " axes[row, col].imshow(img)\n", + " cnt += 1\n", + " row += 1\n", + " if row == 3:\n", + " row = 0\n", + " col += 1\n", + " if col == 2:\n", + " col = 0\n", + "data_loader.reset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/notebooks/tf_dataloader.ipynb b/docs/examples/notebooks/tf_dataloader.ipynb new file mode 100644 index 000000000..5305cec9a --- /dev/null +++ b/docs/examples/notebooks/tf_dataloader.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "95284add", + "metadata": {}, + "source": [ + "## Data Loading: TensorFlow TFRecord\n", + " This example demonstrates how to utilise rocAL to access data that has been saved in the TensorFlow TFRecord format." + ] + }, + { + "cell_type": "markdown", + "id": "81f6b975", + "metadata": {}, + "source": [ + "## Common Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dae4d589", + "metadata": {}, + "outputs": [], + "source": [ + "from amd.rocal.plugin.tf import ROCALIterator\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.types as types\n", + "import amd.rocal.fn as fn\n", + "import tensorflow as tf\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "d4c70a58", + "metadata": {}, + "source": [ + "## Configuring rocAL pipeline\n", + "\n", + "Configure the pipeline parameters as required by the user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2baa1c4d", + "metadata": {}, + "outputs": [], + "source": [ + "image_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"tf\", \"classification\")\n", + "rocal_cpu = True\n", + "batch_size = 10\n", + "one_hot_label = 0\n", + "num_threads = 1\n", + "local_rank = 1\n", + "tf_record_reader_type = 0\n", + "featureKeyMap = {\n", + " \"image/encoded\": \"image/encoded\",\n", + " \"image/class/label\": \"image/class/label\",\n", + " \"image/filename\": \"image/filename\",\n", + "}\n", + "path = \"output_folder/tf_reader/classification/\"" + ] + }, + { + "cell_type": "markdown", + "id": "7f5fa309", + "metadata": {}, + "source": [ + "## Defining and Running the Pipeline\n", + " Creating the pipeline using tf.readers for reading the images stored in tf_record format. In this pipeline, resize augmentation is used on the decoded images.
Resize augmentation outputs are returned using set_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d6ef55f", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=2, rocal_cpu=rocal_cpu, tensor_layout = types.NCHW)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f80ad1a5", + "metadata": {}, + "outputs": [], + "source": [ + "with pipe:\n", + " inputs = fn.readers.tfrecord(path=image_path, reader_type=tf_record_reader_type, user_feature_key_map=featureKeyMap,\n", + " features={\n", + " \"image/encoded\": tf.io.FixedLenFeature((), tf.string, \"\"),\n", + " \"image/class/label\": tf.io.FixedLenFeature([1], tf.int64, -1),\n", + " \"image/filename\": tf.io.FixedLenFeature((), tf.string, \"\")\n", + " }\n", + " )\n", + " jpegs = inputs[\"image/encoded\"]\n", + " images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=image_path)\n", + " resized = fn.resize(images, resize_width=300, resize_height=300)\n", + " if(one_hot_label == 1):\n", + " labels = inputs[\"image/class/label\"]\n", + " _ = fn.one_hot(labels, num_classes=1000)\n", + " pipe.set_outputs(resized)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dfa7b3e", + "metadata": {}, + "outputs": [], + "source": [ + "pipe.build()\n", + "# Dataloader\n", + "imageIterator = ROCALIterator(pipe)" + ] + }, + { + "cell_type": "markdown", + "id": "630cda47", + "metadata": {}, + "source": [ + "## Visualizing outputs\n", + "We have displayed the output of augmented images using imshow()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19493e1e", + "metadata": {}, + "outputs": [], + "source": [ + "cnt = 0\n", + "fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(20, 20))\n", + "row = 0\n", + "col = 0\n", + "for i, ([images_array], labels_array) in enumerate(imageIterator, 0):\n", + " for k in images_array:\n", + " cnt += 1\n", + " image = k.astype(\"int\")\n", + " axes[row, col].imshow(image)\n", + " row += 1\n", + " if row == 2:\n", + " row = 0\n", + " if col != 4:\n", + " col += 1\n", + "imageIterator.reset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/notebooks/video_decoder.ipynb b/docs/examples/notebooks/video_decoder.ipynb new file mode 100644 index 000000000..56922adb7 --- /dev/null +++ b/docs/examples/notebooks/video_decoder.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d7ff9965", + "metadata": {}, + "source": [ + " ## Video Pipeline Reading From Multiple Files in rocAL\n", + "\n", + "This example presents a simple rocAL video pipeline that loads and decodes video data." + ] + }, + { + "cell_type": "markdown", + "id": "b8b1a3e9", + "metadata": {}, + "source": [ + " ## Common Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ac44489", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.fn as fn\n", + "import amd.rocal.types as types\n", + "import numpy as np\n", + "import os\n", + "%matplotlib inline\n", + "from matplotlib import pyplot as plt\n", + "import matplotlib.gridspec as gridspec" + ] + }, + { + "cell_type": "markdown", + "id": "442c364e", + "metadata": {}, + "source": [ + "## Configuring rocAL pipeline\n", + "Configure the pipeline parameters as required by the user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20307afc", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "video_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"video_and_sequence_samples\", \"labelled_videos\")\n", + "rocal_cpu = True\n", + "batch_size = 2\n", + "display = False\n", + "num_threads = 4\n", + "random_seed = 1\n", + "tensor_format = types.NCHW\n", + "tensor_dtype = types.FLOAT\n", + "local_rank = 1\n", + "sequence_length = 3\n", + "n_iter = 6" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf11975c", + "metadata": {}, + "outputs": [], + "source": [ + "def display_sequence(sequence):\n", + " columns = 3\n", + " rows = (sequence_length + 1) // (columns)\n", + " gs = gridspec.GridSpec(rows, columns)\n", + " for j in range(rows * columns):\n", + " plt.subplot(gs[j])\n", + " plt.axis(\"off\")\n", + " plt.imshow(sequence)" + ] + }, + { + "cell_type": "markdown", + "id": "9076f0f5", + "metadata": {}, + "source": [ + "## Defining and Running the Pipeline\n", + "A custom iterator is created for iterating through the video pipeline outputs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78257053-674e-4bcd-a047-bd23bf775ee5", + "metadata": {}, + "outputs": [], + "source": [ + "class ROCALVideoIterator(object):\n", + " \"\"\"\n", + " ROCALVideoIterator for pyTorch.\n", + "\n", + " Parameters\n", + " ----------\n", + " pipelines : list of amd.rocal.pipeline.Pipeline\n", + " List of pipelines to use\n", + " size : int\n", + " Epoch size.\n", + " \"\"\"\n", + "\n", + " def __init__(self, pipelines, tensor_layout=types.NCHW, reverse_channels=False, multiplier=None, offset=None, tensor_dtype=types.FLOAT, display=False, sequence_length=3):\n", + "\n", + " try:\n", + " assert pipelines is not None, \"Number of provided pipelines has to be at least 1\"\n", + " except Exception as ex:\n", + " print(ex)\n", + "\n", + " self.loader = pipelines\n", + " self.tensor_format = tensor_layout\n", + " self.multiplier = multiplier if multiplier else [1.0, 1.0, 1.0]\n", + " self.offset = offset if offset else [0.0, 0.0, 0.0]\n", + " self.reverse_channels = reverse_channels\n", + " self.tensor_dtype = tensor_dtype\n", + " self.batch_size = self.loader._batch_size\n", + " self.rim = self.loader.get_remaining_images()\n", + " self.display = display\n", + " self.iter_num = 0\n", + " self.sequence_length = sequence_length\n", + " self.output = self.dimensions = self.dtype = None\n", + "\n", + " def next(self):\n", + " return self.__next__()\n", + "\n", + " def __next__(self):\n", + " if self.loader.is_empty():\n", + " raise StopIteration\n", + "\n", + " if self.loader.rocal_run() != 0:\n", + " raise StopIteration\n", + " else:\n", + " self.output_tensor_list = self.loader.get_output_tensors()\n", + " self.iter_num += 1\n", + " # Copy output from buffer to numpy array\n", + " if self.output is None:\n", + " self.dimensions = self.output_tensor_list[0].dimensions()\n", + " self.dtype = self.output_tensor_list[0].dtype()\n", + " self.layout = self.output_tensor_list[0].layout()\n", + " self.output = np.empty((self.dimensions[0] * self.dimensions[1], self.dimensions[2], self.dimensions[3], self.dimensions[4]), dtype=self.dtype)\n", + " self.output_tensor_list[0].copy_data(self.output)\n", + " img = torch.from_numpy(self.output)\n", + " return img\n", + "\n", + " def reset(self):\n", + " self.loader.rocal_reset_loaders()\n", + "\n", + " def __iter__(self):\n", + " return self\n", + "\n", + " def __del__(self):\n", + " self.loader.rocal_release()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a8bc652", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu,\n", + " tensor_layout=tensor_format, tensor_dtype=tensor_dtype)" + ] + }, + { + "cell_type": "markdown", + "id": "913c943d", + "metadata": {}, + "source": [ + "## Video Pipeline\n", + "Here the video reader is used to read the video data. Then the decoded sequences are passed to CMN. The CMN outputs are returned using set_outputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09691217", + "metadata": {}, + "outputs": [], + "source": [ + "with pipe:\n", + " images = fn.readers.video(file_root=video_path, sequence_length=sequence_length,\n", + " random_shuffle=False, image_type=types.RGB)\n", + " crop_size = [512, 960]\n", + " output_images = fn.crop_mirror_normalize(images,\n", + " crop=crop_size,\n", + " mean=[0, 0, 0],\n", + " std=[1, 1, 1],\n", + " mirror=0,\n", + " output_dtype=types.UINT8,\n", + " output_layout=types.NFHWC)\n", + " pipe.set_outputs(output_images)" + ] + }, + { + "cell_type": "markdown", + "id": "599c42aa", + "metadata": {}, + "source": [ + "## Building the Pipeline\n", + "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function. Then iterator object is created with ROCALVideoIterator(video_pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "974e212e", + "metadata": {}, + "outputs": [], + "source": [ + "# Build the pipeline\n", + "pipe.build()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20fbd7f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataloader\n", + "data_loader = ROCALVideoIterator(pipe, multiplier=pipe._multiplier, offset=pipe._offset, display=display, sequence_length=sequence_length)" + ] + }, + { + "cell_type": "markdown", + "id": "6b8309c7", + "metadata": {}, + "source": [ + "## Visualizing outputs\n", + "The outputs of the video sequence are plotted using matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c1c7f18", + "metadata": {}, + "outputs": [], + "source": [ + "for i, it in enumerate(data_loader):\n", + " if i == n_iter:\n", + " break\n", + " for sequence in it:\n", + " display_sequence(sequence)\n", + "data_loader.reset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/notebooks/video_reader_with_label.ipynb b/docs/examples/notebooks/video_reader_with_label.ipynb new file mode 100644 index 000000000..9eb2ba032 --- /dev/null +++ b/docs/examples/notebooks/video_reader_with_label.ipynb @@ -0,0 +1,293 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d7ff9965", + "metadata": {}, + "source": [ + " ## Video Pipeline Reading Labelled in rocAL\n", + "\n", + "This example presents a simple rocAL video pipeline that loads and decodes video data along with their labels." + ] + }, + { + "cell_type": "markdown", + "id": "b8b1a3e9", + "metadata": {}, + "source": [ + " ## Common Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ac44489", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from amd.rocal.pipeline import Pipeline\n", + "import amd.rocal.fn as fn\n", + "import amd.rocal.types as types\n", + "import numpy as np\n", + "import os\n", + "%matplotlib inline\n", + "from matplotlib import pyplot as plt\n", + "import matplotlib.gridspec as gridspec" + ] + }, + { + "cell_type": "markdown", + "id": "442c364e", + "metadata": {}, + "source": [ + "## Configuring rocAL pipeline\n", + "Configure the pipeline parameters as required by the user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20307afc", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "video_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"video_and_sequence_samples\", \"labelled_videos\")\n", + "rocal_cpu = True\n", + "batch_size = 2\n", + "display = False\n", + "num_threads = 4\n", + "random_seed = 1\n", + "tensor_format = types.NCHW\n", + "tensor_dtype = types.FLOAT\n", + "local_rank = 1\n", + "sequence_length = 3\n", + "n_iter = 1" + ] + }, + { + "cell_type": "markdown", + "id": "9076f0f5", + "metadata": {}, + "source": [ + "## Defining and Running the Pipeline\n", + "A custom iterator is created for iterating through the video pipeline outputs\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3cc73c8-8d31-49c7-92d0-818ef7ef8b1d", + "metadata": {}, + "outputs": [], + "source": [ + "class ROCALVideoIterator(object):\n", + " \"\"\"\n", + " ROCALVideoIterator for pyTorch.\n", + "\n", + " Parameters\n", + " ----------\n", + " pipelines : list of amd.rocal.pipeline.Pipeline\n", + " List of pipelines to use\n", + " size : int\n", + " Epoch size.\n", + " \"\"\"\n", + "\n", + " def __init__(self, pipelines, tensor_layout=types.NCHW, reverse_channels=False, multiplier=None, offset=None, tensor_dtype=types.FLOAT, display=False, sequence_length=3):\n", + "\n", + " try:\n", + " assert pipelines is not None, \"Number of provided pipelines has to be at least 1\"\n", + " except Exception as ex:\n", + " print(ex)\n", + "\n", + " self.loader = pipelines\n", + " self.tensor_format = tensor_layout\n", + " self.multiplier = multiplier if multiplier else [1.0, 1.0, 1.0]\n", + " self.offset = offset if offset else [0.0, 0.0, 0.0]\n", + " self.reverse_channels = reverse_channels\n", + " self.tensor_dtype = tensor_dtype\n", + " self.batch_size = self.loader._batch_size\n", + " self.rim = self.loader.get_remaining_images()\n", + " self.display = display\n", + " self.iter_num = 0\n", + " self.sequence_length = sequence_length\n", + " print(\"____________REMAINING IMAGES____________:\", self.rim)\n", + " self.output = self.dimensions = self.dtype = None\n", + "\n", + " def next(self):\n", + " return self.__next__()\n", + "\n", + " def __next__(self):\n", + " if self.loader.is_empty():\n", + " raise StopIteration\n", + "\n", + " if self.loader.rocal_run() != 0:\n", + " raise StopIteration\n", + " else:\n", + " self.output_tensor_list = self.loader.get_output_tensors()\n", + " self.iter_num += 1\n", + " # Copy output from buffer to numpy array\n", + " if self.output is None:\n", + " self.dimensions = self.output_tensor_list[0].dimensions()\n", + " self.dtype = self.output_tensor_list[0].dtype()\n", + " self.layout = self.output_tensor_list[0].layout()\n", + " self.output = np.empty((self.dimensions[0] * self.dimensions[1], self.dimensions[2], self.dimensions[3], self.dimensions[4]), dtype=self.dtype)\n", + " self.output_tensor_list[0].copy_data(self.output)\n", + " img = torch.from_numpy(self.output)\n", + " self.labels = self.loader.get_image_labels()\n", + " return img, self.labels\n", + "\n", + " def reset(self):\n", + " self.loader.rocal_reset_loaders()\n", + "\n", + " def __iter__(self):\n", + " return self\n", + "\n", + " def __del__(self):\n", + " self.loader.rocal_release()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a8bc652", + "metadata": {}, + "outputs": [], + "source": [ + "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads,device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu,\n", + " tensor_layout=tensor_format, tensor_dtype=tensor_dtype)" + ] + }, + { + "cell_type": "markdown", + "id": "913c943d", + "metadata": {}, + "source": [ + "## Video Pipeline\n", + "Here the video reader is used to read the video data. Then the decoded sequences are passed to CMN. The CMN outputs are returned using set_outputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09691217", + "metadata": {}, + "outputs": [], + "source": [ + "with pipe:\n", + " images = fn.readers.video(file_root=video_path, sequence_length=sequence_length,\n", + " random_shuffle=False, image_type=types.RGB)\n", + " crop_size = (512,960)\n", + " output_images = fn.crop_mirror_normalize(images,\n", + " crop=crop_size,\n", + " mean=[0, 0, 0],\n", + " std=[1, 1, 1],\n", + " mirror=0,\n", + " output_dtype=types.UINT8,\n", + " output_layout=types.NFHWC)\n", + " pipe.set_outputs(output_images)\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "599c42aa", + "metadata": {}, + "source": [ + "## Building the Pipeline\n", + "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function. Then iterator object is created with ROCALVideoIterator(video_pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "974e212e", + "metadata": {}, + "outputs": [], + "source": [ + "# Build the pipeline\n", + "pipe.build()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20fbd7f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataloader\n", + "data_loader = ROCALVideoIterator(\n", + " pipe, multiplier=pipe._multiplier, offset=pipe._offset, display=display, sequence_length=sequence_length)" + ] + }, + { + "cell_type": "markdown", + "id": "6b8309c7", + "metadata": {}, + "source": [ + "## Visualizing outputs\n", + "The outputs of the video sequence are plotted using matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa664089", + "metadata": {}, + "outputs": [], + "source": [ + "def display_sequence(sequence, labels, count):\n", + " columns = 3\n", + " rows = (sequence_length + 1) // (columns)\n", + " gs = gridspec.GridSpec(rows, columns)\n", + " if(count % 2 == 0):\n", + " plt.suptitle(\"label \" + str(labels[0]), fontsize=30)\n", + " for j in range(rows * columns):\n", + " plt.subplot(gs[j])\n", + " plt.axis(\"off\")\n", + " plt.imshow(sequence)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c1c7f18", + "metadata": {}, + "outputs": [], + "source": [ + "count = 0\n", + "for i, (it, labels) in enumerate(data_loader):\n", + " if i == 0 or i == 112 or i == 244:\n", + " for sequence in it:\n", + " display_sequence(sequence, labels, count)\n", + " count += 1\n", + "data_loader.reset()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/pytorch/README.md b/docs/examples/pytorch/README.md deleted file mode 100644 index 9ae0ded87..000000000 --- a/docs/examples/pytorch/README.md +++ /dev/null @@ -1,10 +0,0 @@ -* This example shows how to run training using pytorch and ToyNet with 2 classes -* Use a dataset with 2 classes - -To run the sample: -* Install rocal_pybind - -``` -python3 test_training.py -``` -* rocal device can be cpu/gpu. diff --git a/docs/examples/pytorch/imagenet_training/README.md b/docs/examples/pytorch/imagenet_training/README.md new file mode 100644 index 000000000..251d51355 --- /dev/null +++ b/docs/examples/pytorch/imagenet_training/README.md @@ -0,0 +1,114 @@ +# ImageNet training in PyTorch + +This example implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset. +This version has been modified to use rocAL. It assumes that the dataset is raw JPEGs from the ImageNet dataset. If offers CPU and GPU based pipeline for rocAL - use `rocal-cpu` argument to enable CPU and use `rocal-gpu` argument to enable GPU. + +## Requirements + +- Install PyTorch for [ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html) +- Install rocAL for running rocAL trainings +- Download the ImageNet dataset from http://www.image-net.org/ and use [the following shell script](https://github.com/pytorch/examples/blob/main/imagenet/extract_ILSVRC.sh) to move and extract the training and validation images to labeled subfolders + +## Training + +To train a model, run `imagenet_training.py` with the desired model architecture and the path to the ImageNet dataset: + +```shell +python imagenet_training.py -a resnet18 [imagenet-folder with train and val folders] +``` + +The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG: + +```shell +python imagenet_training.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders] +``` + +To run a rocAL integrated training, use `rocal-cpu` or `rocal-gpu` + +```shell +python3 imagenet_training.py -a resnet50 -j$(nproc) --batch-size 1024 --rocal-cpu [imagenet-folder with train and val folders] +``` + +Make sure to remove older checkpoints (`rm *.pth.tar`) saved in the folder if the example has been run before + +## Use Dummy Data + +ImageNet dataset is large and time-consuming to download. To get started quickly, run `imagenet_training.py` using dummy data by "--dummy". It's also useful for training speed benchmark. Note that the loss or accuracy is useless in this case. + +```shell +python imagenet_training.py -a resnet18 --dummy +``` + +## Multi-processing Distributed Data Parallel Training + +You should always use the NCCL backend for multi-processing distributed training since it currently provides the best distributed training performance. + +### Single node, multiple GPUs + +```shell +python imagenet_training.py -a resnet50 --dist-url 'tcp://127.0.0.1:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 [imagenet-folder with train and val folders] +``` + +### Multiple nodes + +Node 0: + +```shell +python imagenet_training.py -a resnet50 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 0 [imagenet-folder with train and val folders] +``` + +Node 1: + +```shell +python imagenet_training.py -a resnet50 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 1 [imagenet-folder with train and val folders] +``` + +## Usage + +```bash +usage: imagenet_training.py [-h] [-a ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N] [--lr LR] [--momentum M] [--wd W] [-p N] [--resume PATH] [-e] [--pretrained] [--world-size WORLD_SIZE] [--rank RANK] + [--dist-url DIST_URL] [--dist-backend DIST_BACKEND] [--seed SEED] [--gpu GPU] [--multiprocessing-distributed] [--dummy] + [DIR] + +PyTorch ImageNet Training + +positional arguments: + DIR path to dataset (default: imagenet) + +optional arguments: + -h, --help show this help message and exit + -a ARCH, --arch ARCH model architecture: alexnet | convnext_base | convnext_large | convnext_small | convnext_tiny | densenet121 | densenet161 | densenet169 | densenet201 | efficientnet_b0 | + efficientnet_b1 | efficientnet_b2 | efficientnet_b3 | efficientnet_b4 | efficientnet_b5 | efficientnet_b6 | efficientnet_b7 | googlenet | inception_v3 | mnasnet0_5 | mnasnet0_75 | + mnasnet1_0 | mnasnet1_3 | mobilenet_v2 | mobilenet_v3_large | mobilenet_v3_small | regnet_x_16gf | regnet_x_1_6gf | regnet_x_32gf | regnet_x_3_2gf | regnet_x_400mf | regnet_x_800mf | + regnet_x_8gf | regnet_y_128gf | regnet_y_16gf | regnet_y_1_6gf | regnet_y_32gf | regnet_y_3_2gf | regnet_y_400mf | regnet_y_800mf | regnet_y_8gf | resnet101 | resnet152 | resnet18 | + resnet34 | resnet50 | resnext101_32x8d | resnext50_32x4d | shufflenet_v2_x0_5 | shufflenet_v2_x1_0 | shufflenet_v2_x1_5 | shufflenet_v2_x2_0 | squeezenet1_0 | squeezenet1_1 | vgg11 | + vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19 | vgg19_bn | vit_b_16 | vit_b_32 | vit_l_16 | vit_l_32 | wide_resnet101_2 | wide_resnet50_2 (default: resnet18) + -j N, --workers N number of data loading workers (default: 4) + --rocal-cpu use rocAL CPU dataloader + --rocal-gpu use rocAL GPU dataloader + --epochs N number of total epochs to run + --start-epoch N manual epoch number (useful on restarts) + -b N, --batch-size N mini-batch size (default: 256), this is the total batch size of all GPUs on the current node when using Data Parallel or Distributed Data Parallel + --lr LR, --learning-rate LR + initial learning rate + --momentum M momentum + --wd W, --weight-decay W + weight decay (default: 1e-4) + -p N, --print-freq N print frequency (default: 10) + --resume PATH path to latest checkpoint (default: none) + -e, --evaluate evaluate model on validation set + --pretrained use pre-trained model + --world-size WORLD_SIZE + number of nodes for distributed training + --rank RANK node rank for distributed training + --dist-url DIST_URL url used to set up distributed training + --dist-backend DIST_BACKEND + distributed backend + --seed SEED seed for initializing training. + --gpu GPU GPU id to use. + --multiprocessing-distributed + Use multi-processing distributed training to launch N processes per node, which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel + training + --dummy use fake data to benchmark + +``` diff --git a/docs/examples/pytorch/imagenet_training/imagenet_training.py b/docs/examples/pytorch/imagenet_training/imagenet_training.py new file mode 100644 index 000000000..4fe15a03e --- /dev/null +++ b/docs/examples/pytorch/imagenet_training/imagenet_training.py @@ -0,0 +1,687 @@ +import argparse +import os +import random +import shutil +import time +import warnings +from enum import Enum + +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.nn as nn +import torch.nn.parallel +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +from torch.optim.lr_scheduler import StepLR +from torch.utils.data import Subset + +try: + from amd.rocal.plugin.pytorch import ROCALClassificationIterator + from amd.rocal.pipeline import Pipeline + import amd.rocal.fn as fn + import amd.rocal.types as types +except ImportError: + print('Install rocAL for running rocAL trainings') + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet', + help='path to dataset (default: imagenet)') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--rocal-gpu', action='store_true', + help='enable rocal-gpu based training') +parser.add_argument('--rocal-cpu', action='store_true', + help='enable rocal-cpu based training') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--dummy', action='store_true', + help="use fake data to benchmark") + +best_acc1 = 0 + + +def train_pipeline(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16): + pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, rocal_cpu=rocal_cpu, tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, + prefetch_queue_depth=6, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], output_memory_type=types.HOST_MEMORY if rocal_cpu else types.DEVICE_MEMORY) + with pipe: + jpegs, labels = fn.readers.file(file_root=data_path) + rocal_device = 'cpu' if rocal_cpu else 'gpu' + decode = fn.decoders.image_slice(jpegs, output_type=types.RGB, + file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + res = fn.resize(decode, resize_width=224, resize_height=224, output_layout=types.NHWC, + output_dtype=types.UINT8, interpolation_type=types.TRIANGULAR_INTERPOLATION) + flip_coin = fn.random.coin_flip(probability=0.5) + cmnp = fn.crop_mirror_normalize(res, + output_layout=types.NCHW, + output_dtype=types.FLOAT, + crop=(crop, crop), + mirror=flip_coin, + mean=[0.485 * 255, 0.456 * + 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + pipe.set_outputs(cmnp) + print('rocal "{0}" variant'.format(rocal_device)) + return pipe + + +def get_rocal_train_loader(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16=False): + traindir = os.path.join(data_path, 'train') + pipe_train = train_pipeline( + traindir, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16) + pipe_train.build() + train_loader = ROCALClassificationIterator( + pipe_train, device="cpu" if rocal_cpu else "cuda", device_id=local_rank) + return Prefetcher(train_loader, rocal_cpu, batch_size) + + +def val_pipeline(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16): + pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank + 10, rocal_cpu=rocal_cpu, tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, + prefetch_queue_depth=6, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], output_memory_type=types.HOST_MEMORY if rocal_cpu else types.DEVICE_MEMORY) + with pipe: + jpegs, labels = fn.readers.file(file_root=data_path) + rocal_device = 'cpu' if rocal_cpu else 'gpu' + decode = fn.decoders.image(jpegs, file_root=data_path, max_decoded_width=1000, max_decoded_height=1000, + output_type=types.RGB, shard_id=local_rank, num_shards=world_size, random_shuffle=False) + res = fn.resize(decode, resize_shorter=256, scaling_mode=types.SCALING_MODE_NOT_SMALLER, + interpolation_type=types.TRIANGULAR_INTERPOLATION, output_layout=types.NHWC, output_dtype=types.UINT8) + cmnp = fn.crop_mirror_normalize(res, + output_layout=types.NCHW, + output_dtype=types.FLOAT, + crop=(crop, crop), + mirror=0, + mean=[0.485 * 255, 0.456 * + 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + pipe.set_outputs(cmnp) + print('rocal "{0}" variant'.format(rocal_device)) + return pipe + + +def get_rocal_val_loader(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16=False): + valdir = data_path + "/val/" + pipe_val = val_pipeline(valdir, batch_size, local_rank, + world_size, num_thread, crop, rocal_cpu, fp16) + pipe_val.build() + val_loader = ROCALClassificationIterator( + pipe_val, device="cpu" if rocal_cpu else "cuda", device_id=local_rank) + val_data = [] + for (img, target) in Prefetcher(val_loader, rocal_cpu, batch_size): + val_data.append((img.clone(), target.clone())) + del val_loader + return val_data + + +class Prefetcher: + def __init__(self, data_loader, rocal_cpu, batch_size): + self.data_loader = iter(data_loader) + self.rocal_cpu = rocal_cpu + self.bs = batch_size + self.images = None + self.targets = None + self.done = False + if self.rocal_cpu: + self.loader_stream = torch.cuda.Stream() + + def __iter__(self): + return self + + def __len__(self): + return len(self.data_loader) // self.bs + + def prefetch(self): + try: + if self.rocal_cpu: + with torch.cuda.stream(self.loader_stream): + [self.images], self.targets = next(self.data_loader) + self.images = self.images.cuda(non_blocking=True) + self.targets = self.targets.cuda(non_blocking=True) + else: + [self.images], self.targets = next(self.data_loader) + except StopIteration: + self.images, self.targets = None, None + self.done = True + + def reset(self): + if isinstance(self.data_loader, list): + pass + self.data_loader.reset() + self.images, self.targets = None, None + self.done = False + + def __next__(self): + if self.rocal_cpu: + torch.cuda.current_stream().wait_stream(self.loader_stream) + if self.images is None and not self.done: + self.prefetch() + if self.done: + raise StopIteration() + else: + images, targets = self.images, self.targets + self.images, self.targets = None, None + return images, targets + + +def main(): + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + cudnn.benchmark = False + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + if torch.cuda.is_available(): + ngpus_per_node = torch.cuda.device_count() + if ngpus_per_node == 1 and args.dist_backend == "nccl": + warnings.warn( + "nccl backend >= 2.5 requires GPU count > 1, perhaps use 'gloo'") + else: + ngpus_per_node = 1 + + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, + args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + + if not torch.cuda.is_available() and not torch.backends.mps.is_available(): + if args.rocal_gpu: + args.rocal_gpu = False + args.rocal_cpu = True + print('using CPU, this will be slow') + elif args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if torch.cuda.is_available(): + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs of the current node. + args.batch_size = int(args.batch_size / ngpus_per_node) + args.workers = int( + (args.workers + ngpus_per_node - 1) / ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[args.gpu]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None and torch.cuda.is_available(): + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + elif torch.backends.mps.is_available(): + device = torch.device("mps") + model = model.to(device) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = torch.nn.DataParallel(model).cuda() + + if torch.cuda.is_available(): + if args.gpu: + device = torch.device('cuda:{}'.format(args.gpu)) + else: + device = torch.device("cuda") + elif torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + # define loss function (criterion), optimizer, and learning rate scheduler + criterion = nn.CrossEntropyLoss().to(device) + + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + scheduler = StepLR(optimizer, step_size=30, gamma=0.1) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + elif torch.cuda.is_available(): + # Map model to be loaded to specified single gpu. + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + scheduler.load_state_dict(checkpoint['scheduler']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + # Data loading code + if args.dummy: + print("=> Dummy data is used!") + train_dataset = datasets.FakeData( + 1281167, (3, 224, 224), 1000, transforms.ToTensor()) + val_dataset = datasets.FakeData( + 50000, (3, 224, 224), 1000, transforms.ToTensor()) + if args.rocal_gpu or args.rocal_cpu: + get_train_loader = get_rocal_train_loader + get_val_loader = get_rocal_val_loader + local_rank = 0 + world_size = 1 + + crop_size = 224 + if args.distributed or args.gpu: + local_rank = args.rank if args.distributed else args.gpu + if args.world_size != -1: + world_size = args.world_size + if local_rank == None: + local_rank = 0 + train_loader = get_train_loader(data_path=args.data, batch_size=args.batch_size, local_rank=local_rank, world_size=world_size, + num_thread=args.workers, crop=crop_size, rocal_cpu=False if args.rocal_gpu else True, fp16=False) + + val_loader = get_val_loader(data_path=args.data, batch_size=args.batch_size, local_rank=local_rank, world_size=world_size, + num_thread=args.workers, crop=crop_size, rocal_cpu=False if args.rocal_gpu else True, fp16=False) + else: + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_dataset) + val_sampler = torch.utils.data.distributed.DistributedSampler( + val_dataset, shuffle=False, drop_last=True) + else: + train_sampler = None + val_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=( + train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + val_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True, sampler=val_sampler) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed and not (args.rocal_gpu or args.rocal_cpu): + train_sampler.set_epoch(epoch) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, device, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + scheduler.step() + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer': optimizer.state_dict(), + 'scheduler': scheduler.state_dict() + }, is_best) + + +def train(train_loader, model, criterion, optimizer, epoch, device, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + # move data to the same device as model + images = images.to(device, non_blocking=True) + target = target.to(device, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + if args.rocal_cpu or args.rocal_gpu: + train_loader.reset() + + +def validate(val_loader, model, criterion, args): + + def run_validate(loader, base_progress=0): + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(loader): + i = base_progress + i + if args.gpu is not None and torch.cuda.is_available(): + images = images.cuda(args.gpu, non_blocking=True) + if torch.backends.mps.is_available(): + images = images.to('mps') + target = target.to('mps') + if torch.cuda.is_available(): + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + progress = ProgressMeter( + len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < + len(val_loader.dataset))) if not (args.rocal_cpu or args.rocal_gpu) else len(val_loader), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + run_validate(val_loader) + if args.distributed: + top1.all_reduce() + top5.all_reduce() + + if not (args.rocal_cpu or args.rocal_gpu): + if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)): + aux_val_dataset = Subset(val_loader.dataset, + range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset))) + aux_val_loader = torch.utils.data.DataLoader( + aux_val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + run_validate(aux_val_loader, len(val_loader)) + + progress.display_summary() + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + + +class Summary(Enum): + NONE = 0 + AVERAGE = 1 + SUM = 2 + COUNT = 3 + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + self.name = name + self.fmt = fmt + self.summary_type = summary_type + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def all_reduce(self): + if torch.cuda.is_available(): + device = torch.device("cuda") + elif torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + total = torch.tensor([self.sum, self.count], + dtype=torch.float32, device=device) + dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False) + self.sum, self.count = total.tolist() + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + def summary(self): + fmtstr = '' + if self.summary_type is Summary.NONE: + fmtstr = '' + elif self.summary_type is Summary.AVERAGE: + fmtstr = '{name} {avg:.3f}' + elif self.summary_type is Summary.SUM: + fmtstr = '{name} {sum:.3f}' + elif self.summary_type is Summary.COUNT: + fmtstr = '{name} {count:.3f}' + else: + raise ValueError('invalid summary type %r' % self.summary_type) + + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def display_summary(self): + entries = [" *"] + entries += [meter.summary() for meter in self.meters] + print(' '.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() diff --git a/docs/examples/pytorch/toynet_training/README.md b/docs/examples/pytorch/toynet_training/README.md new file mode 100644 index 000000000..1f0b0b8e2 --- /dev/null +++ b/docs/examples/pytorch/toynet_training/README.md @@ -0,0 +1,18 @@ +* This example shows how to run training using pytorch and ToyNet with 2 classes +* Use a dataset with 2 classes +* rocal device can be cpu/gpu. + +### Building the required Pytorch Rocm docker + +* Use the instructions in the [docker section](https://github.com/ROCm/rocAL/docker) to build the required [Pytorch docker](https://github.com/ROCm/rocAL/docker/rocal-with-pytorch.dockerfile) +* Upgrade pip to the latest version. +* Run requirements.sh to install the required packages. + +### To run the sample + +* Install rocal_pybind + +```shell +python3 train.py +``` + diff --git a/docs/examples/pytorch/test_training.py b/docs/examples/pytorch/toynet_training/train.py similarity index 100% rename from docs/examples/pytorch/test_training.py rename to docs/examples/pytorch/toynet_training/train.py diff --git a/docs/examples/tf/pets_training/README.md b/docs/examples/tf/pets_training/README.md new file mode 100644 index 000000000..87bef65f1 --- /dev/null +++ b/docs/examples/tf/pets_training/README.md @@ -0,0 +1,20 @@ +## Running Pets Training Example + +### Building the required TF Rocm docker + +* Use the instructions in the [docker section](https://github.com/ROCm/rocAL/docker) to build the required [Tensorflow docker](https://github.com/ROCm/rocAL/docker/rocal-with-tensorflow.dockerfile) +* Upgrade pip to the latest version. + +### Running the training + +* To setup dataset, run + +```shell +bash download_and_preprocess_dataset.sh +``` + +* To run this example, just execute: + +```shell +python3 train.py +``` diff --git a/docs/examples/tf/pets_training/create_pet_tf_record.py b/docs/examples/tf/pets_training/create_pet_tf_record.py new file mode 100644 index 000000000..79476a013 --- /dev/null +++ b/docs/examples/tf/pets_training/create_pet_tf_record.py @@ -0,0 +1,314 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Convert the Oxford pet dataset to TFRecord for object_detection. + +See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar + Cats and Dogs + IEEE Conference on Computer Vision and Pattern Recognition, 2012 + http://www.robots.ox.ac.uk/~vgg/data/pets/ + +Example usage: + python object_detection/dataset_tools/create_pet_tf_record.py \ + --data_dir=/home/user/pet \ + --output_dir=/home/user/pet/output +""" + +import hashlib +import io +import logging +import os +import random +import re + +import contextlib2 +from lxml import etree +import PIL.Image +from six.moves import range +import tensorflow.compat.v1 as tf + +flags = tf.app.flags +flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.') +flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.') +flags.DEFINE_string('label_map_path', 'pet_label_map.pbtxt', + 'Path to label map proto') +flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards') + +FLAGS = flags.FLAGS + + +def open_sharded_output_tfrecords(exit_stack, base_path, num_shards): + """Opens all TFRecord shards for writing and adds them to an exit stack. + + Args: + exit_stack: A context2.ExitStack used to automatically closed the TFRecords + opened in this function. + base_path: The base path for all shards + num_shards: The number of shards + + Returns: + The list of opened TFRecords. Position k in the list corresponds to shard k. + """ + tf_record_output_filenames = [ + '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards) + for idx in range(num_shards) + ] + + tfrecords = [ + exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name)) + for file_name in tf_record_output_filenames + ] + + return tfrecords + + +def int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def int64_list_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def bytes_list_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + + +def float_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) + + +def float_list_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +def get_class_name_from_filename(file_name): + """Gets the class name from a file. + + Args: + file_name: The file name to get the class name from. + ie. "american_pit_bull_terrier_105.jpg" + + Returns: + A string of the class name. + """ + match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I) + return match.groups()[0] + + +def read_examples_list(path): + """Read list of training or validation examples. + + The file is assumed to contain a single example per line where the first + token in the line is an identifier that allows us to find the image and + annotation xml for that example. + + For example, the line: + xyz 3 + would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). + + Args: + path: absolute path to examples list file. + + Returns: + list of example identifiers (strings). + """ + with tf.gfile.GFile(path) as fid: + lines = fid.readlines() + return [line.strip().split(' ')[0] for line in lines] + + +def recursive_parse_xml_to_dict(xml): + """Recursively parses XML contents to python dict. + + We assume that `object` tags are the only ones that can appear + multiple times at the same level of a tree. + + Args: + xml: xml tree obtained by parsing XML file contents using lxml.etree + + Returns: + Python dictionary holding XML contents. + """ + if not xml: + return {xml.tag: xml.text} + result = {} + for child in xml: + child_result = recursive_parse_xml_to_dict(child) + if child.tag != 'object': + result[child.tag] = child_result[child.tag] + else: + if child.tag not in result: + result[child.tag] = [] + result[child.tag].append(child_result[child.tag]) + return {xml.tag: result} + + +def dict_to_tf_example(data, + mask_path, + label_map_dict, + image_subdirectory): + """Convert XML derived dict to tf.Example proto. + + Notice that this function normalizes the bounding box coordinates provided + by the raw data. + + Args: + data: dict holding PASCAL XML fields for a single image (obtained by + running recursive_parse_xml_to_dict) + mask_path: String path to PNG encoded mask. + label_map_dict: A map from string label names to integers ids. + image_subdirectory: String specifying subdirectory within the + Pascal dataset directory holding the actual image data. + + Returns: + example: The converted tf.Example. + + Raises: + ValueError: if the image pointed to by data['filename'] is not a valid JPEG + """ + img_path = os.path.join(image_subdirectory, data['filename']) + with tf.gfile.GFile(img_path, 'rb') as fid: + encoded_jpg = fid.read() + encoded_jpg_io = io.BytesIO(encoded_jpg) + image = PIL.Image.open(encoded_jpg_io) + if image.format != 'JPEG': + raise ValueError('Image format not JPEG') + key = hashlib.sha256(encoded_jpg).hexdigest() + + width = int(data['size']['width']) + height = int(data['size']['height']) + + classes = [] + classes_text = [] + + if 'object' in data: + class_name = get_class_name_from_filename(data['filename']) + classes_text.append(class_name.encode('utf8')) + classes.append(label_map_dict[class_name]) + + feature_dict = { + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + 'image/filename': bytes_feature( + data['filename'].encode('utf8')), + 'image/source_id': bytes_feature( + data['filename'].encode('utf8')), + 'image/key/sha256': bytes_feature(key.encode('utf8')), + 'image/encoded': bytes_feature(encoded_jpg), + 'image/format': bytes_feature('jpeg'.encode('utf8')), + 'image/object/class/text': bytes_list_feature(classes_text), + 'image/object/class/label': int64_list_feature(classes) + } + example = tf.train.Example( + features=tf.train.Features(feature=feature_dict)) + return example + + +def create_tf_record(output_filename, + num_shards, + label_map_dict, + annotations_dir, + image_dir, + examples): + """Creates a TFRecord file from examples. + + Args: + output_filename: Path to where output file is saved. + num_shards: Number of shards for output file. + label_map_dict: The label map dictionary. + annotations_dir: Directory where annotation files are stored. + image_dir: Directory where image files are stored. + examples: Examples to parse and save to tf record. + """ + with contextlib2.ExitStack() as tf_record_close_stack: + output_tfrecords = open_sharded_output_tfrecords( + tf_record_close_stack, output_filename, num_shards) + for idx, example in enumerate(examples): + if idx % 100 == 0: + logging.info('On image %d of %d', idx, len(examples)) + xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') + mask_path = os.path.join( + annotations_dir, 'trimaps', example + '.png') + + if not os.path.exists(xml_path): + logging.warning( + 'Could not find %s, ignoring example.', xml_path) + continue + with tf.gfile.GFile(xml_path, 'r') as fid: + xml_str = fid.read() + xml = etree.fromstring(xml_str) + data = recursive_parse_xml_to_dict(xml)['annotation'] + + try: + tf_example = dict_to_tf_example( + data, + mask_path, + label_map_dict, + image_dir) + if tf_example: + shard_idx = idx % num_shards + output_tfrecords[shard_idx].write( + tf_example.SerializeToString()) + except ValueError: + logging.warning('Invalid example: %s, ignoring.', xml_path) + + +def main(_): + data_dir = FLAGS.data_dir + label_map_dict = {'Abyssinian': 1, 'american_bulldog': 2, 'american_pit_bull_terrier': 3, 'basset_hound': 4, 'beagle': 5, 'Bengal': 6, 'Birman': 7, 'Bombay': 8, 'boxer': 9, 'British_Shorthair': 10, 'chihuahua': 11, 'Egyptian_Mau': 12, 'english_cocker_spaniel': 13, 'english_setter': 14, 'german_shorthaired': 15, 'great_pyrenees': 16, 'havanese': 17, 'japanese_chin': 18, + 'keeshond': 19, 'leonberger': 20, 'Maine_Coon': 21, 'miniature_pinscher': 22, 'newfoundland': 23, 'Persian': 24, 'pomeranian': 25, 'pug': 26, 'Ragdoll': 27, 'Russian_Blue': 28, 'saint_bernard': 29, 'samoyed': 30, 'scottish_terrier': 31, 'shiba_inu': 32, 'Siamese': 33, 'Sphynx': 34, 'staffordshire_bull_terrier': 35, 'wheaten_terrier': 36, 'yorkshire_terrier': 37} + logging.info('Reading from Pet dataset.') + image_dir = os.path.join(data_dir, 'images') + annotations_dir = os.path.join(data_dir, 'annotations') + examples_path = os.path.join(annotations_dir, 'trainval.txt') + examples_list = read_examples_list(examples_path) + + # Test images are not included in the downloaded data set, so we shall perform + # our own split. + random.seed(42) + random.shuffle(examples_list) + num_examples = len(examples_list) + num_train = int(0.7 * num_examples) + train_examples = examples_list[:num_train] + val_examples = examples_list[num_train:] + logging.info('%d training and %d validation examples.', + len(train_examples), len(val_examples)) + + train_output_path = os.path.join( + FLAGS.output_dir, 'pet_faces_train.record') + val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record') + create_tf_record( + train_output_path, + FLAGS.num_shards, + label_map_dict, + annotations_dir, + image_dir, + train_examples) + create_tf_record( + val_output_path, + FLAGS.num_shards, + label_map_dict, + annotations_dir, + image_dir, + val_examples) + + +if __name__ == '__main__': + tf.app.run() diff --git a/rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh b/docs/examples/tf/pets_training/download_and_preprocess_dataset.sh similarity index 67% rename from rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh rename to docs/examples/tf/pets_training/download_and_preprocess_dataset.sh index 8b7f5db52..64bb47624 100755 --- a/rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh +++ b/docs/examples/tf/pets_training/download_and_preprocess_dataset.sh @@ -4,6 +4,7 @@ DATASET_URL="http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz" GROUNDTRUTH_URL="http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz" apt-get install wget +pip install contextlib2 pillow printf "\nDownloading Oxford-IIIT-Pet dataset from $DATASET_URL..." wget $DATASET_URL printf "\nDownloading Oxford-IIIT-Pet ground truth from $GROUNDTRUTH_URL..." @@ -12,11 +13,7 @@ printf "\nExtracting..." tar xzvf images.tar.gz tar xzvf annotations.tar.gz mkdir tf_pets_records -git clone https://github.com/swetha097/rocALmodels.git -cd rocALmodels -git checkout TF_V2 -cd ../ -python3 rocALmodels/models/research/object_detection/dataset_tools/create_pet_tf_record.py --data_dir=./ --output_dir=tf_pets_records/ --label_map_path=rocALmodels/models/research/object_detection/data/pet_label_map.pbtxt +python3 create_pet_tf_record.py --data_dir=./ --output_dir=tf_pets_records/ --label_map_path=pet_label_map.pbtxt cd tf_pets_records mkdir train mv pet_faces_train.record-0000* train diff --git a/docs/examples/tf/pets_training/pet_label_map.pbtxt b/docs/examples/tf/pets_training/pet_label_map.pbtxt new file mode 100644 index 000000000..7a91455ef --- /dev/null +++ b/docs/examples/tf/pets_training/pet_label_map.pbtxt @@ -0,0 +1,184 @@ +item { + id: 1 + name: 'Abyssinian' +} + +item { + id: 2 + name: 'american_bulldog' +} + +item { + id: 3 + name: 'american_pit_bull_terrier' +} + +item { + id: 4 + name: 'basset_hound' +} + +item { + id: 5 + name: 'beagle' +} + +item { + id: 6 + name: 'Bengal' +} + +item { + id: 7 + name: 'Birman' +} + +item { + id: 8 + name: 'Bombay' +} + +item { + id: 9 + name: 'boxer' +} + +item { + id: 10 + name: 'British_Shorthair' +} + +item { + id: 11 + name: 'chihuahua' +} + +item { + id: 12 + name: 'Egyptian_Mau' +} + +item { + id: 13 + name: 'english_cocker_spaniel' +} + +item { + id: 14 + name: 'english_setter' +} + +item { + id: 15 + name: 'german_shorthaired' +} + +item { + id: 16 + name: 'great_pyrenees' +} + +item { + id: 17 + name: 'havanese' +} + +item { + id: 18 + name: 'japanese_chin' +} + +item { + id: 19 + name: 'keeshond' +} + +item { + id: 20 + name: 'leonberger' +} + +item { + id: 21 + name: 'Maine_Coon' +} + +item { + id: 22 + name: 'miniature_pinscher' +} + +item { + id: 23 + name: 'newfoundland' +} + +item { + id: 24 + name: 'Persian' +} + +item { + id: 25 + name: 'pomeranian' +} + +item { + id: 26 + name: 'pug' +} + +item { + id: 27 + name: 'Ragdoll' +} + +item { + id: 28 + name: 'Russian_Blue' +} + +item { + id: 29 + name: 'saint_bernard' +} + +item { + id: 30 + name: 'samoyed' +} + +item { + id: 31 + name: 'scottish_terrier' +} + +item { + id: 32 + name: 'shiba_inu' +} + +item { + id: 33 + name: 'Siamese' +} + +item { + id: 34 + name: 'Sphynx' +} + +item { + id: 35 + name: 'staffordshire_bull_terrier' +} + +item { + id: 36 + name: 'wheaten_terrier' +} + +item { + id: 37 + name: 'yorkshire_terrier' +} \ No newline at end of file diff --git a/docs/examples/tf/pets_training/train.py b/docs/examples/tf/pets_training/train.py new file mode 100755 index 000000000..c804ecc64 --- /dev/null +++ b/docs/examples/tf/pets_training/train.py @@ -0,0 +1,184 @@ +# Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os, math +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +import tensorflow as tf + +from amd.rocal.plugin.tf import ROCALIterator +from amd.rocal.pipeline import Pipeline +import amd.rocal.fn as fn +import amd.rocal.types as types + + +############################### CHANGE THESE GLOBAL VARIABLES APPROPRIATELY ############################### + +RECORDS_DIR = 'tf_pets_records/' +NUM_CLASSES = 37 +LEARNING_RATE = 0.005 +TRAIN_BATCH_SIZE = 8 +RUN_ON_HOST = True + +############################### CHANGE THESE GLOBAL VARIABLES APPROPRIATELY ############################### + + +######################################## NO CHANGES IN CODE NEEDED ######################################## + +TRAIN_RECORDS_DIR = RECORDS_DIR + 'train/' +VAL_RECORDS_DIR = RECORDS_DIR + 'val/' + +def main(): + + global NUM_CLASSES + global LEARNING_RATE + global TRAIN_BATCH_SIZE + global TRAIN_RECORDS_DIR + global VAL_RECORDS_DIR + + print("\n-----------------------------------------------------------------------------------------") + print('TF records (train) are located in %s' % TRAIN_RECORDS_DIR) + print('TF records (val) are located in %s' % VAL_RECORDS_DIR) + print("-----------------------------------------------------------------------------------------\n") + + image_size = [128, 128, 3] + base_model = tf.keras.applications.MobileNetV2(input_shape=image_size, + include_top=False, + weights='imagenet') + base_model.trainable = False + model = tf.keras.Sequential([ + base_model, + tf.keras.layers.GlobalAveragePooling2D(), + tf.keras.layers.Dense(NUM_CLASSES) + ]) + + model.summary() + optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE) + loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + model.compile( + optimizer=optimizer, + loss=loss_fn, + metrics=['acc']) + + TFRecordReaderType = 0 + featureKeyMap = { + 'image/encoded': 'image/encoded', + 'image/class/label': 'image/object/class/label', + 'image/filename': 'image/filename' + } + + trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=8, rocal_cpu=RUN_ON_HOST, + tensor_layout=types.NHWC, mean=[0, 0, 0], std=[255, 255, 255], tensor_dtype=types.FLOAT) + with trainPipe: + inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap, + features={ + 'image/encoded': tf.io.FixedLenFeature((), tf.string, ""), + 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1), + 'image/filename': tf.io.FixedLenFeature((), tf.string, "") + } + ) + jpegs = inputs["image/encoded"] + labels = inputs["image/class/label"] + images = fn.decoders.image( + jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR) + resized = fn.resize( + images, resize_width=image_size[0], resize_height=image_size[1]) + flip_coin = fn.random.coin_flip(probability=0.5) + cmn_images = fn.crop_mirror_normalize(resized, crop=(image_size[1], image_size[0]), + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + mirror=flip_coin, + output_dtype=types.FLOAT, + output_layout=types.NHWC) + trainPipe.set_outputs(cmn_images) + trainPipe.build() + + valPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=8, + rocal_cpu=RUN_ON_HOST, tensor_layout=types.NHWC, tensor_dtype=types.FLOAT) + with valPipe: + inputs = fn.readers.tfrecord(path=VAL_RECORDS_DIR, reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap, + features={ + 'image/encoded': tf.io.FixedLenFeature((), tf.string, ""), + 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1), + 'image/filename': tf.io.FixedLenFeature((), tf.string, "") + } + ) + jpegs = inputs["image/encoded"] + labels = inputs["image/class/label"] + images = fn.decoders.image( + jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=VAL_RECORDS_DIR) + resized = fn.resize( + images, resize_width=image_size[0], resize_height=image_size[1]) + flip_coin = fn.random.coin_flip(probability=0.5) + cmn_images = fn.crop_mirror_normalize(resized, crop=(image_size[1], image_size[0]), + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + mirror=flip_coin, + output_dtype=types.FLOAT, + output_layout=types.NHWC) + valPipe.set_outputs(cmn_images) + valPipe.build() + + trainIterator = ROCALIterator(trainPipe) + valIterator = ROCALIterator(valPipe) + + # Create the metrics + accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy( + name='train_acc') + epoch = 0 + train_batches = math.ceil(len(trainIterator) / TRAIN_BATCH_SIZE) + val_batches = math.ceil(len(valIterator) / TRAIN_BATCH_SIZE) + while epoch < 10: + print('Epoch :', epoch + 1) + accuracy_metric.reset_states() + pbar = tf.keras.utils.Progbar(target=train_batches, stateful_metrics=[]) + step = 0 + for ([train_image_ndArray], train_label_ndArray) in trainIterator: + train_label_ndArray = train_label_ndArray - 1 + with tf.GradientTape() as tape: + prediction = model(train_image_ndArray, training=True) + loss = loss_fn(train_label_ndArray, prediction) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients( + zip(gradients, model.trainable_variables)) + accuracy_metric.update_state(train_label_ndArray, prediction) + results = {'loss': loss, 'train_acc': accuracy_metric.result()} + step += 1 + pbar.update(step, values=results.items(), finalize=False) + pbar.update(step, values=results.items(), finalize=True) + trainIterator.reset() + accuracy_metric.reset_states() + pbar = tf.keras.utils.Progbar(target=val_batches, stateful_metrics=[]) + step = 0 + for ([val_image_ndArray], val_label_ndArray) in valIterator: + val_label_ndArray = val_label_ndArray - 1 + prediction = model(val_image_ndArray, training=False) + accuracy_metric.update_state(val_label_ndArray, prediction) + results = {'val_acc': accuracy_metric.result()} + step += 1 + pbar.update(step, values=results.items(), finalize=False) + pbar.update(step, values=results.items(), finalize=True) + valIterator.reset() + epoch += 1 + + +if __name__ == '__main__': + main() + +######################################## NO CHANGES IN CODE NEEDED ######################################## diff --git a/docs/how-to/framework.rst b/docs/how-to/framework.rst index b99dfcf5d..5d09ff636 100644 --- a/docs/how-to/framework.rst +++ b/docs/how-to/framework.rst @@ -27,7 +27,7 @@ Create Data-loading Pipeline Follow these steps: -1. Import libraries for `rocAL `_. +1. Import libraries for `rocAL `_. .. code-block:: python :caption: Import libraries @@ -38,7 +38,7 @@ Follow these steps: import amd.rocal.types as types -2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the `pipeline `_. +2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the `pipeline `_. .. code-block:: python :caption: Pipeline for PyTorch @@ -80,7 +80,7 @@ Follow these steps: import torch.optim as optim -4. Call the training pipeline with rocAL classification data `loader `_. +4. Call the training pipeline with rocAL classification data `loader `_. .. code-block:: python :caption: Call the training pipeline @@ -93,7 +93,7 @@ Follow these steps: train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank) -5. Run the `training script `_. +5. Run the `training script `_. .. code-block:: python :caption: Run the training pipeline @@ -191,15 +191,15 @@ Follow these steps: train_label_one_hot_list = get_label_one_hot(train_label_ndArray) -4. To see and run a sample training script, refer to `rocAL TensorFlow example `_. +4. To see and run a sample training script, refer to `rocAL TensorFlow example `_. .. __resnet50: -Run MLPerf Resnet50 classification training with rocAL +Run Resnet50 classification training with rocAL ======================================================= #. Ensure you have downloaded ``ILSVRC2012_img_val.tar`` (6.3GB) and ``ILSVRC2012_img_train.tar`` (138 GB) files and unzip into ``train`` and ``val`` folders -#. Build `MIVisionX Pytorch docker `_ +#. Build `rocAL Pytorch docker `_ * Run the docker image @@ -208,7 +208,7 @@ Run MLPerf Resnet50 classification training with rocAL sudo docker run -it -v :/data -v /:/dockerx -w /dockerx --privileged --device=/dev/kfd --device=/dev/dri --group-add video --shm-size=4g --ipc="host" --network=host .. note:: - Refer to the `docker `_ page for prerequisites and information on building the docker image. + Refer to the `docker `_ page for prerequisites and information on building the docker image. Optional: Map localhost directory on the docker image @@ -217,16 +217,4 @@ Run MLPerf Resnet50 classification training with rocAL #. To see and run a sample training script, refer to `rocAL Imagenet example `_. -.. code-block:: shell - - git clone -b mlperf-v1.1-rocal https://github.com/rrawther/MLPerf-mGPU - -#. Modify ``RN50_AMP_LARS_8GPUS_NCHW.sh`` or ``RN50_AMP_LARS_8GPUS_NHWC.sh`` to reflect correct path for imagenet directory -#. Run appropriate script as needed: - -.. code-block:: shell - - ./RN50_AMP_LARS_8GPUS_NCHW.sh - (or) - ./RN50_AMP_LARS_8GPUS_NHWC.sh diff --git a/docs/how-to/overview.rst b/docs/how-to/overview.rst index fc3d674ad..11e108189 100644 --- a/docs/how-to/overview.rst +++ b/docs/how-to/overview.rst @@ -109,4 +109,4 @@ Decoders Description ====================== ======================================== To see examples demonstrating the usage of decoders and readers, see -`MIVisionX rocAL Python Binding Examples `_. +`rocAL Python Examples `_. diff --git a/docs/how-to/using-with-cpp.rst b/docs/how-to/using-with-cpp.rst index 50704c60b..cb3a02188 100644 --- a/docs/how-to/using-with-cpp.rst +++ b/docs/how-to/using-with-cpp.rst @@ -92,7 +92,7 @@ See `rocalRelease example `_. +The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing `C++ API `_. .. code-block:: cpp :caption: Example Image Augmentation @@ -131,4 +131,4 @@ The example below shows how to create a pipeline, read JPEG images, perform cert } -To see a sample image augmentation application in C++, see `Image Augmentation `_. +To see a sample image augmentation application in C++, see `Image Augmentation `_. diff --git a/docs/how-to/using-with-python.rst b/docs/how-to/using-with-python.rst index 9252f1316..46ec1209f 100644 --- a/docs/how-to/using-with-python.rst +++ b/docs/how-to/using-with-python.rst @@ -176,7 +176,7 @@ Building the Pipeline Building the pipeline ensures that all operators are validated with the corresponding inputs and outputs. -To build the pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L166` +To build the pipeline, see `https://github.com/ROCm/rocAL/blob/master/tests/python_api/unit_test.py#L166` .. code-block:: python :caption: Build the Pipeline @@ -245,6 +245,7 @@ To run the pipeline, see: images, labels = pipe_out show_images(images) + rocAL Data Types ========================= diff --git a/docs/install/install.rst b/docs/install/install.rst index 920a0640f..c51546b24 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -227,3 +227,4 @@ Test package will install ctest module to test rocAL. Follow below steps to test mkdir rocAL-test && cd rocAL-test cmake /opt/rocm/share/rocal/test/ ctest -VV + diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in index b1eb21341..c316de276 100644 --- a/docs/sphinx/requirements.in +++ b/docs/sphinx/requirements.in @@ -1 +1 @@ -rocm-docs-core[api_reference]==1.4.1 +rocm-docs-core[api_reference]==1.5.1 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt index b0d156078..cae8bc379 100644 --- a/docs/sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -112,7 +112,7 @@ requests==2.32.2 # via # pygithub # sphinx -rocm-docs-core[api-reference]==1.4.1 +rocm-docs-core[api-reference]==1.5.1 # via -r requirements.in smmap==5.0.1 # via gitdb diff --git a/docs/user_guide/ch1.md b/docs/user_guide/ch1.md new file mode 100644 index 000000000..c552757b1 --- /dev/null +++ b/docs/user_guide/ch1.md @@ -0,0 +1,83 @@ +# Chapter 1: Overview + +## 1.1 Overview + +The performance of Deep Learning applications depends upon the efficiency of performance pipelines that can load and preprocess data efficiently to provide a high throughput. The pipelines are typically used to perform tasks such as loading and decoding data, perform a variety of augmentations, perform color-format conversions, etc., before passing the data for training or inference. The Deep Learning frameworks also require the pipelines to support multiple data formats and augmentations to adapt to a variety of datasets and models. + +This can be achieved by creating processing pipelines that fully utilize the underlying hardware capabilities. + +ROCm™ Augmentation Library (rocAL™) allows the user to create hybrid pipelines to maximize the throughput for Machine Learning applications. It helps to create pipelines that can efficiently process images, videos, and a variety of storage formats. The user can program these pipelines using C or Python API. rocAL significantly accelerates data processing on AMD processors. + +To optimize the preprocessing pipeline, rocAL utilizes the following features: + +- Prefetching: Loads the data for the next batch while the existing batch is under process. This parallelization allows more batches to be processed in less time. +- Hybrid execution: Utilizes both the CPU and GPU simultaneously. For example, decoding the data on the CPU while running the training on the GPU.  +- Hardware decoding: Uses the AMD VCN and VA-API to efficiently decode data on the hardware.  +- Batch processing: Groups and processes the data together as a batch. + +![The Role of Pipelines in Deep Learning Applications](../data/ch1_pipelines.png) + +Figure 1. The Role of Pipelines in Deep Learning Applications + +## 1.2 Key Components + +- CPU- or GPU-based implementation for each augmentation and data_loader nodes +- Python and C APIs for easy integration and testing +- Multiple framework support and portable on PyTorch, TensorFlow, and MXNet +- Flexible graphs to help the user create custom pipelines +- Multicore host and multi-gpu execution for the graph +- Support for various augmentations such as fish-eye, water, gitter, non-linear blend, etc., using the AMD ROCm Performance Primitive (RPP) library +- Support for classification, object detection, segmentation, and keypoint data pipelines + +## 1.3 Third-party Integration + +rocAL provides support for many operators. The module imports are designed like other available data loaders for a smooth integration with training frameworks. The rocal_pybind package provides support for integrating with PyTorch, TensorFlow, and Caffe2. rocAL also supports many data formats such as FileReader, COCO Reader, TFRecordReader, and Lightning Memory-Mapped Database (LMDB), thus offering a unified approach to framework integration. + +## 1.4 rocAL Operators + +rocAL operators offer the flexibility to run on CPU or GPU for building hybrid pipelines. They also support classification and object detection on the workload. Some of the useful operators supported by rocAL are listed below: + +- Augmentations: These are used to enhance the data set by adding effects to the original images. To use the augmentations, import the instance of amd.rocal.fn. into the Python script. These augmentation APIs further call the RPP kernels underneath (HIP/HOST) depending on the backend used to build RPP and rocAL.  + +### Table 1. Augmentations Available through rocAL + +| Color Augmentations | Effects Augmentations | Geometry Augmentations | +| ------------------- | --------------------- | ------------------------------------- | +| Blend | Fog | Crop | +| Blur | Jitter | Crop Mirror Normalization | +| Brightness | Pixelization | Crop Resize | +| Color Temperature | Raindrops | Fisheye Lens | +| Color Twist | Snowflakes | Flip (Horizontal, Vertical, and Both) | +| Contrast | Salt and Pepper Noise | Lens Correction | +| Exposure | | Random Crop | +| Gamma | | Resize | +| Hue | | Resize Crop Mirror | +| Saturation | | Rotation | +| Vignette | | Warp Affine | + +- Readers: These are used to read and understand the different types of datasets and their metadata. Some examples of readers are list of files with folders, LMDB, TFRecord, and JSON file for metadata. To use the readers, import the instance of amd.rocal.readers into the Python script. + +### Table 2. Readers Available through rocAL + +| Readers | Description | +| --------------------------------------- | --------------------------------------------------- | +| File Reader | Reads images from a list of files in a folder(s) | +| Video Reader | Reads videos from a list of files in a folder(s) | +| Caffe LMDB Reader | Reads (key, value) pairs from Caffe LMDB | +| Caffe2 LMDB Reader | Reads (key, value) pairs from Caffe2 LMDB | +| COCO Reader – file source and keypoints | Reads images and JSON annotations from COCO dataset | +| TFRecord Reader | Reads from a TFRecord dataset | +| MXNet Reader | Reads from a RecordIO dataset | + +- Decoders: These are used to support different input formats of images and videos. Decoders extract data from the datasets that are in compressed formats such as JPEG, MP4, etc. To use the decoders, import the instance of amd.rocal.decoders into the Python script. + +### Table 3. Decoders Available through rocAL + +| Decoders | Description | +| ----------------- | -------------------------------------- | +| Image | Decodes JPEG images | +| Image_raw | Decodes images in raw format | +| Image_random_crop | Decodes and randomly crops JPEG images | +| Image_slice | Decodes and slices JPEG images | + +To see examples demonstrating the usage of decoders and readers, [click here](https://github.com/ROCm/rocAL/tree/master/docs/examples) diff --git a/docs/user_guide/ch2.md b/docs/user_guide/ch2.md new file mode 100644 index 000000000..b54eecbde --- /dev/null +++ b/docs/user_guide/ch2.md @@ -0,0 +1,21 @@ +# Chapter 2: Architecture Components + +The rocAL architecture comprises rocAL Master-Graph and RPP as major components. + +## 2.1 rocAL Master-Graph + +The rocAL pipeline is built on top of rocAL Master-Graph. The architectural components of rocAL Master-Graph are described below: + +- Loader and Processing Modules: The rocAL Master-Graph consists of two main architectural components, a loader module to load data and a processing module to process data. The loader module is clearly separated from the processing module for a seamless execution without any blockages. The Prefetch queue helps to load data ahead of time and can be configured with user-defined parameters. The Output routine runs in parallel with the load routine, as both have separate queues for storing the result. + +![rocAL Master-Graph Architecture](../data/ch2_arch.png) + +Figure 2. rocAL Master-Graph Architecture + +- rocAL Pipeline: The rocAL pipeline holds great significance, as it contains all the information required to create a rocAL graph with data loader, augmentation nodes, and the output format. Once a rocAL pipeline is created, the user can build, run, and call an iterator to get the next batch of data into the pipeline. The user can install the rocAL pipeline using the rocAL Python package. It supports many operators for data loading and data augmentation. + +## 2.2 ROCm Performance Primitive Library + +RPP is a comprehensive high-performance computer vision library optimized for the AMD CPU and GPU with HIP and OpenCL backends. It is available under the AMD ROCm software platform. It provides low-level functionality for all rocAL operators for single, image, and tensor datatypes. RPP provides an extensive library for vision augmentations that includes vision functions, color augmentations, filter augmentations, geometric distortions, and a few more features. + +For more information on RPP along with the list of supported kernels, see https://github.com/ROCm/rpp. diff --git a/docs/user_guide/ch3.md b/docs/user_guide/ch3.md new file mode 100644 index 000000000..e74ad45e4 --- /dev/null +++ b/docs/user_guide/ch3.md @@ -0,0 +1,42 @@ +# Chapter 3: Installation + +This chapter provides information about the installation of rocAL and related packages. + +## 3.1 Prerequisites + +* Linux distribution +* [AMD RPP](https://github.com/ROCm/rpp) +* [AMD OpenVX™](https://github.com/ROCm/rocAL/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media` +* [Turbo JPEG](https://libjpeg-turbo.org/) - Version `3.0` or higher +* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher +* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher +* [LMBD Library](http://www.lmdb.tech/doc/) +* [RapidJSON](https://github.com/Tencent/rapidjson) +* [PyBind11](https://github.com/pybind/pybind11) + +## 3.2 Platform Support + +To see the list of supported platforms for rocAL, see the [ROCm Installation Guide](https://rocm.docs.amd.com). + +## 3.3 Installing rocAL + +To build and install the rocAL library, follow the instructions given [here](https://github.com/ROCm/rocAL#build-instructions) + +## 3.4 Installing rocAL Python Package + +The rocAL Python package (rocal_pybind) is a separate redistributable wheel. rocal_pybind, which is created using Pybind11, enables data transfer between rocAL C++ API and Python API. With the help of rocal_pybind.so wrapper library, the rocAL functionality, which is primarily in C/C++, can be effectively used in Python. +The Python package supports PyTorch, TensorFlow, Caffe2, and data readers available for various formats such as FileReader, COCO Reader, TFRecord Reader, and CaffeReader. + +To build and install the Python package, install the PyPackageInstall instruction [here](https://github.com/ROCm/rocAL#build-instructions) + +## 3.5 Installing rocAL Using Framework Dockers + +To test the rocAL Python APIs using PyTorch or TensorFlow, we recommend building a docker with rocAL and ROCm using any of the links below: + +* [rocAL PyTorch docker](https://github.com/ROCm/rocAL/tree/master/docker/rocal-with-pytorch.dockerfile) +* [rocAL TensorFlow docker](https://github.com/ROCm/rocAL/tree/master/docker/rocal-with-tensorflow.dockerfile) + +To use rocAL on Ubuntu, use the following dockers: + +* [rocAL on ubuntu20](https://github.com/ROCm/rocAL/blob/master/docker/rocAL-on-ubuntu20.dockerfile) +* [rocAL on Ubuntu22](https://github.com/ROCm/rocAL/blob/master/docker/rocAL-on-ubuntu22.dockerfile) diff --git a/docs/user_guide/ch5.md b/docs/user_guide/ch5.md new file mode 100644 index 000000000..2c0f325ff --- /dev/null +++ b/docs/user_guide/ch5.md @@ -0,0 +1,164 @@ +# Chapter 5: Framework Integration + +rocAL improves the pipeline efficiency by preprocessing the data and parallelizing the data loading on the CPU and running trainings on the GPU. To separate the data loading from the training, rocAL provides TensorFlow and PyTorch iterators and readers as a plugin. The integration process with PyTorch and TensorFlow is described in the sections below. + +## 5.1 PyTorch Integration + +This section demonstrates how to use rocAL with PyTorch for training. Follow the steps below to get started. + +## 5.1.1 Build PyTorch Docker + +Build a rocAL PyTorch docker by following the steps here. + +## 5.1.2 Create Data-loading Pipeline + +Follow these steps: + +1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L28). + +```python +from amd.rocal.plugin.pytorch import ROCALClassificationIterator +from amd.rocal.pipeline import Pipeline +import amd.rocal.fn as fn +import amd.rocal.types as types +``` + +2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the [pipeline](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L38). + +```python +def trainPipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16): + pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, + rocal_cpu=rocal_cpu, tensor_dtype = types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, + prefetch_queue_depth = 7) + with pipe: + jpegs, labels = fn.readers.file(file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + rocal_device = 'cpu' if rocal_cpu else 'gpu' + # decode = fn.decoders.image(jpegs, output_type=types.RGB,file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + decode = fn.decoders.image_slice(jpegs, output_type=types.RGB, + file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True) + res = fn.resize(decode, resize_x=224, resize_y=224) + flip_coin = fn.random.coin_flip(probability=0.5) + cmnp = fn.crop_mirror_normalize(res, device="gpu", + output_dtype=types.FLOAT, + output_layout=types.NCHW, + crop=(crop, crop), + mirror=flip_coin, + image_type=types.RGB, + mean=[0.485,0.456,0.406], + std=[0.229,0.224,0.225]) + if(one_hot): + _ = fn.one_hot(labels, num_classes) + pipe.set_outputs(cmnp) + print('rocal "{0}" variant'.format(rocal_device)) + return pipe +``` + +3. Import libraries for PyTorch. + +```python +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +``` + +4. Call the training pipeline with rocAL classification data [loader](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L78). + +```python +def get_pytorch_train_loader(self): + print(“in get_pytorch_train_loader function”) + pipe_train = trainPipeline(self.data_path, self.batch_size, self.num_classes, self.one_hot, self.local_rank, + self.world_size, self.num_thread, self.crop, self.rocal_cpu, self.fp16) + pipe_train.build() + train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank) +``` + +5. Run the [training](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L179). + +```python +# Training loop + for epoch in range(10): # loop over the dataset multiple times + print(“\n epoch:: “,epoch) + running_loss = 0.0 + + for i, (inputs,labels) in enumerate(train_loader, 0): + + sys.stdout.write(“\r Mini-batch “ + str(i)) + # print(“Images”,inputs) + # print(“Labels”,labels) + inputs, labels = inputs.to(device), labels.to(device) +``` + +6. Run the training as shown [here](https://github.com/ROCm/rocAL/tree/develop/rocAL/docs/examples/pytorch/toynet_training). + +To see a sample training script, click [here](https://github.com/ROCm/rocAL/tree/develop/rocAL/docs/examples/pytorch/toynet_training). + +## 5.2 TensorFlow Integration + +This section demonstrates how to use rocAL with TensorFlow for training. Follow the steps below to get started. + +## 5.2.1 Build TensorFlow Docker + +Build a rocAL TensorFlow docker by following the steps here. + +## 5.2.2 Create Data-loading Pipeline + +Follow these steps: + +1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/docs/examples/tf/pets_training/train.py#L22). + +```python +from amd.rocal.plugin.tf import ROCALIterator +from amd.rocal.pipeline import Pipeline +import amd.rocal.fn as fn +import amd.rocal.types as types +``` + +2. See a rocAL pipeline for TensorFlow below. It reads data from the TFRecords using TFRecord Reader and uses fn.decoders.image to decode the raw [images](https://github.com/ROCm/rocAL/blob/master/examples/tf/pets_training/train.py#L128). + +```python +trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=1, rocal_cpu=RUN_ON_HOST, tensor_layout = types.NHWC) + with trainPipe: + inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, index_path = "", reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap, + features={ + 'image/encoded':tf.io.FixedLenFeature((), tf.string, ""), + 'image/class/label':tf.io.FixedLenFeature([1], tf.int64, -1), + 'image/filename':tf.io.FixedLenFeature((), tf.string, "") + } + ) + jpegs = inputs["image/encoded"] + images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR) + resized = fn.resize(images, resize_x=crop_size[0], resize_y=crop_size[1]) + flip_coin = fn.random.coin_flip(probability=0.5) + cmn_images = fn.crop_mirror_normalize(resized, crop=(crop_size[1], crop_size[0]), + mean=[0,0,0], + std=[255,255,255], + mirror=flip_coin, + output_dtype=types.FLOAT, + output_layout=types.NHWC, + pad_output=False) + trainPipe.set_outputs(cmn_images) +trainPipe.build() +``` + +3. Import libraries for [TensorFlow](https://github.com/ROCm/rocAL/blob/master/examples/tf/pets_training/train.py#L174). + +```python +import tensorflow.compat.v1 as tf +tf.compat.v1.disable_v2_behavior() +import tensorflow_hub as hub +Call the train pipeline + trainIterator = ROCALIterator(trainPipe) +Run the training Session + i = 0 + with tf.compat.v1.Session(graph = train_graph) as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + while i < NUM_TRAIN_STEPS: + + + for t, (train_image_ndArray, train_label_ndArray) in enumerate(trainIterator, 0): + train_label_one_hot_list = get_label_one_hot(train_label_ndArray) +``` + +4. Run the training as shown [here](https://github.com/ROCm/rocAL/tree/master/rocAL/examples/tf/pets_training/). + +To see a sample training script, click [here](https://github.com/ROCm/rocAL/tree/master/rocAL/examples/tf/pets_training/). diff --git a/docs/user_guide/ch6.md b/docs/user_guide/ch6.md new file mode 100644 index 000000000..d85d9586e --- /dev/null +++ b/docs/user_guide/ch6.md @@ -0,0 +1,109 @@ +# Chapter 6: Using with C++ API + +This chapter explains how to create a pipeline and add augmentations using C++ APIs directly. The Python APIs also call these C++ APIs internally using the Python pybind utility as explained in the section Installing rocAL Python Package. + +## 6.1 C++ Common APIs + +The following sections list the commonly used C++ APIs. + +## 6.1.1 rocalCreate + +Use: To create the pipeline + +Returns: The context for the pipeline + +Arguments: + +* RocalProcessMode: Defines whether rocal data loading should be on the CPU or [GPU](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L91) + +```c++ +RocalProcessMode::ROCAL_PROCESS_GPU +RocalProcessMode::ROCAL_PROCESS_CPU +``` + +* RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or [FP16](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L124) + +```c++ +RocalTensorOutputType::ROCAL_FP32 +RocalTensorOutputType::ROCAL_FP16 +``` + +[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L41): + +```c++ +extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); +``` + +## 6.1.2 rocalVerify + +Use: To verify the graph for all the inputs and outputs + +Returns: A status code indicating the success or failure + +[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L47): + +```c++ +extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); +``` + +## 6.1.3 rocalRun + +Use: To process and run the built and verified graph + +Returns: A status code indicating the success or failure + +[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L52): + +```c++ +extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); +``` + +## 6.1.4 rocalRelease + +Use: To free all the resources allocated during the graph creation process + +Returns: A status code indicating the success or failure + +[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L57): + +```c++ +extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context); +``` + +## 6.1.5 Image Augmentation Using C++ API + +The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing C++ [APIs](https://github.com/ROCm/rocAL/blob/develop/tests/cpp_api/image_augmentation/image_augmentation.cpp#L103). + +```c++ +Auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1); +input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type); + +image0 = rocalResize(handle, input1, resize_w, resize_h, true); + +RocalImage image1 = rocalRain(handle, image0, false); + +RocalImage image11 = rocalFishEye(handle, image1, false); + + +rocalRotate(handle, image11, true, rand_angle); + + +// Creating successive blur nodes to simulate a deep branch of augmentations +RocalImage image2 = rocalCropResize(handle, image0, resize_w, resize_h, false, rand_crop_area);; +for(int i = 0 ; i < aug_depth; i++) { + image2 = rocalBlurFixed(handle, image2, 17.25, (i == (aug_depth -1)) ? true:false ); +} +// Calling the API to verify and build the augmentation graph +if(rocalVerify(handle) != ROCAL_OK) +{ + std::cout << "Could not verify the augmentation graph" << std::endl; + return -1; +} + +while (!rocalIsEmpty(handle)) { + if(rocalRun(handle) != 0) + break; +} +``` + +To see a sample image augmentation application in C++, click [here](https://github.com/ROCm/rocAL/tree/develop/tests/cpp_api/image_augmentation). diff --git a/rocAL-setup.py b/rocAL-setup.py index da7eac015..7b412d27c 100644 --- a/rocAL-setup.py +++ b/rocAL-setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2022 - 2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,66 +22,81 @@ import sys import argparse import platform +import traceback if sys.version_info[0] < 3: import commands else: import subprocess -__author__ = "Kiriti Nagesh Gowda" -__copyright__ = "Copyright 2022 - 2023, AMD ROCm Augmentation Library" +__copyright__ = "Copyright 2022 - 2024, AMD ROCm Augmentation Library" __license__ = "MIT" -__version__ = "1.1.0" -__maintainer__ = "Kiriti Nagesh Gowda" +__version__ = "2.5.0" __email__ = "mivisionx.support@amd.com" __status__ = "Shipping" +# error check calls +def ERROR_CHECK(call): + status = call + if(status != 0): + print('ERROR_CHECK failed with status:'+str(status)) + traceback.print_stack() + exit(status) + # Arguments parser = argparse.ArgumentParser() parser.add_argument('--directory', type=str, default='~/rocal-deps', help='Setup home directory - optional (default:~/)') -parser.add_argument('--opencv', type=str, default='4.6.0', - help='OpenCV Version - optional (default:4.6.0)') -parser.add_argument('--protobuf', type=str, default='3.12.4', - help='ProtoBuf Version - optional (default:3.12.4)') -parser.add_argument('--pybind11', type=str, default='v2.10.4', - help='PyBind11 Version - optional (default:v2.10.4)') -parser.add_argument('--reinstall', type=str, default='ON', - help='Remove previous setup and reinstall - optional (default:ON) [options:OFF/ON]') -parser.add_argument('--backend', type=str, default='HIP', - help='rocAL Dependency Backend - optional (default:HIP) [options:CPU/OCL/HIP]') parser.add_argument('--rocm_path', type=str, default='/opt/rocm', help='ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required') +parser.add_argument('--backend', type=str, default='HIP', + help='rocAL Dependency Backend - optional (default:HIP) [options:CPU/OCL/HIP]') +parser.add_argument('--ffmpeg', type=str, default='OFF', + help='FFMPEG Installation - optional (default:OFF) [options:ON/OFF]') +parser.add_argument('--reinstall', type=str, default='OFF', + help='Remove previous setup and reinstall - optional (default:OFF) [options:ON/OFF]') args = parser.parse_args() setupDir = args.directory -opencvVersion = args.opencv -ProtoBufVersion = args.protobuf -pybind11Version = args.pybind11 -reinstall = args.reinstall.upper() -backend = args.backend.upper() ROCM_PATH = args.rocm_path +backend = args.backend.upper() +ffmpegInstall = args.ffmpeg.upper() +reinstall = args.reinstall.upper() +# override default path if env path set if "ROCM_PATH" in os.environ: ROCM_PATH = os.environ.get('ROCM_PATH') print("\nROCm PATH set to -- "+ROCM_PATH+"\n") -if reinstall not in ('ON', 'OFF'): +# check developer inputs +if backend not in ('OCL', 'HIP', 'CPU'): print( - "ERROR: Re-Install Option Not Supported - [Supported Options: ON or OFF]") + "ERROR: Backend Option Not Supported - [Supported Options: CPU or OCL or HIP]\n") + parser.print_help() exit() -if backend not in ('OCL', 'HIP', 'CPU'): +if ffmpegInstall not in ('OFF', 'ON'): print( - "ERROR: Backend Option Not Supported - [Supported Options: CPU or OCL or HIP]") + "ERROR: FFMPEG Install Option Not Supported - [Supported Options: OFF or ON]\n") + parser.print_help() + exit() +if reinstall not in ('OFF', 'ON'): + print( + "ERROR: Re-Install Option Not Supported - [Supported Options: OFF or ON]\n") + parser.print_help() exit() # check ROCm installation -if os.path.exists(ROCM_PATH): +if os.path.exists(ROCM_PATH) and backend != 'CPU': print("\nROCm Installation Found -- "+ROCM_PATH+"\n") os.system('echo ROCm Info -- && '+ROCM_PATH+'/bin/rocminfo') else: - print("\nWARNING: ROCm Not Found at -- "+ROCM_PATH+"\n") - print( - "WARNING: Set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n") + if backend != 'CPU': + print("\nWARNING: ROCm Not Found at -- "+ROCM_PATH+"\n") + print( + "WARNING: If ROCm installed, set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n") + print("WARNING: Limited dependencies will be installed\n") + backend = 'CPU' + else: + print("\nSTATUS: CPU Backend Install\n") # get platfrom info platfromInfo = platform.platform() @@ -108,303 +123,316 @@ deps_dir = os.path.expanduser(setupDir_deps) deps_dir = os.path.abspath(deps_dir) +# check os version +os_info_data = 'NOT Supported' +if os.path.exists('/etc/os-release'): + with open('/etc/os-release', 'r') as os_file: + os_info_data = os_file.read().replace('\n', ' ') + os_info_data = os_info_data.replace('"', '') + # setup for Linux linuxSystemInstall = '' linuxCMake = 'cmake' linuxSystemInstall_check = '' linuxFlag = '' -if "centos" in platfromInfo or "redhat" in platfromInfo or os.path.exists('/usr/bin/yum'): +sudoValidate = 'sudo -v' +if "centos" in os_info_data or "redhat" in os_info_data or "Oracle" in os_info_data: linuxSystemInstall = 'yum -y' linuxSystemInstall_check = '--nogpgcheck' - if "centos-7" in platfromInfo or "redhat-7" in platfromInfo: + if "VERSION_ID=7" in os_info_data: linuxCMake = 'cmake3' - os.system(linuxSystemInstall+' install cmake3') - if not "centos" in platfromInfo or not "redhat" in platfromInfo: - platfromInfo = platfromInfo+'-redhat' -elif "Ubuntu" in platfromInfo or os.path.exists('/usr/bin/apt-get'): + sudoValidate = 'sudo -k' + platfromInfo = platfromInfo+'-redhat-7' + elif "VERSION_ID=8" in os_info_data: + platfromInfo = platfromInfo+'-redhat-8' + elif "VERSION_ID=9" in os_info_data: + platfromInfo = platfromInfo+'-redhat-9' + else: + platfromInfo = platfromInfo+'-redhat-centos-undefined-version' +elif "Ubuntu" in os_info_data: linuxSystemInstall = 'apt-get -y' linuxSystemInstall_check = '--allow-unauthenticated' linuxFlag = '-S' - if not "Ubuntu" in platfromInfo: - platfromInfo = platfromInfo+'-Ubuntu' -elif os.path.exists('/usr/bin/zypper'): + if "VERSION_ID=20" in os_info_data: + platfromInfo = platfromInfo+'-Ubuntu-20' + elif "VERSION_ID=22" in os_info_data: + platfromInfo = platfromInfo+'-Ubuntu-22' + elif "VERSION_ID=24" in os_info_data: + platfromInfo = platfromInfo+'-Ubuntu-24' + else: + platfromInfo = platfromInfo+'-Ubuntu-undefined-version' +elif "SLES" in os_info_data: linuxSystemInstall = 'zypper -n' linuxSystemInstall_check = '--no-gpg-checks' platfromInfo = platfromInfo+'-SLES' +elif "Mariner" in os_info_data: + linuxSystemInstall = 'tdnf -y' + linuxSystemInstall_check = '--nogpgcheck' + platfromInfo = platfromInfo+'-Mariner' else: print("\nrocAL Setup on "+platfromInfo+" is unsupported\n") - print("\nrocAL Setup Supported on: Ubuntu 20/22; CentOS 7/8; RedHat 7/8; & SLES 15-SP4\n") + print("\nrocAL Setup Supported on: Ubuntu 20/22, RedHat 8/9, & SLES 15\n") exit() # rocAL Setup print("\nrocAL Setup on: "+platfromInfo+"\n") if userName == 'root': - os.system(linuxSystemInstall+' update') - os.system(linuxSystemInstall+' install sudo') + ERROR_CHECK(os.system(linuxSystemInstall+' update')) + ERROR_CHECK(os.system(linuxSystemInstall+' install sudo')) # Delete previous install if os.path.exists(deps_dir) and reinstall == 'ON': - os.system('sudo -v') - os.system('sudo rm -rf '+deps_dir) + ERROR_CHECK(os.system(sudoValidate)) + ERROR_CHECK(os.system('sudo rm -rf '+deps_dir)) print("\nrocAL Setup: Removing Previous Install -- "+deps_dir+"\n") -# Re-Install -if os.path.exists(deps_dir): - print("\nrocAL Setup: Re-Installing Libraries from -- "+deps_dir+"\n") - # opencv - if os.path.exists(deps_dir+'/build/OpenCV'): - os.system('sudo -v') - os.system('(cd '+deps_dir+'/build/OpenCV; sudo ' + - linuxFlag+' make install -j8)') - - # ProtoBuf - if os.path.exists(deps_dir+'/protobuf-'+ProtoBufVersion): - os.system('sudo -v') - os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion + - '; sudo '+linuxFlag+' make install -j8)') - - # FFMPEG - if os.path.exists(deps_dir+'/FFmpeg-n4.4.2'): - os.system('sudo -v') - os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo ' + - linuxFlag+' make install -j8)') - - # MIVisionX - if os.path.exists(deps_dir+'/MIVisionX/build-'+backend): - os.system('sudo -v') - os.system('(cd '+deps_dir+'/MIVisionX/build-'+backend+'; sudo ' + - linuxFlag+' make install -j8)') - - print("\nrocAL Dependencies Re-Installed with rocAL-setup.py V-"+__version__+"\n") +# Core package dependencies +libpkgConfig = "pkg-config" +if "centos" in os_info_data and "VERSION_ID=7" in os_info_data: + libpkgConfig = "pkgconfig" +commonPackages = [ + 'gcc', + 'cmake', + 'git', + 'wget', + 'unzip', + str(libpkgConfig) +] + +rocmDebianPackages = [ + 'half', + 'rpp', + 'rpp-dev', + 'mivisionx', + 'mivisionx-dev' +] +rocmRPMPackages = [ + 'half', + 'rpp', + 'rpp-devel', + 'mivisionx', + 'mivisionx-devel' +] + +rocdecodeDebianPackages = [ + 'rocdecode', + 'rocdecode-dev' +] + +rocdecodeRPMPackages = [ + 'rocdecode', + 'rocdecode-devel' +] + +opencvDebianPackages = [ + 'build-essential', + 'pkg-config', + 'libgtk2.0-dev', + 'libavcodec-dev', + 'libavformat-dev', + 'libswscale-dev', + 'libtbb2', + 'libtbb-dev', + 'libjpeg-dev', + 'libpng-dev', + 'libtiff-dev', + 'libdc1394-dev', + 'unzip' +] + +opencvRPMPackages = [ + 'gtk2-devel', + 'libjpeg-devel', + 'libpng-devel', + 'libtiff-devel', + 'libavc1394', + 'unzip' +] + +coreDebianPackages = [ + 'nasm', + 'yasm', + 'liblmdb-dev', + #'rapidjson-dev', + 'python3-dev', + 'python3-pip', + 'python3-protobuf', + 'libprotobuf-dev', + 'libprotoc-dev', + 'protobuf-compiler' +] + +libPythonProto = "python3-protobuf" +libProtoCompiler = "protobuf-compiler" +if "centos" in os_info_data and "VERSION_ID=7" in os_info_data: + libPythonProto = "protobuf-python" +if "SLES" in os_info_data: + libProtoCompiler = "libprotobuf-c-devel" +coreRPMPackages = [ + 'nasm', + 'yasm', + 'lmdb-devel', + 'jsoncpp-devel', + #'rapidjson-devel', + 'python3-devel', + 'python3-pip', + str(libPythonProto), + 'protobuf-devel', + str(libProtoCompiler) +] + +pip3Packages = [ + 'pytest==7.0.0', + 'wheel==0.37.0' +] + +debianOptionalPackages = [ + 'ffmpeg', + 'libavcodec-dev', + 'libavformat-dev', + 'libavutil-dev', + 'libswscale-dev', + 'libopencv-dev' +] + +# Install +ERROR_CHECK(os.system(sudoValidate)) +if os.path.exists(deps_dir): + print("\nrocAL Setup: install found -- "+deps_dir) + print("\nrocAL Setup: use option --reinstall ON to reinstall all dependencies") + print("\nrocAL Dependencies Installed with rocAL-setup.py on "+platfromInfo+"\n") + exit(0) # Clean Install else: print("\nrocAL Dependencies Installation with rocAL-setup.py V-"+__version__+"\n") - os.system('mkdir '+deps_dir) + ERROR_CHECK(os.system('mkdir '+deps_dir)) # Create Build folder - os.system('(cd '+deps_dir+'; mkdir build )') - # install pre-reqs - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install gcc cmake git wget unzip pkg-config inxi mivisionx python3 python3-pip') - - # Get Installation Source - os.system( - '(cd '+deps_dir+'; wget https://github.com/opencv/opencv/archive/'+opencvVersion+'.zip )') - os.system('(cd '+deps_dir+'; unzip '+opencvVersion+'.zip )') - os.system( - '(cd '+deps_dir+'; wget https://github.com/protocolbuffers/protobuf/archive/v'+ProtoBufVersion+'.zip )') - os.system('(cd '+deps_dir+'; unzip v'+ProtoBufVersion+'.zip )') - os.system( - '(cd '+deps_dir+'; wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip )') - - # Install - # package dependencies - os.system('sudo -v') - if "centos" in platfromInfo or "redhat" in platfromInfo: - if "centos-7" in platfromInfo or "redhat-7" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check + - ' install kernel-devel libsqlite3x-devel bzip2-devel openssl-devel python3-devel autoconf automake libtool curl make g++ unzip') - elif "centos-8" in platfromInfo or "redhat-8" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check + - ' install kernel-devel libsqlite3x-devel bzip2-devel openssl-devel python3-devel autoconf automake libtool curl make gcc-c++ unzip') - elif "Ubuntu" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install sqlite3 libsqlite3-dev libbz2-dev libssl-dev python3-dev autoconf automake libtool') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install curl make g++ unzip libomp-dev libpthread-stubs0-dev') - elif "SLES" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install sqlite3 sqlite3-devel libbz2-devel libopenssl-devel python3-devel autoconf automake libtool curl make gcc-c++ unzip') - # Install half.hpp - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install -y half') - # Install ProtoBuf - os.system('(cd '+deps_dir+'/protobuf-' + - ProtoBufVersion+'; ./autogen.sh )') - os.system('(cd '+deps_dir+'/protobuf-' + - ProtoBufVersion+'; ./configure )') - os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion+'; make -j8 )') - os.system('(cd '+deps_dir+'/protobuf-' + - ProtoBufVersion+'; make check -j8 )') - os.system('sudo -v') - os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion + - '; sudo '+linuxFlag+' make install )') - os.system('sudo -v') - os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion + - '; sudo '+linuxFlag+' ldconfig )') - - # Install OpenCV - os.system('(cd '+deps_dir+'/build; mkdir OpenCV )') - # Install pre-reqs - os.system('sudo -v') - if "Ubuntu" in platfromInfo: - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy ') - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip') - elif "centos" in platfromInfo or "redhat" in platfromInfo: - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' groupinstall \'Development Tools\'') - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install gtk2-devel libjpeg-devel libpng-devel libtiff-devel libavc1394 wget unzip') - elif "SLES" in platfromInfo: - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install -t pattern devel_basis') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install gtk2-devel libjpeg-devel libpng-devel libtiff-devel libavc1394 wget unzip') - # OpenCV 4.6.0 - os.system('(cd '+deps_dir+'/build/OpenCV; '+linuxCMake + - ' -D WITH_GTK=ON -D WITH_JPEG=ON -D BUILD_JPEG=ON -D WITH_OPENCL=OFF -D WITH_OPENCLAMDFFT=OFF -D WITH_OPENCLAMDBLAS=OFF -D WITH_VA_INTEL=OFF -D WITH_OPENCL_SVM=OFF -D CMAKE_INSTALL_PREFIX=/usr/local ../../opencv-'+opencvVersion+' )') - os.system('(cd '+deps_dir+'/build/OpenCV; make -j8 )') - os.system('sudo -v') - os.system('(cd '+deps_dir+'/build/OpenCV; sudo '+linuxFlag+' make install )') - os.system('sudo -v') - os.system('(cd '+deps_dir+'/build/OpenCV; sudo '+linuxFlag+' ldconfig )') + ERROR_CHECK(os.system('(cd '+deps_dir+'; mkdir build )')) + # update + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +' '+linuxSystemInstall_check+' update')) + # common packages + for i in range(len(commonPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ commonPackages[i])) + if "redhat-7" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install cmake3')) + # ROCm Packages if "Ubuntu" in platfromInfo: - # Install Packages for rocAL - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install libgflags-dev libgoogle-glog-dev liblmdb-dev') - # Yasm/Nasm for TurboJPEG - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install nasm yasm') - # clang - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install clang') - elif "redhat" in platfromInfo or "SLES" in platfromInfo: - # Nasm & Yasm - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install nasm yasm') - # JSON-cpp - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install jsoncpp-devel') - # lmbd - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + - linuxSystemInstall_check+' install lmdb-devel') - - # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2 - os.system( - '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )') - os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake + - ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )') - # RPP - os.system('sudo -v') - if "Ubuntu" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install -y rpp rpp-dev') + for i in range(len(rocmDebianPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ rocmDebianPackages[i])) else: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install -y rpp rpp-devel') - # RapidJSON - os.system('sudo -v') + for i in range(len(rocmRPMPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ rocmRPMPackages[i])) + + # rocDecode if "Ubuntu" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + ' ' + - linuxSystemInstall_check+' install -y rapidjson-dev') - else: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + ' ' + - linuxSystemInstall_check+' install -y rapidjson-devel') - os.system('(cd '+deps_dir+'; git clone https://github.com/Tencent/rapidjson.git; cd rapidjson; mkdir build; cd build; ' + - linuxCMake+' ../; make -j4; sudo make install)') - # PyBind11 - os.system('sudo -v') - os.system('pip install pytest==7.3.1') - os.system('(cd '+deps_dir+'; git clone -b '+pybind11Version+' https://github.com/pybind/pybind11; cd pybind11; mkdir build; cd build; ' + - linuxCMake+' -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../; make -j4; sudo make install)') - - # Install ffmpeg + for i in range(len(rocdecodeDebianPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ rocdecodeDebianPackages[i])) + elif "redhat-7" not in platfromInfo: + for i in range(len(rocdecodeRPMPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ rocdecodeRPMPackages[i])) + + ERROR_CHECK(os.system(sudoValidate)) + # rocAL Core Packages if "Ubuntu" in platfromInfo: - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install autoconf automake build-essential git-core libass-dev libfreetype6-dev') - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install libsdl2-dev libtool libva-dev libvdpau-dev libvorbis-dev libxcb1-dev') - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo zlib1g-dev') - os.system('sudo -v') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev') + for i in range(len(coreDebianPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ coreDebianPackages[i])) else: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install autoconf automake bzip2 bzip2-devel freetype-devel') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install gcc-c++ libtool make pkgconfig zlib-devel') - # Nasm - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install nasm') - if "centos-7" in platfromInfo or "redhat-7" in platfromInfo: - # Yasm - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' --enablerepo=extras install epel-release') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install yasm') - # libx264 & libx265 - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install libx264-devel libx265-devel') - # libfdk_aac - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install https://forensics.cert.org/cert-forensics-tools-release-el7.rpm') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' --enablerepo=forensics install fdk-aac') - # libASS - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install libass-devel') - elif "centos-8" in platfromInfo or "redhat-8" in platfromInfo: - # el8 x86_64 packages - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-8.noarch.rpm') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install http://mirror.centos.org/centos/8/PowerTools/x86_64/os/Packages/SDL2-2.0.10-2.el8.x86_64.rpm') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install ffmpeg ffmpeg-devel') - elif "SLES" in platfromInfo: - # FFMPEG-4 packages - os.system( - 'sudo zypper ar -cfp 90 \'https://ftp.gwdg.de/pub/linux/misc/packman/suse/openSUSE_Leap_$releasever/Essentials\' packman-essentials') - os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + - ' install ffmpeg-4') - - # FFMPEG 4 from source -- for Ubuntu, CentOS 7, & RedHat 7 - if "Ubuntu" in platfromInfo or "centos-7" in platfromInfo or "redhat-7" in platfromInfo: - os.system('sudo -v') - os.system( - '(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo '+linuxFlag+' ldconfig )') - os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; export PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"; ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree)') - os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; make -j8 )') - os.system('sudo -v') - os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo ' + - linuxFlag+' make install )') - - # MIVisionX - os.system('sudo -v') + for i in range(len(coreRPMPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ coreRPMPackages[i])) + + #pip3 packages + for i in range(len(pip3Packages)): + ERROR_CHECK(os.system('pip3 install '+ pip3Packages[i])) + + # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.2 + turboJpegVersion = '3.0.2' + ERROR_CHECK(os.system( + '(cd '+deps_dir+'; git clone -b '+turboJpegVersion+' https://github.com/libjpeg-turbo/libjpeg-turbo.git )')) + ERROR_CHECK(os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake + + ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j$(nproc); sudo make install )')) + + # PyBind11 - https://github.com/pybind/pybind11 -- v2.11.1 + pybind11Version = 'v2.11.1' + ERROR_CHECK(os.system('(cd '+deps_dir+'; git clone -b '+pybind11Version+' https://github.com/pybind/pybind11; cd pybind11; mkdir build; cd build; ' + + linuxCMake+' -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../; make -j$(nproc); sudo make install)')) + + # RapidJSON - Source TBD: Package install of RapidJSON has compile issues - https://github.com/Tencent/rapidjson.git -- master + os.system('(cd '+deps_dir+'; git clone https://github.com/Tencent/rapidjson.git; cd rapidjson; mkdir build; cd build; ' + + linuxCMake+' ../; make -j$(nproc); sudo make install)') + + # Optional Deps if "Ubuntu" in platfromInfo: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install -y mivisionx mivisionx-dev') + for i in range(len(debianOptionalPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ debianOptionalPackages[i])) else: - os.system('sudo '+linuxFlag+' '+linuxSystemInstall + - ' '+linuxSystemInstall_check+' install -y mivisionx mivisionx-devel') - # TBD: Need source install as rocm-6.0 mivisionx is missing vx_rpp - os.system('sudo -v') - os.system('(cd '+deps_dir+'; git clone https://github.com/ROCm/MIVisionX.git; cd MIVisionX; mkdir build-'+backend+'; cd build-'+backend+'; ' + - linuxCMake+' -DBACKEND='+backend+' -D ROCAL=OFF ../; make -j4; sudo make install)') - - print("\nrocAL Dependencies Installed with rocAL-setup.py V-"+__version__+"\n") + # Install ffmpeg + if ffmpegInstall == 'ON': + if "redhat-7" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install epel-release')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' localinstall --nogpgcheck https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install ffmpeg ffmpeg-devel')) + elif "redhat-8" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-8.noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install http://mirror.centos.org/centos/8/PowerTools/x86_64/os/Packages/SDL2-2.0.10-2.el8.x86_64.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install ffmpeg ffmpeg-devel')) + elif "redhat-9" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install https://dl.fedoraproject.org/pub/epel/epel-next-release-latest-9.noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install --nogpgcheck https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-$(rpm -E %rhel).noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install https://mirrors.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-$(rpm -E %rhel).noarch.rpm')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install ffmpeg ffmpeg-free-devel')) + elif "SLES" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install ffmpeg-4 ffmpeg-4-libavcodec-devel ffmpeg-4-libavformat-devel')) + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' install ffmpeg-4-libavutil-devel ffmpeg-4-libswscale-devel')) + + # Install OpenCV -- TBD cleanup + opencvVersion = '4.6.0' + ERROR_CHECK(os.system('(cd '+deps_dir+'/build; mkdir OpenCV )')) + # Install pre-reqs + ERROR_CHECK(os.system(sudoValidate)) + if "redhat" in platfromInfo: + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check + + ' groupinstall \'Development Tools\'')) + for i in range(len(opencvRPMPackages)): + ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall + + ' '+linuxSystemInstall_check+' install -y '+ opencvRPMPackages[i])) + # OpenCV 4.6.0 + # Get Installation Source + ERROR_CHECK(os.system( + '(cd '+deps_dir+'; wget https://github.com/opencv/opencv/archive/'+opencvVersion+'.zip )')) + ERROR_CHECK(os.system('(cd '+deps_dir+'; unzip '+opencvVersion+'.zip )')) + ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; '+linuxCMake + + ' -D WITH_EIGEN=OFF -D WITH_GTK=ON -D WITH_JPEG=ON -D BUILD_JPEG=ON -D WITH_OPENCL=OFF -D WITH_OPENCLAMDFFT=OFF -D WITH_OPENCLAMDBLAS=OFF -D WITH_VA_INTEL=OFF -D WITH_OPENCL_SVM=OFF -D CMAKE_INSTALL_PREFIX=/usr/local ../../opencv-'+opencvVersion+' )')) + ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; make -j$(nproc))')) + ERROR_CHECK(os.system(sudoValidate)) + ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; sudo make install)')) + ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; sudo ldconfig)')) + +print("\nrocAL Dependencies Installed with rocAL-setup.py V-"+__version__+" on "+platfromInfo+"\n") diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt index f7219f683..4973940e6 100644 --- a/rocAL/CMakeLists.txt +++ b/rocAL/CMakeLists.txt @@ -21,7 +21,7 @@ # SOFTWARE. cmake_minimum_required(VERSION 3.5) -set(VERSION "1.0.0") +set(VERSION "2.0.0") # Set Project Version and Language project(rocal VERSION ${VERSION} LANGUAGES CXX) @@ -44,6 +44,7 @@ find_package(LMDB QUIET) find_package(RapidJSON QUIET) find_package(StdFilesystem QUIET) find_package(HALF QUIET) +find_package(SndFile QUIET) # HIP Backend if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP") @@ -55,11 +56,21 @@ if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP") endif() endif() list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip) - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1031;gfx1032;gfx1100") - if(BUILD_WITH_AMD_ADVANCE) - set(DEFAULT_AMDGPU_TARGETS ${DEFAULT_AMDGPU_TARGETS} "gfx941,gfx942") + # Set supported GPU Targets + set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102") + if (BUILD_WITH_AMD_ADVANCE) + set(DEFAULT_AMDGPU_TARGETS ${DEFAULT_AMDGPU_TARGETS} "gfx1200;gfx1201") endif() - set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + + # Set AMDGPU_TARGETS + if(DEFINED ENV{AMDGPU_TARGETS}) + set(AMDGPU_TARGETS $ENV{AMDGPU_TARGETS} CACHE STRING "List of specific machine types for library to target") + elseif(AMDGPU_TARGETS) + message("-- ${White}${PROJECT_NAME} -- AMDGPU_TARGETS set with -D option${ColourReset}") + else() + set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + endif() + message("-- ${White}${PROJECT_NAME} -- AMDGPU_TARGETS: ${AMDGPU_TARGETS}${ColourReset}") find_package(HIP QUIET) if(HIP_FOUND) SET(BUILD_ROCAL true) @@ -78,10 +89,10 @@ if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "OPENCL") set(BUILD_ROCAL true) message("-- ${White}rocAL -- Using OpenCL -- Path:" ${OpenCL_LIBRARIES} "\tVersion:" ${CL_TARGET_OpenCL_VERSION}) else() - set(BACKEND "CPU") - message("-- ${Yellow}NOTE: rocAL library requires OpenCL for BACKEND=OpenCL, Not Found ${ColourReset}") - message("-- ${Yellow}NOTE: rocAL Backend set to CPU ${ColourReset}") - endif() + set(BACKEND "CPU") + message("-- ${Yellow}NOTE: rocAL library requires OpenCL for BACKEND=OpenCL, Not Found ${ColourReset}") + message("-- ${Yellow}NOTE: rocAL Backend set to CPU ${ColourReset}") + endif() endif() # Dependency Check # AMD RPP @@ -168,20 +179,20 @@ if(NOT Threads_FOUND) set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires Threads, Not Found ${ColourReset}") endif() -if(NOT LMDB_FOUND) - set(BUILD_ROCAL false) +if(NOT LMDB_FOUND) + set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires LMDB, Not Found ${ColourReset}") -endif() -if(NOT RapidJSON_FOUND) - set(BUILD_ROCAL false) +endif() +if(NOT RapidJSON_FOUND) + set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires RapidJSON, Not Found ${ColourReset}") endif() if(NOT FILESYSTEM_FOUND) set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires FileSystem, Not Found ${ColourReset}") endif() -if(NOT HALF_FOUND) - set(BUILD_ROCAL false) +if(NOT HALF_FOUND) + set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires HALF, Not Found ${ColourReset}") endif() @@ -204,10 +215,10 @@ if(${BUILD_ROCAL}) # Protobuf include_directories(${PROTOBUF_INCLUDE_DIRS}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${PROTOBUF_LIBRARIES}) - # LMDB + # LMDB include_directories(${LMDB_INCLUDE_DIRS}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LMDB_LIBRARIES}) - # RapidJSON + # RapidJSON include_directories(${RapidJSON_INCLUDE_DIRS}) # Filesystem include_directories(${FILESYSTEM_INCLUDE_DIRS}) @@ -221,22 +232,8 @@ if(${BUILD_ROCAL}) include_directories( ${CMAKE_CURRENT_BINARY_DIR} - include/api/ - include/augmentations/ - include/augmentations/color_augmentations/ - include/augmentations/effects_augmentations/ - include/augmentations/geometry_augmentations/ - include/decoders/image/ - include/decoders/video/ - include/device/ - include/loaders/ - include/loaders/image/ - include/loaders/video/ - include/meta_data/ - include/parameters/ - include/pipeline/ - include/readers/image/ - include/readers/video/ + include + include/api ) link_directories(${ROCM_PATH}/${CMAKE_INSTALL_LIBDIR}) @@ -287,7 +284,7 @@ if(${BUILD_ROCAL}) if(${OpenCV_VERSION_MAJOR} EQUAL 4) target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=1) else() - target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=0) + target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=0) endif() else() target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_OPENCV=0) @@ -309,6 +306,29 @@ if(${BUILD_ROCAL}) else() message(FATAL_ERROR "No filesystem library found.") endif() + + # Audio features for rocAL + if(${VX_EXT_RPP_VERSION_MAJOR} VERSION_GREATER_EQUAL "3" AND ${VX_EXT_RPP_VERSION_MINOR} VERSION_GREATER_EQUAL "0" AND ${VX_EXT_RPP_VERSION_PATCH} VERSION_GREATER "0") + if(NOT SNDFILE_FOUND) + message("-- ${Yellow}NOTE: rocAL built without SndFile - Audio Functionalities will not be supported${ColourReset}") + set(AUDIO_SUPPORT FALSE) + else() + set(AUDIO_SUPPORT TRUE) + endif() + else() + set(AUDIO_SUPPORT FALSE) + message("-- ${Yellow}rocAL -- Requires MIVisionX vx_rpp version > 3.0.0 for audio${ColourReset}") + endif() + + # SndFile + if(AUDIO_SUPPORT) + include_directories(${SNDFILE_INCLUDE_DIRS}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${SNDFILE_LIBRARIES}) + message("-- ${White}rocAL built with Audio Functionality${ColourReset}") + target_compile_definitions(${PROJECT_NAME} PUBLIC -DROCAL_AUDIO) + else() + message("-- ${Yellow}NOTE: rocAL built without Audio support - Audio Functionalities will not be enabled${ColourReset}") + endif() # -Wall -- Enable most warning messages # -mavx2 -- Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation # -mfma -- Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation @@ -336,6 +356,7 @@ if(${BUILD_ROCAL}) include/api/rocal_api_data_transfer.h include/api/rocal_api_parameters.h include/api/rocal_api_meta_data.h + include/api/rocal_api_tensor.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocal COMPONENT dev) else() message(FATAL_ERROR "-- ${Red}rocAL dependencies not satisfied${ColourReset}") diff --git a/rocAL/include/api/rocal_api.h b/rocAL/include/api/rocal_api.h index 484ef8c4b..78c0acb17 100644 --- a/rocAL/include/api/rocal_api.h +++ b/rocAL/include/api/rocal_api.h @@ -1,4 +1,5 @@ /* +MIT License Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,6 +25,7 @@ THE SOFTWARE. #define ROCAL_H #include "rocal_api_types.h" +#include "rocal_api_tensor.h" #include "rocal_api_parameters.h" #include "rocal_api_data_loaders.h" #include "rocal_api_augmentation.h" @@ -42,27 +44,21 @@ THE SOFTWARE. /*! * \brief rocalCreate creates the context for a new augmentation pipeline. Initializes all the required internals for the pipeline * \ingroup group_rocal - * - * \param [in] batch_size + * \param [in] batch_size batch size * \param [in] affinity RocalProcessMode: Defines whether rocal data loading should be on the CPU or GPU. - * \param [in] gpu_id - * \param [in] cpu_thread_count - * \param [in] prefetch_queue_depth + * \param [in] gpu_id GPU id + * \param [in] cpu_thread_count number of cpu threads + * \param [in] prefetch_queue_depth The depth of the prefetch queue. * \param [in] output_tensor_data_type RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or FP16. * \return A \ref RocalContext - The context for the pipeline */ -extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, - RocalProcessMode affinity, - int gpu_id = 0, - size_t cpu_thread_count = 1, - size_t prefetch_queue_depth = 3, - RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); +extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); /*! * \brief rocalVerify function to verify the graph for all the inputs and outputs * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); @@ -71,7 +67,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); * \brief rocalRun function to process and run the built and verified graph. * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); @@ -80,7 +76,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); * \brief rocalRelease function to free all the resources allocated during the graph creation process. * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure. */ extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context); diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index d397e8002..110e7a85d 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -33,725 +33,1295 @@ THE SOFTWARE. */ /*! - * \brief Rearranges the order of the frames in the sequences with respect to new_order. - * new_order can have values in the range [0, sequence_length). - * Frames can be repeated or dropped in the new_order. + * \brief Rearranges the order of the frames in the sequences with respect to new_order. new_order can have values in the range [0, sequence_length). Frames can be repeated or dropped in the new_order. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] p_context context for the pipeline. + * \param [in] p_input Input Rocal Tensor + * \param [in] new_order represents the new order of the frames in the sequence + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceRearrange(RocalContext p_context, RocalTensor p_input, + std::vector &new_order, + bool is_output); + +/*! \brief Resize images. + * \note Accepts U8 and RGB24 input. * \ingroup group_rocal_augmentations * \note: Accepts U8 and RGB24 input. - * \param context context for the pipeline. - * \param input - * \param new_order - * \param new_sequence_length - * \param sequence_length - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceRearrange(RocalContext context, RocalImage input, - unsigned int *new_order, unsigned int new_sequence_length, - unsigned int sequence_length, bool is_output); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param scaling_mode The resize scaling_mode to resize the image. - * \param max_size Limits the size of the resized image. - * \param resize_shorter The length of the shorter dimension of the image. - * \param resize_longer The length of the larger dimension of the image. - * \param interpolation_type The type of interpolation to be used for resize. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalResize(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, + * \param [in] context context for the pipeline. + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height ouput Height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] scaling_mode The resize scaling_mode to resize the image. + * \param [in] max_size Limits the size of the resized image. + * \param [in] resize_shorter The length of the shorter dimension of the image. + * \param [in] resize_longer The length of the larger dimension of the image. + * \param [in] interpolation_type The type of interpolation to be used for resize. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResize(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, normalize and flip on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] mean The channel mean values + * \param [in] std_dev The channel standard deviation values + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] scaling_mode The resize scaling_mode to resize the image. + * \param [in] max_size Limits the size of the resized image. + * \param [in] resize_shorter The length of the shorter dimension of the image. + * \param [in] resize_longer The length of the larger dimension of the image. + * \param [in] interpolation_type The type of interpolation to be used for resize. + * \param [in] mirror Parameter to enable horizontal flip for output image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalTensor p_input, unsigned dest_width, + unsigned dest_height, std::vector &mean, std::vector &std_dev, + bool is_output, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, + std::vector max_size = {}, unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which perrforms crop and resize on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] area Target area for the crop + * \param [in] aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image + * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropResize(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + RocalFloatParam area = NULL, + RocalFloatParam aspect_ratio = NULL, + RocalFloatParam x_center_drift = NULL, + RocalFloatParam y_center_drift = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which perrforms crop and resize on images with fixed crop coordinates. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] area Target area for the crop + * \param [in] aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image + * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + float area, float aspect_ratio, + float x_center_drift, float y_center_drift, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Rotates images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] is_output True: the output tensor is needed by user and will be copied to output buffers using the data transfer API calls. False: the output tensor is just an intermediate tensor, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] angle Rocal parameter defining the rotation angle value in degrees. + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] interpolation_type The type of interpolation to be used for rotate. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRotate(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam angle = NULL, unsigned dest_width = 0, + unsigned dest_height = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Rotates images with fixed angle value. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] angle The rotation angle value in degrees. + * \param [in] interpolation_type The type of interpolation to be used for rotate. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalTensor input, float angle, + bool is_output, unsigned dest_width = 0, unsigned dest_height = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts brightness of the image. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha controls contrast of the image + * \param [in] beta controls brightness of the image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBrightness(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts brightness of the image with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha controls contrast of the image + * \param [in] beta controls brightness of the image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalTensor input, + float alpha, float beta, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies gamma correction on image. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] gamma gamma value for the image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalGamma(RocalContext context, RocalTensor input, bool is_output, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context Rocal context - * \param input Input Rocal Image - * \param dest_width The output width - * \param dest_height The output height - * \param mean The channel mean values - * \param std_dev The channel standard deviation values - * \param is_output True: the output image is needed by user and will be copied to output buffers using the data - * transfer API calls. False: the output image is just an intermediate image, user is not interested in - * using it directly. This option allows certain optimizations to be achieved. - * \param p_mirror Parameter to enable horizontal flip for output image. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalImage p_input, - unsigned dest_width, unsigned dest_height, - std::vector &mean, std::vector &std_dev, - bool is_output, RocalIntParam p_mirror = NULL); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param area - * \param x_center_drift - * \param y_center_drift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropResize(RocalContext context, RocalImage input, unsigned dest_width, - unsigned dest_height, bool is_output, - RocalFloatParam area = NULL, - RocalFloatParam aspect_ratio = NULL, - RocalFloatParam x_center_drift = NULL, - RocalFloatParam y_center_drift = NULL); - -/*! \brief Accepts U8 and RGB24 input. Crops the input image to a new area and same aspect ratio. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param area - * \param x_center_drift - * \param y_center_drift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalImage input, unsigned dest_width, - unsigned dest_height, bool is_output, float area, float aspect_ratio, - float x_center_drift, float y_center_drift); - -/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit, - * otherwise; the image is cropped to fit the result. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. Rocal context - * \param input Input Rocal Image - * \param is_output True: the output image is needed by user and will be copied to output buffers using the data - * transfer API calls. False: the output image is just an intermediate image, user is not interested in - * using it directly. This option allows certain optimizations to be achieved. - * \param angle Rocal parameter defining the rotation angle value in degrees. - * \param dest_width The output width - * \param dest_height The output height - * \return Returns a new image that keeps the result. - */ -extern "C" RocalImage ROCAL_API_CALL rocalRotate(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam angle = NULL, unsigned dest_width = 0, - unsigned dest_height = 0); - -/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit, - * otherwise; the image is cropped to fit the result. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. Rocal context - * \param input Input Rocal Image - * \param dest_width The output width - * \param dest_height The output height - * \param is_output Is the output image part of the graph output - * \param angle The rotation angle value in degrees. - * \return Returns a new image that keeps the result. - */ -extern "C" RocalImage ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalImage input, float angle, - bool is_output, unsigned dest_width = 0, unsigned dest_height = 0); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBrightness(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param alpha - * \param beta - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalImage input, - float alpha, float beta, - bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalGamma(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam alpha = NULL); + RocalFloatParam gamma = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs +/*! \brief Applies gamma correction on image with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param alpha - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] gamma gamma value for the image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalImage input, float alpha, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalTensor input, + float gamma, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Adjusts contrast of the image. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation + * \param [in] contrast_center parameter representing the contrast center for the contrast operation + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalContrast(RocalContext context, RocalImage input, bool is_output, - RocalIntParam min = NULL, RocalIntParam max = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalContrast(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam contrast_factor = NULL, RocalFloatParam contrast_center = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Adjusts contrast of the image with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param min - * \param max - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation + * \param [in] contrast_center parameter representing the contrast center for the contrast operation + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalImage input, - unsigned min, unsigned max, - bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalTensor input, + float contrast_factor, float contrast_center, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Flip images horizontally and/or vertically based on inputs. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param axis - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally + * \param [in] vertical_flag determines whether the input tensor should be flipped vertically + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFlip(RocalContext context, RocalImage input, bool is_output, - RocalIntParam flip_axis = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalFlip(RocalContext context, RocalTensor input, bool is_output, + RocalIntParam horizonal_flag = NULL, RocalIntParam vertical_flag = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Flip images horizontally and/or vertically with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param axis - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally + * \param [in] vertical_flag determines whether the input tensor should be flipped vertically + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalImage input, int flip_axis, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalTensor input, + int horizonal_flag, int vertical_flag, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs +/*! \brief Applies blur effect to images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size size ofthr kernel used for blurring + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlur(RocalContext context, RocalImage input, bool is_output, - RocalIntParam sdev = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalBlur(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam kernel_size = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Applies blur effect to images with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size size of the kernel used for blurring + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalImage input, int sdev, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalTensor input, + int kernel_size, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); /*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio) * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input1 - * \param input2 - * \param is_output - * \param ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0. - * \return + * \param [in] context Rocal context + * \param [in] input1 Input1 Rocal tensor + * \param [in] input2 Input2 Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0 + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlend(RocalContext context, RocalImage input1, RocalImage input2, bool is_output, - RocalFloatParam ratio = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalBlend(RocalContext context, RocalTensor input1, RocalTensor input2, + bool is_output, + RocalFloatParam ratio = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio) +/*! \brief Blends two input images given the fixed ratio: output = input1*ratio + input2*(1-ratio) * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input1 - * \param input2 - * \param ratio Float value defining the blending ratio, should be between 0.0 and 1.0. - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalImage input1, RocalImage input2, - float ratio, - bool is_output); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param x0 - * \param x1 - * \param y0 - * \param y1 - * \param o0 - * \param o1 - * \param dest_height - * \param dest_width - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalImage input, bool is_output, - unsigned dest_height = 0, unsigned dest_width = 0, - RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL, - RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL, - RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param x0 - * \param x1 - * \param y0 - * \param y1 - * \param o0 - * \param o1 - * \param is_output - * \param dest_height - * \param dest_width - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalImage input, float x0, float x1, - float y0, float y1, float o0, float o1, bool is_output, - unsigned int dest_height = 0, unsigned int dest_width = 0); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFishEye(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVignette(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam sdev = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalImage input, float sdev, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalJitter(RocalContext context, RocalImage input, bool is_output, - RocalIntParam kernel_size = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param min - * \param max - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalImage input, - int kernel_size, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam sdev = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalImage input, float sdev, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnow(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam shift = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalImage input, float shift, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param rain_value - * \param rain_width - * \param rain_heigth - * \param rain_transparency - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRain(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam rain_value = NULL, - RocalIntParam rain_width = NULL, - RocalIntParam rain_height = NULL, - RocalFloatParam rain_transparency = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param rain_value - * \param rain_width - * \param rain_heigth - * \param rain_transparency - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalImage input, - float rain_value, - int rain_width, - int rain_height, - float rain_transparency, - bool is_output); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param adjustment - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalImage input, bool is_output, - RocalIntParam adjustment = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param adjustment - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalImage input, int adjustment, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param fog_value - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFog(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam fog_value = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param fog_value - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalImage input, float fog_value, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param strength - * \param zoom - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam strength = NULL, - RocalFloatParam zoom = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param strength - * \param zoom - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalImage input, - float strength, float zoom, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalPixelate(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param shift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalExposure(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam shift = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param shift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalImage input, float shift, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalHue(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam hue = NULL); - -/*! \brief + * \param [in] context Rocal context + * \param [in] input1 Input1 Rocal tensor + * \param [in] input2 Input2 Rocal tensor + * \param [in] ratio Float value defining the blending ratio, should be between 0.0 and 1.0. + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalTensor input1, RocalTensor input2, + float ratio, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies affine transformation to images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] x0 float parameter representing the coefficient of affine tensor matrix + * \param [in] x1 float parameter representing the coefficient of affine tensor matrix + * \param [in] y0 float parameter representing the coefficient of affine tensor matrix + * \param [in] y1 float parameter representing the coefficient of affine tensor matrix + * \param [in] o0 float parameter representing the coefficient of affine tensor matrix + * \param [in] o1 float parameter representing the coefficient of affine tensor matrix + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] interpolation_type The type of interpolation to be used for warp affine. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalTensor input, bool is_output, + unsigned dest_height = 0, unsigned dest_width = 0, + RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL, + RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL, + RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies affine transformation to images with fixed affine matrix. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param hue - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] x0 float parameter representing the coefficient of affine tensor matrix + * \param [in] x1 float parameter representing the coefficient of affine tensor matrix + * \param [in] y0 float parameter representing the coefficient of affine tensor matrix + * \param [in] y1 float parameter representing the coefficient of affine tensor matrix + * \param [in] o0 float parameter representing the coefficient of affine tensor matrix + * \param [in] o1 float parameter representing the coefficient of affine tensor matrix + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] interpolation_type The type of interpolation to be used for warp affine. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalImage input, - float hue, - bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalTensor input, float x0, float x1, + float y0, float y1, float o0, float o1, bool is_output, + unsigned int dest_height = 0, unsigned int dest_width = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Applies fish eye effect on images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalSaturation(RocalContext context, - RocalImage input, +extern "C" RocalTensor ROCAL_API_CALL rocalFishEye(RocalContext context, RocalTensor input, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies vignette effect on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] sdev standard deviation for the vignette effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVignette(RocalContext context, RocalTensor input, + bool is_output, RocalFloatParam sdev = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies vignette effect on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] sdev standard deviation for the vignette effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalTensor input, + float sdev, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies jitter effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size kernel size used for the jitter effect + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJitter(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam kernel_size = NULL, + int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies jitter effect on images with fixed kernel size. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size kernel size used for the jitter effect + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalTensor input, + int kernel_size, bool is_output, int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies salt and pepper noise effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] noise_prob probability of applying the Salt and Pepper noise. + * \param [in] salt_prob probability of applying salt noise + * \param [in] salt_val specifies the value of the salt noise + * \param [in] pepper_val specifies the value of the pepper noise + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam noise_prob = NULL, RocalFloatParam salt_prob = NULL, + RocalFloatParam salt_val = NULL, RocalFloatParam pepper_val = NULL, + int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies salt and pepper noise on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] noise_prob probability of applying the Salt and Pepper noise. + * \param [in] salt_prob probability of applying salt noise + * \param [in] salt_val specifies the value of the salt noise + * \param [in] pepper_val specifies the value of the pepper noise + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalTensor input, + float noise_prob, float salt_prob, + float salt_val, float pepper_val, + bool is_output, int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies snow effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] snow Float param representing the intensity of snow effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnow(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam snow = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies snow effect on images with fixed parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] snow Float param representing the intensity of snow effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalTensor input, + float snow, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies rain effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] rain_value parameter represents the intensity of rain effect + * \param [in] rain_width parameter represents the width of the rain effect + * \param [in] rain_height parameter represents the width of the rain effect + * \param [in] rain_transparency parameter represents the transperancy of the rain effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRain(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam rain_value = NULL, + RocalIntParam rain_width = NULL, + RocalIntParam rain_height = NULL, + RocalFloatParam rain_transparency = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies rain effect on images with fixed parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] rain_value parameter represents the intensity of rain effect + * \param [in] rain_width parameter represents the width of the rain effect + * \param [in] rain_height parameter represents the width of the rain effect + * \param [in] rain_transparency parameter represents the transperancy of the rain effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalTensor input, + float rain_value, + int rain_width, + int rain_height, + float rain_transparency, bool is_output, - RocalFloatParam sat = NULL); - -/*! \brief rocalSaturationFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalImage input, float sat, - bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCopy(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalNop(RocalContext context, RocalImage input, bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \param hue - * \param sat - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam alpha = NULL, - RocalFloatParam beta = NULL, - RocalFloatParam hue = NULL, - RocalFloatParam sat = NULL); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \param hue - * \param sat - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalImage input, - float alpha, - float beta, - float hue, - float sat, - bool is_output); -/*! \brief rocalCropMirrorNormalize - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalImage input, - unsigned crop_depth, - unsigned crop_height, - unsigned crop_width, - float start_x, - float start_y, - float start_z, - std::vector &mean, - std::vector &std_dev, - bool is_output, - RocalIntParam mirror = NULL); + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the color temperature in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] adjustment color temperature adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam adjustment = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the color temperature in images with fixed value. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] adjustment color temperature adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalTensor input, + int adjustment, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief rocalCrop +/*! \brief Applies fog effect on images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] fog_value parameter representing the intensity of fog effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalCrop(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam crop_width = NULL, - RocalFloatParam crop_height = NULL, - RocalFloatParam crop_depth = NULL, - RocalFloatParam crop_pox_x = NULL, - RocalFloatParam crop_pos_y = NULL, - RocalFloatParam crop_pos_z = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalFog(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam fog_value = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief rocalCropFixed +/*! \brief Applies fog effect on images with fixed parameter. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] fog_value parameter representing the intensity of fog effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalImage input, - unsigned crop_width, - unsigned crop_height, - unsigned crop_depth, +extern "C" RocalTensor ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalTensor input, + float fog_value, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies lens correction effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] strength parameter representing the strength of the lens correction. + * \param [in] zoom parameter representing the zoom factor of the lens correction. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam strength = NULL, + RocalFloatParam zoom = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies lens correction effect on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] strength parameter representing the strength of the lens correction. + * \param [in] zoom parameter representing the zoom factor of the lens correction. + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalTensor input, + float strength, float zoom, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies pixelate effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalPixelate(RocalContext context, RocalTensor input, bool is_output, - float crop_pox_x, - float crop_pos_y, - float crop_pos_z); -/*! \brief rocalCropCenterFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param crop_width - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalImage input, - unsigned crop_width, - unsigned crop_height, - unsigned crop_depth, - bool output); -/*! \brief rocalResizeCropMirrorFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, - bool is_output, - unsigned crop_h, - unsigned crop_w, - RocalIntParam mirror); -/*! \brief rocalResizeCropMirror - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, - bool is_output, RocalFloatParam crop_height = NULL, - RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL); + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the exposure in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] exposure_factor exposure adjustment factor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalExposure(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam exposure_factor = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the exposure in images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] exposure_factor exposure adjustment factor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalTensor input, + float exposure_factor, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the hue in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] hue hue adjustment value in degrees + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalHue(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam hue = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the hue in images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] hue hue adjustment value in degrees + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalTensor input, + float hue, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the saturation in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] saturation saturation adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSaturation(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam saturation = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the saturation in images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] saturation saturation adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalTensor input, + float saturation, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Copies input tensor to output tensor. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCopy(RocalContext context, RocalTensor input, bool is_output); + +/*! \brief Performs no operation. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNop(RocalContext context, RocalTensor input, bool is_output); + +/*! \brief Adjusts the brightness, hue and saturation of the images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha parameter that controls the brightness of an image + * \param [in] beta parameter that helps in tuning the color balance of an image + * \param [in] hue parameter that adjusts the hue of an image + * \param [in] sat parameter that controls the intensity of colors + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam alpha = NULL, + RocalFloatParam beta = NULL, + RocalFloatParam hue = NULL, + RocalFloatParam sat = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the brightness, hue and saturation of the images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] alpha parameter that controls the brightness of an image + * \param [in] beta parameter that helps in tuning the color balance of an image + * \param [in] hue parameter that adjusts the hue of an image + * \param [in] sat parameter that controls the intensity of colors + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalTensor input, + float alpha, + float beta, + float hue, + float sat, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs crop, normalize and flip on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] start_x x-coordinate, start of the input tensor to be cropped + * \param [in] start_y y-coordinate, start of the input tensor to be cropped + * \param [in] mean mean value (specified for each channel) for tensor normalization + * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalTensor input, + unsigned crop_height, + unsigned crop_width, + float start_x, + float start_y, + std::vector &mean, + std::vector &std_dev, + bool is_output, + RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels +/*! \brief Crops images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param num_of_attmpts - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalImage input, +extern "C" RocalTensor ROCAL_API_CALL rocalCrop(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam crop_width = NULL, + RocalFloatParam crop_height = NULL, + RocalFloatParam crop_depth = NULL, + RocalFloatParam crop_pox_x = NULL, + RocalFloatParam crop_pos_y = NULL, + RocalFloatParam crop_pos_z = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images with fixed coordinates. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalTensor input, + unsigned crop_width, + unsigned crop_height, + unsigned crop_depth, bool is_output, - RocalFloatParam crop_area_factor = NULL, - RocalFloatParam crop_aspect_ratio = NULL, - RocalFloatParam crop_pos_x = NULL, - RocalFloatParam crop_pos_y = NULL, - int num_of_attempts = 20); - -/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param IOU_threshold - * \param num_of_attmpts - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam threshold = NULL, - RocalFloatParam crop_area_factor = NULL, - RocalFloatParam crop_aspect_ratio = NULL, - RocalFloatParam crop_pos_x = NULL, - RocalFloatParam crop_pos_y = NULL, - int num_of_attempts = 20); - -#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H + float crop_pox_x, + float crop_pos_y, + float crop_pos_z, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images at the center with fixed coordinates. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalTensor input, + unsigned crop_width, + unsigned crop_height, + unsigned crop_depth, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, crop and flip on images with fixed crop. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] crop_h crop width of the tensor + * \param [in] crop_w crop height of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + unsigned crop_h, + unsigned crop_w, + RocalIntParam mirror, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, crop and flip on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, RocalFloatParam crop_height = NULL, + RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images randomly. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] num_of_attempts maximum number of attempts the function will make to find a valid crop + * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region + * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] crop_pos_x specifies a specific horizontal position for the crop + * \param [in] crop_pos_y specifies a specific vertical position for the crop + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam crop_area_factor = NULL, + RocalFloatParam crop_aspect_ratio = NULL, + RocalFloatParam crop_pos_x = NULL, + RocalFloatParam crop_pos_y = NULL, + int num_of_attempts = 20, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images randomly used for SSD training. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] threshold the threshold parameter for crop operation + * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region + * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] crop_pos_x specifies a specific horizontal position for the crop + * \param [in] crop_pos_y specifies a specific vertical position for the crop + * \param [in] num_of_attempts he maximum number of attempts the function will make to find a valid crop + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam threshold = NULL, + RocalFloatParam crop_area_factor = NULL, + RocalFloatParam crop_aspect_ratio = NULL, + RocalFloatParam crop_pos_x = NULL, + RocalFloatParam crop_pos_y = NULL, + int num_of_attempts = 20, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies preemphasis filter to the input data. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output Sets to True if the output tensor is part of the graph output + * \param [in] preemph_coeff Preemphasis coefficient + * \param [in] preemph_border_type Border value policy. Possible values are "zero", "clamp", "reflect". + * \param [in] output_datatype The data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalPreEmphasisFilter(RocalContext context, + RocalTensor input, + bool is_output, + RocalFloatParam preemph_coeff = NULL, + RocalAudioBorderType preemph_border_type = RocalAudioBorderType::ROCAL_CLAMP, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Produces a spectrogram from a 1D audio signal. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] window_fn values of the window function + * \param [in] center_windows boolean value to specify whether extracted windows should be padded so that the window function is centered at multiples of window_step + * \param [in] reflect_padding Indicates the padding policy when sampling outside the bounds of the audio data + * \param [in] spectrogram_layout output spectrogram layout + * \param [in] power Exponent of the magnitude of the spectrum + * \param [in] nfft Size of the Fast Fourier transform (FFT) + * \param [in] window_length Window size in the number of samples + * \param [in] window_step Step between the Short-time Fourier transform (STFT) windows in number of samples + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSpectrogram(RocalContext context, + RocalTensor input, + bool is_output, + std::vector &window_fn, + bool center_windows, + bool reflect_padding, + int power, + int nfft, + int window_length = 512, + int window_step = 256, + RocalTensorLayout output_layout = ROCAL_NFT, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief A + * \ingroup group_rocal_augmentations + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param[in] cutoff_db minimum or cut-off ratio in dB + * \param[in] multiplier factor by which the logarithm is multiplied + * \param[in] reference_magnitude Reference magnitude which if not provided uses maximum value of input as reference + * \param [in] rocal_tensor_output_type the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalToDecibels(RocalContext p_context, + RocalTensor p_input, + bool is_output, + float cutoff_db, + float multiplier, + float reference_magnitude, + RocalTensorOutputType rocal_tensor_output_type); + +/*! \brief Applies resample augmentation to input tensors + * \ingroup group_rocal_augmentations + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] p_output_resample_rate the output resample rate for a batch of audio samples + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] sample_hint sample_hint value is the value required to allocate the max memory for output tensor wrt resample_rate and the samples + * \param [in] quality The resampling is achieved by applying a sinc filter with Hann window with an extent controlled by the quality argument + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResample(RocalContext p_context, + RocalTensor p_input, + RocalTensor p_output_resample_rate, + bool is_output, + float sample_hint, + float quality = 50.0, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Creates and returns rocALTensor generated from an uniform distribution + * \ingroup group_rocal_augmentations + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] range The range for generating uniform distribution + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalUniformDistribution(RocalContext p_context, + RocalTensor p_input, + bool is_output, + std::vector &range); + +/*! \brief Creates and returns rocALTensor generated from an normal distribution + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] mean The mean value for generating the normal distribution + * \param [in] stddev The stddev value for generating the normal distribution + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNormalDistribution(RocalContext p_context, + RocalTensor p_input, + bool is_output, + float mean = 0.0, + float stddev = 0.0); + +/*! \brief Multiples a tensor and a scalar and returns the output + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] scalar The scalar value to be multiplied with the input tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalTensorMulScalar(RocalContext p_context, + RocalTensor p_input, + bool is_output, + float scalar = 0.0, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Adds two tensors and returns the output. + * \param [in] p_context Rocal context + * \param [in] p_input1 Input Rocal tensor1 + * \param [in] p_input2 Input Rocal tensor2 + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalTensorAddTensor(RocalContext p_context, + RocalTensor p_input1, + RocalTensor p_input2, + bool is_output, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Performs silence detection in the input audio tensor + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] cutoff_db threshold(dB) below which the signal is considered silent + * \param [in] reference_power reference power that is used to convert the signal to dB + * \param [in] reset_interval number of samples after which the moving mean average is recalculated to avoid loss of precision + * \param [in] window_length size of the sliding window used to calculate of the short-term power of the signal + * \return std::pair + */ +extern "C" std::pair ROCAL_API_CALL rocalNonSilentRegionDetection(RocalContext context, + RocalTensor input, + bool is_output, + float cutoff_db, + float reference_power, + int reset_interval, + int window_length); + +/*! \brief Extracts the sub-tensor from a given input tensor + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] anchor anchor values used for specifying the starting indices of slice + * \param [in] shape shape values used for specifying the length of slice + * \param [in] fill_values fill values based on out of Bound policy + * \param [in] policy + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSlice(RocalContext context, + RocalTensor input, + bool is_output, + RocalTensor anchor, + RocalTensor shape, + std::vector fill_values, + RocalOutOfBoundsPolicy policy = RocalOutOfBoundsPolicy::ROCAL_ERROR, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Performs mean-stddev normalization on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] axes axes list for tensor normalization + * \param [in] mean mean value (specified for each channel) for tensor normalization + * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization + * \param [in] is_output is the output tensor part of the graph output + * \param [in] scale scale value (specified for each channel) for tensor normalization + * \param [in] shift shift value (specified for each channel) for tensor normalization + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input, + std::vector &axes, + std::vector &mean, + std::vector &std_dev, + bool is_output, + float scale = 1.0, float shift = 0.0, + RocalTensorOutputType output_datatype = ROCAL_FP32); + +/*! \brief Applies mel-filter bank augmentation on the given input tensor + * \ingroup group_rocal_augmentations + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] freq_high maximum frequency + * \param [in] freq_low minimum frequency + * \param [in] mel_formula formula used to convert frequencies from hertz to mel and from mel to hertz + * \param [in] nfilter number of mel filters + * \param [in] normalize boolean variable that determine whether to normalize weights / not + * \param [in] sample_rate sampling rate of the audio data + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ + +extern "C" RocalTensor ROCAL_API_CALL rocalMelFilterBank(RocalContext p_context, + RocalTensor p_input, + bool is_output, + float freq_high, + float freq_low, + RocalMelScaleFormula mel_formula, + int nfilter, + bool normalize, + float sample_rate, + RocalTensorOutputType output_datatype); + +#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h index 567dca1a9..eec0c9a64 100644 --- a/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/include/api/rocal_api_data_loaders.h @@ -23,7 +23,6 @@ THE SOFTWARE. #ifndef MIVISIONX_ROCAL_API_DATA_LOADERS_H #define MIVISIONX_ROCAL_API_DATA_LOADERS_H #include "rocal_api_types.h" -#include /*! * \file @@ -33,310 +32,258 @@ THE SOFTWARE. * \brief The AMD rocAL data loader functions. */ -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded tensors to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images in a sequence will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param is_output Determines if the user wants the loaded sequences to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the sequences or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \return Reference to the output image. - */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReader(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only. +/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images in a sequence will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param sequence_length: The number of frames in a sequence. - * \param is_output Determines if the user wants the loaded sequences to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded tensor to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images in a sequence will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the sequences or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor. */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context, - const char *source_path, - const char *json_path, - RocalImageColor color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReader(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images in a sequence will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context, - const char *source_path, - const char *json_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context, - const char *source_path, - const char *json_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief \param rocal_context Rocal context +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context, + const char* source_path, + const char* json_path, + RocalImageColor color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context, - const char *source_path, - const char *json_path, - RocalImageColor color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context, + const char* source_path, + const char* json_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context, + const char* source_path, + const char* json_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +/*! \brief Creates JPEG image reader. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context, + const char* source_path, + const char* json_path, + RocalImageColor color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context, + const char* source_path, RocalImageColor rocal_color_format, unsigned internal_shard_count, bool is_output, @@ -344,27 +291,28 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContex bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. * \ingroup group_rocal_data_loaders - * \param p_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, @@ -373,177 +321,114 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard( bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param p_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored and Crops t +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] p_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned num_threads, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); - -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); - -/*! - * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context, + const char* source_path, RocalImageColor rocal_color_format, unsigned internal_shard_count, bool is_output, - const char *user_key_for_encoded, - const char *user_key_for_filename, bool shuffle = false, bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] p_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, @@ -552,300 +437,490 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalCon bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored and Crops t * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle: to shuffle dataset - * \param loop: repeat data loading - * \param out_width The output_width of raw image - * \param out_height The output height of raw image - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context, - const char *source_path, - const char *user_key_for_raw, - const char *user_key_for_filename, - RocalImageColor rocal_color_format, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned out_width = 0, unsigned out_height = 0, - const char *record_name_prefix = ""); + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned num_threads, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param shuffle: to shuffle dataset - * \param loop: repeat data loading - * \param out_width The output_width of raw image - * \param out_height The output height of raw image - * \param record_name_prefix : if nonempty reader will only read records with certain prefix - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned out_width = 0, unsigned out_height = 0, - const char *record_name_prefix = ""); + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. +/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param file_names_list List of input video filenames - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSource(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - const std::vector& file_names_list, - bool is_output = false, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); - -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only. + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output image + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + const char* user_key_for_encoded, + const char* user_key_for_filename, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param shard_id Shard id for this loader. - * \param shard_count Total shard count. - * \param sequence_length: The number of frames in a sequence. - * \param file_names_list List of input video filenames - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - const std::vector& file_names_list, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); - -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param dest_width The output width of frames. - * \param dest_height The output height of frames. - * \param file_names_list List of input video filenames - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResize(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - const std::vector& file_names_list, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle: to shuffle dataset + * \param [in] loop: repeat data loading + * \param [in] out_width The output_width of raw image + * \param [in] out_height The output height of raw image + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context, + const char* source_path, + const char* user_key_for_raw, + const char* user_key_for_filename, + RocalImageColor rocal_color_format, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned out_width = 0, unsigned out_height = 0, + const char* record_name_prefix = "", + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only. +/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param shard_id Shard id for this loader. - * \param shard_count Total shard count. - * \param sequence_length: The number of frames in a sequence. - * \param dest_width The output width of frames. - * \param dest_height The output height of frames. - * \param file_names_list List of input video filenames - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - const std::vector& file_names_list, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] shuffle: to shuffle dataset + * \param [in] loop: repeat data loading + * \param [in] out_width The output_width of raw image + * \param [in] out_height The output height of raw image + * \param [in] record_name_prefix : if nonempty reader will only read records with certain prefix + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned out_width = 0, unsigned out_height = 0, + const char* record_name_prefix = "", + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); /*! - * \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems. + * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param out_width ; output width - * \param out_height ; output_height - * \param filename_prefix ; if set loader will only load files with the given prefix name - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSource(RocalContext context, + const char* source_path, RocalImageColor color_format, - bool is_output, - unsigned out_width, unsigned out_height, const char *filename_prefix = "", - bool loop = false); - -/*! - * \brief + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output = false, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only. * \ingroup group_rocal_data_loaders - * \param context - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] shard_id Shard id for this loader. + * \param [in] shard_count Total shard count. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] dest_width The output width of frames. + * \param [in] dest_height The output height of frames. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResize(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] shard_id Shard id for this loader. + * \param [in] shard_count Total shard count. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] dest_width The output width of frames. + * \param [in] dest_height The output height of frames. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] out_width output width + * \param [in] out_height output_height + * \param [in] filename_prefix if set loader will only load files with the given prefix name + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context, + const char* source_path, + RocalImageColor color_format, + bool is_output, + unsigned out_width, unsigned out_height, const char* filename_prefix = "", + bool loop = false); -/*! - * \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +/*! \brief reset Loaders * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] context Rocal Context + * \return Rocal status value */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); +extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context); -/*! - * \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +/*! \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, + std::vector& area_factor, + std::vector& aspect_ratio, unsigned num_attempts, bool shuffle = false, bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); -#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H +/*! \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. + * \ingroup group_rocal_data_loaders + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); +/*! \brief Creates JPEG external source image reader. + * \ingroup group_rocal_data_loaders + * \param [in] rocal_context Rocal context + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy is the RocalImageSizeEvaluationPolicy for decoding + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \param [in] external_source_mode Determines the mode of the source passed from the user - file_names / uncompressed data / compressed data + * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data). + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJpegExternalFileSource(RocalContext p_context, + RocalImageColor rocal_color_format, + bool is_output = false, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, + RocalExternalSourceMode external_source_mode = RocalExternalSourceMode::ROCAL_EXTSOURCE_FNAME, + std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true}); + +/*! Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel if user wants. + * If the files are not in standard audio compression formats they will be ignored, Currently wav format is supported + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] source_file_list_path A char string pointing to the file list location on the disk + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Boolean variable to enable the audio to be part of the output. + * \param [in] shuffle Boolean variable to shuffle the dataset. + * \param [in] loop Boolean variable to indefinitely loop through audio. + * \param [in] downmix Boolean variable to downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio. + * \return Reference to the output audio + */ +extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSource(RocalContext context, + const char* source_path, + const char* source_file_list_path, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + bool downmix = false); + +/*! Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. + * If the files are not in standard audio compression formats they will be ignored. + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] source_file_list_path A char string pointing to the file list location on the disk + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Boolean variable to enable the audio to be part of the output. + * \param [in] shuffle Boolean variable to shuffle the dataset. + * \param [in] loop Boolean variable to indefinitely loop through audio. + * \param [in] downmix Boolean variable to downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio. + * \return Reference to the output audio + */ +extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSourceSingleShard(RocalContext p_context, + const char* source_path, + const char* source_file_list_path, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + bool downmix = false); + +#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H diff --git a/rocAL/include/api/rocal_api_data_transfer.h b/rocAL/include/api/rocal_api_data_transfer.h index b31b8911a..966ee6bdf 100644 --- a/rocAL/include/api/rocal_api_data_transfer.h +++ b/rocAL/include/api/rocal_api_data_transfer.h @@ -33,59 +33,75 @@ THE SOFTWARE. */ /*! - * \brief TBD + * \brief copies data to output buffer * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] context Rocal context + * \param [in] out_ptr pointer to output buffer + * \param [in] out_size size of output buffer + * \return Rocal status indicating success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalCopyToOutput(RocalContext context, unsigned char *out_ptr, size_t out_size); /*! - * \brief TBD + * \brief converts data to a tensor * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] rocal_context Rocal context + * \param [in] out_ptr pointer to output buffer + * \param [in] tensor_format the layout of the tensor data + * \param [in] tensor_output_type the output type of the tensor data + * \param [in] multiplier0 the multiplier for channel 0 + * \param [in] multiplier1 the multiplier for channel 1 + * \param [in] multiplier2 the multiplier for channel 2 + * \param [in] offset0 the offset for channel 0 + * \param [in] offset1 the offset for channel 1 + * \param [in] offset2 the offset for channel 2 + * \param [in] reverse_channels flag to reverse the channel orders + * \param [in] output_mem_type the memory type of output tensor buffer + * \return Rocal status indicating success or failure */ -extern "C" RocalStatus ROCAL_API_CALL rocalToTensor32(RocalContext rocal_context, float *out_ptr, - RocalTensorLayout tensor_format, float multiplier0, - float multiplier1, float multiplier2, float offset0, - float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); +extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, void *out_ptr, + RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, + float multiplier0, float multiplier1, float multiplier2, float offset0, + float offset1, float offset2, + bool reverse_channels, RocalOutputMemType output_mem_type, int max_roi_height = 0, int max_roi_width = 0); /*! - * \brief TBD + * \brief Sets the output images in the RocalContext * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] p_context Rocal context + * \param [in] num_of_outputs number of output images + * \param [in] output_images output images */ -extern "C" RocalStatus ROCAL_API_CALL rocalToTensor16(RocalContext rocal_context, half *out_ptr, - RocalTensorLayout tensor_format, float multiplier0, - float multiplier1, float multiplier2, float offset0, - float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); +extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images); /*! - * \brief TBD + * \brief gives the list of output tensors from rocal context * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] p_context Rocal Context + * \return A RocalTensorList containing the list of output tensors */ -extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, void *out_ptr, - RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, - float multiplier0, float multiplier1, float multiplier2, float offset0, - float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetOutputTensors(RocalContext p_context); + /*! - * \brief TBD + * \brief Creates ExternalSourceFeedInput for data transfer * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param rocal_context Rocal context + * \param input_images Strings pointing to the location on the disk + * \param labels Labels whose values is passed by the user using an external source + * \param input_buffer Compressed or uncompressed input buffer + * \param roi_width The roi width of the images + * \param roi_height The roi height of the images + * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param channels The number of channels for the image + * \param mode Determines the mode of the source passed from the user - file_names / uncompressed data / compressed data + * \param layout Determines the layout of the images - NCHW / NHWC + * \return Reference to the output tensor */ -extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images); +extern "C" RocalStatus ROCAL_API_CALL rocalExternalSourceFeedInput(RocalContext p_context, const std::vector& input_images_names, + bool is_labels, const std::vector& input_buffer, + const std::vector& roi_xywh, + unsigned int max_width, unsigned int max_height, unsigned int channels, + RocalExternalSourceMode mode, RocalTensorLayout layout, bool eos); -#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H +#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H diff --git a/rocAL/include/api/rocal_api_info.h b/rocAL/include/api/rocal_api_info.h index e00d5e4f7..9ea36fbb8 100644 --- a/rocAL/include/api/rocal_api_info.h +++ b/rocAL/include/api/rocal_api_info.h @@ -33,115 +33,108 @@ THE SOFTWARE. */ /*! - * \brief rocalGetOutputWidth + * \brief Retrieves the width of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The width of the ROCAL's output image in pixels + * \param [in] rocal_context The RocalContext + * \return The width of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputWidth(RocalContext rocal_context); /*! - * \brief rocalGetOutputHeight + * \brief Retrieves the height of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The height of the ROCAL's output image in pixels. It includes all images in the batch. + * \param [in] rocal_context The RocalContext + * \return The height of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputHeight(RocalContext rocal_context); /*! - * \brief rocalGetOutputColorFormat + * \brief Retrieves the color format of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The color format of the ROCAL's output. It's equivalent of what's passed to the loaders as input color format. + * \param [in] rocal_context The RocalContext. + * \return The color format of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputColorFormat(RocalContext rocal_context); /*! - * \brief rocalGetRemainingImages + * \brief Retrieves the number of remaining images. * \ingroup group_rocal_info - * - * \param [in] context - * \return The number of images yet to be processed + * \param [in] rocal_context The RocalContext. + * \return The number of remaining images yet to be processed. */ + extern "C" size_t ROCAL_API_CALL rocalGetRemainingImages(RocalContext rocal_context); /*! - * \brief rocalGetImageWidth + * \brief Retrieves the width of the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Width of the graph output image - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The width of the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalTensor image); /*! - * \brief rocalGetImageHeight + * \brief Retrieves the height of the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Height of the pipeline output image, includes all images in the batch - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The height of the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalTensor image); /*! - * \brief rocalGetImagePlanes + * \brief Retrieves the number of planes (channels) in the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Color format of the pipeline output image. - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The number of planes (channels) in the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalTensor image); /*! - * \brief rocalIsEmpty + * \brief Checks if the RocalContext is empty. * \ingroup group_rocal_info - * - * \param [in] context - * \return 1 if all images have been processed, otherwise 0 - * \note Returned value valid only after rocalVerify is called + * \param [in] rocal_context The RocalContext + * \return return if RocalContext is empty or not. */ extern "C" size_t ROCAL_API_CALL rocalIsEmpty(RocalContext rocal_context); /*! - * \brief rocalGetAugmentationBranchCount + * \brief Retrieves the number of augmentation branches. * \ingroup group_rocal_info - * - * \param [in] context + * \param [in] rocal_context The RocalContext * \return Number of augmentation graph branches. Defined by number of calls to the augmentation API's with the is_output flag set to true. */ extern "C" size_t ROCAL_API_CALL rocalGetAugmentationBranchCount(RocalContext rocal_context); /*! - * \brief rocalGetStatus - * \ingroup group_rocal_info - * - * \param [in] context + * \brief Retrieves the status. + * \ingroup group_rocal_info + * \param [in] rocal_context The RocalContext from which to retrieve the status. * \return The status of tha last API call */ extern "C" RocalStatus ROCAL_API_CALL rocalGetStatus(RocalContext rocal_context); /*! - * \brief rocalGetErrorMessage + * \brief Retrieves the error message. * \ingroup group_rocal_info - * - * \param [in] context - * \return The last error message generated by call to rocal API + * \param [in] rocal_context The RocalContext + * \return A pointer to the error message string. */ -extern "C" const char *ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context); +extern "C" const char* ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context); /*! - * \brief rocalGetTimingInfo + * \brief Retrieves timing information. * \ingroup group_rocal_info - * - * \param [in] context + * \param [in] rocal_context The RocalContext * \return The timing info associated with recent execution. */ extern "C" TimingInfo ROCAL_API_CALL rocalGetTimingInfo(RocalContext rocal_context); -#endif // MIVISIONX_ROCAL_API_INFO_H +/*! + * \brief Retrieves the information about the size of the last batch. + * \ingroup group_rocal_info + * \param rocal_context + * \return The number of samples that were padded in the last batch in adherence with last_batch_policy and last_batch_padded + */ +extern "C" size_t ROCAL_API_CALL rocalGetLastBatchPaddedSize(RocalContext rocal_context); + +#endif // MIVISIONX_ROCAL_API_INFO_H diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index edee9dda4..9907427bb 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -32,272 +32,288 @@ THE SOFTWARE. * \brief The AMD rocAL meta data functions. */ -/*! - * \brief rocalCreateLabelReader +/*! \brief creates label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the folder that contains the dataset or metadata file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the folder that contains the dataset or metadata file + * \param file_list_path is the path to file list that contains the file names and its corresponding labels * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char* source_path, const char* file_list_path = ""); -/*! - * \brief rocalCreateVideoLabelReader - * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the folder that contains the dataset or metadata file - * \param sequence_length The number of frames in a sequence. - * \param frame_step Frame interval between each sequence. - * \param frame_stride Frame interval between frames in a sequence. - * \param file_names_list List of input video filenames - * \param labels List of labels corresponding to each video filename in filenames_list - * \param file_list_frame_num True : when the inputs from text file is to be considered as frame numbers. - * False : when the inputs from text file is to considered as timestamps. +/*! \brief creates video label reader + * \ingroup group_rocal_meta_data + * \param [in] rocal_context rocal context + * \param [in] source_path path to the folder that contains the dataset or metadata file + * \param [in] sequence_length The number of frames in a sequence. + * \param [in] frame_step Frame interval between each sequence. + * \param [in] frame_stride Frame interval between frames in a sequence. + * \param [in] file_list_frame_num True : when the inputs from text file is to be considered as frame numbers. False : when the inputs from text file is to considered as timestamps. * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char *source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, const std::vector& file_names_list, const std::vector& labels, bool file_list_frame_num = true); - +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char* source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); -/*! - * \brief rocalCreateTFReader +/*! \brief create tf reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char *source_path, bool is_output, - const char *user_key_for_label, const char *user_key_for_filename); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char* source_path, bool is_output, + const char* user_key_for_label, const char* user_key_for_filename); -/*! - * \brief rocalCreateTFReaderDetection +/*! \brief create tf reader detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context + * \param [in] source_path path to the coco json file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output, - const char *user_key_for_label, const char *user_key_for_text, - const char *user_key_for_xmin, const char *user_key_for_ymin, const char *user_key_for_xmax, const char *user_key_for_ymax, - const char *user_key_for_filename); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output, + const char* user_key_for_label, const char* user_key_for_text, + const char* user_key_for_xmin, const char* user_key_for_ymin, const char* user_key_for_xmax, const char* user_key_for_ymax, + const char* user_key_for_filename); -/*! - * \brief rocalCreateCOCOReader +/*! \brief create coco reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file + * \param [in] mask enable polygon masks + * \param [in] ltrb If set to True, bboxes are returned as [left, top, right, bottom]. If set to False, the bboxes are returned as [x, y, width, height] + * \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors + * \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values + * \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned + * \param [in] is_box_iou_matcher If set to True, box iou matcher which returns matched indices is enabled in the pipeline * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false); -/*! - * \brief rocalCreateCOCOReaderKeyPoints +/*! \brief create coco reader key points * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file - * \param sigma sigma used for gaussian distribution (needed for HRNet Pose estimation) - * \param pose_output_width output image width (needed for HRNet Pose estimation) - * \param pose_output_width output image height (needed for HRNet Pose estimation) + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file + * \param [in] sigma sigma used for gaussian distribution (needed for HRNet Pose estimation) + * \param [in] pose_output_width output image width (needed for HRNet Pose estimation) + * \param [in] pose_output_width output image height (needed for HRNet Pose estimation) * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char *source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char* source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); -/*! - * \brief rocalCreateTextFileBasedLabelReader +/*! \brief create text file based label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the file that contains the metadata file + * \param [in] rocal_context + * \param [in] source_path path to the file that contains the metadata file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffeLMDBLabelReader +/*! \brief create caffe LMDB label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe LMDB records for Classification + * \param [in] rocal_context + * \param [in] source_path path to the Caffe LMDB records for Classification * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffeLMDBReaderDetection +/*! \brief create caffe LMDB label reader for object detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe LMDB records for Object Detection + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe LMDB records for Object Detection * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffe2LMDBLabelReader +/*! \brief create caffe2 LMDB label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe2LMDB records for Classification + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe2LMDB records for Classification * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalCreateCaffe2LMDBReaderDetection +/*! \brief create caffe2 LMDB label reader for object detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe2LMDB records for Object Detection - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe2LMDB records for Object Detection + * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalCreateMXNetReader +/*! \brief create MXNet reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the MXNet recordio files for Classification - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [in] source_path path to the MXNet recordio files for Classification + * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalGetImageName +/*! \brief get image name * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf user buffer provided to be filled with output image names for images in the output batch. + * \param [in] rocal_context rocal context + * \param [out] buf user buffer provided to be filled with output image names for images in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char *buf); +extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char* buf); -/*! - * \brief rocalGetImageNameLen +/*! \brief get image name lengths * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf userbuffer provided to be filled with the length of the image names in the output batch + * \param [in] rocal_context rocal context + * \param [out] buf userbuffer provided to be filled with the length of the image names in the output batch * \return The size of the buffer needs to be provided by user to get the image names of the output batch */ -extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int* buf); -/*! - * \brief rocalGetImageLabels +/*! \brief get image labels * \ingroup group_rocal_meta_data - * \param meta_data RocalMetaData object that contains info about the images and labels - * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \param [in] meta_data RocalMetaData object that contains info about the images and labels + * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \return RocalTensorList of labels associated with image */ -extern "C" void ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context, void *buf, RocalOutputMemType output_mem_type = RocalOutputMemType::ROCAL_MEMCPY_HOST); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context); -/*! - * \brief rocalGetBoundingBoxCount +/*! \brief get bounding box count * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with number of object in the images. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with number of object in the images. * \return The size of the buffer needs to be provided by user to get bounding box info for all images in the output batch. */ -extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context); -/*! - * \brief rocalGetBoundingBoxLabel +/*! \brief get mask count * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount + * \param [in] rocal_context rocal context + * \param [out] buf the imageIdx in the output batch + * \return The size of the buffer needs to be provided by user to get mask box info associated with image_idx in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetMaskCount(RocalContext p_context, int* buf); -/*! - * \brief rocalGetBoundingBoxCords +/*! \brief get mask coordinates * \ingroup group_rocal_meta_data - * \param rocal_context + * \param [in] rocal_context rocal context + * \param [out] bufcount The user's buffer that will be filled with polygon size for the mask info + * \return The tensorlist with the mask coordinates */ -extern "C" void ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context, float *buf); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMaskCoordinates(RocalContext p_context, int* bufcount); -/*! - * \brief rocalGetImageSizes +/*! \brief get bounding box label * \ingroup group_rocal_meta_data - * \param rocal_context + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount + * \return RocalTensorList of labels associated with bounding box coordinates */ -extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int *buf); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context); -/*! - * \brief rocalCreateTextCifar10LabelReader +/*! \brief get bounding box coordinates * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the file that contains the metadata file - * \param filename_prefix: look only files with prefix ( needed for cifar10) - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with bounding box coords info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCords + * \return RocalTensorList of bounding box co-ordinates */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char *source_path, const char *file_prefix); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context); -/*! - * \brief rocalGetOneHotImageLabels +/*! \brief get image sizes * \ingroup group_rocal_meta_data - * \param meta_data RocalMetaData object that contains info about the images and labels - * \param numOfClasses the number of classes for a image dataset - * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. - * \param dest destination can be host=0 / device=1 + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with images sizes info for the images in the output batch */ -extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void *buf, int numOfClasses, int dest); +extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int* buf); -/*! - * \brief rocalRandomBBoxCrop +/*! \brief get ROI image sizes * \ingroup group_rocal_meta_data - * \param rocal_context - * */ + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with ROI image size info for the images in the output batch + */ +extern "C" void ROCAL_API_CALL rocalGetROIImageSizes(RocalContext rocal_context, int* buf); + +/*! \brief create text cifar10 label reader + * \ingroup group_rocal_meta_data + * \param [in] rocal_context rocal context + * \param [in] source_path path to the file that contains the metadata file + * \param [in] filename_prefix: look only files with prefix ( needed for cifar10) + * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + */ +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char* source_path, const char* file_prefix); + +/*! \brief get one hot image labels + * \ingroup group_rocal_meta_data + * \param [in] meta_data RocalMetaData object that contains info about the images and labels + * \param [in] numOfClasses the number of classes for a image dataset + * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \param [in] dest destination can be host=0 / device=1 + */ +extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void* buf, int numOfClasses, RocalOutputMemType output_mem_type); + extern "C" void ROCAL_API_CALL rocalRandomBBoxCrop(RocalContext p_context, bool all_boxes_overlap, bool no_crop, RocalFloatParam aspect_ratio = NULL, bool has_shape = false, int crop_width = 0, int crop_height = 0, int num_attempts = 1, RocalFloatParam scaling = NULL, int total_num_attempts = 0, int64_t seed = 0); -/*! - * \brief rocalGetSequenceStartFrameNumber +/*! \brief get sequence starting frame number * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with starting frame numbers of the output batch sequences. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with starting frame numbers of the output batch sequences. */ -extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int *buf); +extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int* buf); -/*! - * \brief rocalGetSequenceFrameTimestamps +/*! \brief get sequence time stamps * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences. */ -extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float *buf); +extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float* buf); -/*! - * \brief rocalBoxEncoder +/*! \brief rocal box encoder * \ingroup group_rocal_meta_data - * \param anchors Anchors to be used for encoding, as the array of floats is in the ltrb format. - * \param criteria Threshold IoU for matching bounding boxes with anchors. - * The value needs to be between 0 and 1. - * \param offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments - * \param means [x y w h] mean values for normalization. - * \param stds [x y w h] standard deviations for offset normalization. - * \param scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values). + * \param [in] anchors Anchors to be used for encoding, as the array of floats is in the ltrb format. + * \param [in] criteria Threshold IoU for matching bounding boxes with anchors. The value needs to be between 0 and 1. + * \param [in] offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments + * \param [in] means [x y w h] mean values for normalization. + * \param [in] stds [x y w h] standard deviations for offset normalization. + * \param [in] scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values). */ -extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector &anchors, float criteria, - std::vector &means, std::vector &stds, bool offset = false, float scale = 1.0); +extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, + std::vector& means, std::vector& stds, bool offset = false, float scale = 1.0); -/*! - * \brief rocalCopyEncodedBoxesAndLables +/*! \brief copy encoded boxes and labels * \ingroup group_rocal_meta_data - * \param boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. - * \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. + * \param [in] p_context rocal context + * \param [out] boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. + * \param [out] labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. */ -extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float *boxes_buf, int *labels_buf); +extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float* boxes_buf, int* labels_buf); -/*! - * \brief rocalGetEncodedBoxesAndLables +/*! \brief * \ingroup group_rocal_meta_data * \param boxes_buf ptr to user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. * \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. */ -extern "C" void ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, float **boxes_buf_ptr, int **labels_buf_ptr, int num_encoded_boxes); +extern "C" RocalMetaData ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, int num_encoded_boxes); -/*! - * \brief rocalGetImageId +/*! \brief get image id * \ingroup group_rocal_meta_data - * \param rocal_context + * \param rocal_context rocal context * \param buf The user's buffer that will be filled with image id info for the images in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int *buf); +extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int* buf); -/*! - * \brief rocalGetJointsDataPtr +/*! \brief get joints data pointer + * \ingroup group_rocal_meta_data + * \param [in] rocal_context rocal context + * \param [out] joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer + */ +extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData** joints_data); + +/*! \brief API to enable box IOU matcher and pass required params to pipeline + * \ingroup group_rocal_meta_data + * \param [in] p_context rocAL context + * \param [in] anchors The anchors / ground truth bounding box coordinates + * \param [in] high_threshold The max threshold for IOU + * \param [in] low_threshold The min threshold for IOU + * \param [in] allow_low_quality_matches bool value when set to true allows low quality matches + */ +extern "C" void ROCAL_API_CALL rocalBoxIouMatcher(RocalContext p_context, std::vector& anchors, + float high_threshold, float low_threshold, bool allow_low_quality_matches = true); + +/*! \brief API to return the matched indices for the bounding box and anchors * \ingroup group_rocal_meta_data - * \param rocal_context - * \param joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer + * \param [in] p_context rocAL context + * \return RocalTensorList of matched indices */ -extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData **joints_data); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext p_context); -#endif // MIVISIONX_ROCAL_API_META_DATA_H +#endif // MIVISIONX_ROCAL_API_META_DATA_H diff --git a/rocAL/include/api/rocal_api_parameters.h b/rocAL/include/api/rocal_api_parameters.h index bc2e5907f..d79abc49b 100644 --- a/rocAL/include/api/rocal_api_parameters.h +++ b/rocAL/include/api/rocal_api_parameters.h @@ -32,163 +32,132 @@ THE SOFTWARE. * \brief The AMD rocAL Parameters. */ -/*! - * \brief rocalSetSeed +/*! \brief set seed for random number generation * \ingroup group_rocal_parameters - * - * \param seed + * \param [in] seed seed for the random number generation */ extern "C" void ROCAL_API_CALL rocalSetSeed(unsigned seed); -/*! - * \brief rocalGetSeed +/*! \brief gets the seed value * \ingroup group_rocal_parameters - * - * \return + * \return seed value */ extern "C" unsigned ROCAL_API_CALL rocalGetSeed(); -/*! - * \brief rocalCreateIntUniformRand +/*! \brief Creates a new uniform random integer parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \return + * \param start start value of the integer range + * \param end end value of the integer range + * \return RocalIntParam representing the uniform random integer parameter. */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntUniformRand(int start, int end); -/*! - * \brief rocalUpdateIntUniformRand +/*! \brief updates uniform random integer parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \param input_obj - * \return + * \param start start value of the integer range + * \param end start value of the integer range + * \param input_obj RocalIntParam to be updated. + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntUniformRand(int start, int end, RocalIntParam updating_obj); -/*! - * \brief rocalGetIntValue +/*! \brief gets the value of a RocalIntParam. * \ingroup group_rocal_parameters - * - * \param obj - * \return + * \param [in] obj The RocalIntParam from which to retrieve the value. + * \return integer value of the RocalIntParam. */ extern "C" int ROCAL_API_CALL rocalGetIntValue(RocalIntParam obj); -/*! - * \brief rocalGetFloatValue +/*! \brief gets the value of a RocalFloatParam. * \ingroup group_rocal_parameters - * - * \param obj - * \return + * \param [in] obj The RocalFloatParam from which to retrieve the value. + * \return float value of the RocalIntParam. */ extern "C" float ROCAL_API_CALL rocalGetFloatValue(RocalFloatParam obj); -/*! - * \brief rocalCreateFloatUniformRand +/*! \brief Creates a new uniform random float parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \return + * \param start start value of the float range + * \param end end value of the float range + * \return RocalFloatParam representing the uniform random float parameter. */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatUniformRand(float start, float end); -/*! - * \brief rocalCreateFloatParameter +/*! \brief Creates a new float parameter with a specified value. * \ingroup group_rocal_parameters - * - * \param val - * \return + * \param [in] val value to create float param + * \return A new RocalFloatParam representing the float parameter. */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatParameter(float val); -/*! - * \brief rocalCreateIntParameter +/*! \brief Creates a new int parameter with a specified value. * \ingroup group_rocal_parameters - * - * \param val - * \return + * \param [in] val value to create integer param + * \return A new RocalIntParam representing the integer parameter. */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntParameter(int val); -/*! - * \brief rocalUpdateFloatParameter +/*! \brief Updates a float parameter with a new value. * \ingroup group_rocal_parameters - * - * \param new_val - * \param input_obj - * \return + * \param[in] new_val The new value to update the float parameter. + * \param[in] input_obj The RocalFloatParam to be updated. + * \return RocalStatus value. */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatParameter(float new_val, RocalFloatParam input_obj); -/*! - * \brief rocalUpdateIntParameter +/*! \brief Updates a integer parameter with a new value. * \ingroup group_rocal_parameters - * - * \param new_val - * \param input_obj - * \return + * \param[in] new_val The new value to update the integer parameter. + * \param[in] input_obj The RocalIntParam to be updated. + * \return RocalStatus value. */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntParameter(int new_val, RocalIntParam input_obj); -/*! - * \brief rocalUpdateFloatUniformRand +/*! \brief updates uniform random float parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \param input_obj - * \return + * \param start start value of the float range + * \param end start value of the float range + * \param input_obj RocalFloatParam to be updated. + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatUniformRand(float start, float end, RocalFloatParam updating_obj); -/*! - * \brief rocalCreateIntRand +/*! \brief Sets the parameters for a new or existing RocalIntRandGen object * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \return + * \param [in] values random int values + * \param [in] frequencies frequencies of the values + * \param size size of the array + * \return random int paraeter */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntRand(const int *values, const double *frequencies, unsigned size); -/*! - * \brief rocalUpdateIntRand +/*! \brief update the int random value * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \param updating_obj - * \return + * \param [in] values random int values + * \param [in] frequencies frequencies of the values + * \param [in] size size of the array + * \param [in] updating_obj Rocal int Param to update + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntRand(const int *values, const double *frequencies, unsigned size, RocalIntParam updating_obj); -/*! - * \brief Sets the parameters for a new or existing RocalFloatRandGen object +/*! \brief Sets the parameters for a new or existing RocalFloatRandGen object * \ingroup group_rocal_parameters - * \param values - * \param frequencies - * \param size - * \return + * \param [in] values random float values + * \param [in] frequencies frequencies of the values + * \param size size of the array + * \return random float parameter */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatRand(const float *values, const double *frequencies, unsigned size); -/*! - * \brief rocalUpdateFloatRand +/*! \brief update the float random value * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \param updating_obj - * \return + * \param [in] values random float values + * \param [in] frequencies frequencies of the values + * \param [in] size size of the array + * \param [in] updating_obj Rocal Float Param to update + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatRand(const float *values, const double *frequencies, unsigned size, RocalFloatParam updating_obj); -#endif // MIVISIONX_ROCAL_API_PARAMETERS_H +#endif // MIVISIONX_ROCAL_API_PARAMETERS_H diff --git a/rocAL/include/api/rocal_api_tensor.h b/rocAL/include/api/rocal_api_tensor.h new file mode 100644 index 000000000..5faf0e0ee --- /dev/null +++ b/rocAL/include/api/rocal_api_tensor.h @@ -0,0 +1,73 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef MIVISIONX_ROCAL_API_TENSOR_H +#define MIVISIONX_ROCAL_API_TENSOR_H +#include "rocal_api_types.h" + +/*! + * \file + * \brief The AMD rocAL Library - Tensor + * + * \defgroup group_rocal_tensor API: AMD rocAL - Tensor API + * \brief The AMD rocAL Tensor. + */ + +/*! + * \brief class representing rocal tensor + */ +class rocalTensor { + public: + virtual ~rocalTensor() = default; + virtual void* buffer() = 0; + virtual unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type = ROCAL_MEMCPY_HOST) = 0; + virtual unsigned num_of_dims() = 0; + virtual unsigned batch_size() = 0; + virtual std::vector dims() = 0; + virtual std::vector strides() = 0; + virtual RocalTensorLayout layout() = 0; + virtual RocalTensorBackend backend() = 0; + virtual RocalTensorOutputType data_type() = 0; + virtual size_t data_size() = 0; + virtual RocalROICordsType roi_type() = 0; + virtual size_t get_roi_dims_size() = 0; + virtual void copy_roi(void* roi_buffer) = 0; + virtual std::vector shape() = 0; + virtual void set_dims(std::vector dims) = 0; + virtual void set_mem_handle(void* buffer) = 0; +}; + +/*! + * \brief class representing rocal tensor list + */ +class rocalTensorList { + public: + virtual uint64_t size() = 0; + virtual rocalTensor* at(size_t index) = 0; + // isDenseTensor +}; + +typedef rocalTensor* RocalTensor; +typedef rocalTensorList* RocalTensorList; +typedef std::vector RocalMetaData; + +#endif // MIVISIONX_ROCAL_API_TENSOR_H diff --git a/rocAL/include/api/rocal_api_types.h b/rocAL/include/api/rocal_api_types.h index 13af3671c..e63a5a915 100644 --- a/rocAL/include/api/rocal_api_types.h +++ b/rocAL/include/api/rocal_api_types.h @@ -50,23 +50,17 @@ using half_float::half; /*! \brief typedef void* Float Param * \ingroup group_rocal_types */ -typedef void *RocalFloatParam; +typedef void* RocalFloatParam; + /*! \brief typedef void* rocAL Int Param * \ingroup group_rocal_types */ -typedef void *RocalIntParam; +typedef void* RocalIntParam; + /*! \brief typedef void* rocAL Context * \ingroup group_rocal_types */ -typedef void *RocalContext; -/*! \brief typedef void* rocAL Image - * \ingroup group_rocal_types - */ -typedef void *RocalImage; -/*! \brief typedef void* rocAL Meta Data - * \ingroup group_rocal_types - */ -typedef void *RocalMetaData; +typedef void* RocalContext; /*! \brief typedef std::vectors * \ingroup group_rocal_types @@ -82,19 +76,18 @@ typedef std::vector>> JointsBatch, JointsVisibili /*! \brief Timing Info struct * \ingroup group_rocal_types */ -struct TimingInfo -{ +struct TimingInfo { long long unsigned load_time; long long unsigned decode_time; long long unsigned process_time; long long unsigned transfer_time; }; +// HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user /*! \brief rocAL Joints Data struct - HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user * \ingroup group_rocal_types */ -struct RocalJointsData -{ +struct RocalJointsData { ImageIDBatch image_id_batch; AnnotationIDBatch annotation_id_batch; ImagePathBatch image_path_batch; @@ -106,11 +99,17 @@ struct RocalJointsData RotationBatch rotation_batch; }; +struct ROIxywh { + unsigned x; + unsigned y; + unsigned w; + unsigned h; +}; + /*! \brief rocAL Status enum * \ingroup group_rocal_types */ -enum RocalStatus -{ +enum RocalStatus { /*! \brief AMD ROCAL_OK */ ROCAL_OK = 0, @@ -131,8 +130,7 @@ enum RocalStatus /*! \brief rocAL Image Color enum * \ingroup group_rocal_types */ -enum RocalImageColor -{ +enum RocalImageColor { /*! \brief AMD ROCAL_COLOR_RGB24 */ ROCAL_COLOR_RGB24 = 0, @@ -150,8 +148,7 @@ enum RocalImageColor /*! \brief rocAL Process Mode enum * \ingroup group_rocal_types */ -enum RocalProcessMode -{ +enum RocalProcessMode { /*! \brief AMD ROCAL_PROCESS_GPU */ ROCAL_PROCESS_GPU = 0, @@ -163,8 +160,7 @@ enum RocalProcessMode /*! \brief rocAL Flip Axis enum * \ingroup group_rocal_types */ -enum RocalFlipAxis -{ +enum RocalFlipAxis { /*! \brief AMD ROCAL_FLIP_HORIZONTAL */ ROCAL_FLIP_HORIZONTAL = 0, @@ -176,8 +172,7 @@ enum RocalFlipAxis /*! \brief rocAL Image Size Evaluation Policy enum * \ingroup group_rocal_types */ -enum RocalImageSizeEvaluationPolicy -{ +enum RocalImageSizeEvaluationPolicy { /*! \brief AMD ROCAL_USE_MAX_SIZE */ ROCAL_USE_MAX_SIZE = 0, @@ -189,17 +184,16 @@ enum RocalImageSizeEvaluationPolicy ROCAL_USE_MOST_FREQUENT_SIZE = 2, /*! \brief Use the given size only if the actual decoded size is greater than the given size */ - ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3, + ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3, // use the given size only if the actual decoded size is greater than the given size /*! \brief Use max size if the actual decoded size is greater than max */ - ROCAL_USE_MAX_SIZE_RESTRICTED = 4, + ROCAL_USE_MAX_SIZE_RESTRICTED = 4, // use max size if the actual decoded size is greater than max }; /*! \brief rocAL Decode Device enum * \ingroup group_rocal_types */ -enum RocalDecodeDevice -{ +enum RocalDecodeDevice { /*! \brief AMD ROCAL_HW_DECODE */ ROCAL_HW_DECODE = 0, @@ -211,37 +205,63 @@ enum RocalDecodeDevice /*! \brief rocAL Tensor Layout enum * \ingroup group_rocal_types */ -enum RocalTensorLayout -{ +enum RocalTensorLayout { /*! \brief AMD ROCAL_NHWC */ ROCAL_NHWC = 0, /*! \brief AMD ROCAL_NCHW */ - ROCAL_NCHW = 1 + ROCAL_NCHW = 1, + /*! \brief AMD ROCAL_NFHWc + */ + ROCAL_NFHWC = 2, + /*! \brief AMD ROCAL_NFCHW + */ + ROCAL_NFCHW = 3, + /*! \brief AMD ROCAL_NHW + */ + ROCAL_NHW = 4, + /*! \brief AMD ROCAL_NFT + * Spectrogram Layout FT + */ + ROCAL_NFT = 5, + /*! \brief AMD ROCAL_NTF + * Spectrogram Layout TF + */ + ROCAL_NTF = 6, + /*! \brief AMD ROCAL_NONE + */ + ROCAL_NONE = 7 // Layout for generic tensors (Non-Image or Non-Video) }; /*! \brief rocAL Tensor Output Type enum * \ingroup group_rocal_types */ -enum RocalTensorOutputType -{ +enum RocalTensorOutputType { /*! \brief AMD ROCAL_FP32 */ ROCAL_FP32 = 0, /*! \brief AMD ROCAL_FP16 */ ROCAL_FP16 = 1, - /*! \brief AMD ROCAL_U8 + /*! \brief AMD ROCAL_UINT8 + */ + ROCAL_UINT8 = 2, + /*! \brief AMD ROCAL_INT8 */ - ROCAL_U8 = 2, + ROCAL_INT8 = 3, + /*! \brief AMD ROCAL_UINT32 + */ + ROCAL_UINT32 = 4, + /*! \brief AMD ROCAL_INT32 + */ + ROCAL_INT32 = 5 }; /*! \brief rocAL Decoder Type enum * \ingroup group_rocal_types */ -enum RocalDecoderType -{ +enum RocalDecoderType { /*! \brief AMD ROCAL_DECODER_TJPEG */ ROCAL_DECODER_TJPEG = 0, @@ -256,14 +276,14 @@ enum RocalDecoderType ROCAL_DECODER_VIDEO_FFMPEG_SW = 3, /*! \brief AMD ROCAL_DECODER_VIDEO_FFMPEG_HW */ - ROCAL_DECODER_VIDEO_FFMPEG_HW = 4 + ROCAL_DECODER_VIDEO_FFMPEG_HW = 4, + /*! \brief AMD ROCAL_DECODER_AUDIO_GENERIC + * Uses SndFile library to read audio files + */ + ROCAL_DECODER_AUDIO_GENERIC = 5 }; -/*! \brief rocAL Output Mem Type enum - * \ingroup group_rocal_types - */ -enum RocalOutputMemType -{ +enum RocalOutputMemType { /*! \brief AMD ROCAL_MEMCPY_HOST */ ROCAL_MEMCPY_HOST = 0, @@ -275,24 +295,24 @@ enum RocalOutputMemType ROCAL_MEMCPY_PINNED = 2 }; +// rocal external memcpy flags /*! \brief AMD rocAL external memcpy flags - force copy to user provided host memory * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_TO_HOST 1 +#define ROCAL_MEMCPY_TO_HOST 1 // force copy to user provided host memory /*! \brief AMD rocAL external memcpy flags - force copy to user provided device memory (gpu) * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_TO_DEVICE 2 +#define ROCAL_MEMCPY_TO_DEVICE 2 // force copy to user provided device memory (gpu) /*! \brief AMD rocAL external memcpy flags - for future use * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_IS_PINNED 4 +#define ROCAL_MEMCPY_IS_PINNED 4 // for future use /*! \brief rocAL Resize Scaling Mode enum * \ingroup group_rocal_types */ -enum RocalResizeScalingMode -{ +enum RocalResizeScalingMode { /*! \brief scales wrt specified size, if only resize width/height is provided the other dimension is scaled according to aspect ratio */ ROCAL_SCALING_MODE_DEFAULT = 0, @@ -304,7 +324,10 @@ enum RocalResizeScalingMode ROCAL_SCALING_MODE_NOT_SMALLER = 2, /*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified size */ - ROCAL_SCALING_MODE_NOT_LARGER = 3 + ROCAL_SCALING_MODE_NOT_LARGER = 3, + /*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified min and max size + */ + ROCAL_SCALING_MODE_MIN_MAX = 4 }; /*! \brief rocAL Resize Interpolation Type enum @@ -332,4 +355,102 @@ enum RocalResizeInterpolationType ROCAL_TRIANGULAR_INTERPOLATION = 5 }; -#endif // MIVISIONX_ROCAL_API_TYPES_H +/*! \brief Tensor Backend + * \ingroup group_rocal_types + */ +enum RocalTensorBackend { + /*! \brief ROCAL_CPU + */ + ROCAL_CPU = 0, + /*! \brief ROCAL_GPU + */ + ROCAL_GPU = 1 +}; + +/*! \brief Tensor ROI type + * \ingroup group_rocal_types + */ +enum class RocalROICordsType { + /*! \brief ROCAL_LTRB + */ + ROCAL_LTRB = 0, + /*! \brief ROCAL_XYWH + */ + ROCAL_XYWH = 1 +}; + +/*! \brief RocalExternalSourceMode struct + * \ingroup group_rocal_types + */ +enum RocalExternalSourceMode { + /*! \brief list of filename passed as input + */ + ROCAL_EXTSOURCE_FNAME = 0, + /*! \brief compressed raw buffer passed as input + */ + ROCAL_EXTSOURCE_RAW_COMPRESSED = 1, + /*! \brief uncompressed raw buffer passed as input + */ + ROCAL_EXTSOURCE_RAW_UNCOMPRESSED = 2, +}; + +/*! \brief rocAL Audio Border Type enum + * \ingroup group_rocal_types + */ +enum RocalAudioBorderType { + /*! \brief AMD ROCAL_ZERO + */ + ROCAL_ZERO = 0, + /*! \brief AMD ROCAL_CLAMP + */ + ROCAL_CLAMP = 1, + /*! \brief AMD ROCAL_REFLECT + */ + ROCAL_REFLECT = 2 +}; + +/*! \brief rocAL Out Of Bounds Policy Type enum + * \ingroup group_rocal_types + */ +enum RocalOutOfBoundsPolicy { + /*! \brief Pad + */ + ROCAL_PAD = 0, + /*! \brief Trimtoshape + */ + ROCAL_TRIMTOSHAPE, + /*! \brief Error + */ + ROCAL_ERROR +}; + +/*! \brief rocAL MelScale formula enum + * \ingroup group_rocal_types + */ +enum RocalMelScaleFormula { + /*! \brief Slaney + * Follows Slaney’s MATLAB Auditory Modelling Work behavior + */ + ROCAL_MELSCALE_SLANEY = 0, + /*! \brief HTK + * Follows O’Shaughnessy’s book formula, consistent with Hidden Markov Toolkit(HTK), m = 2595 * log10(1 + (f/700)) + */ + ROCAL_MELSCALE_HTK +}; + +/*! \brief Tensor Last Batch Policies + * \ingroup group_rocal_types + */ +enum RocalLastBatchPolicy { + /*! \brief ROCAL_LAST_BATCH_FILL - The last batch is filled by either repeating the last sample or by wrapping up the data set. + */ + ROCAL_LAST_BATCH_FILL = 0, + /*! \brief ROCAL_LAST_BATCH_DROP - The last batch is dropped if there are not enough samples from the current epoch. + */ + ROCAL_LAST_BATCH_DROP = 1, + /*! \brief ROCAL_LAST_BATCH_PARTIAL - The last batch is partially filled with the remaining data from the current epoch, keeping the rest of the samples empty. (currently this policy works similar to FILL in rocAL, PARTIAL policy needs to be handled from python end) + */ + ROCAL_LAST_BATCH_PARTIAL = 2 +}; + +#endif // MIVISIONX_ROCAL_API_TYPES_H diff --git a/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h new file mode 100644 index 000000000..ec62cddc8 --- /dev/null +++ b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h @@ -0,0 +1,36 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "rocal_api_types.h" + +class TensorAddTensorNode : public Node { + public: + TensorAddTensorNode(const std::vector &inputs, const std::vector &outputs); + TensorAddTensorNode() = delete; + + protected: + void create_node() override; + void update_node() override; +}; diff --git a/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h new file mode 100644 index 000000000..49d79c89b --- /dev/null +++ b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "rocal_api_types.h" + +class TensorMulScalarNode : public Node { + public: + TensorMulScalarNode(const std::vector &inputs, const std::vector &outputs); + TensorMulScalarNode() = delete; + void init(float scalar); + + protected: + void create_node() override; + void update_node() override; + + private: + float _scalar; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_downmix.h b/rocAL/include/augmentations/audio_augmentations/node_downmix.h new file mode 100644 index 000000000..19bb40cc2 --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_downmix.h @@ -0,0 +1,34 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +class DownmixNode : public Node { + public: + DownmixNode(const std::vector &inputs, const std::vector &outputs); + DownmixNode() = delete; + + protected: + void create_node() override; + void update_node() override; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h b/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h new file mode 100644 index 000000000..995392edc --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h @@ -0,0 +1,45 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "rocal_api_types.h" + +class MelFilterBankNode : public Node { + public: + MelFilterBankNode(const std::vector &inputs, const std::vector &outputs); + MelFilterBankNode() = delete; + void init(float freq_high, float freq_low, RocalMelScaleFormula mel_formula, int nfilter, bool normalize, float sample_rate); + + protected: + void create_node() override; + void update_node() override; + + private: + float _freq_high = 0; + float _freq_low = 0; + int _mel_formula = 0; + int _nfilter = 128; + float _sample_rate = 44100; + bool _normalize = true; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h b/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h new file mode 100644 index 000000000..7bbe7182f --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" + +class NonSilentRegionDetectionNode : public Node { + public: + NonSilentRegionDetectionNode(const std::vector &inputs, const std::vector &outputs); + NonSilentRegionDetectionNode() = delete; + void init(float cutoff_db, float reference_power, int reset_interval, int window_length); + + protected: + void create_node() override; + void update_node() override; + + private: + float _cutoff_db = -60.0; + float _reference_power = 0.0; + int _window_length = 2048; + int _reset_interval = 8192; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h b/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h new file mode 100644 index 000000000..e3ba6c516 --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" +#include "rocal_api_types.h" + +class PreemphasisFilterNode : public Node { + public: + PreemphasisFilterNode(const std::vector &inputs, const std::vector &outputs); + PreemphasisFilterNode() = delete; + void init(FloatParam *preemph_coeff, RocalAudioBorderType preemph_border); + + protected: + void create_node() override; + void update_node() override; + + private: + ParameterVX _preemph_coeff; + constexpr static float PREEMPH_COEFF_RANGE[2] = {0.97, 0.97}; + RocalAudioBorderType _preemph_border; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_resample.h b/rocAL/include/augmentations/audio_augmentations/node_resample.h new file mode 100644 index 000000000..32163b473 --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_resample.h @@ -0,0 +1,42 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "rocal_api_types.h" + +class ResampleNode : public Node { + public: + ResampleNode(const std::vector &inputs, const std::vector &outputs); + ResampleNode() = delete; + void init(Tensor *resample_rate, float quality); + + protected: + void create_node() override; + void update_node() override; + + private: + Tensor *_output_resample_rate; + float _quality; + vx_array _src_sample_rate_array; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h b/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h new file mode 100644 index 000000000..d79576ff6 --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h @@ -0,0 +1,60 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "rocal_api_types.h" + +/// @brief Generates hann window for spectrogram +/// @param output +/// @param window_size +inline void hann_window(float *output, int window_size) { + if (window_size <= 0) + THROW("Invalid window size, for Hann window") + double a = (2.0 * M_PI) / window_size; + for (int t = 0; t < window_size; t++) { + double phase = a * (t + 0.5); + output[t] = (0.5 * (1.0 - std::cos(phase))); + } +} + +class SpectrogramNode : public Node { + public: + SpectrogramNode(const std::vector &inputs, const std::vector &outputs); + SpectrogramNode() = delete; + void init(bool is_center_windows, bool is_reflect_padding, int power, int nfft, + int window_length, int window_step, std::vector &window_fn); + + protected: + void create_node() override; + void update_node() override; + + private: + std::vector _window_fn; + int _power = 2; + int _nfft = 2048; + int _window_length = 512; + int _window_step = 256; + bool _is_center_windows = true; + bool _is_reflect_padding = true; +}; diff --git a/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h b/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h new file mode 100644 index 000000000..e6a5b28af --- /dev/null +++ b/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h @@ -0,0 +1,41 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" + +class ToDecibelsNode : public Node { + public: + ToDecibelsNode(const std::vector &inputs, const std::vector &outputs); + ToDecibelsNode() = delete; + void init(float cutoff_db, float multiplier, float reference_magnitude); + + protected: + void create_node() override; + void update_node() override; + + private: + float _cutoff_db = -200.0; + float _multiplier = 10.0; + float _reference_magnitude = 0.0; +}; diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h index 34bc1d6a8..6f9def1c0 100644 --- a/rocAL/include/augmentations/augmentations_nodes.h +++ b/rocAL/include/augmentations/augmentations_nodes.h @@ -22,36 +22,48 @@ THE SOFTWARE. #pragma once -#include "node_warp_affine.h" -#include "node_exposure.h" -#include "node_vignette.h" -#include "node_jitter.h" -#include "node_snp_noise.h" -#include "node_snow.h" -#include "node_rain.h" -#include "node_color_temperature.h" -#include "node_fog.h" -#include "node_pixelate.h" -#include "node_lens_correction.h" -#include "node_gamma.h" -#include "node_flip.h" -#include "node_crop_resize.h" -#include "node_brightness.h" -#include "node_contrast.h" -#include "node_blur.h" -#include "node_fisheye.h" -#include "node_blend.h" -#include "node_resize.h" -#include "node_rotate.h" -#include "node_color_twist.h" -#include "node_hue.h" -#include "node_saturation.h" -#include "node_crop_mirror_normalize.h" -#include "node_resize_mirror_normalize.h" -#include "node_resize_crop_mirror.h" -#include "node_ssd_random_crop.h" -#include "node_crop.h" -#include "node_random_crop.h" -#include "node_copy.h" -#include "node_nop.h" -#include "node_sequence_rearrange.h" +#include "augmentations/geometry_augmentations/node_warp_affine.h" +#include "augmentations/color_augmentations/node_exposure.h" +#include "augmentations/color_augmentations/node_vignette.h" +#include "augmentations/effects_augmentations/node_jitter.h" +#include "augmentations/effects_augmentations/node_snp_noise.h" +#include "augmentations/effects_augmentations/node_snow.h" +#include "augmentations/effects_augmentations/node_rain.h" +#include "augmentations/color_augmentations/node_color_temperature.h" +#include "augmentations/effects_augmentations/node_fog.h" +#include "augmentations/effects_augmentations/node_pixelate.h" +#include "augmentations/geometry_augmentations/node_lens_correction.h" +#include "augmentations/color_augmentations/node_gamma.h" +#include "augmentations/geometry_augmentations/node_flip.h" +#include "augmentations/geometry_augmentations/node_crop_resize.h" +#include "augmentations/color_augmentations/node_brightness.h" +#include "augmentations/color_augmentations/node_contrast.h" +#include "augmentations/color_augmentations/node_blur.h" +#include "augmentations/geometry_augmentations/node_fisheye.h" +#include "augmentations/color_augmentations/node_blend.h" +#include "augmentations/geometry_augmentations/node_resize.h" +#include "augmentations/geometry_augmentations/node_rotate.h" +#include "augmentations/color_augmentations/node_color_twist.h" +#include "augmentations/color_augmentations/node_hue.h" +#include "augmentations/color_augmentations/node_saturation.h" +#include "augmentations/geometry_augmentations/node_crop_mirror_normalize.h" +#include "augmentations/geometry_augmentations/node_resize_mirror_normalize.h" +#include "augmentations/geometry_augmentations/node_resize_crop_mirror.h" +#include "augmentations/node_ssd_random_crop.h" +#include "augmentations/geometry_augmentations/node_crop.h" +#include "augmentations/geometry_augmentations/node_random_crop.h" +#include "augmentations/node_copy.h" +#include "augmentations/node_nop.h" +#include "augmentations/node_sequence_rearrange.h" +#include "augmentations/audio_augmentations/node_preemphasis_filter.h" +#include "augmentations/audio_augmentations/node_spectrogram.h" +#include "augmentations/audio_augmentations/node_to_decibels.h" +#include "augmentations/audio_augmentations/node_resample.h" +#include "augmentations/node_uniform_distribution.h" +#include "augmentations/node_normal_distribution.h" +#include "augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h" +#include "augmentations/arithmetic_augmentations/node_tensor_add_tensor.h" +#include "augmentations/audio_augmentations/node_non_silent_region_detection.h" +#include "augmentations/geometry_augmentations/node_slice.h" +#include "augmentations/effects_augmentations/node_normalize.h" +#include "augmentations/audio_augmentations/node_mel_filter_bank.h" diff --git a/rocAL/include/augmentations/color_augmentations/node_blend.h b/rocAL/include/augmentations/color_augmentations/node_blend.h index cee6ac60f..46d8d94e2 100644 --- a/rocAL/include/augmentations/color_augmentations/node_blend.h +++ b/rocAL/include/augmentations/color_augmentations/node_blend.h @@ -21,23 +21,23 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" - -class BlendNode : public Node -{ -public: - explicit BlendNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class BlendNode : public Node { + public: + explicit BlendNode(const std::vector &inputs, const std::vector &outputs); BlendNode() = delete; void init(float ratio); - void init(FloatParam* ratio); + void init(FloatParam *ratio); -protected: + protected: void update_node() override; void create_node() override; -private: + + private: ParameterVX _ratio; - constexpr static float RATIO_RANGE [2] = {0.1, 0.9}; + constexpr static float RATIO_RANGE[2] = {0.1, 0.9}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_blur.h b/rocAL/include/augmentations/color_augmentations/node_blur.h index 341c109ff..d26751fc8 100644 --- a/rocAL/include/augmentations/color_augmentations/node_blur.h +++ b/rocAL/include/augmentations/color_augmentations/node_blur.h @@ -21,24 +21,23 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class BlurNode : public Node -{ -public: - BlurNode(const std::vector &inputs, const std::vector &outputs); +class BlurNode : public Node { + public: + BlurNode(const std::vector &inputs, const std::vector &outputs); BlurNode() = delete; - void init(int sdev); - void init(IntParam *sdev); + void init(int kernel_size); + void init(IntParam *kernel_size_param); -protected: + protected: void update_node() override; void create_node() override; -private: - ParameterVX _sdev; - constexpr static int SDEV_RANGE [2] = {3, 9}; + private: + ParameterVX _kernel_size; + constexpr static int KERNEL_SIZE_RANGE[2] = {3, 9}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_brightness.h b/rocAL/include/augmentations/color_augmentations/node_brightness.h index b672c81eb..5825fb8ad 100644 --- a/rocAL/include/augmentations/color_augmentations/node_brightness.h +++ b/rocAL/include/augmentations/color_augmentations/node_brightness.h @@ -21,27 +21,26 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" - -class BrightnessNode : public Node -{ -public: - BrightnessNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class BrightnessNode : public Node { + public: + BrightnessNode(const std::vector &inputs, const std::vector &outputs); BrightnessNode() = delete; - void init( float alpha, float beta); - void init( FloatParam* alpha_param, FloatParam* beta_param); + void init(float alpha, float beta); + void init(FloatParam *alpha_param, FloatParam *beta_param); -protected: - void create_node() override ; + protected: + void create_node() override; void update_node() override; -private: + private: ParameterVX _alpha; ParameterVX _beta; - constexpr static float ALPHA_RANGE [2] = {0.1, 1.95}; - constexpr static float BETA_RANGE [2] = {0, 25}; + constexpr static float ALPHA_RANGE[2] = {0.1, 1.95}; + constexpr static float BETA_RANGE[2] = {0, 25}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h index ab1bd172f..19f393ba0 100644 --- a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h +++ b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h @@ -21,24 +21,24 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class ColorTemperatureNode : public Node -{ -public: - ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs); +class ColorTemperatureNode : public Node { + public: + ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs); ColorTemperatureNode() = delete; void init(int adjustment); void init(IntParam *adjustment); -protected: - void create_node() override ; + protected: + void create_node() override; void update_node() override; -private: + + private: ParameterVX _adj_value_param; - constexpr static int ADJUSTMENT_RANGE [2] = {-99, 99}; + constexpr static int ADJUSTMENT_RANGE[2] = {-99, 99}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_color_twist.h b/rocAL/include/augmentations/color_augmentations/node_color_twist.h index eddbeb4f6..265abef36 100644 --- a/rocAL/include/augmentations/color_augmentations/node_color_twist.h +++ b/rocAL/include/augmentations/color_augmentations/node_color_twist.h @@ -21,31 +21,28 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" - -class ColorTwistBatchNode : public Node -{ -public: - ColorTwistBatchNode(const std::vector &inputs, const std::vector &outputs); - ColorTwistBatchNode() = delete; +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class ColorTwistNode : public Node { + public: + ColorTwistNode(const std::vector &inputs, const std::vector &outputs); + ColorTwistNode() = delete; void init(float alpha, float beta, float hue, float sat); - void init(FloatParam *alpha, FloatParam *beta, FloatParam *hue, FloatParam *sat); + void init(FloatParam *alpha_param, FloatParam *beta_param, FloatParam *hue_param, FloatParam *sat_param); -protected: + protected: void create_node() override; void update_node() override; -private: + private: ParameterVX _alpha; ParameterVX _beta; ParameterVX _hue; ParameterVX _sat; - - constexpr static float ALPHA_RANGE [2] = {0.1, 1.95}; - constexpr static float BETA_RANGE [2] = {0.1, 25.0}; - constexpr static float HUE_RANGE [2] = {5.0, 170.0}; - constexpr static float SAT_RANGE [2] = {0.1, 0.4}; -}; \ No newline at end of file + constexpr static float ALPHA_RANGE[2] = {0.1, 1.95}; + constexpr static float BETA_RANGE[2] = {0.1, 25.0}; + constexpr static float HUE_RANGE[2] = {5.0, 170.0}; + constexpr static float SAT_RANGE[2] = {0.1, 0.4}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_contrast.h b/rocAL/include/augmentations/color_augmentations/node_contrast.h index 075673c90..e5e096dd7 100644 --- a/rocAL/include/augmentations/color_augmentations/node_contrast.h +++ b/rocAL/include/augmentations/color_augmentations/node_contrast.h @@ -22,25 +22,24 @@ THE SOFTWARE. #pragma once #include -#include "node.h" -#include "parameter_vx.h" -#include "graph.h" - -class RocalContrastNode : public Node -{ -public: - RocalContrastNode(const std::vector &inputs, const std::vector &outputs); - RocalContrastNode() = delete; - void init(int min, int max); - void init(IntParam *min, IntParam * max); - -protected: - void create_node() override ; + +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_vx.h" + +class ContrastNode : public Node { + public: + ContrastNode(const std::vector &inputs, const std::vector &outputs); + ContrastNode() = delete; + void init(float contrast_factor, float contrast_center); + void init(FloatParam *contrast_factor_param, FloatParam *contrast_center_param); + + protected: + void create_node() override; void update_node() override; -private: - ParameterVX _min; - ParameterVX _max; - constexpr static int CONTRAST_MIN_RANGE [2] = {0, 30}; - constexpr static int CONTRAST_MAX_RANGE [2] = {60, 90}; -}; \ No newline at end of file + private: + ParameterVX _factor, _center; + constexpr static float CONTRAST_FACTOR_RANGE[2] = {0.1, 1.95}; + constexpr static float CONTRAST_CENTER_RANGE[2] = {60, 90}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_exposure.h b/rocAL/include/augmentations/color_augmentations/node_exposure.h index 4f1cb95f4..825edfa57 100644 --- a/rocAL/include/augmentations/color_augmentations/node_exposure.h +++ b/rocAL/include/augmentations/color_augmentations/node_exposure.h @@ -21,23 +21,23 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class ExposureNode : public Node -{ -public: - ExposureNode(const std::vector &inputs, const std::vector &outputs); +class ExposureNode : public Node { + public: + ExposureNode(const std::vector &inputs, const std::vector &outputs); ExposureNode() = delete; - void init(float shift); - void init(FloatParam *shift); -protected: + void init(float exposure_factor); + void init(FloatParam *exposure_factor_param); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _shift; - vx_array _width_array ,_height_array; - constexpr static float SHIFT_RANGE [2] = {0.15, 0.95}; -}; \ No newline at end of file + + private: + ParameterVX _exposure_factor; + constexpr static float EXPOSURE_FACTOR_RANGE[2] = {0.15, 0.95}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_gamma.h b/rocAL/include/augmentations/color_augmentations/node_gamma.h index b113fd96b..3fd62417e 100644 --- a/rocAL/include/augmentations/color_augmentations/node_gamma.h +++ b/rocAL/include/augmentations/color_augmentations/node_gamma.h @@ -21,23 +21,22 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" - -class GammaNode : public Node -{ -public: - GammaNode(const std::vector &inputs, const std::vector &outputs); +class GammaNode : public Node { + public: + GammaNode(const std::vector &inputs, const std::vector &outputs); GammaNode() = delete; - void init(float shift); - void init(FloatParam *shift); + void init(float gamma); + void init(FloatParam *gamma_param); -protected: + protected: void update_node() override; void create_node() override; -private: - ParameterVX _shift; - constexpr static float SHIFT_RANGE [2] = {0.3, 7.00}; + + private: + ParameterVX _gamma; + constexpr static float GAMMA_RANGE[2] = {0.3, 7.00}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_hue.h b/rocAL/include/augmentations/color_augmentations/node_hue.h index 79f1639bd..9d76e9dc5 100644 --- a/rocAL/include/augmentations/color_augmentations/node_hue.h +++ b/rocAL/include/augmentations/color_augmentations/node_hue.h @@ -21,22 +21,22 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" - -class HueNode : public Node -{ -public: - HueNode(const std::vector &inputs, const std::vector &outputs); +class HueNode : public Node { + public: + HueNode(const std::vector &inputs, const std::vector &outputs); HueNode() = delete; void init(float hue); void init(FloatParam *hue); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _hue; - constexpr static float HUE_RANGE [2] = {-359.0, 359.0}; + constexpr static float HUE_RANGE[2] = {-359.0, 359.0}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_saturation.h b/rocAL/include/augmentations/color_augmentations/node_saturation.h index a503c4c85..e8a085032 100644 --- a/rocAL/include/augmentations/color_augmentations/node_saturation.h +++ b/rocAL/include/augmentations/color_augmentations/node_saturation.h @@ -21,22 +21,22 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" - -class SatNode : public Node -{ -public: - SatNode(const std::vector &inputs, const std::vector &outputs); - SatNode() = delete; +class SaturationNode : public Node { + public: + SaturationNode(const std::vector &inputs, const std::vector &outputs); + SaturationNode() = delete; void init(float sat); void init(FloatParam *sat); -protected: + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _sat; // For saturation - constexpr static float SAT_RANGE [2] = {-0.5, 0.5}; + + private: + ParameterVX _saturation; + constexpr static float SAT_RANGE[2] = {-0.5, 0.5}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_vignette.h b/rocAL/include/augmentations/color_augmentations/node_vignette.h index 9af231b09..0a93f759f 100644 --- a/rocAL/include/augmentations/color_augmentations/node_vignette.h +++ b/rocAL/include/augmentations/color_augmentations/node_vignette.h @@ -21,22 +21,23 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class VignetteNode : public Node -{ -public: - VignetteNode(const std::vector &inputs, const std::vector &outputs); - VignetteNode () = delete; +class VignetteNode : public Node { + public: + VignetteNode(const std::vector &inputs, const std::vector &outputs); + VignetteNode() = delete; void init(float sdev); void init(FloatParam *sdev); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _sdev; - constexpr static float SDEV_RANGE [2] = {40 , 60}; + constexpr static float SDEV_RANGE[2] = {40, 60}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_fog.h b/rocAL/include/augmentations/effects_augmentations/node_fog.h index ea0309dc8..58554adf2 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_fog.h +++ b/rocAL/include/augmentations/effects_augmentations/node_fog.h @@ -21,22 +21,22 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -class FogNode : public Node -{ -public: - FogNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class FogNode : public Node { + public: + FogNode(const std::vector &inputs, const std::vector &outputs); FogNode() = delete; void init(float fog_param); void init(FloatParam *fog_param); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _fog_param; - constexpr static float FOG_VALUE_RANGE [2] = {0.2, 0.8}; + constexpr static float FOG_VALUE_RANGE[2] = {0.2, 0.8}; }; - - diff --git a/rocAL/include/augmentations/effects_augmentations/node_jitter.h b/rocAL/include/augmentations/effects_augmentations/node_jitter.h index 2ddc58645..971d51d95 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_jitter.h +++ b/rocAL/include/augmentations/effects_augmentations/node_jitter.h @@ -21,22 +21,23 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" - -class JitterNode : public Node -{ -public: - JitterNode(const std::vector &inputs, const std::vector &outputs); +class JitterNode : public Node { + public: + JitterNode(const std::vector &inputs, const std::vector &outputs); JitterNode() = delete; - void init(int kernel_size); - void init(IntParam *kernel_size); -protected: + void init(int kernel_size, int seed); + void init(IntParam *kernel_size, int seed); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _kernel_size; - constexpr static int KERNEL_SIZE [2] = {2, 5}; + int _seed; + constexpr static int KERNEL_SIZE[2] = {2, 5}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_normalize.h b/rocAL/include/augmentations/effects_augmentations/node_normalize.h new file mode 100644 index 000000000..a4b76d99c --- /dev/null +++ b/rocAL/include/augmentations/effects_augmentations/node_normalize.h @@ -0,0 +1,51 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_vx.h" + +class NormalizeNode : public Node { + public: + NormalizeNode(const std::vector &inputs, const std::vector &outputs); + NormalizeNode() = delete; + void init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift); + + protected: + void create_node() override; + void update_node() override {}; + + private: + int _axis_mask = 0; + vx_array _mean_vx_array, _stddev_vx_array; + std::vector _axes; + std::vector _mean, _std_dev; + float _scale, _shift; + std::vector> _normalize_roi; + enum NormalizeModes { + DO_NOT_COMPUTE = 0, // Mean and Stddev values are passed from user + COMPUTE_MEAN = 1, // Compute mean from specified axes of input + COMPUTE_STDDEV = 2, // Compute stddev from specified axes of input + COMPUTE_MEAN_STDDEV = 3 // Compute both mean and stddev from specified axes of input + }; +}; diff --git a/rocAL/include/augmentations/effects_augmentations/node_pixelate.h b/rocAL/include/augmentations/effects_augmentations/node_pixelate.h index ab3b965dc..700960966 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_pixelate.h +++ b/rocAL/include/augmentations/effects_augmentations/node_pixelate.h @@ -22,18 +22,18 @@ THE SOFTWARE. #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" - -class PixelateNode : public Node -{ -public: - PixelateNode(const std::vector &inputs, const std::vector &outputs); +class PixelateNode : public Node { + public: + PixelateNode(const std::vector &inputs, const std::vector &outputs); PixelateNode() = delete; -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_rain.h b/rocAL/include/augmentations/effects_augmentations/node_rain.h index 9b0cf80aa..15019a219 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_rain.h +++ b/rocAL/include/augmentations/effects_augmentations/node_rain.h @@ -22,27 +22,28 @@ THE SOFTWARE. #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" - -class RainNode : public Node -{ -public: - RainNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class RainNode : public Node { + public: + RainNode(const std::vector &inputs, const std::vector &outputs); RainNode() = delete; void init(float rain_value, int rain_width, int rain_height, float rain_transparency); - void init(FloatParam *rain_value, IntParam *rain_width, IntParam *rain_height, FloatParam *rain_transparency); -protected: + void init(FloatParam *rain_value, IntParam *rain_width, IntParam *rain_height, FloatParam *rain_transparency); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _rain_value; ParameterVX _rain_width; ParameterVX _rain_height; ParameterVX _rain_transparency; - constexpr static float RAIN_VALUE_RANGE [2] = {0.15, 0.95}; - constexpr static int RAIN_WIDTH_RANGE [2] = {1, 2}; - constexpr static int RAIN_HEIGHT_RANGE [2] = {15, 17}; - constexpr static float RAIN_TRANSPARENCY_RANGE [2] = {0.2, 0.3}; + constexpr static float RAIN_VALUE_RANGE[2] = {0.15, 0.95}; + constexpr static int RAIN_WIDTH_RANGE[2] = {1, 2}; + constexpr static int RAIN_HEIGHT_RANGE[2] = {15, 17}; + constexpr static float RAIN_TRANSPARENCY_RANGE[2] = {0.2, 0.3}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/effects_augmentations/node_snow.h b/rocAL/include/augmentations/effects_augmentations/node_snow.h index ba0687f04..316f82728 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_snow.h +++ b/rocAL/include/augmentations/effects_augmentations/node_snow.h @@ -22,21 +22,22 @@ THE SOFTWARE. #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" - -class SnowNode : public Node -{ -public: - SnowNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class SnowNode : public Node { + public: + SnowNode(const std::vector &inputs, const std::vector &outputs); SnowNode() = delete; - void init(float shift); - void init(FloatParam *shift); -protected: + void init(float snow_value); + void init(FloatParam *snow_value_param); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _shift; - constexpr static float SNOW_VALUE_RANGE [2] = {0.1, 0.8}; + + private: + ParameterVX _snow_value; + constexpr static float SNOW_VALUE_RANGE[2] = {0.1, 0.8}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h b/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h index 0599ea611..53725e818 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h +++ b/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h @@ -22,23 +22,27 @@ THE SOFTWARE. #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" - -class SnPNoiseNode : public Node -{ -public: - SnPNoiseNode(const std::vector &inputs, const std::vector &outputs); +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class SnPNoiseNode : public Node { + public: + SnPNoiseNode(const std::vector &inputs, const std::vector &outputs); SnPNoiseNode() = delete; - void init(float sdev); - void init(FloatParam *sdev); -protected: + void init(float noise_prob, float salt_prob, float salt_value, float pepper_value, int seed); + void init(FloatParam *noise_prob_param, FloatParam *salt_prob_param, FloatParam *salt_value_param, FloatParam *pepper_value_param, int seed); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _sdev; - constexpr static float SDEV_RANGE [2] = {0.1, 0.15}; -}; + private: + ParameterVX _noise_prob, _salt_prob, _salt_value, _pepper_value; + constexpr static float NOISE_PROB_RANGE[2] = {0.1, 1}; + constexpr static float SALT_PROB_RANGE[2] = {0.1, 1}; + constexpr static float SALT_RANGE[2] = {0.1, 1}; + constexpr static float PEPPER_RANGE[2] = {0, 0.5}; + int _seed; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop.h b/rocAL/include/augmentations/geometry_augmentations/node_crop.h index 66ffd4139..e2ccff486 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop.h @@ -21,29 +21,30 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" -#include "parameter_rocal_crop.h" +#include "pipeline/node.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_rocal_crop.h" -class CropNode : public Node -{ -public: - CropNode(const std::vector &inputs, const std::vector &outputs); +class CropNode : public Node { + public: + CropNode(const std::vector &inputs, const std::vector &outputs); CropNode() = delete; + ~CropNode(); void init(unsigned int crop_h, unsigned int crop_w, float x_drift, float y_drift); void init(unsigned int crop_h, unsigned int crop_w); - void init( FloatParam *crop_h_factor, FloatParam *crop_w_factor, FloatParam * x_drift, FloatParam * y_drift); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(FloatParam *crop_h_factor, FloatParam *crop_w_factor, FloatParam *x_drift, FloatParam *y_drift); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } -protected: - void create_node() override ; + + protected: + void create_node() override; void update_node() override; -private: + void create_crop_tensor(); + void *_crop_coordinates = nullptr; + vx_tensor _crop_tensor = nullptr; - size_t _dest_width; - size_t _dest_height; + private: std::shared_ptr _crop_param; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h b/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h index 870232d53..ae211c2e2 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h @@ -21,30 +21,28 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" -#include "parameter_vx.h" -class CropMirrorNormalizeNode : public Node -{ -public: - CropMirrorNormalizeNode(const std::vector &inputs, - const std::vector &outputs); +#include "augmentations/geometry_augmentations/node_crop.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" + +class CropMirrorNormalizeNode : public CropNode { + public: + CropMirrorNormalizeNode(const std::vector &inputs, + const std::vector &outputs); CropMirrorNormalizeNode() = delete; - void init(int crop_h, int crop_w, float start_x, float start_y, float mean, float std_dev, IntParam *mirror); - vx_array return_mirror(){ return _mirror.default_array(); } + void init(int crop_h, int crop_w, float start_x, float start_y, std::vector &mean, std::vector &std_dev, IntParam *mirror); + vx_array return_mirror() { return _mirror.default_array(); } std::shared_ptr return_crop_param() { return _crop_param; } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } -protected: - void create_node() override ; + + protected: + void create_node() override; void update_node() override; -private: + + private: std::shared_ptr _crop_param; - std::vector _mean_vx, _std_dev_vx; - vx_array _mean_array, _std_dev_array; - float _mean; - float _std_dev; + vx_array _multiplier_vx_array, _offset_vx_array; + std::vector _mean, _std_dev; ParameterVX _mirror; - constexpr static int MIRROR_RANGE [2] = {0, 1}; + constexpr static int MIRROR_RANGE[2] = {0, 1}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h b/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h index f5ce3399a..815015fe0 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h @@ -21,30 +21,25 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" +#include "augmentations/geometry_augmentations/node_crop.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" -class CropResizeNode : public Node -{ -public: - CropResizeNode(const std::vector &inputs, const std::vector &outputs); +class CropResizeNode : public CropNode { + public: + CropResizeNode(const std::vector &inputs, const std::vector &outputs); CropResizeNode() = delete; void init(float area, float aspect_ratio, float x_center_drift, float y_center_drift); - void init(FloatParam* area, FloatParam *aspect_ratio, FloatParam * x_drift_factor, FloatParam * y_drift_factor); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(FloatParam *area, FloatParam *aspect_ratio, FloatParam *x_drift_factor, FloatParam *y_drift_factor); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } -protected: + + protected: void create_node() override; void update_node() override; -private: - size_t _dest_width; - size_t _dest_height; + private: std::shared_ptr _crop_param; - vx_array _dst_roi_width ,_dst_roi_height; + vx_array _dst_roi_width, _dst_roi_height; }; - - - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h b/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h index 31e6c5173..483450a10 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h @@ -22,18 +22,17 @@ THE SOFTWARE. #pragma once -#include "node.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" - -class FisheyeNode : public Node -{ -public: - FisheyeNode(const std::vector &inputs, const std::vector &outputs); +class FisheyeNode : public Node { + public: + FisheyeNode(const std::vector &inputs, const std::vector &outputs); FisheyeNode() = delete; -protected: + protected: void create_node() override; void update_node() override; -private: + + private: }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_flip.h b/rocAL/include/augmentations/geometry_augmentations/node_flip.h index d46d8a33d..eb49cc820 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_flip.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_flip.h @@ -21,27 +21,25 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_vx.h" -#include "parameter_factory.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class FlipNode : public Node -{ -public: - FlipNode(const std::vector &inputs, const std::vector &outputs); +class FlipNode : public Node { + public: + FlipNode(const std::vector &inputs, const std::vector &outputs); FlipNode() = delete; - void init(int flip_axis); - void init(IntParam *flip_axis); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } - vx_array get_flip_axis() { return _flip_axis.default_array(); } -protected: + void init(int h_flag, int v_flag); + void init(IntParam *h_flag_param, IntParam *v_flag_param); + vx_array get_horizontal_flip() { return _horizontal.default_array(); } + vx_array get_vertical_flip() { return _vertical.default_array(); } + + protected: void create_node() override; void update_node() override; -private: - int _axis; - ParameterVX _flip_axis; - constexpr static int FLIP_SIZE [2] = {0, 2}; -}; \ No newline at end of file + + private: + ParameterVX _horizontal, _vertical; + constexpr static int HORIZONTAL_RANGE[2] = {0, 1}; + constexpr static int VERTICAL_RANGE[2] = {0, 1}; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h b/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h index 4e5cfb3e6..50d8e29c1 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h @@ -21,25 +21,24 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" - - -class LensCorrectionNode : public Node -{ -public: - LensCorrectionNode(const std::vector &inputs, const std::vector &outputs); +class LensCorrectionNode : public Node { + public: + LensCorrectionNode(const std::vector &inputs, const std::vector &outputs); LensCorrectionNode() = delete; void init(float strength, float zoom); void init(FloatParam *strength, FloatParam *zoom); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _strength; ParameterVX _zoom; - constexpr static float STRENGTH_RANGE [2] = {0.05, 3.0}; - constexpr static float ZOOM_RANGE [2] = {1.0, 1.3}; + constexpr static float STRENGTH_RANGE[2] = {0.05, 3.0}; + constexpr static float ZOOM_RANGE[2] = {1.0, 1.3}; }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h b/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h index 17eac5ff5..8b3362b76 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h @@ -21,30 +21,25 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" +#include "augmentations/geometry_augmentations/node_crop.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" -class RandomCropNode : public Node -{ -public: - RandomCropNode(const std::vector &inputs, const std::vector &outputs); +class RandomCropNode : public CropNode { + public: + RandomCropNode(const std::vector &inputs, const std::vector &outputs); RandomCropNode() = delete; - void init(float area, float aspect_ratio, float x_drift, float y_drift); void init(FloatParam *crop_area_factor, FloatParam *crop_aspect_ratio, FloatParam *x_drift, FloatParam *y_drift, int num_of_attempts); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } - int get_num_of_attempts(){return _num_of_attempts;} + int get_num_of_attempts() { return _num_of_attempts; } -protected: + protected: void create_node() override; void update_node() override; -private: - size_t _dest_width; - size_t _dest_height; - int _num_of_attempts = 20; + private: + int _num_of_attempts = 20; std::shared_ptr _crop_param; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize.h b/rocAL/include/augmentations/geometry_augmentations/node_resize.h index f62e92c08..f20bffd6e 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize.h @@ -21,17 +21,13 @@ THE SOFTWARE. */ #pragma once -#include "node.h" +#include "pipeline/node.h" #include "rocal_api_types.h" class ResizeNode : public Node { public: - ResizeNode(const std::vector &inputs, const std::vector &outputs); + ResizeNode(const std::vector &inputs, const std::vector &outputs); ResizeNode() = delete; - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } void init(unsigned dest_width, unsigned dest_height, RocalResizeScalingMode scaling_mode, const std::vector& max_size, RocalResizeInterpolationType interpolation_type); void adjust_out_roi_size(); diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h b/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h index 464d042b4..10cd2a6eb 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h @@ -21,31 +21,34 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_vx.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" +#include "augmentations/geometry_augmentations/node_crop.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" +#include "rocal_api_types.h" -class CropParam; - -class ResizeCropMirrorNode : public Node -{ -public: - ResizeCropMirrorNode(const std::vector &inputs, const std::vector &outputs); +class ResizeCropMirrorNode : public CropNode { + public: + ResizeCropMirrorNode(const std::vector &inputs, const std::vector &outputs); ResizeCropMirrorNode() = delete; - void init(unsigned int crop_h, unsigned int crop_w, IntParam *mirror); - void init( FloatParam *crop_h_factor, FloatParam *crop_w_factor, IntParam *mirror); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(unsigned int crop_h, unsigned int crop_w, IntParam *mirror, + RocalResizeInterpolationType interpolation_type = RocalResizeInterpolationType::ROCAL_LINEAR_INTERPOLATION); + void init(FloatParam *crop_h_factor, FloatParam *crop_w_factor, IntParam *mirror, + RocalResizeInterpolationType interpolation_type = RocalResizeInterpolationType::ROCAL_LINEAR_INTERPOLATION); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } vx_array get_mirror() { return _mirror.default_array(); } -protected: + void adjust_out_roi_size(); + + protected: void create_node() override; void update_node() override; -private: + + private: std::shared_ptr _crop_param; - vx_array _dst_roi_width ,_dst_roi_height; + vx_array _dst_roi_width, _dst_roi_height; ParameterVX _mirror; - constexpr static int MIRROR_RANGE [2] = {0, 1}; + constexpr static int MIRROR_RANGE[2] = {0, 1}; + int _interpolation_type; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h b/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h index bafd5f6d5..376996865 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h @@ -21,30 +21,31 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class ResizeMirrorNormalizeNode : public Node -{ -public: - ResizeMirrorNormalizeNode(const std::vector &inputs, const std::vector &outputs); +class ResizeMirrorNormalizeNode : public Node { + public: + ResizeMirrorNormalizeNode(const std::vector &inputs, const std::vector &outputs); ResizeMirrorNormalizeNode() = delete; - void init(std::vector& mean, std::vector& std_dev, IntParam *mirror); - vx_array get_dst_width() { return _dst_roi_width; } - vx_array get_dst_height() { return _dst_roi_height;} - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } - vx_array return_mirror(){ return _mirror.default_array(); } -protected: + void init(unsigned dest_width, unsigned dest_height, RocalResizeScalingMode scaling_mode, std::vector max_size, + RocalResizeInterpolationType interpolation_type, std::vector &mean, std::vector &std_dev, IntParam *mirror); + void adjust_out_roi_size(); + vx_array get_mirror() { return _mirror.default_array(); } + + protected: void create_node() override; void update_node() override; -private: - vx_array _dst_roi_width, _dst_roi_height; - std::vector _dest_width_val, _dest_height_val; - vx_array _mean_array, _std_dev_array; - std::vector _mean; - std::vector _std_dev; + + private: + vx_array _mean_vx_array, _std_dev_vx_array, _mirror_vx_array, _dst_roi_width, _dst_roi_height; + std::vector _mean, _std_dev; + int _interpolation_type; ParameterVX _mirror; constexpr static int _mirror_range[2] = {0, 1}; + RocalResizeScalingMode _scaling_mode; + unsigned _src_width, _src_height, _dst_width, _dst_height, _out_width, _out_height; + unsigned _max_width = 0, _max_height = 0; + std::vector _dst_roi_width_vec, _dst_roi_height_vec; }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_rotate.h b/rocAL/include/augmentations/geometry_augmentations/node_rotate.h index f2abab09b..56adfefef 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_rotate.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_rotate.h @@ -21,30 +21,28 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" +#include "rocal_api_types.h" -class RotateNode : public Node -{ -public: - RotateNode(const std::vector &inputs, const std::vector &outputs); +class RotateNode : public Node { + public: + RotateNode(const std::vector &inputs, const std::vector &outputs); RotateNode() = delete; - void init(float angle); - void init(FloatParam *angle); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } + void init(float angle, RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + void init(FloatParam *angle_param, RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } vx_array get_angle() { return _angle.default_array(); } -protected: + protected: void create_node() override; void update_node() override; -private: - ParameterVX _angle; - vx_array _dst_roi_width,_dst_roi_height; - constexpr static float ROTATE_ANGLE_RANGE [2] = {0, 180}; -}; \ No newline at end of file + private: + ParameterVX _angle; + int _interpolation_type; + constexpr static float ROTATE_ANGLE_RANGE[2] = {0, 180}; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_slice.h b/rocAL/include/augmentations/geometry_augmentations/node_slice.h new file mode 100644 index 000000000..7077a8da5 --- /dev/null +++ b/rocAL/include/augmentations/geometry_augmentations/node_slice.h @@ -0,0 +1,44 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" +#include "rocal_api_types.h" + +class SliceNode : public Node { + public: + SliceNode(const std::vector &inputs, const std::vector &outputs); + SliceNode() = delete; + void init(Tensor *anchor_param, Tensor *shape_param, std::vector &fill_values_param, RocalOutOfBoundsPolicy policy); + + protected: + void create_node() override; + void update_node() override; + + private: + Tensor *_anchor, *_shape; + std::vector _fill_values, _fill_values_vec; + RocalOutOfBoundsPolicy _policy = RocalOutOfBoundsPolicy::ROCAL_ERROR; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h b/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h index 476cbacd2..63bf9613f 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h @@ -21,34 +21,35 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class WarpAffineNode : public Node -{ -public: - WarpAffineNode(const std::vector &inputs, const std::vector &outputs); +class WarpAffineNode : public Node { + public: + WarpAffineNode(const std::vector &inputs, const std::vector &outputs); WarpAffineNode() = delete; - void init(float x0, float x1, float y0, float y1, float o0, float o1); - void init(FloatParam* x0, FloatParam* x1, FloatParam* y0, FloatParam* y1, FloatParam* o0, FloatParam* o1); -protected: + void init(float x0, float x1, float y0, float y1, float o0, float o1, RocalResizeInterpolationType interpolation_type); + void init(FloatParam *x0, FloatParam *x1, FloatParam *y0, FloatParam *y1, + FloatParam *o0, FloatParam *o1, RocalResizeInterpolationType interpolation_type); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _x0; ParameterVX _x1; ParameterVX _y0; ParameterVX _y1; ParameterVX _o0; ParameterVX _o1; - std::vector _affine; - vx_array _dst_roi_width,_dst_roi_height; vx_array _affine_array; - constexpr static float COEFFICIENT_RANGE_0 [2] = {-0.35, 0.35}; - constexpr static float COEFFICIENT_RANGE_1 [2] = {0.65, 1.35}; - constexpr static float COEFFICIENT_RANGE_OFFSET [2] = {-10.0, 10.0}; + constexpr static float COEFFICIENT_RANGE_0[2] = {-0.35, 0.35}; + constexpr static float COEFFICIENT_RANGE_1[2] = {0.65, 1.35}; + constexpr static float COEFFICIENT_RANGE_OFFSET[2] = {-10.0, 10.0}; void update_affine_array(); + int _interpolation_type; }; diff --git a/rocAL/include/augmentations/node_copy.h b/rocAL/include/augmentations/node_copy.h index 2cd00d1bf..680f20d4b 100644 --- a/rocAL/include/augmentations/node_copy.h +++ b/rocAL/include/augmentations/node_copy.h @@ -21,16 +21,15 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" -class CopyNode : public Node -{ -public: - CopyNode(const std::vector &inputs, const std::vector &outputs); +class CopyNode : public Node { + public: + CopyNode(const std::vector &inputs, const std::vector &outputs); CopyNode() = delete; -protected: + protected: void create_node() override; - void update_node() override {}; + void update_node() override{}; }; diff --git a/rocAL/include/augmentations/node_nop.h b/rocAL/include/augmentations/node_nop.h index 3876a2572..b1d226cbf 100644 --- a/rocAL/include/augmentations/node_nop.h +++ b/rocAL/include/augmentations/node_nop.h @@ -21,15 +21,15 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" -class NopNode : public Node -{ -public: - NopNode(const std::vector &inputs, const std::vector &outputs); +class NopNode : public Node { + public: + NopNode(const std::vector &inputs, const std::vector &outputs); NopNode() = delete; -protected: + + protected: void create_node() override; void update_node() override; }; diff --git a/rocAL/include/augmentations/node_normal_distribution.h b/rocAL/include/augmentations/node_normal_distribution.h new file mode 100644 index 000000000..a3597da1c --- /dev/null +++ b/rocAL/include/augmentations/node_normal_distribution.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include + +#include "pipeline/commons.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" + +class NormalDistributionNode : public Node { + public: + NormalDistributionNode(const std::vector &inputs, const std::vector &outputs); + NormalDistributionNode() = delete; + void init(float mean, float stddev); + void update_param(); + + protected: + void create_node() override; + void update_node() override; + + private: + float _mean, _std_dev; + std::normal_distribution _dist_normal; + std::vector _normal_distribution_array; + BatchRNG _rngs = {89, 2}; // Random Seed & BatchSize for initialization +}; diff --git a/rocAL/include/augmentations/node_sequence_rearrange.h b/rocAL/include/augmentations/node_sequence_rearrange.h index 247b7728d..6497e52d7 100644 --- a/rocAL/include/augmentations/node_sequence_rearrange.h +++ b/rocAL/include/augmentations/node_sequence_rearrange.h @@ -21,22 +21,21 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_vx.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_vx.h" -class SequenceRearrangeNode : public Node -{ -public: - SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); +class SequenceRearrangeNode : public Node { + public: + SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); SequenceRearrangeNode() = delete; - void init(unsigned int* new_order, unsigned int new_sequence_length, unsigned int sequence_length, unsigned int sequence_count); -protected: + void init(std::vector &new_order); + + protected: void create_node() override; void update_node() override; -private: + + private: std::vector _new_order; - unsigned int _new_sequence_length, _sequence_length, _sequence_count; - vx_array _sequence_array; }; diff --git a/rocAL/include/augmentations/node_ssd_random_crop.h b/rocAL/include/augmentations/node_ssd_random_crop.h index 63a5bd042..224068f4d 100644 --- a/rocAL/include/augmentations/node_ssd_random_crop.h +++ b/rocAL/include/augmentations/node_ssd_random_crop.h @@ -20,76 +20,70 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" -#include "parameter_crop_factory.h" - +#include "augmentations/geometry_augmentations/node_crop.h" +#include "parameters/parameter_crop_factory.h" +#include "parameters/parameter_factory.h" // todo:: move this to common header -template +template class SeededRNG { - /* - * @param batch_size How many RNGs to store - * @param state_size How many seed are used to initialize one RNG. Used to lower probablity of - * collisions between seeds used to initialize RNGs in different operators. - */ -public: - SeededRNG (int batch_size = 128) { - std::random_device source; - _batch_size = batch_size; - std::size_t _random_data_size = state_size * batch_size ; - std::vector random_data(_random_data_size); - std::generate(random_data.begin(), random_data.end(), std::ref(source)); - _rngs.reserve(batch_size); - for (int i=0; i < (int)(_batch_size*state_size); i += state_size) { - std::seed_seq seeds(std::begin(random_data) + i, std::begin(random_data)+ i +state_size); - _rngs.emplace_back(T(seeds)); - } - } + /* + * @param batch_size How many RNGs to store + * @param state_size How many seed are used to initialize one RNG. Used to lower probablity of + * collisions between seeds used to initialize RNGs in different operators. + */ + public: + SeededRNG(int batch_size = 128) { + std::random_device source; + _batch_size = batch_size; + std::size_t _random_data_size = state_size * batch_size; + std::vector random_data(_random_data_size); + std::generate(random_data.begin(), random_data.end(), std::ref(source)); + _rngs.reserve(batch_size); + for (int i = 0; i < (int)(_batch_size * state_size); i += state_size) { + std::seed_seq seeds(std::begin(random_data) + i, std::begin(random_data) + i + state_size); + _rngs.emplace_back(T(seeds)); + } + } - /** - * Returns engine corresponding to given sample ID - */ - T &operator[](int sample) noexcept { - return _rngs[sample % _batch_size]; - } + /** + * Returns engine corresponding to given sample ID + */ + T &operator[](int sample) noexcept { + return _rngs[sample % _batch_size]; + } -private: + private: std::vector _rngs; int _batch_size; }; -class SSDRandomCropNode : public Node -{ -public: - SSDRandomCropNode(const std::vector &inputs, const std::vector &outputs); +class SSDRandomCropNode : public CropNode { + public: + SSDRandomCropNode(const std::vector &inputs, const std::vector &outputs); SSDRandomCropNode() = delete; void init(FloatParam *crop_area_factor, FloatParam *crop_aspect_ratio, FloatParam *x_drift, FloatParam *y_drift, int num_of_attempts); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } - float get_threshold(){return _threshold;} - std::vector> get_iou_range(){return _iou_range;} - bool is_entire_iou(){return _entire_iou;} + float get_threshold() { return _threshold; } + std::vector> get_iou_range() { return _iou_range; } + bool is_entire_iou() { return _entire_iou; } void set_meta_data_batch() {} -protected: + protected: void create_node() override; void update_node() override; -private: + private: std::shared_ptr _meta_crop_param; - vx_array _crop_width, _crop_height, _x1, _y1, _x2, _y2; - std::vector _crop_width_val, _crop_height_val, _x1_val, _y1_val, _x2_val, _y2_val; - // unsigned int _dst_width, _dst_height; - std::vector in_width, in_height; + std::vector _x1_val, _y1_val, _crop_width_val, _crop_height_val; size_t _dest_width; size_t _dest_height; - float _threshold = 0.05; - std::vector> _iou_range; + float _threshold = 0.05; + std::vector> _iou_range; int _num_of_attempts = 20; bool _entire_iou = false; std::shared_ptr _crop_param; - SeededRNG _rngs; // setting the state_size to 4 for 4 random parameters. - -}; \ No newline at end of file + SeededRNG _rngs; // setting the state_size to 4 for 4 random parameters. +}; diff --git a/rocAL/include/augmentations/node_uniform_distribution.h b/rocAL/include/augmentations/node_uniform_distribution.h new file mode 100644 index 000000000..eaa00b80f --- /dev/null +++ b/rocAL/include/augmentations/node_uniform_distribution.h @@ -0,0 +1,47 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include + +#include "pipeline/commons.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" + +class UniformDistributionNode : public Node { + public: + UniformDistributionNode(const std::vector &inputs, + const std::vector &outputs); + UniformDistributionNode() = delete; + void init(std::vector &range); + void update_param(); + + protected: + void create_node() override; + void update_node() override; + + private: + float _min, _max; + std::uniform_real_distribution _dist_uniform; // uniform distribution + std::vector _uniform_distribution_array; + BatchRNG _rngs = {89, 2}; // Random Seed, Random BatchSize for initialization +}; diff --git a/rocAL/include/decoders/audio/audio_decoder.h b/rocAL/include/decoders/audio/audio_decoder.h new file mode 100644 index 000000000..9175d9cd1 --- /dev/null +++ b/rocAL/include/decoders/audio/audio_decoder.h @@ -0,0 +1,51 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#ifdef ROCAL_AUDIO +#include "sndfile.h" + +class AudioDecoder { + public: + enum class Status { + OK = 0, + HEADER_DECODE_FAILED, + CONTENT_DECODE_FAILED, + UNSUPPORTED, + FAILED, + NO_MEMORY + }; + virtual AudioDecoder::Status Initialize(const char* src_filename) = 0; + virtual AudioDecoder::Status Decode(float* buffer) = 0; + virtual AudioDecoder::Status DecodeInfo(int* samples, int* channels, float* sample_rates) = 0; + virtual void Release() = 0; + virtual ~AudioDecoder() = default; + + protected: + SF_INFO _sfinfo; + SNDFILE* _sf_ptr; +}; +#endif diff --git a/rocAL/include/decoders/audio/audio_decoder_factory.hpp b/rocAL/include/decoders/audio/audio_decoder_factory.hpp new file mode 100644 index 000000000..b882e79a1 --- /dev/null +++ b/rocAL/include/decoders/audio/audio_decoder_factory.hpp @@ -0,0 +1,37 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "decoders/audio/audio_decoder.h" +#include "decoders/audio/generic_audio_decoder.h" + +#ifdef ROCAL_AUDIO +static std::shared_ptr create_audio_decoder(DecoderConfig config) { + switch (config.type()) { + case DecoderType::AUDIO_SOFTWARE_DECODE: + return std::make_shared(); + default: + THROW("Unsupported decoder type " + TOSTR(config.type())); + } +} +#endif diff --git a/rocAL/include/decoders/audio/generic_audio_decoder.h b/rocAL/include/decoders/audio/generic_audio_decoder.h new file mode 100644 index 000000000..d54a1460e --- /dev/null +++ b/rocAL/include/decoders/audio/generic_audio_decoder.h @@ -0,0 +1,38 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "decoders/audio/audio_decoder.h" + +#ifdef ROCAL_AUDIO +class GenericAudioDecoder : public AudioDecoder { + public: + //! Default constructor + GenericAudioDecoder(); + AudioDecoder::Status Initialize(const char* src_filename) override; + AudioDecoder::Status Decode(float* buffer) override; + AudioDecoder::Status DecodeInfo(int* samples, int* channels, float* sample_rates) override; + void Release() override; + ~GenericAudioDecoder() override; +}; +#endif diff --git a/rocAL/include/decoders/image/decoder.h b/rocAL/include/decoders/image/decoder.h index aea8f6c86..fbbe25d04 100644 --- a/rocAL/include/decoders/image/decoder.h +++ b/rocAL/include/decoders/image/decoder.h @@ -25,26 +25,26 @@ THE SOFTWARE. #include #include #include -#include "parameter_factory.h" -#include "parameter_random_crop_decoder.h" +#include "parameters/parameter_factory.h" +#include "parameters/parameter_random_crop_decoder.h" -enum class DecoderType -{ - TURBO_JPEG = 0,//!< Can only decode - FUSED_TURBO_JPEG = 1, //!< FOR PARTIAL DECODING - OPENCV_DEC = 2, //!< for back_up decoding - HW_JPEG_DEC = 3, - SKIP_DECODE = 4, //!< For skipping decoding in case of uncompressed data from reader - OVX_FFMPEG,//!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously +enum class DecoderType { + TURBO_JPEG = 0, //!< Can only decode + FUSED_TURBO_JPEG = 1, //!< FOR PARTIAL DECODING + OPENCV_DEC = 2, //!< for back_up decoding + HW_JPEG_DEC = 3, + SKIP_DECODE = 4, //!< For skipping decoding in case of uncompressed data from reader + OVX_FFMPEG = 5, //!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously + FFMPEG_SOFTWARE_DECODE = 6, + FFMPEG_HARDWARE_DECODE = 7, + AUDIO_SOFTWARE_DECODE = 8 //!< Uses sndfile to decode audio files }; - -class DecoderConfig -{ -public: +class DecoderConfig { + public: DecoderConfig() {} - explicit DecoderConfig(DecoderType type):_type(type){} - virtual DecoderType type() {return _type; }; + explicit DecoderConfig(DecoderType type) : _type(type) {} + virtual DecoderType type() { return _type; }; DecoderType _type = DecoderType::TURBO_JPEG; void set_random_area(std::vector &random_area) { _random_area = std::move(random_area); } void set_random_aspect_ratio(std::vector &random_aspect_ratio) { _random_aspect_ratio = std::move(random_aspect_ratio); } @@ -54,17 +54,15 @@ class DecoderConfig unsigned get_num_attempts() { return _num_attempts; } void set_seed(int seed) { _seed = seed; } int get_seed() { return _seed; } -private: + + private: std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts = 10; - int _seed = std::time(0); //seed for decoder random crop + int _seed = std::time(0); // seed for decoder random crop }; - -class Decoder -{ -public: - +class Decoder { + public: enum class Status { OK = 0, HEADER_DECODE_FAILED, @@ -86,11 +84,11 @@ class Decoder \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - virtual Status decode_info(unsigned char* input_buffer, - size_t input_size, - int* width, - int* height, - int* color_comps) = 0; + virtual Status decode_info(unsigned char *input_buffer, + size_t input_size, + int *width, + int *height, + int *color_comps) = 0; // TODO: Extend the decode API if needed, color format and order can be passed to the function //! Decodes the actual image data @@ -111,7 +109,7 @@ class Decoder virtual ~Decoder() = default; virtual void initialize(int device_id) = 0; virtual bool is_partial_decoder() = 0; - virtual void set_bbox_coords(std::vector bbox_coords) = 0; - virtual std::vector get_bbox_coords() = 0; + virtual void set_bbox_coords(std::vector bbox_coords) = 0; + virtual std::vector get_bbox_coords() = 0; virtual void set_crop_window(CropWindow &crop_window) = 0; }; diff --git a/rocAL/include/decoders/image/decoder_factory.h b/rocAL/include/decoders/image/decoder_factory.h index 978145c6c..e4b382af9 100644 --- a/rocAL/include/decoders/image/decoder_factory.h +++ b/rocAL/include/decoders/image/decoder_factory.h @@ -22,5 +22,6 @@ THE SOFTWARE. #pragma once #include -#include "decoder.h" + +#include "decoders/image/decoder.h" std::shared_ptr create_decoder(DecoderConfig config); \ No newline at end of file diff --git a/rocAL/include/decoders/image/fused_crop_decoder.h b/rocAL/include/decoders/image/fused_crop_decoder.h index 247681191..5e691cc1b 100644 --- a/rocAL/include/decoders/image/fused_crop_decoder.h +++ b/rocAL/include/decoders/image/fused_crop_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,10 +21,10 @@ THE SOFTWARE. */ #pragma once -#include "decoder.h" +#include "decoders/image/decoder.h" #include class FusedCropTJDecoder : public Decoder { -public: + public: //! Default constructor FusedCropTJDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image @@ -35,7 +35,7 @@ class FusedCropTJDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -51,38 +51,36 @@ class FusedCropTJDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; - + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~FusedCropTJDecoder() override; - void initialize(int device_id) override {}; + void initialize(int device_id) override{}; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } - std::vector get_bbox_coords() override { return _bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + std::vector get_bbox_coords() override { return _bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } -private: + private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; + const static unsigned SCALING_FACTORS_COUNT = 16; const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - { 2, 1 }, - { 15, 8 }, - { 7, 4 }, - { 13, 8 }, - { 3, 2 }, - { 11, 8 }, - { 5, 4 }, - { 9, 8 }, - { 1, 1 }, - { 7, 8 }, - { 3, 4 }, - { 5, 8 }, - { 1, 2 }, - { 3, 8 }, - { 1, 4 }, - { 1, 8 } - }; + {2, 1}, + {15, 8}, + {7, 4}, + {13, 8}, + {3, 2}, + {11, 8}, + {5, 4}, + {9, 8}, + {1, 1}, + {7, 8}, + {3, 4}, + {5, 8}, + {1, 2}, + {3, 8}, + {1, 4}, + {1, 8}}; bool _is_partial_decoder = true; - std::vector _bbox_coord; + std::vector _bbox_coord; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/image/hw_jpeg_decoder.h b/rocAL/include/decoders/image/hw_jpeg_decoder.h index 090f2813c..25acca4fb 100644 --- a/rocAL/include/decoders/image/hw_jpeg_decoder.h +++ b/rocAL/include/decoders/image/hw_jpeg_decoder.h @@ -22,24 +22,23 @@ THE SOFTWARE. #pragma once -#include "decoder.h" +#include "decoders/image/decoder.h" #ifdef ROCAL_VIDEO -extern "C" -{ + +extern "C" { #include #include -#include -#include -#include #include +#include #include +#include +#include } - class HWJpegDecoder : public Decoder { -public: + public: //! Default constructor - HWJpegDecoder() {}; + HWJpegDecoder(){}; //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image /*! \param input_buffer User provided buffer containig the encoded image @@ -48,7 +47,7 @@ class HWJpegDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -64,16 +63,16 @@ class HWJpegDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~HWJpegDecoder() override; - void initialize(int device_id=0); + void initialize(int device_id = 0); bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord;} - void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window;} - std::vector get_bbox_coords() override { return _bbox_coord;} + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } + std::vector get_bbox_coords() override { return _bbox_coord; } -private: + private: void release(); const char *_src_filename = NULL; AVHWDeviceType _hw_type = AV_HWDEVICE_TYPE_NONE; @@ -88,7 +87,7 @@ class HWJpegDecoder : public Decoder { size_t _codec_width, _codec_height; bool _is_partial_decoder = false; - std::vector _bbox_coord; + std::vector _bbox_coord; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/image/open_cv_decoder.h b/rocAL/include/decoders/image/open_cv_decoder.h index 74ff8f986..dee165bc5 100644 --- a/rocAL/include/decoders/image/open_cv_decoder.h +++ b/rocAL/include/decoders/image/open_cv_decoder.h @@ -22,7 +22,7 @@ THE SOFTWARE. #pragma once -#include "decoder.h" +#include "decoders/image/decoder.h" #if ENABLE_OPENCV #include @@ -33,22 +33,22 @@ using namespace cv; #endif class CVDecoder : public Decoder { -public: + public: //! Default constructor CVDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image /*! \param input_buffer User provided buffer containig the encoded image \param input_size Size of the compressed data provided in the input_buffer - \param width pointer to the user's buffer to write the width of the compressed image to - \param height pointer to the user's buffer to write the height of the compressed image to - \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to + \param width pointer to the user's buffer to write the width of the compressed image to + \param height pointer to the user's buffer to write the height of the compressed image to + \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; - + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; + //! Decodes the actual image data //! Decodes the actual image data - /*! + /*! \param input_buffer User provided buffer containig the encoded image \param output_buffer User provided buffer used to write the decoded image into \param input_size Size of the compressed data provided in the input_buffer @@ -58,25 +58,25 @@ class CVDecoder : public Decoder { \param original_image_height The actual height of the compressed image. decoded height will be equal to this if this is smaller than max_decoded_height */ Status decode(unsigned char *input_buffer, size_t input_size, unsigned char *output_buffer, - size_t max_decoded_width, size_t max_decoded_height, - size_t original_image_width, size_t original_image_height, - size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + size_t max_decoded_width, size_t max_decoded_height, + size_t original_image_width, size_t original_image_height, + size_t &actual_decoded_width, size_t &actual_decoded_height, + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } - std::vector get_bbox_coords() override { return _bbox_coord; } - //virtual Status decode(unsigned char* input_buffer, size_t input_size, unsigned char* output_buffer,int desired_width, int desired_height, ColorFormat desired_color); - void initialize(int device_id) override {}; + std::vector get_bbox_coords() override { return _bbox_coord; } + // virtual Status decode(unsigned char* input_buffer, size_t input_size, unsigned char* output_buffer,int desired_width, int desired_height, ColorFormat desired_color); + void initialize(int device_id) override{}; ~CVDecoder() override; -private: - //cv::Mat m_mat_compressed; - cv::Mat m_mat_scaled; - cv::Mat m_mat_orig; - bool _is_partial_decoder = false; - std::vector _bbox_coord; - CropWindow _crop_window; + private: + // cv::Mat m_mat_compressed; + cv::Mat m_mat_scaled; + cv::Mat m_mat_orig; + bool _is_partial_decoder = false; + std::vector _bbox_coord; + CropWindow _crop_window; }; #endif diff --git a/rocAL/include/decoders/image/turbo_jpeg_decoder.h b/rocAL/include/decoders/image/turbo_jpeg_decoder.h index c5df6d460..3deaffb98 100644 --- a/rocAL/include/decoders/image/turbo_jpeg_decoder.h +++ b/rocAL/include/decoders/image/turbo_jpeg_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,11 +22,11 @@ THE SOFTWARE. #pragma once -#include "decoder.h" +#include "decoders/image/decoder.h" #include class TJDecoder : public Decoder { -public: + public: //! Default constructor TJDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image @@ -37,7 +37,7 @@ class TJDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -53,37 +53,21 @@ class TJDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~TJDecoder() override; - void initialize(int device_id) override {}; + void initialize(int device_id) override{}; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } - std::vector get_bbox_coords() override { return _bbox_coord; } -private: + std::vector get_bbox_coords() override { return _bbox_coord; } + + private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; - const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - { 2, 1 }, - { 15, 8 }, - { 7, 4 }, - { 13, 8 }, - { 3, 2 }, - { 11, 8 }, - { 5, 4 }, - { 9, 8 }, - { 1, 1 }, - { 7, 8 }, - { 3, 4 }, - { 5, 8 }, - { 1, 2 }, - { 3, 8 }, - { 1, 4 }, - { 1, 8 } - }; + tjscalingfactor *_scaling_factors = nullptr; + int _num_scaling_factors = 0; bool _is_partial_decoder = false; - std::vector _bbox_coord; + std::vector _bbox_coord; const static unsigned _max_scaling_factor = 8; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/libjpeg/libjpeg_extra.h b/rocAL/include/decoders/libjpeg/libjpeg_extra.h new file mode 100644 index 000000000..69db1028a --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_extra.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include +#include +#include +#include "libjpeg_utils.h" + +extern "C" { + +//! extra apis for rocal to support partial decoding + +//! * Helper function to se the source +//! * This function doesn't scale the decoded image + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function doesn't scale the decoded image + +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param pixelFormat pixel format of the image + \param crop_x_diff, crop_width_diff Actual crop_x and crop_w (adjusted to MB boundery) + \param x1, y1, crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff, + unsigned int x1, unsigned int y1, unsigned int crop_width, unsigned int crop_height); + + +//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image. +//! * This function scale the decoded image to fit the output dims +/*! + \param handle TJPeg handle + \param jpegBuf compressed jpeg image buffer + \param jpegSize Size of the compressed data provided in the input_buffer + \param dstBuf user provided output buffer + \param width, pitch, height width, stride and height of the allocated buffer + \param flags TJPEG flags + \param crop_width, crop_height requested crop window +*/ + +int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags, unsigned int crop_width, unsigned int crop_height); +} \ No newline at end of file diff --git a/rocAL/include/decoders/libjpeg/libjpeg_utils.h b/rocAL/include/decoders/libjpeg/libjpeg_utils.h new file mode 100644 index 000000000..9cdbab283 --- /dev/null +++ b/rocAL/include/decoders/libjpeg/libjpeg_utils.h @@ -0,0 +1,29 @@ +/* +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +//! turbojpeg includes + +extern "C" { +#include "jerror.h" +#include "jpeglib.h" +} diff --git a/rocAL/include/decoders/video/ffmpeg_video_decoder.h b/rocAL/include/decoders/video/ffmpeg_video_decoder.h index 3a46d06c8..e66ee9f7b 100644 --- a/rocAL/include/decoders/video/ffmpeg_video_decoder.h +++ b/rocAL/include/decoders/video/ffmpeg_video_decoder.h @@ -25,9 +25,8 @@ THE SOFTWARE. #include "video_decoder.h" #ifdef ROCAL_VIDEO -class FFmpegVideoDecoder : public VideoDecoder -{ -public: +class FFmpegVideoDecoder : public VideoDecoder { + public: //! Default constructor FFmpegVideoDecoder(); VideoDecoder::Status Initialize(const char *src_filename) override; @@ -35,7 +34,8 @@ class FFmpegVideoDecoder : public VideoDecoder int seek_frame(AVRational avg_frame_rate, AVRational time_base, unsigned frame_number) override; void release() override; ~FFmpegVideoDecoder() override; -private: + + private: const char *_src_filename = NULL; AVFormatContext *_fmt_ctx = NULL; AVCodecContext *_video_dec_ctx = NULL; diff --git a/rocAL/include/decoders/video/hardware_video_decoder.h b/rocAL/include/decoders/video/hardware_video_decoder.h index fe23224a5..dad64e02b 100644 --- a/rocAL/include/decoders/video/hardware_video_decoder.h +++ b/rocAL/include/decoders/video/hardware_video_decoder.h @@ -25,9 +25,8 @@ THE SOFTWARE. #include "video_decoder.h" #ifdef ROCAL_VIDEO -class HardWareVideoDecoder : public VideoDecoder -{ -public: +class HardWareVideoDecoder : public VideoDecoder { + public: //! Default constructor HardWareVideoDecoder(); VideoDecoder::Status Initialize(const char *src_filename) override; @@ -35,7 +34,8 @@ class HardWareVideoDecoder : public VideoDecoder int seek_frame(AVRational avg_frame_rate, AVRational time_base, unsigned frame_number) override; void release() override; ~HardWareVideoDecoder() override; -private: + + private: const char *_src_filename = NULL; AVFormatContext *_fmt_ctx = NULL; AVCodecContext *_video_dec_ctx = NULL; diff --git a/rocAL/include/decoders/video/video_decoder.h b/rocAL/include/decoders/video/video_decoder.h index e6549140f..74c2b247f 100644 --- a/rocAL/include/decoders/video/video_decoder.h +++ b/rocAL/include/decoders/video/video_decoder.h @@ -26,45 +26,27 @@ THE SOFTWARE. #include #include #ifdef ROCAL_VIDEO -extern "C" -{ -#include -#include -#include -#include + +extern "C" { #include +#include +#include #include -#include -#include #include -#include -#include #include +#include +#include +#include +#include +#include } #endif -#include "parameter_factory.h" - -enum class VideoDecoderType -{ - FFMPEG_SOFTWARE_DECODE = 0, - FFMPEG_HARDWARE_DECODE = 1, -}; - -class VideoDecoderConfig -{ -public: - VideoDecoderConfig() {} - explicit VideoDecoderConfig(VideoDecoderType type) : _type(type) {} - virtual VideoDecoderType type() { return _type; }; - VideoDecoderType _type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; -}; +#include "parameters/parameter_factory.h" #ifdef ROCAL_VIDEO -class VideoDecoder -{ -public: - enum class Status - { +class VideoDecoder { + public: + enum class Status { OK = 0, HEADER_DECODE_FAILED, CONTENT_DECODE_FAILED, @@ -72,8 +54,7 @@ class VideoDecoder FAILED, NO_MEMORY }; - enum class ColorFormat - { + enum class ColorFormat { GRAY = 0, RGB, BGR diff --git a/rocAL/include/decoders/video/video_decoder_factory.h b/rocAL/include/decoders/video/video_decoder_factory.h index 0b108e465..36fe88bd3 100644 --- a/rocAL/include/decoders/video/video_decoder_factory.h +++ b/rocAL/include/decoders/video/video_decoder_factory.h @@ -23,7 +23,8 @@ THE SOFTWARE. #pragma once #include #include "video_decoder.h" +#include "decoders/image/decoder.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_decoder(VideoDecoderConfig config); +std::shared_ptr create_video_decoder(DecoderConfig config); #endif diff --git a/rocAL/include/device/device_code.h b/rocAL/include/device/device_code.h index 28e9f308f..550c731c2 100644 --- a/rocAL/include/device/device_code.h +++ b/rocAL/include/device/device_code.h @@ -22,20 +22,18 @@ THE SOFTWARE. #pragma once -#include #include - - +#include class DeviceCode { -public: - explicit DeviceCode(const std::string& source_code, const std::string& program_name, const std::vector& kernel_list ): - m_source_code(source_code), m_prog_name(program_name), m_kernel_list(kernel_list) {} + public: + explicit DeviceCode(const std::string& source_code, const std::string& program_name, const std::vector& kernel_list) : m_source_code(source_code), m_prog_name(program_name), m_kernel_list(kernel_list) {} const std::string& getSourceCode() const { return m_source_code; } const std::string& getName() const { return m_prog_name; } const std::vector& getKernelList() const { return m_kernel_list; } -private: + + private: const std::string m_source_code; const std::string m_prog_name; - const std::vector m_kernel_list; + const std::vector m_kernel_list; }; diff --git a/rocAL/include/device/device_data_transfer_code.h b/rocAL/include/device/device_data_transfer_code.h index a752c6a1c..0d081397c 100644 --- a/rocAL/include/device/device_data_transfer_code.h +++ b/rocAL/include/device/device_data_transfer_code.h @@ -26,50 +26,48 @@ THE SOFTWARE. const static std::string data_transfer_program_name = "utility"; -const static std::vector data_transfer_kernel_names = {"copyInt8ToNHWC","copyInt8ToNCHW"}; +const static std::vector data_transfer_kernel_names = {"copyInt8ToNHWC", "copyInt8ToNCHW"}; const static std::string data_transfer_source = -"__kernel void copyInt8ToNHWC(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" -" if(c > 3 || c < 1) return;" -" int i = get_global_id(0);" -" unsigned channel_size = h*w;" -" unsigned size = channel_size*c;" -" if(i >= size) return;" -" unsigned channel_idx = i % c;" -" unsigned pixel_idx = i % channel_size;" -" float out_val = 0;" -" float multiplier[3] = {multiplier0, multiplier1, multiplier2};" -" float offset[3] = {offset0, offset1, offset2};" -" if(reverse_channels) {" -" out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " -" } else {" -" out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " -" }" -" out [out_offset + c*pixel_idx + channel_idx] = out_val;}" -"" -"" -"__kernel void copyInt8ToNCHW(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" -" if(c > 3 || c < 1) return;" -" int i = get_global_id(0);" -" unsigned channel_size = h*w;" -" unsigned size = channel_size*c;" -" if(i >= size) return; " -" unsigned channel_idx = i % c;" -" unsigned pixel_idx = i % channel_size;" -" float out_val = 0;" -" float multiplier[3] = {multiplier0, multiplier1, multiplier2};" -" float offset[3] = {offset0, offset1, offset2};" -" if(reverse_channels) {" -" out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " -" } else {" -" out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " -" }" -" out [out_offset + channel_idx*channel_size + pixel_idx] = out_val;}"; + "__kernel void copyInt8ToNHWC(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" + " if(c > 3 || c < 1) return;" + " int i = get_global_id(0);" + " unsigned channel_size = h*w;" + " unsigned size = channel_size*c;" + " if(i >= size) return;" + " unsigned channel_idx = i % c;" + " unsigned pixel_idx = i % channel_size;" + " float out_val = 0;" + " float multiplier[3] = {multiplier0, multiplier1, multiplier2};" + " float offset[3] = {offset0, offset1, offset2};" + " if(reverse_channels) {" + " out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " + " } else {" + " out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " + " }" + " out [out_offset + c*pixel_idx + channel_idx] = out_val;}" + "" + "" + "__kernel void copyInt8ToNCHW(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" + " if(c > 3 || c < 1) return;" + " int i = get_global_id(0);" + " unsigned channel_size = h*w;" + " unsigned size = channel_size*c;" + " if(i >= size) return; " + " unsigned channel_idx = i % c;" + " unsigned pixel_idx = i % channel_size;" + " float out_val = 0;" + " float multiplier[3] = {multiplier0, multiplier1, multiplier2};" + " float offset[3] = {offset0, offset1, offset2};" + " if(reverse_channels) {" + " out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " + " } else {" + " out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " + " }" + " out [out_offset + channel_idx*channel_size + pixel_idx] = out_val;}"; class OCLUtility : public DeviceCode { - public: - OCLUtility(): DeviceCode(data_transfer_source, data_transfer_program_name, data_transfer_kernel_names){} + public: + OCLUtility() : DeviceCode(data_transfer_source, data_transfer_program_name, data_transfer_kernel_names) {} // TODO : delete other implicit constructors }; - - diff --git a/rocAL/include/device/device_manager.h b/rocAL/include/device/device_manager.h index 357d87d12..bc64d5f78 100644 --- a/rocAL/include/device/device_manager.h +++ b/rocAL/include/device/device_manager.h @@ -35,25 +35,28 @@ struct DeviceResources { cl_context context; cl_device_id device_id; cl_command_queue cmd_queue; - DeviceResources() { cmd_queue = nullptr; context = nullptr; device_id = nullptr; } + DeviceResources() { + cmd_queue = nullptr; + context = nullptr; + device_id = nullptr; + } }; - class CLProgram { -public: - CLProgram(const DeviceResources* ocl, const DeviceCode& ocl_code): m_ocl(ocl), m_code(ocl_code) {} + public: + CLProgram(const DeviceResources* ocl, const DeviceCode& ocl_code) : m_ocl(ocl), m_code(ocl_code) {} ~CLProgram(); - cl_int runKernel(const std::string& kernel_name, const std::vector& args, const std::vector& argSize, const std::vector& globalWorkSize, const std::vector& localWorkSize); + cl_int runKernel(const std::string& kernel_name, const std::vector& args, const std::vector& argSize, const std::vector& globalWorkSize, const std::vector& localWorkSize); cl_int buildAll(); - const cl_kernel& operator[](const std::string& kernel_name) const ; + const cl_kernel& operator[](const std::string& kernel_name) const; std::string getProgramName(); -private: + private: const DeviceResources* m_ocl; const DeviceCode& m_code; @@ -61,17 +64,15 @@ class CLProgram { cl_program m_prog; std::map m_kernels; - }; - class DeviceManager { -public: + public: DeviceManager(){}; cl_int initialize(); - DeviceResources *resources(); + DeviceResources* resources(); const CLProgram& operator[](const std::string& prog_name); @@ -79,8 +80,7 @@ class DeviceManager { ~DeviceManager(); -private: - + private: DeviceResources _resources; std::map m_programs; diff --git a/rocAL/include/device/device_manager_hip.h b/rocAL/include/device/device_manager_hip.h index 6402598b0..2f9adce8c 100644 --- a/rocAL/include/device/device_manager_hip.h +++ b/rocAL/include/device/device_manager_hip.h @@ -33,11 +33,14 @@ struct DeviceResourcesHip { hipStream_t hip_stream; int device_id; hipDeviceProp_t dev_prop; - DeviceResourcesHip() { hip_stream = nullptr; device_id = -1;} + DeviceResourcesHip() { + hip_stream = nullptr; + device_id = -1; + } }; class DeviceManagerHip { -public: + public: DeviceManagerHip(){}; hipError_t initialize(); @@ -48,10 +51,8 @@ class DeviceManagerHip { ~DeviceManagerHip(); -private: - + private: DeviceResourcesHip _resources; - }; using pRocalHip = std::shared_ptr; diff --git a/rocAL/include/device/ocl_setup.h b/rocAL/include/device/ocl_setup.h index b8ff2516e..db1e7bc56 100644 --- a/rocAL/include/device/ocl_setup.h +++ b/rocAL/include/device/ocl_setup.h @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#pragma once #if ENABLE_OPENCL #include diff --git a/rocAL/include/loaders/audio/audio_loader.h b/rocAL/include/loaders/audio/audio_loader.h new file mode 100644 index 000000000..8fa87ee48 --- /dev/null +++ b/rocAL/include/loaders/audio/audio_loader.h @@ -0,0 +1,86 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +#include "loaders/audio/audio_read_and_decode.h" +#include "loaders/circular_buffer.h" +#include "pipeline/commons.h" +#include "meta_data/meta_data_reader.h" + +#ifdef ROCAL_AUDIO + +// AudioLoader runs an internal thread for loading and decoding of audios asynchronously +// It uses a circular buffer to store decoded audios for the user +class AudioLoader : public LoaderModule { + public: + explicit AudioLoader(void* dev_resources); + ~AudioLoader() override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_audio) override; + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning of the media + Timing timing() override; + void start_loading() override; + LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); + LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); + std::vector get_id() override; + DecodedDataInfo get_decode_data_info() override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void set_gpu_device_id(int device_id); + void shut_down() override; + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, + ExternalSourceFileMode mode, bool eos) override { THROW("external source feed is not supported in audio loader") } + + private: + bool is_out_of_data(); + void de_init(); + void stop_internal_thread(); + LoaderModuleStatus update_output_audio(); + LoaderModuleStatus load_routine(); + std::shared_ptr _audio_loader; + Tensor* _output_tensor; + std::vector _output_names; // audio file name/ids that are stored in the _output_audio + MetaDataBatch* _meta_data = nullptr; // The output of the meta_data_graph + bool _internal_thread_running; + size_t _output_mem_size, _batch_size, _max_decoded_samples, _max_decoded_channels; + std::thread _load_thread; + RocalMemType _mem_type; + DecodedDataInfo _decoded_audio_info; + DecodedDataInfo _output_decoded_audio_info; + CircularBuffer _circ_buff; + TimingDbg _swap_handle_time; + bool _is_initialized; + bool _stopped = false; + bool _loop; // If true the reader will wrap around at the end of the media (files/audios/...) and wouldn't stop + size_t _prefetch_queue_depth = 0; // Used for circular buffer's internal buffer allocation + size_t _audio_counter = 0; // How many audios have been loaded already + size_t _remaining_audio_count; // How many audios are there yet to be loaded + int _device_id; +}; +#endif diff --git a/rocAL/include/loaders/audio/audio_loader_sharded.h b/rocAL/include/loaders/audio/audio_loader_sharded.h new file mode 100644 index 000000000..76eb63cad --- /dev/null +++ b/rocAL/include/loaders/audio/audio_loader_sharded.h @@ -0,0 +1,60 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "loaders/audio/audio_loader.h" + +#ifdef ROCAL_AUDIO + +// AudioLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, +// It improves load and decode performance since each loader loads the audios in parallel using an internal thread +class AudioLoaderSharded : public LoaderModule { + public: + explicit AudioLoaderSharded(void* dev_resources); + ~AudioLoaderSharded() override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_audio) override; + size_t remaining_count() override; + void reset() override; + void start_loading() override; + std::vector get_id() override; + DecodedDataInfo get_decode_data_info() override; + Timing timing() override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void shut_down() override; + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, + ExternalSourceFileMode mode, bool eos) override { THROW("external source feed is not supported in audio loader") } + + private: + void increment_loader_idx(); + void fast_forward_through_empty_loaders(); + void* _dev_resources; + bool _initialized = false; + std::vector> _loaders; + size_t _loader_idx; + size_t _shard_count = 1; + size_t _prefetch_queue_depth = 0; + Tensor* _output_tensor = nullptr; +}; +#endif diff --git a/rocAL/include/loaders/audio/audio_read_and_decode.h b/rocAL/include/loaders/audio/audio_read_and_decode.h new file mode 100644 index 000000000..d121869e6 --- /dev/null +++ b/rocAL/include/loaders/audio/audio_read_and_decode.h @@ -0,0 +1,77 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include +#include + +#include "decoders/audio/audio_decoder.h" +#include "pipeline/commons.h" +#include "loaders/loader_module.h" +#include "readers/image/reader_factory.h" +#include "decoders/audio/generic_audio_decoder.h" +#include "pipeline/timing_debug.h" + +#ifdef ROCAL_AUDIO + +// Contains all the meta info for the audio file +struct AudioMetaInfo { + std::string file_name; // Name of audio file + std::string file_path; // Absolute path to the audio file + size_t samples; + size_t channels; + float sample_rate; +}; + +class AudioReadAndDecode { + public: + AudioReadAndDecode(); + ~AudioReadAndDecode(); + size_t Count(); + void Reset(); + void Create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id = 0); + /*! + \brief Loads a decompressed batch of audios into the buffer indicated by buff + \param audio_buffer User's buffer provided to be filled with decoded audio samples + \param audio_info DecodedDataInfo to be filled with name, samples, channels and sample rate of the decoded audio files + \param max_decoded_samples User's buffer maximum samples per decoded audio. + \param max_decoded_channels user's buffer maximum channels per decoded audio. + \return status of decoding the audio files in the batch + */ + LoaderModuleStatus Load( + float *audio_buffer, + DecodedDataInfo& audio_info, + const size_t max_decoded_samples, + const size_t max_decoded_channels); + // returns timing info or other status information + Timing GetTiming(); + + private: + std::vector> _decoder; + std::shared_ptr _reader; + std::vector _decompressed_buff_ptrs; + std::vector _audio_meta_info; + TimingDbg _file_load_time, _decode_time; + size_t _batch_size, _num_threads; + DecoderConfig _decoder_config; +}; +#endif diff --git a/rocAL/include/loaders/audio/audio_source_evaluator.h b/rocAL/include/loaders/audio/audio_source_evaluator.h new file mode 100644 index 000000000..8be1fd805 --- /dev/null +++ b/rocAL/include/loaders/audio/audio_source_evaluator.h @@ -0,0 +1,50 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include +#include + +#include "loaders/loader_module.h" +#include "decoders/audio/generic_audio_decoder.h" + +#ifdef ROCAL_AUDIO + +enum class AudioSourceEvaluatorStatus { + OK = 0, + UNSUPPORTED_DECODER_TYPE, + UNSUPPORTED_STORAGE_TYPE +}; + +class AudioSourceEvaluator { + public: + AudioSourceEvaluatorStatus Create(ReaderConfig reader_cfg, DecoderConfig decoder_cfg); + void FindMaxDimension(); + size_t GetMaxSamples(); + size_t GetMaxChannels(); + + private: + int _samples_max = 0, _channels_max = 0; + std::shared_ptr _decoder; + std::shared_ptr _reader; +}; +#endif diff --git a/rocAL/include/loaders/audio/node_audio_loader.h b/rocAL/include/loaders/audio/node_audio_loader.h new file mode 100644 index 000000000..b3d04b68c --- /dev/null +++ b/rocAL/include/loaders/audio/node_audio_loader.h @@ -0,0 +1,61 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "loaders/audio/audio_loader_sharded.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" + +#ifdef ROCAL_AUDIO + +class AudioLoaderNode : public Node { + public: + /// \param device_resources shard count from user + /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. + AudioLoaderNode(Tensor *output, void *device_resources); + ~AudioLoaderNode() override; + AudioLoaderNode() = delete; + /// \param internal_shard_count Defines the amount of parallelism user wants for the load and decode process to be handled internally. + /// \param source_path Defines the path that includes the Audio dataset + /// \param file_list_path Defines the path that contains the file list + /// \param storage_type Determines the storage type + /// \param decoder_type Determines the decoder_type + /// \param shuffle Determines if the user wants to shuffle the dataset or not. + /// \param loop Determines if the user wants to indefinitely loops through audio or not. + /// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size. + /// \param mem_type Memory type, host or device + /// \param meta_data_reader Determines the meta-data information + /// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count, + /// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available. + void Init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, + const std::string &file_list_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, + size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader); + std::shared_ptr GetLoaderModule(); + + protected: + void create_node() override{}; + void update_node() override{}; + + private: + std::shared_ptr _loader_module = nullptr; +}; +#endif diff --git a/rocAL/include/loaders/audio/node_audio_loader_single_shard.h b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h new file mode 100644 index 000000000..851a4eb39 --- /dev/null +++ b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "loaders/audio/audio_loader_sharded.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" + +#ifdef ROCAL_AUDIO + +class AudioLoaderSingleShardNode : public Node { + public: + AudioLoaderSingleShardNode(Tensor *output, void *device_resources); + ~AudioLoaderSingleShardNode() override; + /// \param user_shard_count shard count from user + /// \param user_shard_id shard id from user + /// \param source_path Defines the path that includes the Audio dataset + /// \param file_list_path Defines the path that contains the file list + /// \param storage_type Determines the storage type + /// \param decoder_type Determines the decoder_type + /// \param shuffle Determines if the user wants to shuffle the dataset or not. + /// \param loop Determines if the user wants to indefinitely loops through audios or not. + /// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size. + /// \param mem_type Memory type, host or device + /// \param meta_data_reader Determines the meta-data information + /// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count, + /// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available. + void Init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, + const std::string &file_list_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, + bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader); + std::shared_ptr GetLoaderModule(); + + protected: + void create_node() override{}; + void update_node() override{}; + + private: + std::shared_ptr _loader_module = nullptr; +}; +#endif diff --git a/rocAL/include/loaders/circular_buffer.h b/rocAL/include/loaders/circular_buffer.h index 0d97e2159..d39601ea9 100644 --- a/rocAL/include/loaders/circular_buffer.h +++ b/rocAL/include/loaders/circular_buffer.h @@ -21,69 +21,66 @@ THE SOFTWARE. */ #pragma once -#include #include +#include #if ENABLE_OPENCL - #include +#include #endif #include -#include "device_manager.h" -#include "device_manager_hip.h" -#include "commons.h" -struct decoded_image_info -{ - std::vector _image_names; + +#include "pipeline/commons.h" +#include "device/device_manager.h" +#include "device/device_manager_hip.h" +struct DecodedDataInfo { + std::vector _data_names; std::vector _roi_width; std::vector _roi_height; std::vector _original_width; std::vector _original_height; + std::vector _audio_samples; //! Amplitude of an audio signal at a specific point in time + std::vector _audio_channels; //! Number of audio channels in an audio signal + std::vector _audio_sample_rates; //! The number of samples of audio carried per second }; -struct crop_image_info -{ - //Batch of Image Crop Coordinates in "xywh" format +struct CropImageInfo { + // Batch of Image Crop Coordinates in "xywh" format std::vector> _crop_image_coords; }; -class CircularBuffer -{ -public: +class CircularBuffer { + public: CircularBuffer(void* devres); ~CircularBuffer(); void init(RocalMemType output_mem_type, size_t output_mem_size, size_t buff_depth); - void release(); // release resources - void sync();// Syncs device buffers with host - void unblock_reader();// Unblocks the thread currently waiting on a call to get_read_buffer - void unblock_writer();// Unblocks the thread currently waiting on get_write_buffer - void push();// The latest write goes through, effectively adds one element to the buffer - void pop();// The oldest write will be erased and overwritten in upcoming writes - void set_image_info(const decoded_image_info& info) { _last_image_info = info; } - void set_crop_image_info(const crop_image_info& info) { _last_crop_image_info = info; } - decoded_image_info& get_image_info(); - crop_image_info& get_cropped_image_info(); + void release(); // release resources + void sync(); // Syncs device buffers with host + void unblock_reader(); // Unblocks the thread currently waiting on a call to get_read_buffer + void unblock_writer(); // Unblocks the thread currently waiting on get_write_buffer + void push(); // The latest write goes through, effectively adds one element to the buffer + void pop(); // The oldest write will be erased and overwritten in upcoming writes + void set_decoded_data_info(const DecodedDataInfo& info) { _last_data_info = info; } + void set_crop_image_info(const CropImageInfo& info) { _last_crop_image_info = info; } + DecodedDataInfo& get_decoded_data_info(); + CropImageInfo& get_cropped_image_info(); bool random_bbox_crop_flag = false; void* get_read_buffer_dev(); - unsigned char* get_read_buffer_host();// blocks the caller if the buffer is empty - unsigned char* get_write_buffer(); // blocks the caller if the buffer is full - size_t level();// Returns the number of elements stored - void reset();// sets the buffer level to 0 - void block_if_empty();// blocks the caller if the buffer is empty - void block_if_full();// blocks the caller if the buffer is full + unsigned char* get_read_buffer_host(); // blocks the caller if the buffer is empty + unsigned char* get_write_buffer(); // blocks the caller if the buffer is full + size_t level(); // Returns the number of elements stored + void reset(); // sets the buffer level to 0 + void block_if_empty(); // blocks the caller if the buffer is empty + void block_if_full(); // blocks the caller if the buffer is full -private: + private: void increment_read_ptr(); void increment_write_ptr(); bool full(); bool empty(); size_t _buff_depth; - decoded_image_info _last_image_info; - std::queue _circ_image_info;//!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) - crop_image_info _last_crop_image_info; // for Random BBox crop coordinates - std::queue _circ_crop_image_info;//!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) + DecodedDataInfo _last_data_info; + std::queue _circ_buff_data_info; //!< Stores the loaded data names, decoded_width and decoded_height(data is stored in the _circ_buff) + CropImageInfo _last_crop_image_info; // for Random BBox crop coordinates + std::queue _circ_crop_image_info; //!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) std::mutex _names_buff_lock; - /* - * Pinned memory allocated on the host used for fast host to device memory transactions, - * or the regular host memory buffers in the host processing case. - */ #if ENABLE_HIP hipStream_t _hip_stream; int _hip_device_id, _hip_canMapHostMemory; @@ -92,9 +89,8 @@ class CircularBuffer cl_context _cl_context = nullptr; cl_device_id _device_id = nullptr; #endif - std::vector _dev_buffer;// Actual memory allocated on the device (in the case of GPU affinity) + std::vector _dev_buffer; // Actual memory allocated on the device (in the case of GPU affinity) std::vector _host_buffer_ptrs; - std::vector> _actual_host_buffers; std::condition_variable _wait_for_load; std::condition_variable _wait_for_unload; std::mutex _lock; diff --git a/rocAL/include/loaders/image/cifar10_data_loader.h b/rocAL/include/loaders/image/cifar10_data_loader.h index 83fad27aa..426bde1c5 100644 --- a/rocAL/include/loaders/image/cifar10_data_loader.h +++ b/rocAL/include/loaders/image/cifar10_data_loader.h @@ -22,31 +22,35 @@ THE SOFTWARE. #pragma once #include -#include "image_loader.h" -#include "reader_factory.h" -#include "timing_debug.h" -#include "cifar10_data_reader.h" -class CIFAR10DataLoader : public LoaderModule -{ -public: +#include "readers/image/cifar10_data_reader.h" +#include "loaders/image/image_loader.h" +#include "readers/image/reader_factory.h" +#include "pipeline/timing_debug.h" + +class CIFAR10DataLoader : public LoaderModule { + public: explicit CIFAR10DataLoader(void *dev_resources); ~CIFAR10DataLoader() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=true) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = true) override; + void set_output(Tensor *output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; size_t remaining_count() override; void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; - crop_image_info get_crop_image_info() override; + DecodedDataInfo get_decode_data_info() override; + CropImageInfo get_crop_image_info() override; Timing timing() override; - void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; + std::vector> &get_batch_random_bbox_crop_coords(); + void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} -private: + private: void increment_loader_idx(); bool is_out_of_data(); void de_init(); @@ -55,8 +59,6 @@ class CIFAR10DataLoader : public LoaderModule LoaderModuleStatus load_routine(); std::shared_ptr _reader; void *_dev_resources; - decoded_image_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field - decoded_image_info _output_decoded_img_info; bool _initialized = false; RocalMemType _mem_type; size_t _output_mem_size; @@ -69,15 +71,18 @@ class CIFAR10DataLoader : public LoaderModule std::vector _output_names; CircularBuffer _circ_buff; size_t _prefetch_queue_depth; - TimingDBG _file_load_time, _swap_handle_time; + TimingDbg _file_load_time, _swap_handle_time; size_t _loader_idx; size_t _shard_count = 1; void fast_forward_through_empty_loaders(); bool _is_initialized; bool _stopped = false; - bool _loop;// _randombboxcrop_meta_data_reader = nullptr; -}; \ No newline at end of file + std::vector> _bbox_coords, _crop_coords_batch; + CropImageInfo _crop_image_info; + CropImageInfo _output_cropped_image_info; +}; diff --git a/rocAL/include/loaders/image/image_loader.h b/rocAL/include/loaders/image/image_loader.h index 91a1d2b9d..3b112a4b7 100644 --- a/rocAL/include/loaders/image/image_loader.h +++ b/rocAL/include/loaders/image/image_loader.h @@ -25,34 +25,39 @@ THE SOFTWARE. #include #include #include -#include "commons.h" -#include "circular_buffer.h" + +#include "loaders/circular_buffer.h" +#include "pipeline/commons.h" #include "image_read_and_decode.h" -#include "meta_data_reader.h" +#include "meta_data/meta_data_reader.h" // // ImageLoader runs an internal thread for loading an decoding of images asynchronously // it uses a circular buffer to store decoded frames and images for the user class ImageLoader : public LoaderModule { -public: - explicit ImageLoader(void *dev_resources); + public: + explicit ImageLoader(void* dev_resources); ~ImageLoader() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=false) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; - size_t remaining_count() override; // returns number of remaining items to be loaded - void reset() override; // Resets the loader to load from the beginning of the media + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning of the media Timing timing() override; void start_loading() override; LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); void set_gpu_device_id(int device_id); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; - crop_image_info get_crop_image_info() override; - void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + DecodedDataInfo get_decode_data_info() override; + CropImageInfo get_crop_image_info() override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; -private: + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override; + size_t last_batch_padded_size() override; + + private: bool is_out_of_data(); void de_init(); void stop_internal_thread(); @@ -61,28 +66,30 @@ class ImageLoader : public LoaderModule { LoaderModuleStatus load_routine(); std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; - Image* _output_image; - std::vector _output_names;//!< image name/ids that are stores in the _output_image + Tensor* _output_tensor; + std::vector _output_names; //!< image name/ids that are stores in the _output_image size_t _output_mem_size; - MetaDataBatch* _meta_data = nullptr;//!< The output of the meta_data_graph, - std::vector> _bbox_coords; + MetaDataBatch* _meta_data = nullptr; //!< The output of the meta_data_graph, + std::vector> _bbox_coords; bool _internal_thread_running; size_t _batch_size; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; - crop_image_info _crop_image_info; - decoded_image_info _output_decoded_img_info; - crop_image_info _output_cropped_img_info; + CropImageInfo _crop_image_info; + CropImageInfo _output_cropped_img_info; CircularBuffer _circ_buff; - TimingDBG _swap_handle_time; + TimingDbg _swap_handle_time; bool _is_initialized; bool _stopped = false; - bool _loop;// + #include "image_loader.h" // // ImageLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, // It improves load and decode performance since each loader loads the images in parallel using an internal thread // -class ImageLoaderSharded : public LoaderModule -{ -public: +class ImageLoaderSharded : public LoaderModule { + public: explicit ImageLoaderSharded(void *dev_resources); ~ImageLoaderSharded() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=false) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor *output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; size_t remaining_count() override; void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; - crop_image_info get_crop_image_info() override; + DecodedDataInfo get_decode_data_info() override; + CropImageInfo get_crop_image_info() override; Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; -private: + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override; + size_t last_batch_padded_size() override; + + private: void increment_loader_idx(); void *_dev_resources; bool _initialized = false; @@ -55,6 +59,6 @@ class ImageLoaderSharded : public LoaderModule void fast_forward_through_empty_loaders(); size_t _prefetch_queue_depth; - Image *_output_image; + Tensor *_output_tensor; std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/image_read_and_decode.h b/rocAL/include/loaders/image/image_read_and_decode.h index 3286e4a95..77883d361 100644 --- a/rocAL/include/loaders/image/image_read_and_decode.h +++ b/rocAL/include/loaders/image/image_read_and_decode.h @@ -22,39 +22,33 @@ THE SOFTWARE. #pragma once #include -#include + #include -#include "commons.h" -#include "turbo_jpeg_decoder.h" -#include "reader_factory.h" -#include "timing_debug.h" -#include "loader_module.h" -#include "parameter_random_crop_decoder.h" +#include -/** - * Compute the scaled value of dimension using the given scaling - * factor. This macro performs the integer equivalent of ceil(dimension * - * scalingFactor). - */ -#define TJSCALED(dimension, scalingFactor) \ - ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ - scalingFactor.denom) +#include "pipeline/commons.h" +#include "loaders/loader_module.h" +#include "parameters/parameter_random_crop_decoder.h" +#include "readers/image/reader_factory.h" +#include "pipeline/timing_debug.h" +#include "decoders/image/turbo_jpeg_decoder.h" -class ImageReadAndDecode -{ -public: + +class ImageReadAndDecode { + public: ImageReadAndDecode(); ~ImageReadAndDecode(); size_t count(); void reset(); - void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id=0); - void set_bbox_vector(std::vector> bbox_coords) { _bbox_coords = bbox_coords;}; + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id = 0); + void set_bbox_vector(std::vector> bbox_coords) { _bbox_coords = bbox_coords; }; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader); - std::vector> get_batch_random_bbox_crop_coords(); - void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); - + std::vector> &get_batch_random_bbox_crop_coords(); + void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos); //! Loads a decompressed batch of images into the buffer indicated by buff - /// \param buff User's buffer provided to be filled with decoded image samples + /// \param buff User's buffer provided to be filled with decoded image data /// \param names User's buffer provided to be filled with name of the images decoded /// \param max_decoded_width User's buffer maximum width per decoded image. User expects the decoder to downscale the image if image's original width is bigger than max_width /// \param max_decoded_height user's buffer maximum height per decoded image. User expects the decoder to downscale the image if image's original height is bigger than max_height @@ -62,40 +56,41 @@ class ImageReadAndDecode /// \param roi_height is set by the load() function tp the width of the region that decoded image is located.It's less than max_height and is either equal to the original image height if original image height is smaller than max_height or downscaled if necessary to fit the max_height criterion. /// \param output_color_format defines what color format user expects decoder to decode images into if capable of doing so supported is LoaderModuleStatus load( - unsigned char* buff, - std::vector& names, - const size_t max_decoded_width, - const size_t max_decoded_height, - std::vector &roi_width, - std::vector &roi_height, - std::vector &actual_width, - std::vector &actual_height, - RocalColorFormat output_color_format, - bool decoder_keep_original=false); + unsigned char *buff, + std::vector &names, + const size_t max_decoded_width, + const size_t max_decoded_height, + std::vector &roi_width, + std::vector &roi_height, + std::vector &actual_width, + std::vector &actual_height, + RocalColorFormat output_color_format, + bool decoder_keep_original = false); //! returns timing info or other status information Timing timing(); + size_t last_batch_padded_size(); -private: + private: std::vector> _decoder; std::shared_ptr _reader; std::vector> _compressed_buff; std::vector _actual_read_size; std::vector _image_names; std::vector _compressed_image_size; - std::vector _decompressed_buff_ptrs; + std::vector _decompressed_buff_ptrs; std::vector _actual_decoded_width; std::vector _actual_decoded_height; std::vector _original_width; std::vector _original_height; - static const size_t MAX_COMPRESSED_SIZE = 1*1024*1024; // 1 Meg - TimingDBG _file_load_time, _decode_time; + static const size_t MAX_COMPRESSED_SIZE = 1 * 1024 * 1024; // 1 Meg + TimingDbg _file_load_time, _decode_time; size_t _batch_size, _shard_count, _num_threads; DecoderConfig _decoder_config; bool decoder_keep_original; - std::vector> _bbox_coords, _crop_coords_batch; + std::vector> _bbox_coords, _crop_coords_batch; std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; pCropCord _CropCord; RocalRandomCropDecParam *_random_crop_dec_param = nullptr; + bool _is_external_source = false; }; - diff --git a/rocAL/include/loaders/image/loader_module.h b/rocAL/include/loaders/image/loader_module.h deleted file mode 100644 index e0ad2c451..000000000 --- a/rocAL/include/loaders/image/loader_module.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include -#include "image_reader.h" -#include "decoder.h" -#include "commons.h" -#include "image.h" -#include "circular_buffer.h" -#include "meta_data_reader.h" -#include "meta_data_graph.h" - -enum class LoaderModuleStatus -{ - OK = 0, - DEVICE_BUFFER_SWAP_FAILED, - HOST_BUFFER_SWAP_FAILED, - NO_FILES_TO_READ, - DECODE_FAILED, - NO_MORE_DATA_TO_READ, - NOT_INITIALIZED -}; - -/*! \class LoaderModule The interface defining the API and requirements of loader modules*/ -class LoaderModule -{ -public: - virtual void initialize(ReaderConfig reader_config, DecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image* output_image) = 0; - virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output_image - virtual void reset() = 0; // Resets the loader to load from the beginning of the media - virtual size_t remaining_count() = 0; // Returns the number of available images to be loaded - virtual ~LoaderModule()= default; - virtual Timing timing() = 0;// Returns timing info - virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded - virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual crop_image_info get_crop_image_info() = 0; - virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; - // introduce meta data reader - virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) = 0; - virtual void shut_down() = 0; -}; - -using pLoaderModule = std::shared_ptr; \ No newline at end of file diff --git a/rocAL/include/loaders/image/node_cifar10_loader.h b/rocAL/include/loaders/image/node_cifar10_loader.h index ffe701c0c..a6bd31e4d 100644 --- a/rocAL/include/loaders/image/node_cifar10_loader.h +++ b/rocAL/include/loaders/image/node_cifar10_loader.h @@ -21,17 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" #include "cifar10_data_loader.h" -#include "graph.h" +#include "pipeline/graph.h" +#include "pipeline/node.h" -class Cifar10LoaderNode: public Node -{ -public: +class Cifar10LoaderNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - Cifar10LoaderNode(Image *output, void *device_resources); + Cifar10LoaderNode(Tensor *output, void *device_resources); ~Cifar10LoaderNode() override; Cifar10LoaderNode() = delete; /// @@ -40,12 +39,14 @@ class Cifar10LoaderNode: public Node /// \param load_batch_count Defines the quantum count of the images to be loaded. It's usually equal to the user's batch size. /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. - void init( const std::string &source_path, const std::string &json_path, StorageType storage_type, bool loop, size_t load_batch_count, RocalMemType mem_type, const std::string &file_prefix); + void init(const std::string &source_path, const std::string &json_path, StorageType storage_type, bool loop, size_t load_batch_count, RocalMemType mem_type, const std::string &file_prefix); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/node_fused_jpeg_crop.h b/rocAL/include/loaders/image/node_fused_jpeg_crop.h index fa4558882..841b7f091 100644 --- a/rocAL/include/loaders/image/node_fused_jpeg_crop.h +++ b/rocAL/include/loaders/image/node_fused_jpeg_crop.h @@ -21,18 +21,17 @@ THE SOFTWARE. */ #pragma once -#include "node.h" +#include "pipeline/graph.h" #include "image_loader_sharded.h" -#include "graph.h" -#include "parameter_factory.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" -class FusedJpegCropNode: public Node -{ -public: +class FusedJpegCropNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - FusedJpegCropNode(Image *output, void *device_resources_hip); + FusedJpegCropNode(Tensor *output, void *device_resources_hip); ~FusedJpegCropNode() override; FusedJpegCropNode() = delete; /// @@ -43,13 +42,15 @@ class FusedJpegCropNode: public Node /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. void init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, - unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio); + unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio, std::pair last_batch_info = {RocalBatchPolicy::FILL, true}); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts; diff --git a/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h b/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h index cd4f23e75..87e3a1d80 100644 --- a/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h +++ b/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h @@ -21,15 +21,14 @@ THE SOFTWARE. */ #pragma once -#include "node.h" +#include "pipeline/graph.h" #include "image_loader_sharded.h" -#include "graph.h" -#include "parameter_factory.h" +#include "pipeline/node.h" +#include "parameters/parameter_factory.h" -class FusedJpegCropSingleShardNode: public Node -{ -public: - FusedJpegCropSingleShardNode(Image *output, void *device_resources); +class FusedJpegCropSingleShardNode : public Node { + public: + FusedJpegCropSingleShardNode(Tensor *output, void *device_resources); ~FusedJpegCropSingleShardNode() override; /// \param user_shard_count shard count from user @@ -40,13 +39,15 @@ class FusedJpegCropSingleShardNode: public Node /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. void init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, - unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio); + unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio, std::pair last_batch_info = {RocalBatchPolicy::FILL, true}); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts; diff --git a/rocAL/include/loaders/image/node_image_loader.h b/rocAL/include/loaders/image/node_image_loader.h index 84afd1673..43b847650 100644 --- a/rocAL/include/loaders/image/node_image_loader.h +++ b/rocAL/include/loaders/image/node_image_loader.h @@ -21,17 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" +#include "pipeline/graph.h" #include "image_loader_sharded.h" -#include "graph.h" +#include "pipeline/node.h" -class ImageLoaderNode : public Node -{ -public: +class ImageLoaderNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - ImageLoaderNode(Image *output, void *device_resources); + ImageLoaderNode(Tensor *output, void *device_resources); ~ImageLoaderNode() override; ImageLoaderNode() = delete; /// @@ -41,12 +40,14 @@ class ImageLoaderNode : public Node /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. void init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, const std::map feature_key_map, StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, - size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, bool decoder_keep_orig = false, const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); + size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, bool decoder_keep_orig = false, std::pair last_batch_info = {RocalBatchPolicy::FILL, true}, const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0, ExternalSourceFileMode external_file_mode = ExternalSourceFileMode::NONE); std::shared_ptr get_loader_module(); -protected: + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/node_image_loader_single_shard.h b/rocAL/include/loaders/image/node_image_loader_single_shard.h index 61cb941ae..30cffe1ad 100644 --- a/rocAL/include/loaders/image/node_image_loader_single_shard.h +++ b/rocAL/include/loaders/image/node_image_loader_single_shard.h @@ -21,14 +21,13 @@ THE SOFTWARE. */ #pragma once -#include "node.h" +#include "pipeline/graph.h" #include "image_loader_sharded.h" -#include "graph.h" +#include "pipeline/node.h" -class ImageLoaderSingleShardNode : public Node -{ -public: - ImageLoaderSingleShardNode(Image *output, void *device_resources); +class ImageLoaderSingleShardNode : public Node { + public: + ImageLoaderSingleShardNode(Tensor *output, void *device_resources); ~ImageLoaderSingleShardNode() override; /// \param user_shard_count shard count from user @@ -38,13 +37,15 @@ class ImageLoaderSingleShardNode : public Node /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. void init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &json_path, StorageType storage_type, DecoderType decoder_type, - bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, bool decoder_keep_orig = false, - const std::map feature_key_map = std::map(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); + bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, bool decoder_keep_orig = false, std::pair last_batch_info = {RocalBatchPolicy::FILL, true}, + const std::map feature_key_map = std::map(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0, ExternalSourceFileMode external_file_mode = ExternalSourceFileMode::NONE); std::shared_ptr get_loader_module(); -protected: + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image_source_evaluator.h b/rocAL/include/loaders/image_source_evaluator.h index 48a7a42e8..03d172d32 100644 --- a/rocAL/include/loaders/image_source_evaluator.h +++ b/rocAL/include/loaders/image_source_evaluator.h @@ -21,53 +21,50 @@ THE SOFTWARE. */ #pragma once -#include #include -#include "turbo_jpeg_decoder.h" -#include "reader_factory.h" -#include "timing_debug.h" +#include + #include "loader_module.h" -enum class ImageSourceEvaluatorStatus -{ +#include "readers/image/reader_factory.h" +#include "pipeline/timing_debug.h" +#include "decoders/image/turbo_jpeg_decoder.h" +enum class ImageSourceEvaluatorStatus { OK = 0, - UNSUPPORTED_DECODER_TYPE, + UNSUPPORTED_DECODER_TYPE, UNSUPPORTED_STORAGE_TYPE, }; -enum class MaxSizeEvaluationPolicy -{ +enum class MaxSizeEvaluationPolicy { MAXIMUM_FOUND_SIZE, MOST_FREQUENT_SIZE }; -class ImageSourceEvaluator -{ -public: +class ImageSourceEvaluator { + public: ImageSourceEvaluatorStatus create(ReaderConfig reader_cfg, DecoderConfig decoder_cfg); void find_max_dimension(); void set_size_evaluation_policy(MaxSizeEvaluationPolicy arg); size_t max_width(); size_t max_height(); -private: - class FindMaxSize - { - public: + private: + class FindMaxSize { + public: void set_policy(MaxSizeEvaluationPolicy arg) { _policy = arg; } void process_sample(unsigned val); unsigned get_max() { return _max; }; - private: + + private: MaxSizeEvaluationPolicy _policy = MaxSizeEvaluationPolicy::MOST_FREQUENT_SIZE; - std::map _hist; + std::map _hist; unsigned _max = 0; unsigned _max_count = 0; - }; - FindMaxSize _width_max; + }; + FindMaxSize _width_max; FindMaxSize _height_max; DecoderConfig _decoder_cfg_cv; std::shared_ptr _decoder; std::shared_ptr _reader; std::shared_ptr _meta_data_reader; std::vector _header_buff; - static const size_t COMPRESSED_SIZE = 1024 * 1024; // 1 MB + static const size_t COMPRESSED_SIZE = 1024 * 1024; // 1 MB }; - diff --git a/rocAL/include/loaders/loader_module.h b/rocAL/include/loaders/loader_module.h new file mode 100644 index 000000000..de9d5aba0 --- /dev/null +++ b/rocAL/include/loaders/loader_module.h @@ -0,0 +1,73 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include + +#include "readers/image/image_reader.h" +#include "circular_buffer.h" +#include "pipeline/commons.h" +#include "decoders/image/decoder.h" +#include "meta_data/meta_data_graph.h" +#include "meta_data/meta_data_reader.h" +#include "pipeline/tensor.h" + +enum class LoaderModuleStatus { + OK = 0, + DEVICE_BUFFER_SWAP_FAILED, + HOST_BUFFER_SWAP_FAILED, + NO_FILES_TO_READ, + DECODE_FAILED, + NO_MORE_DATA_TO_READ, + NOT_INITIALIZED +}; + +/*! \class LoaderModule The interface defining the API and requirements of loader modules*/ +class LoaderModule { + public: + virtual void initialize(ReaderConfig reader_config, DecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; + virtual void set_output(Tensor* output_tensor) = 0; + virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output + virtual void reset() = 0; // Resets the loader to load from the beginning of the media + virtual size_t remaining_count() = 0; // Returns the number of available images to be loaded + virtual ~LoaderModule() = default; + virtual Timing timing() = 0; // Returns timing info + virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded + virtual void start_loading() = 0; // starts internal loading thread + virtual DecodedDataInfo get_decode_data_info() = 0; + virtual CropImageInfo get_crop_image_info() { return {}; } + virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; + // introduce meta data reader + virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) { THROW("set_random_bbox_data_reader is not compatible with this implementation") } + virtual void shut_down() = 0; + virtual std::vector get_sequence_start_frame_number() { return {}; } + virtual std::vector> get_sequence_frame_timestamps() { return {}; } + // External Source reader + virtual void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, + unsigned int channels, ExternalSourceFileMode mode, bool eos) = 0; + virtual size_t last_batch_padded_size() { return 0; } + protected: + DecodedDataInfo _decoded_data_info, _output_decoded_data_info; // Stores the decoded data info +}; + +using pLoaderModule = std::shared_ptr; \ No newline at end of file diff --git a/rocAL/include/loaders/video/node_video_loader.h b/rocAL/include/loaders/video/node_video_loader.h index 41625cb2d..9b4c832f4 100644 --- a/rocAL/include/loaders/video/node_video_loader.h +++ b/rocAL/include/loaders/video/node_video_loader.h @@ -21,16 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "video_loader_sharded.h" -#include "graph.h" #include +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "video_loader_sharded.h" + #ifdef ROCAL_VIDEO -class VideoLoaderNode : public Node -{ -public: - VideoLoaderNode(Image *output, void * device_resources); +class VideoLoaderNode : public Node { + public: + VideoLoaderNode(Tensor *output, void *device_resources); ~VideoLoaderNode() override; VideoLoaderNode() = delete; /// @@ -39,13 +39,15 @@ class VideoLoaderNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned internal_shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned internal_shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); -protected: + std::shared_ptr get_loader_module(); + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: DecodeMode _decode_mode = DecodeMode::CPU; std::shared_ptr _loader_module = nullptr; }; diff --git a/rocAL/include/loaders/video/node_video_loader_single_shard.h b/rocAL/include/loaders/video/node_video_loader_single_shard.h index a463ed31d..19969908c 100644 --- a/rocAL/include/loaders/video/node_video_loader_single_shard.h +++ b/rocAL/include/loaders/video/node_video_loader_single_shard.h @@ -21,16 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "video_loader_sharded.h" -#include "graph.h" #include +#include "pipeline/graph.h" +#include "pipeline/node.h" +#include "video_loader_sharded.h" + #ifdef ROCAL_VIDEO -class VideoLoaderSingleShardNode : public Node -{ -public: - VideoLoaderSingleShardNode(Image *output, void *device_resources); +class VideoLoaderSingleShardNode : public Node { + public: + VideoLoaderSingleShardNode(Tensor *output, void *device_resources); ~VideoLoaderSingleShardNode() override; /// \param user_shard_count shard count from user @@ -39,15 +39,17 @@ class VideoLoaderSingleShardNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: - DecodeMode _decode_mode = DecodeMode::CPU; + std::shared_ptr get_loader_module(); + + protected: + void create_node() override{}; + void update_node() override{}; + + private: + DecodeMode _decode_mode = DecodeMode::CPU; std::shared_ptr _loader_module = nullptr; }; #endif diff --git a/rocAL/include/loaders/video/video_loader.h b/rocAL/include/loaders/video/video_loader.h index caa1092ee..893e91526 100644 --- a/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/include/loaders/video/video_loader.h @@ -25,8 +25,9 @@ THE SOFTWARE. #include #include #include -#include "commons.h" -#include "circular_buffer.h" + +#include "loaders/circular_buffer.h" +#include "pipeline/commons.h" #include "video_read_and_decode.h" #ifdef ROCAL_VIDEO @@ -34,55 +35,57 @@ THE SOFTWARE. // // VideoLoader runs an internal thread for loading an decoding of sequences asynchronously // it uses a circular buffer to store decoded sequence of frames for the user -class VideoLoader : public VideoLoaderModule -{ -public: - explicit VideoLoader(void * dev_resources); +class VideoLoader : public LoaderModule { + public: + explicit VideoLoader(void* dev_resources); ~VideoLoader() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; - void set_output_image(Image *output_image) override; - size_t remaining_count() override; // returns number of remaining items to be loaded - void reset() override; // Resets the loader to load from the beginning + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_image) override; + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning Timing timing() override; void start_loading() override; - VideoLoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); - VideoLoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); + LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); + LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + DecodedDataInfo get_decode_data_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + CropImageInfo get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; void shut_down() override; + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} -private: + private: bool is_out_of_data(); void de_init(); void stop_internal_thread(); std::shared_ptr _video_loader; - VideoLoaderModuleStatus update_output_image(); - VideoLoaderModuleStatus load_routine(); - Image *_output_image; - std::vector _output_names; //!< frame name/ids that are stored in the _output_image + LoaderModuleStatus update_output_image(); + LoaderModuleStatus load_routine(); + Tensor* _output_tensor; + std::vector _output_names; //!< frame name/ids that are stored in the _output_image size_t _output_mem_size; bool _internal_thread_running; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; - decoded_image_info _output_decoded_img_info; CircularBuffer _circ_buff; - TimingDBG _swap_handle_time; + TimingDbg _swap_handle_time; bool _is_initialized; bool _stopped = false; - bool _loop; //> _sequence_start_framenum_vec; std::vector>> _sequence_frame_timestamps_vec; + CropImageInfo _crop_img_info; + size_t _max_tensor_width, _max_tensor_height; }; #endif diff --git a/rocAL/include/loaders/video/video_loader_module.h b/rocAL/include/loaders/video/video_loader_module.h deleted file mode 100644 index 73e6cdf30..000000000 --- a/rocAL/include/loaders/video/video_loader_module.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include -#include "video_reader.h" -#include "video_decoder.h" -#include "commons.h" -#include "image.h" -#include "circular_buffer.h" -#include "meta_data_reader.h" -#include "meta_data_graph.h" - -#ifdef ROCAL_VIDEO -enum class VideoLoaderModuleStatus -{ - OK = 0, - DEVICE_BUFFER_SWAP_FAILED, - HOST_BUFFER_SWAP_FAILED, - NO_FILES_TO_READ, - DECODE_FAILED, - NO_MORE_DATA_TO_READ, - NOT_INITIALIZED -}; - -/*! \class VideoLoaderModule The interface defining the API and requirements of loader modules*/ -class VideoLoaderModule -{ -public: - virtual void initialize(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image *output_image) = 0; - virtual VideoLoaderModuleStatus load_next() = 0; // Loads the next sequence of frames into the buffer set by calling into the set_output_image - virtual void reset() = 0; // Resets the loader to load from the beginning of the video files - virtual size_t remaining_count() = 0; // Returns the number of available frames to be loaded - virtual ~VideoLoaderModule() = default; - virtual Timing timing() = 0; // Returns timing info - virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded - virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; - virtual std::vector get_sequence_start_frame_number() = 0; - virtual std::vector> get_sequence_frame_timestamps() = 0; - virtual void shut_down() = 0; -}; - -using pVideoLoaderModule = std::shared_ptr; -#endif diff --git a/rocAL/include/loaders/video/video_loader_sharded.h b/rocAL/include/loaders/video/video_loader_sharded.h index 44b79ebcb..37a570b75 100644 --- a/rocAL/include/loaders/video/video_loader_sharded.h +++ b/rocAL/include/loaders/video/video_loader_sharded.h @@ -22,39 +22,45 @@ THE SOFTWARE. #pragma once #include + #include "video_loader.h" // // VideoLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, // It improves load and decode performance since each loader loads the sequences in parallel using an internal thread // #ifdef ROCAL_VIDEO -class VideoLoaderSharded : public VideoLoaderModule -{ -public: - explicit VideoLoaderSharded(void *dev_resources); +class VideoLoaderSharded : public LoaderModule { + public: + explicit VideoLoaderSharded(void* dev_resources); ~VideoLoaderSharded() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; void shut_down() override; - void set_output_image(Image *output_image) override; + void set_output(Tensor* output_image) override; size_t remaining_count() override; void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + DecodedDataInfo get_decode_data_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + CropImageInfo get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; Timing timing() override; -private: + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} + + private: void increment_loader_idx(); - void *_dev_resources; + void* _dev_resources; bool _initialized = false; std::vector> _loaders; size_t _loader_idx; size_t _shard_count = 1; void fast_forward_through_empty_loaders(); - size_t _prefetch_queue_depth; // Used for circular buffer's internal buffer - Image *_output_image; + size_t _prefetch_queue_depth; // Used for circular buffer's internal buffer + Tensor* _output_tensor; + CropImageInfo _crop_img_info; }; #endif diff --git a/rocAL/include/loaders/video/video_read_and_decode.h b/rocAL/include/loaders/video/video_read_and_decode.h index 0e5bb6d55..4695289d8 100644 --- a/rocAL/include/loaders/video/video_read_and_decode.h +++ b/rocAL/include/loaders/video/video_read_and_decode.h @@ -28,30 +28,28 @@ THE SOFTWARE. #include #include #include -#include "commons.h" -#include "ffmpeg_video_decoder.h" -#include "video_reader_factory.h" -#include "timing_debug.h" -#include "filesystem.h" +#include "pipeline/commons.h" +#include "decoders/video/ffmpeg_video_decoder.h" +#include "readers/video/video_reader_factory.h" +#include "pipeline/timing_debug.h" +#include "loaders/loader_module.h" +#include "readers/video/video_properties.h" +#include "readers/video/video_reader.h" +#include "pipeline/filesystem.h" -#include "video_loader_module.h" -#include "video_properties.h" #ifdef ROCAL_VIDEO -extern "C" -{ +extern "C" { #include } -class VideoReadAndDecode -{ -public: +class VideoReadAndDecode { + public: VideoReadAndDecode(); ~VideoReadAndDecode(); size_t count(); void reset(); - void create(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, int batch_size); - void set_video_process_count(size_t video_count) - { + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size); + void set_video_process_count(size_t video_count) { _video_process_count = (video_count <= _max_video_count) ? video_count : _max_video_count; } float convert_framenum_to_timestamp(size_t frame_number); @@ -67,7 +65,7 @@ class VideoReadAndDecode /// \param sequence_start_framenum_vec is set by the load() function. The starting frame number of the sequences will be updated. /// \param sequence_frame_timestamps_vec is set by the load() function. The timestamps of each of the frames in the sequences will be updated. /// \param output_color_format defines what color format user expects decoder to decode frames into if capable of doing so supported is - VideoLoaderModuleStatus load( + LoaderModuleStatus load( unsigned char *buff, std::vector &names, const size_t max_decoded_width, @@ -82,9 +80,9 @@ class VideoReadAndDecode //! returns timing info or other status information Timing timing(); -private: - struct video_map - { + + private: + struct video_map { int _video_map_idx; bool _is_decoder_instance; }; @@ -101,9 +99,8 @@ class VideoReadAndDecode std::vector _sequence_start_frame_num; std::vector _sequence_video_path; std::vector _sequence_video_idx; - TimingDBG _file_load_time, _decode_time; + TimingDbg _file_load_time, _decode_time; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; size_t _stride; size_t _video_count; @@ -112,6 +109,6 @@ class VideoReadAndDecode size_t _max_decoded_height; size_t _max_decoded_stride; AVPixelFormat _out_pix_fmt; - VideoDecoderConfig _video_decoder_config; + DecoderConfig _video_decoder_config; }; -#endif +#endif \ No newline at end of file diff --git a/rocAL/include/meta_data/augmentations_meta_nodes.h b/rocAL/include/meta_data/augmentations_meta_nodes.h index 7e6f5a240..6f268a803 100644 --- a/rocAL/include/meta_data/augmentations_meta_nodes.h +++ b/rocAL/include/meta_data/augmentations_meta_nodes.h @@ -22,11 +22,12 @@ THE SOFTWARE. #pragma once -#include "meta_node_crop_mirror_normalize.h" -#include "meta_node_resize.h" -#include "meta_node_crop_resize.h" -#include "meta_node_crop.h" -#include "meta_node_resize_crop_mirror.h" -#include "meta_node_rotate.h" -#include "meta_node_ssd_random_crop.h" -#include "meta_node_flip.h" +#include "meta_data/meta_node_crop.h" +#include "meta_data/meta_node_crop_mirror_normalize.h" +#include "meta_data/meta_node_crop_resize.h" +#include "meta_data/meta_node_flip.h" +#include "meta_data/meta_node_resize.h" +#include "meta_data/meta_node_resize_crop_mirror.h" +#include "meta_data/meta_node_resize_mirror_normalize.h" +#include "meta_data/meta_node_rotate.h" +#include "meta_data/meta_node_ssd_random_crop.h" diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index c2a46eb4d..d4bbf5e31 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -22,13 +22,17 @@ THE SOFTWARE. #pragma once #include -#include "meta_data_graph.h" -#include "meta_node.h" -class BoundingBoxGraph : public MetaDataGraph -{ -public: - void process(MetaDataBatch* meta_data) override; - void update_random_bbox_meta_data(MetaDataBatch* meta_data, decoded_image_info decoded_image_info,crop_image_info crop_image_info) override; - void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data ,float criteria, bool offset , float scale, std::vector& means, std::vector& stds) override; -}; +#include "meta_data/meta_data_graph.h" +#include "meta_data/meta_node.h" + +typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh; + +class BoundingBoxGraph : public MetaDataGraph { + public: + void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) override; + void update_meta_data(pMetaDataBatch meta_data, DecodedDataInfo decode_image_info) override; + void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, DecodedDataInfo decoded_image_info, CropImageInfo crop_image_info) override; + void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; + void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) override; +}; diff --git a/rocAL/include/meta_data/caffe2_meta_data_reader.h b/rocAL/include/meta_data/caffe2_meta_data_reader.h index f1d521dd6..9ca4b33cd 100644 --- a/rocAL/include/meta_data/caffe2_meta_data_reader.h +++ b/rocAL/include/meta_data/caffe2_meta_data_reader.h @@ -21,42 +21,42 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "image_reader.h" - -class Caffe2MetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; + +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "readers/image/image_reader.h" + +class Caffe2MetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - std::map> &get_map_content() override { return (_map_content);} + std::map>& get_map_content() override { return (_map_content); } Caffe2MetaDataReader(); - ~Caffe2MetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; std::vector _image_name; }; diff --git a/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h b/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h index 589b1eba8..67f8e473a 100644 --- a/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h +++ b/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h @@ -21,42 +21,42 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "image_reader.h" - -class Caffe2MetaDataReaderDetection: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; + +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "readers/image/image_reader.h" + +class Caffe2MetaDataReaderDetection : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); - std::map> &get_map_content() override{ return _map_content;} + std::map>& get_map_content() override { return _map_content; } bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } Caffe2MetaDataReaderDetection(); - ~Caffe2MetaDataReaderDetection() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); + bool exists(const std::string& image_name) override; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - BoundingBoxBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; std::vector _image_name; }; diff --git a/rocAL/include/meta_data/caffe_meta_data_reader.h b/rocAL/include/meta_data/caffe_meta_data_reader.h index bbc2355d6..bc825fe35 100644 --- a/rocAL/include/meta_data/caffe_meta_data_reader.h +++ b/rocAL/include/meta_data/caffe_meta_data_reader.h @@ -21,40 +21,40 @@ THE SOFTWARE. */ #pragma once -#include #include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "image_reader.h" + +#include + #include "caffe_protos.pb.h" +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "readers/image/image_reader.h" -class CaffeMetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class CaffeMetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; bool set_timestamp_mode() override { return false; } void print_map_contents(); - std::map> &get_map_content() override { return _map_content;} - MetaDataBatch * get_output() override { return _output; } + std::map>& get_map_content() override { return _map_content; } CaffeMetaDataReader(); - ~CaffeMetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); void read_lmdb_record(std::string _path, uint file_size); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _subfolder_file_names; MDB_env* _mdb_env; @@ -62,4 +62,4 @@ public : MDB_val _mdb_key, _mdb_value; MDB_txn* _mdb_txn; MDB_cursor* _mdb_cursor; - }; +}; diff --git a/rocAL/include/meta_data/caffe_meta_data_reader_detection.h b/rocAL/include/meta_data/caffe_meta_data_reader_detection.h index 457ddea54..888857af9 100644 --- a/rocAL/include/meta_data/caffe_meta_data_reader_detection.h +++ b/rocAL/include/meta_data/caffe_meta_data_reader_detection.h @@ -21,44 +21,44 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "image_reader.h" +#include "pipeline/commons.h" +#include "lmdb.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "readers/image/image_reader.h" #include "caffe_protos.pb.h" -class CaffeMetaDataReaderDetection: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class CaffeMetaDataReaderDetection : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; bool set_timestamp_mode() override { return false; } void print_map_contents(); - std::map> &get_map_content() override{ return _map_content;} - MetaDataBatch * get_output() override { return _output; } + std::map>& get_map_content() override { return _map_content; } CaffeMetaDataReaderDetection(); - ~CaffeMetaDataReaderDetection() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); + bool exists(const std::string& image_name) override; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - BoundingBoxBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; MDB_env* _mdb_env; MDB_dbi _mdb_dbi; diff --git a/rocAL/include/meta_data/cifar10_meta_data_reader.h b/rocAL/include/meta_data/cifar10_meta_data_reader.h index 8801b9235..5b8dc18b6 100644 --- a/rocAL/include/meta_data/cifar10_meta_data_reader.h +++ b/rocAL/include/meta_data/cifar10_meta_data_reader.h @@ -21,38 +21,38 @@ THE SOFTWARE. */ #pragma once -#include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" - -class Cifar10MetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; + +#include + +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" + +class Cifar10MetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - std::map> &get_map_content() override { return _map_content;} + std::map>& get_map_content() override { return _map_content; } Cifar10MetaDataReader(); - ~Cifar10MetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; std::string _file_prefix; - size_t _raw_file_size; - LabelBatch* _output; + size_t _raw_file_size; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _file_offsets; std::vector _file_idx; diff --git a/rocAL/include/meta_data/coco_meta_data_reader.h b/rocAL/include/meta_data/coco_meta_data_reader.h index 2aacbe6bd..7936b37ed 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader.h +++ b/rocAL/include/meta_data/coco_meta_data_reader.h @@ -22,37 +22,41 @@ THE SOFTWARE. #pragma once #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "timing_debug.h" -class COCOMetaDataReader: public MetaDataReader -{ -public: - void init(const MetaDataConfig& cfg) override; +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "pipeline/timing_debug.h" + +class COCOMetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; + ImgSize lookup_image_size(const std::string& image_name) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - const std::map> & get_map_content() override { return _map_content;} + const std::map>& get_map_content() override { return _map_content; } + void set_aspect_ratio_grouping(bool aspect_ratio_grouping) override { _aspect_ratio_grouping = aspect_ratio_grouping; } + bool get_aspect_ratio_grouping() const override { return _aspect_ratio_grouping; } COCOMetaDataReader(); - ~COCOMetaDataReader() override { delete _output; } -private: - BoundingBoxBatch* _output; + + private: + pMetaDataBatch _output; std::string _path; int meta_data_reader_type; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); - bool exists(const std::string &image_name) override; + bool _avoid_class_remapping; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, int image_id = 0); + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count, int image_id = 0); // To add Mask coordinates to Metadata struct + bool exists(const std::string& image_name) override; std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; - std::map ::iterator itr; + std::map _map_image_names_to_id; // Maps image names to their image IDs + std::map::iterator itr; std::map _label_info; - std::map ::iterator _it_label; - TimingDBG _coco_metadata_read_time; + std::map::iterator _it_label; + TimingDbg _coco_metadata_read_time; }; - diff --git a/rocAL/include/meta_data/coco_meta_data_reader_key_points.h b/rocAL/include/meta_data/coco_meta_data_reader_key_points.h index 096780b90..de31fc1db 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader_key_points.h +++ b/rocAL/include/meta_data/coco_meta_data_reader_key_points.h @@ -22,39 +22,38 @@ THE SOFTWARE. #pragma once #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" -#include "timing_debug.h" - -class COCOMetaDataReaderKeyPoints: public MetaDataReader -{ -public: - void init(const MetaDataConfig& cfg) override; + +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" +#include "pipeline/timing_debug.h" + +class COCOMetaDataReaderKeyPoints : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - const std::map> & get_map_content() override { return _map_content; } + + const std::map>& get_map_content() override { return _map_content; } COCOMetaDataReaderKeyPoints(); - ~COCOMetaDataReaderKeyPoints() override { delete _output; } -private: - KeyPointBatch* _output; + + private: + pMetaDataBatch _output; std::string _path; unsigned _out_img_width; unsigned _out_img_height; int meta_data_reader_type; - void add(std::string image_name, ImgSize image_size, JointsData *joints_data); - bool exists(const std::string &image_name) override; + void add(std::string image_name, ImgSize image_size, JointsData* joints_data); + bool exists(const std::string& image_name) override; std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; - std::map> ::iterator itr; + std::map>::iterator itr; std::map _label_info; - std::map ::iterator _it_label; - TimingDBG _coco_metadata_read_time; + std::map::iterator _it_label; + TimingDbg _coco_metadata_read_time; }; - diff --git a/rocAL/include/meta_data/label_reader_folders.h b/rocAL/include/meta_data/label_reader_folders.h index 5c999edb8..4911d85bb 100644 --- a/rocAL/include/meta_data/label_reader_folders.h +++ b/rocAL/include/meta_data/label_reader_folders.h @@ -21,36 +21,37 @@ THE SOFTWARE. */ #pragma once -#include #include -#include "commons.h" -#include "meta_data.h" -#include "meta_data_reader.h" - -class LabelReaderFolders: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; + +#include + +#include "pipeline/commons.h" +#include "meta_data/meta_data.h" +#include "meta_data/meta_data_reader.h" + +class LabelReaderFolders : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - const std::map> & get_map_content() override { return _map_content;} - MetaDataBatch * get_output() override { return _output; } + const std::map>& get_map_content() override { return _map_content; } + LabelReaderFolders(); - ~LabelReaderFolders() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _subfolder_file_names; }; \ No newline at end of file diff --git a/rocAL/include/meta_data/lookahead_parser.h b/rocAL/include/meta_data/lookahead_parser.h index d62c05bfa..89d4ad2cd 100644 --- a/rocAL/include/meta_data/lookahead_parser.h +++ b/rocAL/include/meta_data/lookahead_parser.h @@ -20,13 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include #include -#include -#include +#include + #include -#include #include +#include +#include +#include RAPIDJSON_DIAG_PUSH #ifdef __GNUC__ @@ -65,29 +66,76 @@ RAPIDJSON_DIAG_OFF(effc++) using namespace rapidjson; - class LookaheadParserHandler { -public: - bool Null() { st_ = kHasNull; v_.SetNull(); return true; } - bool Bool(bool b) { st_ = kHasBool; v_.SetBool(b); return true; } - bool Int(int i) { st_ = kHasNumber; v_.SetInt(i); return true; } - bool Uint(unsigned u) { st_ = kHasNumber; v_.SetUint(u); return true; } - bool Int64(int64_t i) { st_ = kHasNumber; v_.SetInt64(i); return true; } - bool Uint64(uint64_t u) { st_ = kHasNumber; v_.SetUint64(u); return true; } - bool Double(double d) { st_ = kHasNumber; v_.SetDouble(d); return true; } + public: + bool Null() { + st_ = kHasNull; + v_.SetNull(); + return true; + } + bool Bool(bool b) { + st_ = kHasBool; + v_.SetBool(b); + return true; + } + bool Int(int i) { + st_ = kHasNumber; + v_.SetInt(i); + return true; + } + bool Uint(unsigned u) { + st_ = kHasNumber; + v_.SetUint(u); + return true; + } + bool Int64(int64_t i) { + st_ = kHasNumber; + v_.SetInt64(i); + return true; + } + bool Uint64(uint64_t u) { + st_ = kHasNumber; + v_.SetUint64(u); + return true; + } + bool Double(double d) { + st_ = kHasNumber; + v_.SetDouble(d); + return true; + } bool RawNumber(const char*, SizeType, bool) { return false; } - bool String(const char* str, SizeType length, bool) { st_ = kHasString; v_.SetString(str, length); return true; } - bool StartObject() { st_ = kEnteringObject; return true; } - bool Key(const char* str, SizeType length, bool) { st_ = kHasKey; v_.SetString(str, length); return true; } - bool EndObject(SizeType) { st_ = kExitingObject; return true; } - bool StartArray() { st_ = kEnteringArray; return true; } - bool EndArray(SizeType) { st_ = kExitingArray; return true; } - -protected: + bool String(const char* str, SizeType length, bool) { + st_ = kHasString; + v_.SetString(str, length); + return true; + } + bool StartObject() { + st_ = kEnteringObject; + return true; + } + bool Key(const char* str, SizeType length, bool) { + st_ = kHasKey; + v_.SetString(str, length); + return true; + } + bool EndObject(SizeType) { + st_ = kExitingObject; + return true; + } + bool StartArray() { + st_ = kEnteringArray; + return true; + } + bool EndArray(SizeType) { + st_ = kExitingArray; + return true; + } + + protected: LookaheadParserHandler(char* str); void ParseNext(); -protected: + protected: enum LookaheadParsingState { kInit, kError, @@ -125,7 +173,7 @@ inline void LookaheadParserHandler::ParseNext() { } class LookaheadParser : protected LookaheadParserHandler { -public: + public: LookaheadParser(char* str) : LookaheadParserHandler(str) {} bool EnterObject(); @@ -142,17 +190,17 @@ class LookaheadParser : protected LookaheadParserHandler { void SkipArray(); void SkipValue(); Value* PeekValue(); - int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array) + int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array) bool IsValid() { return st_ != kError; } -protected: + protected: void SkipOut(int depth); }; inline bool LookaheadParser::EnterObject() { if (st_ != kEnteringObject) { - st_ = kError; + st_ = kError; return false; } @@ -162,7 +210,7 @@ inline bool LookaheadParser::EnterObject() { inline bool LookaheadParser::EnterArray() { if (st_ != kEnteringArray) { - st_ = kError; + st_ = kError; return false; } @@ -213,7 +261,7 @@ inline int LookaheadParser::GetInt() { inline double LookaheadParser::GetDouble() { if (st_ != kHasNumber) { - st_ = kError; + st_ = kError; return 0.; } @@ -224,7 +272,7 @@ inline double LookaheadParser::GetDouble() { inline bool LookaheadParser::GetBool() { if (st_ != kHasBool) { - st_ = kError; + st_ = kError; return false; } @@ -235,7 +283,7 @@ inline bool LookaheadParser::GetBool() { inline void LookaheadParser::GetNull() { if (st_ != kHasNull) { - st_ = kError; + st_ = kError; return; } @@ -244,7 +292,7 @@ inline void LookaheadParser::GetNull() { inline const char* LookaheadParser::GetString() { if (st_ != kHasString) { - st_ = kError; + st_ = kError; return 0; } @@ -257,17 +305,14 @@ inline void LookaheadParser::SkipOut(int depth) { do { if (st_ == kEnteringArray || st_ == kEnteringObject) { ++depth; - } - else if (st_ == kExitingArray || st_ == kExitingObject) { + } else if (st_ == kExitingArray || st_ == kExitingObject) { --depth; - } - else if (st_ == kError) { + } else if (st_ == kError) { return; } ParseNext(); - } - while (depth > 0); + } while (depth > 0); } inline void LookaheadParser::SkipValue() { diff --git a/rocAL/include/meta_data/meta_data.h b/rocAL/include/meta_data/meta_data.h index 59907aaee..4d41529cd 100644 --- a/rocAL/include/meta_data/meta_data.h +++ b/rocAL/include/meta_data/meta_data.h @@ -21,40 +21,58 @@ THE SOFTWARE. */ #pragma once +#include +#include +#include #include #include #include -#include -#include "commons.h" +#include "pipeline/commons.h" -//Defined constants since needed in reader and meta nodes for Pose Estimation +// Defined constants since needed in reader and meta nodes for Pose Estimation #define NUMBER_OF_JOINTS 17 #define NUMBER_OF_JOINTS_HALFBODY 8 -#define PIXEL_STD 200 +#define PIXEL_STD 200 #define SCALE_CONSTANT_CS 1.25 #define SCALE_CONSTANT_HALF_BODY 1.5 -typedef struct BoundingBoxCord_ -{ - float l; float t; float r; float b; - BoundingBoxCord_() {} - BoundingBoxCord_(float l_, float t_, float r_, float b_): l(l_), t(t_), r(r_), b(b_) {} // constructor - BoundingBoxCord_(const BoundingBoxCord_& cord) : l(cord.l), t(cord.t), r(cord.r), b(cord.b) {} //copy constructor +typedef struct BoundingBoxCord_ { + float l; + float t; + float r; + float b; + BoundingBoxCord_() {} + BoundingBoxCord_(float l_, float t_, float r_, float b_) : l(l_), t(t_), r(r_), b(b_) {} // constructor + BoundingBoxCord_(const BoundingBoxCord_& cord) : l(cord.l), t(cord.t), r(cord.r), b(cord.b) {} // copy constructor } BoundingBoxCord; -typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh; -typedef std::vector BoundingBoxCords; -typedef std::vector BoundingBoxCords_xcycwh; -typedef std::vector BoundingBoxLabels; -typedef struct { int w; int h; } ImgSize; -typedef std::vector ImgSizes; +typedef std::vector BoundingBoxCords; +typedef std::vector Labels; +typedef struct { + int w; + int h; +} ImgSize; +typedef std::vector ImgSizes; -typedef std::vector ImageIDBatch,AnnotationIDBatch; +typedef std::vector MaskCords; +typedef std::vector ImageIDBatch, AnnotationIDBatch; typedef std::vector ImagePathBatch; -typedef std::vector Joint,JointVisibility,ScoreBatch,RotationBatch; -typedef std::vector> Joints,JointsVisibility, CenterBatch, ScaleBatch; +typedef std::vector Joint, JointVisibility, ScoreBatch, RotationBatch; +typedef std::vector> Joints, JointsVisibility, CenterBatch, ScaleBatch; typedef std::vector>> JointsBatch, JointsVisibilityBatch; +enum class MetaDataType { + Label, + BoundingBox, + PolygonMask, + KeyPoints +}; + +enum class BoundingBoxType { + XYWH = 0, + LTRB +}; + typedef struct { int image_id; @@ -66,7 +84,7 @@ typedef struct JointsVisibility joints_visibility; float score; float rotation; -}JointsData; +} JointsData; typedef struct { @@ -79,187 +97,407 @@ typedef struct JointsVisibilityBatch joints_visibility_batch; ScoreBatch score_batch; RotationBatch rotation_batch; -}JointsDataBatch; +} JointsDataBatch; -struct MetaData -{ - int& get_label() { return _label_id; } - BoundingBoxCords& get_bb_cords() { return _bb_cords; } - BoundingBoxCords_xcycwh& get_bb_cords_xcycwh() { return _bb_cords_xcycwh; } - BoundingBoxLabels& get_bb_labels() { return _bb_label_ids; } - void set_bb_labels(BoundingBoxLabels bb_label_ids) {_bb_label_ids = std::move(bb_label_ids); } - ImgSize& get_img_size() { return _img_size; } - const JointsData& get_joints_data(){ return _joints_data; } -protected: - BoundingBoxCords _bb_cords = {}; // For bb use - BoundingBoxCords_xcycwh _bb_cords_xcycwh = {}; // For bb use - BoundingBoxLabels _bb_label_ids = {};// For bb use - ImgSize _img_size = {}; - JointsData _joints_data = {}; - int _label_id = -1; // For label use only +typedef class MetaDataInfo { + public: + int img_id = -1; + std::string img_name = ""; + ImgSize img_size = {}; + ImgSize img_roi_size = {}; +} MetaDataInfo; + +class MetaData { + public: + virtual std::vector& get_labels() = 0; + virtual void set_labels(Labels label_ids) = 0; + virtual BoundingBoxCords& get_bb_cords() = 0; + virtual void set_bb_cords(BoundingBoxCords bb_cords) = 0; + virtual std::vector& get_polygon_count() = 0; + virtual std::vector>& get_vertices_count() = 0; + virtual MaskCords& get_mask_cords() = 0; + virtual void set_mask_cords(MaskCords mask_cords) = 0; + virtual void set_polygon_counts(std::vector polygon_count) = 0; + virtual void set_vertices_counts(std::vector> vertices_count) = 0; + virtual JointsData& get_joints_data() = 0; + virtual void set_joints_data(JointsData* joints_data) = 0; + ImgSize& get_img_size() { return _info.img_size; } + ImgSize& get_img_roi_size() { return _info.img_roi_size; } + std::string& get_image_name() { return _info.img_name; } + int& get_image_id() { return _info.img_id; } + void set_img_size(ImgSize img_size) { _info.img_size = std::move(img_size); } + void set_img_roi_size(ImgSize img_roi_size) { _info.img_roi_size = std::move(img_roi_size); } + void set_img_id(int img_id) { _info.img_id = img_id; } + void set_img_name(std::string img_name) { _info.img_name = img_name; } + void set_metadata_info(MetaDataInfo info) { _info = std::move(info); } + + protected: + MetaDataInfo _info; }; -struct Label : public MetaData -{ - Label(int label) { _label_id = label; } - Label(){ _label_id = -1; } +class Label : public MetaData { + public: + Label(int label) { _label_ids = {label}; } + Label() { _label_ids = {-1}; } + std::vector& get_labels() override { return _label_ids; } + void set_labels(Labels label_ids) override { _label_ids = std::move(label_ids); } + BoundingBoxCords& get_bb_cords() override { THROW("Not Implemented") } + void set_bb_cords(BoundingBoxCords bb_cords) override{THROW("Not Implemented")} std::vector& get_polygon_count() override{THROW("Not Implemented")} std::vector>& get_vertices_count() override{THROW("Not Implemented")} MaskCords& get_mask_cords() override { THROW("Not Implemented") } + void set_mask_cords(MaskCords mask_cords) override { THROW("Not Implemented") } + void set_polygon_counts(std::vector polygon_count) override { THROW("Not Implemented") } + void set_vertices_counts(std::vector> vertices_count) override{THROW("Not Implemented")} JointsData& get_joints_data() override { THROW("Not Implemented") } + void set_joints_data(JointsData* joints_data) override { THROW("Not Implemented") } + + protected: + Labels _label_ids = {}; // For label use only }; -struct BoundingBox : public MetaData -{ - BoundingBox()= default; - BoundingBox(BoundingBoxCords bb_cords, BoundingBoxLabels bb_label_ids) - { - _bb_cords =std::move(bb_cords); - _bb_label_ids = std::move(bb_label_ids); - } - BoundingBox(BoundingBoxCords bb_cords, BoundingBoxLabels bb_label_ids, ImgSize img_size) - { - _bb_cords =std::move(bb_cords); - _bb_label_ids = std::move(bb_label_ids); - _img_size = std::move(img_size); - } - void set_bb_cords(BoundingBoxCords bb_cords) { _bb_cords =std::move(bb_cords); } - BoundingBox(BoundingBoxCords_xcycwh bb_cords_xcycwh, BoundingBoxLabels bb_label_ids) - { - _bb_cords_xcycwh =std::move(bb_cords_xcycwh); - _bb_label_ids = std::move(bb_label_ids); - } - BoundingBox(BoundingBoxCords_xcycwh bb_cords_xcycwh, BoundingBoxLabels bb_label_ids, ImgSize img_size) - { - _bb_cords_xcycwh =std::move(bb_cords_xcycwh); - _bb_label_ids = std::move(bb_label_ids); - _img_size = std::move(img_size); - } - void set_bb_cords_xcycwh(BoundingBoxCords_xcycwh bb_cords_xcycwh) { _bb_cords_xcycwh =std::move(bb_cords_xcycwh); } - void set_bb_labels(BoundingBoxLabels bb_label_ids) { _bb_label_ids = std::move(bb_label_ids); } - void set_img_size(ImgSize img_size) { _img_size = std::move(img_size); } +class BoundingBox : public Label { + public: + BoundingBox() = default; + BoundingBox(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size = ImgSize{0, 0}, int img_id = 0) { + _bb_cords = std::move(bb_cords); + _label_ids = std::move(bb_label_ids); + _info.img_size = std::move(img_size); + _info.img_id = img_id; + } + BoundingBoxCords& get_bb_cords() override { return _bb_cords; } + void set_bb_cords(BoundingBoxCords bb_cords) override { _bb_cords = std::move(bb_cords); } + + protected: + BoundingBoxCords _bb_cords = {}; // For bb use }; -struct KeyPoint : public MetaData -{ - KeyPoint()= default; - KeyPoint(ImgSize img_size, JointsData *joints_data) - { - _img_size = std::move(img_size); +struct PolygonMask : public BoundingBox { + public: + PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count, int img_id = 0) { + _bb_cords = std::move(bb_cords); + _label_ids = std::move(bb_label_ids); + _info.img_size = std::move(img_size); + _mask_cords = std::move(mask_cords); + _polygon_count = std::move(polygon_count); + _vertices_count = std::move(vertices_count); + _info.img_id = img_id; + } + std::vector& get_polygon_count() override { return _polygon_count; } + std::vector>& get_vertices_count() override { return _vertices_count; } + MaskCords& get_mask_cords() override { return _mask_cords; } + void set_mask_cords(MaskCords mask_cords) override { _mask_cords = std::move(mask_cords); } + void set_polygon_counts(std::vector polygon_count) override { _polygon_count = std::move(polygon_count); } + void set_vertices_counts(std::vector> vertices_count) override { _vertices_count = std::move(vertices_count); } + + protected: + MaskCords _mask_cords = {}; + std::vector _polygon_count = {}; + std::vector> _vertices_count = {}; +}; + +class KeyPoint : public BoundingBox { + public: + KeyPoint() = default; + KeyPoint(ImgSize img_size, JointsData* joints_data) { + _info.img_size = std::move(img_size); _joints_data = std::move(*joints_data); } - void set_joints_data(JointsData *joints_data) { _joints_data = std::move(*joints_data); } + void set_joints_data(JointsData* joints_data) override { _joints_data = std::move(*joints_data); } + JointsData& get_joints_data() override { return _joints_data; } + + protected: + JointsData _joints_data = {}; }; -struct MetaDataBatch -{ +class MetaDataInfoBatch { + public: + std::vector img_ids = {}; + std::vector img_names = {}; + std::vector img_sizes = {}; + std::vector img_roi_sizes = {}; + void clear() { + img_ids.clear(); + img_names.clear(); + img_sizes.clear(); + img_roi_sizes.clear(); + } + void resize(int batch_size) { + img_ids.resize(batch_size); + img_names.resize(batch_size); + img_sizes.resize(batch_size); + img_roi_sizes.resize(batch_size); + } + void insert(MetaDataInfoBatch& other) { + img_sizes.insert(img_sizes.end(), other.img_sizes.begin(), other.img_sizes.end()); + img_ids.insert(img_ids.end(), other.img_ids.begin(), other.img_ids.end()); + img_names.insert(img_names.end(), other.img_names.begin(), other.img_names.end()); + img_roi_sizes.insert(img_roi_sizes.end(), other.img_roi_sizes.begin(), other.img_roi_sizes.end()); + } +}; + +class MetaDataBatch { + public: virtual ~MetaDataBatch() = default; virtual void clear() = 0; virtual void resize(int batch_size) = 0; virtual int size() = 0; - virtual MetaDataBatch& operator += (MetaDataBatch& other) = 0; - MetaDataBatch* concatenate(MetaDataBatch* other) - { + virtual void copy_data(std::vector buffer) = 0; + virtual std::vector& get_buffer_size() = 0; + virtual MetaDataBatch& operator+=(MetaDataBatch& other) = 0; + MetaDataBatch* concatenate(MetaDataBatch* other) { *this += *other; return this; } - virtual std::shared_ptr clone() = 0; - std::vector& get_label_batch() { return _label_id; } - std::vector& get_bb_cords_batch() { return _bb_cords; } - std::vector& get_bb_cords_batch_xcycxwh() { return _bb_cords_xcycwh; } - std::vector& get_bb_labels_batch() { return _bb_label_ids; } - ImgSizes & get_img_sizes_batch() { return _img_sizes; } - JointsDataBatch & get_joints_data_batch() { return _joints_data; } -protected: - std::vector _label_id = {}; // For label use only - std::vector _bb_cords = {}; - std::vector _bb_cords_xcycwh = {}; - std::vector _bb_label_ids = {}; - std::vector _img_sizes = {}; - JointsDataBatch _joints_data = {}; + virtual std::shared_ptr clone(bool copy_contents = true) = 0; + virtual int mask_size() = 0; + virtual std::vector& get_labels_batch() = 0; + virtual std::vector& get_bb_cords_batch() = 0; + virtual void set_xywh_bbox() = 0; + virtual std::vector& get_mask_cords_batch() = 0; + virtual std::vector>& get_mask_polygons_count_batch() = 0; + virtual std::vector>>& get_mask_vertices_count_batch() = 0; + virtual JointsDataBatch& get_joints_data_batch() = 0; + std::vector& get_image_id_batch() { return _info_batch.img_ids; } + std::vector& get_image_names_batch() { return _info_batch.img_names; } + ImgSizes& get_img_sizes_batch() { return _info_batch.img_sizes; } + ImgSizes& get_img_roi_sizes_batch() { return _info_batch.img_roi_sizes; } + MetaDataInfoBatch& get_info_batch() { return _info_batch; } + void set_metadata_type(MetaDataType metadata_type) { _type = metadata_type; } + MetaDataType get_metadata_type() { return _type; } + + protected: + MetaDataInfoBatch _info_batch; + MetaDataType _type; }; -struct LabelBatch : public MetaDataBatch -{ - void clear() override - { - _label_id.clear(); +class LabelBatch : public MetaDataBatch { + public: + void clear() override { + for (auto label : _label_ids) { + label.clear(); + } + _info_batch.clear(); + _label_ids.clear(); + _buffer_size.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { - _label_id.insert(_label_id.end(), other.get_label_batch().begin(), other.get_label_batch().end()); + MetaDataBatch& operator+=(MetaDataBatch& other) override { + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { - _label_id.resize(batch_size); + void resize(int batch_size) override { + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); } - int size() override - { - return _label_id.size(); + int size() override { + return _label_ids.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr label_batch_instance = std::make_shared(); + label_batch_instance->resize(this->size()); + label_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return label_batch_instance; + } } - explicit LabelBatch(std::vector& labels) - { - _label_id = std::move(labels); + explicit LabelBatch(std::vector& labels) { + _label_ids = std::move(labels); } LabelBatch() = default; + void copy_data(std::vector buffer) override { + if (buffer.size() < 1) + THROW("The buffers are insufficient") // TODO -change + auto labels_buffer = (int*)buffer[0]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + memcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + labels_buffer += _label_ids[i].size(); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + return _buffer_size; + } + std::vector& get_labels_batch() override { return _label_ids; } + int mask_size() override{THROW("Not Implemented")} std::vector& get_bb_cords_batch() override { THROW("Not Implemented") } + void set_xywh_bbox() override{THROW("Not Implemented")} std::vector& get_mask_cords_batch() override{THROW("Not Implemented")} std::vector>& get_mask_polygons_count_batch() override{THROW("Not Implemented")} std::vector>>& get_mask_vertices_count_batch() override{THROW("Not Implemented")} JointsDataBatch& get_joints_data_batch() override { THROW("Not Implemented") } + + protected: + std::vector _label_ids = {}; + std::vector _buffer_size; }; -struct BoundingBoxBatch: public MetaDataBatch -{ - void clear() override - { +class BoundingBoxBatch : public LabelBatch { + public: + void clear() override { _bb_cords.clear(); - _bb_label_ids.clear(); - _img_sizes.clear(); + _label_ids.clear(); + _info_batch.clear(); + _buffer_size.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { + MetaDataBatch& operator+=(MetaDataBatch& other) override { _bb_cords.insert(_bb_cords.end(), other.get_bb_cords_batch().begin(), other.get_bb_cords_batch().end()); - _bb_label_ids.insert(_bb_label_ids.end(), other.get_bb_labels_batch().begin(), other.get_bb_labels_batch().end()); - _img_sizes.insert(_img_sizes.end(), other.get_img_sizes_batch().begin(), other.get_img_sizes_batch().end()); + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { + void resize(int batch_size) override { _bb_cords.resize(batch_size); - _bb_label_ids.resize(batch_size); - _img_sizes.resize(batch_size); + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); } - int size() override - { + int size() override { return _bb_cords.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr bbox_batch_instance = std::make_shared(); + bbox_batch_instance->resize(this->size()); + bbox_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return bbox_batch_instance; + } + } + void convert_ltrb_to_xywh(BoundingBoxCords& ltrb_bbox_list) { + for (unsigned i = 0; i < ltrb_bbox_list.size(); i++) { + auto& bbox = ltrb_bbox_list[i]; + // Change the values in place + bbox.r = bbox.r - bbox.l; + bbox.b = bbox.b - bbox.t; + } } + void copy_data(std::vector buffer) override { + if (buffer.size() < 2) + THROW("The buffers are insufficient") // TODO -change + int* labels_buffer = (int*)buffer[0]; + float* bbox_buffer = (float*)buffer[1]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + memcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + if (_bbox_output_type == BoundingBoxType::XYWH) convert_ltrb_to_xywh(_bb_cords[i]); + memcpy(bbox_buffer, _bb_cords[i].data(), _label_ids[i].size() * 4 * sizeof(float)); + labels_buffer += _label_ids[i].size(); + bbox_buffer += (_label_ids[i].size() * 4); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + _buffer_size.emplace_back(size * 4 * sizeof(float)); + return _buffer_size; + } + std::vector& get_bb_cords_batch() override { return _bb_cords; } + void set_xywh_bbox() override { _bbox_output_type = BoundingBoxType::XYWH; } + + protected: + std::vector _bb_cords = {}; + BoundingBoxType _bbox_output_type = BoundingBoxType::LTRB; }; -struct KeyPointBatch : public MetaDataBatch -{ - void clear() override - { - _img_sizes.clear(); +struct PolygonMaskBatch : public BoundingBoxBatch { + public: + void clear() override { + _bb_cords.clear(); + _label_ids.clear(); + _info_batch.clear(); + _mask_cords.clear(); + _polygon_counts.clear(); + _vertices_counts.clear(); + _buffer_size.clear(); + } + MetaDataBatch& operator+=(MetaDataBatch& other) override { + _bb_cords.insert(_bb_cords.end(), other.get_bb_cords_batch().begin(), other.get_bb_cords_batch().end()); + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); + _mask_cords.insert(_mask_cords.end(), other.get_mask_cords_batch().begin(), other.get_mask_cords_batch().end()); + _polygon_counts.insert(_polygon_counts.end(), other.get_mask_polygons_count_batch().begin(), other.get_mask_polygons_count_batch().end()); + _vertices_counts.insert(_vertices_counts.end(), other.get_mask_vertices_count_batch().begin(), other.get_mask_vertices_count_batch().end()); + return *this; + } + void resize(int batch_size) override { + _bb_cords.resize(batch_size); + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); + _mask_cords.resize(batch_size); + _polygon_counts.resize(batch_size); + _vertices_counts.resize(batch_size); + } + std::vector& get_mask_cords_batch() override { return _mask_cords; } + std::vector>& get_mask_polygons_count_batch() override { return _polygon_counts; } + std::vector>>& get_mask_vertices_count_batch() override { return _vertices_counts; } + int mask_size() override { return _mask_cords.size(); } + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr mask_batch_instance = std::make_shared(); + mask_batch_instance->resize(this->size()); + mask_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return mask_batch_instance; + } + } + void copy_data(std::vector buffer) override { + if (buffer.size() < 2) + THROW("The buffers are insufficient") // TODO -change + int* labels_buffer = (int*)buffer[0]; + float* bbox_buffer = (float*)buffer[1]; + float* mask_buffer = (float*)buffer[2]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + mempcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + if (_bbox_output_type == BoundingBoxType::XYWH) convert_ltrb_to_xywh(_bb_cords[i]); + memcpy(bbox_buffer, _bb_cords[i].data(), _label_ids[i].size() * 4 * sizeof(float)); + memcpy(mask_buffer, _mask_cords[i].data(), _mask_cords[i].size() * sizeof(float)); + labels_buffer += _label_ids[i].size(); + bbox_buffer += (_label_ids[i].size() * 4); + mask_buffer += _mask_cords[i].size(); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + _buffer_size.emplace_back(size * 4 * sizeof(float)); + size = 0; + for (auto mask : _mask_cords) + size += mask.size(); + _buffer_size.emplace_back(size * sizeof(float)); + return _buffer_size; + } + + protected: + std::vector _mask_cords = {}; + std::vector> _polygon_counts = {}; + std::vector>> _vertices_counts = {}; +}; + +class KeyPointBatch : public BoundingBoxBatch { + public: + void clear() override { + _info_batch.clear(); _joints_data = {}; _bb_cords.clear(); - _bb_label_ids.clear(); + _label_ids.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { - _img_sizes.insert(_img_sizes.end(), other.get_img_sizes_batch().begin(), other.get_img_sizes_batch().end()); + MetaDataBatch& operator+=(MetaDataBatch& other) override { _joints_data.image_id_batch.insert(_joints_data.image_id_batch.end(), other.get_joints_data_batch().image_id_batch.begin(), other.get_joints_data_batch().image_id_batch.end()); _joints_data.annotation_id_batch.insert(_joints_data.annotation_id_batch.end(), other.get_joints_data_batch().annotation_id_batch.begin(), other.get_joints_data_batch().annotation_id_batch.end()); _joints_data.center_batch.insert(_joints_data.center_batch.end(), other.get_joints_data_batch().center_batch.begin(), other.get_joints_data_batch().center_batch.end()); _joints_data.scale_batch.insert(_joints_data.scale_batch.end(), other.get_joints_data_batch().scale_batch.begin(), other.get_joints_data_batch().scale_batch.end()); - _joints_data.joints_batch.insert(_joints_data.joints_batch.end(), other.get_joints_data_batch().joints_batch.begin() ,other.get_joints_data_batch().joints_batch.end()); + _joints_data.joints_batch.insert(_joints_data.joints_batch.end(), other.get_joints_data_batch().joints_batch.begin(), other.get_joints_data_batch().joints_batch.end()); _joints_data.joints_visibility_batch.insert(_joints_data.joints_visibility_batch.end(), other.get_joints_data_batch().joints_visibility_batch.begin(), other.get_joints_data_batch().joints_visibility_batch.end()); _joints_data.score_batch.insert(_joints_data.score_batch.end(), other.get_joints_data_batch().score_batch.begin(), other.get_joints_data_batch().score_batch.end()); _joints_data.rotation_batch.insert(_joints_data.rotation_batch.end(), other.get_joints_data_batch().rotation_batch.begin(), other.get_joints_data_batch().rotation_batch.end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { + void resize(int batch_size) override { _joints_data.image_id_batch.resize(batch_size); _joints_data.annotation_id_batch.resize(batch_size); _joints_data.center_batch.resize(batch_size); @@ -268,22 +506,34 @@ struct KeyPointBatch : public MetaDataBatch _joints_data.joints_visibility_batch.resize(batch_size); _joints_data.score_batch.resize(batch_size); _joints_data.rotation_batch.resize(batch_size); + _info_batch.resize(batch_size); _bb_cords.resize(batch_size); - _bb_label_ids.resize(batch_size); + _label_ids.resize(batch_size); } - int size() override - { + int size() override { return _joints_data.image_id_batch.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr joints_batch_instance = std::make_shared(); + joints_batch_instance->resize(this->size()); + joints_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return joints_batch_instance; + } } + JointsDataBatch& get_joints_data_batch() override { return _joints_data; } + void copy_data(std::vector buffer) override {} + std::vector& get_buffer_size() override { return _buffer_size; } + + protected: + JointsDataBatch _joints_data = {}; }; using ImageNameBatch = std::vector; using pMetaData = std::shared_ptr
BlendBlur (Gaussian 3x3)Blur (Gaussian 3x3) Brightness Color Temperature
CropResizeExposure ModificationExposure Modification Fisheye Lens Flip (Horizontal, Vertical and Both)
FogGammaGamma Hue Jitter
Lens CorrectionPixelizationPixelization Raindrops Random Crop
ResizeResize Crop MirrorResize Crop Mirror Rotation Salt And Pepper Noise
SaturationSnowflakesSnowflakes Vignette Warp Affine