diff --git a/.azuredevops/rocm-ci.yml b/.azuredevops/rocm-ci.yml
new file mode 100644
index 000000000..2f388cb76
--- /dev/null
+++ b/.azuredevops/rocm-ci.yml
@@ -0,0 +1,44 @@
+resources:
+ repositories:
+ - repository: pipelines_repo
+ type: github
+ endpoint: ROCm
+ name: ROCm/ROCm
+
+variables:
+- group: common
+- template: /.azuredevops/variables-global.yml@pipelines_repo
+
+trigger:
+ batch: true
+ branches:
+ include:
+ - develop
+ paths:
+ exclude:
+ - .github
+ - .jenkins
+ - docs
+ - '.*.y*ml'
+ - '*.md'
+ - copyright.txt
+ - LICENSE.txt
+
+pr:
+ autoCancel: true
+ branches:
+ include:
+ - develop
+ paths:
+ exclude:
+ - .github
+ - .jenkins
+ - docs
+ - '.*.y*ml'
+ - '*.md'
+ - copyright.txt
+ - LICENSE.txt
+ drafts: false
+
+jobs:
+ - template: ${{ variables.CI_COMPONENT_PATH }}/rocAL.yml@pipelines_repo
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index d0056aeaf..55c82bc44 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -2,9 +2,9 @@ name: Linting
on:
push:
- branches: [develop, main]
+ branches: [master, develop]
pull_request:
- branches: [develop, main]
+ branches: [master, develop]
jobs:
call-workflow-passing-data:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4323a034f..b5dba5a08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,39 +1,107 @@
+
+
# Changelog
-rocAL documentation is available at
-[https://rocm.docs.amd.com/projects/rocAL/en/latest/](https://rocm.docs.amd.com/projects/rocAL/en/latest/)
+## Online Documentation
+
+[rocAL Documentation](https://github.com/ROCm/rocAL)
-## rocAL 1.0.0 (unreleased)
+## rocAL 2.0.0 (unreleased)
-### Additions
+### Added
-*
+* Packages - dev & tests
+* Support for audio loader and decoder, which uses libsndfile library to decode wav files
+* C++ rocAL audio unit test and python script to run and compare the outputs
+* Python support for audio decoders
+* Pytorch iterator for Audio
+* Python audio unit test, and support to verify outputs
+* rocDecode for HW decode
+* Support for Audio augmentation - PreEmphasis filter
+* Support for reading from file lists in file reader
+* Support for Audio augmentation - Spectrogram
+* Support for Audio augmentation - ToDecibels
+* Support for downmixing audio channels during decoding
+* Support for Audio augmentation - Resample
+* Support for TensorTensorAdd and TensorScalarMultiply operations
+* Support for Uniform and Normal distribution nodes
+* Support for Audio augmentation - NonSilentRegionDetection
+* Support for generic augmentation - Slice
+* Support for generic augmentation - Normalize
+* Support for Audio augmentation - MelFilterBank
### Optimizations
-*
+* Tests
+* Setup Script
+* CentOS 7 support
+* SLES 15 SP5 support
-### Changes
+### Changed
-* Removed CuPy from `setup.py`
+* Image to tensor updates
+* ROCm install - use case graphics removed
-### Fixes
+### Fixed
-*
+* Tests & readme
-### Tested configurations
+### Tested Configurations
* Linux distribution
- + Ubuntu - `20.04` / `22.04`
-* ROCm: rocm-core - `5.4.0.50400-72`
+ * Ubuntu - `20.04` / `22.04`
+ * CentOS - `7`
+ * RedHat - `8` / `9`
+ * SLES - `15-SP5`
+* ROCm: rocm-core - `6.1.0.60100-64`
+* RPP - `rpp` & `rpp-dev`/`rpp-devel`
+* MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel`
+* rocDecode - `rocdecode` & `rocdecode-dev`/`rocdecode-devel`
+* Protobuf - `libprotobuf-dev`/`protobuf-devel`
+* RapidJSON - `https://github.com/Tencent/rapidjson`
+* Turbo JPEG - [Version 3.0.2](https://libjpeg-turbo.org/)
+* PyBind11 - [v2.11.1](https://github.com/pybind/pybind11)
+* FFMPEG - `ffmpeg 4` dev package
+* OpenCV - `libopencv` / [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0)
+* libsndfile - [1.0.31](https://github.com/libsndfile/libsndfile/releases/tag/1.0.31)
+* rocAL Setup Script - `V2.5.0`
+* Dependencies for all the above packages
+
+### Known issues
+
+* Requires custom deps install
+
+## rocAL 1.0.0
+
+### Added
+
+* rocAL Tests
+
+### Optimizations
+
+* Image augmentations
+
+### Changed
+
+* Deps
+
+### Fixed
+
+* minor issues
+
+### Tested Configurations
+
+* Linux distribution
+ * Ubuntu - `20.04` / `22.04`
+* ROCm: rocm-core - `6.0.60002-1`
* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4)
* OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0)
+* RPP - [1.4.0](https://github.com/ROCms/rpp/releases/tag/1.4.0)
* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2)
-* RPP - `rpp` & `rpp-dev`/`rpp-devel`
-* MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel`
+* MIVisionX - [master](https://github.com/ROCm/MIVisionX)
* Dependencies for all the above packages
-* rocAL Setup Script - `V1.1.0`
+* rocAL Setup Script - `V1.0.2`
### Known issues
-*
+* Requires custom version of libturbo-JPEG
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4ddb7f99..051ffa34e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
cmake_minimum_required(VERSION 3.5)
-set(VERSION "1.0.0")
+set(VERSION "2.0.0")
# Set Project Version and Language
project(rocal VERSION ${VERSION} LANGUAGES CXX)
@@ -51,7 +51,8 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
option(ENHANCED_MESSAGE "rocAL Enhanced Message Option" ON)
option(GPU_SUPPORT "Build rocAL with GPU Support" ON)
option(BUILD_PYPACKAGE "Build rocAL Python Package" ON)
-option(BUILD_WITH_AMD_ADVANCE "Build rocAL with Advanced GPU support" OFF)
+option(PYTHON_VERSION_SUGGESTED "Python version to build rocal" "")
+option(BUILD_WITH_AMD_ADVANCE "Build rocAL for advanced AMD GPU Architecture" OFF)
set(DEFAULT_BUILD_TYPE "Release")
@@ -117,13 +118,8 @@ else()
endif()
message("-- ${BoldBlue}rocAL Build Type -- ${CMAKE_BUILD_TYPE}${ColourReset}")
-message("-- ${Cyan}rocAL Developer Options${ColourReset}")
-message("-- ${Cyan} -D GPU_SUPPORT=${GPU_SUPPORT} [Turn ON/OFF GPU support (default:ON)]${ColourReset}")
-message("-- ${Cyan} -D BACKEND=${BACKEND} [Select rocAL Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
-message("-- ${Cyan} -D BUILD_PYPACKAGE=${BUILD_PYPACKAGE} [rocAL Python Package(default:ON)]${ColourReset}")
-message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [rocAL support for advanced GPU(default:OFF)]${ColourReset}")
-
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
find_package(HALF QUIET)
if(HALF_FOUND)
@@ -134,8 +130,19 @@ else()
message("-- ${Blue}rocAL Built with float16 Support OFF${ColourReset}")
endif()
+message("-- ${Cyan}rocAL Developer Options${ColourReset}")
+message("-- ${Cyan} -D GPU_SUPPORT=${GPU_SUPPORT} [Turn ON/OFF GPU support (default:ON)]${ColourReset}")
+message("-- ${Cyan} -D BACKEND=${BACKEND} [Select rocAL Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
+message("-- ${Cyan} -D BUILD_PYPACKAGE=${BUILD_PYPACKAGE} [rocAL Python Package(default:ON)]${ColourReset}")
+message("-- ${Cyan} -D PYTHON_VERSION_SUGGESTED=${PYTHON_VERSION_SUGGESTED} [User provided python version to use for rocAL Python Bindings(default:System Version)]${ColourReset}")
+message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
+
add_subdirectory(rocAL)
-add_subdirectory(rocAL_pybind)
+if(BUILD_PYPACKAGE)
+ add_subdirectory(rocAL_pybind)
+else()
+ message("-- ${Cyan}rocAL Python Module turned OFF by user option -D BUILD_PYPACKAGE=OFF ${ColourReset}")
+endif()
# install rocAL docs -- {ROCM_PATH}/${CMAKE_INSTALL_DATADIR}/doc/rocal/
install(FILES docs/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/doc/rocal COMPONENT runtime)
@@ -143,11 +150,11 @@ install(FILES docs/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/doc/rocal COMP
# test package
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/data DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
-install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp_api_tests/ DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
-# CTest - Needs RPP Installed
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/cpp_api/ DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/test COMPONENT test)
+# CTest - Needs rocAL Installed
enable_testing()
include(CTest)
-add_subdirectory(tests/cpp_api_tests)
+add_subdirectory(tests/cpp_api)
# set package information
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
@@ -180,8 +187,6 @@ set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan)
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime)
-install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-dev COMPONENT dev)
-install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-test COMPONENT test)
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
@@ -194,8 +199,20 @@ endif()
set(ROCAL_DEBIAN_PACKAGE_LIST "rpp, mivisionx")
set(ROCAL_RPM_PACKAGE_LIST "rpp, mivisionx")
# Set the dev dependent packages
-set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "rpp-dev, mivisionx-dev, liblmdb-dev")
-set(ROCAL_RPM_DEV_PACKAGE_LIST "rpp-devel, mivisionx-devel, lmdb-devel")
+set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "rpp-dev, mivisionx-dev, liblmdb-dev, libprotobuf-dev")
+set(ROCAL_RPM_DEV_PACKAGE_LIST "rpp-devel, mivisionx-devel, lmdb-devel, protobuf-devel")
+
+# Add rocDecode Deps for HW Decode - Exclude centos-7
+if (EXISTS "/etc/os-release")
+ file(READ "/etc/os-release" OS_RELEASE)
+ string(REGEX MATCH "CentOS-7" CENTOS_7_FOUND ${OS_RELEASE})
+ if(NOT CENTOS_7_FOUND)
+ set(ROCAL_DEBIAN_PACKAGE_LIST "${ROCAL_DEBIAN_PACKAGE_LIST}, rocdecode")
+ set(ROCAL_RPM_PACKAGE_LIST "${ROCAL_RPM_PACKAGE_LIST}, rocdecode")
+ set(ROCAL_DEBIAN_DEV_PACKAGE_LIST "${ROCAL_DEBIAN_DEV_PACKAGE_LIST}, rocdecode-dev")
+ set(ROCAL_RPM_DEV_PACKAGE_LIST "${ROCAL_RPM_DEV_PACKAGE_LIST}, rocdecode-devel")
+ endif()
+endif()
# '%{?dist}' breaks manual builds on debian systems due to empty Provides
execute_process(
@@ -231,21 +248,17 @@ set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${ROCAL_RPM_PACKAGE_LIST}")
set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}")
-set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}")
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_RUNTIME_PACKAGE_NAME}, ${ROCAL_RPM_DEV_PACKAGE_LIST}")
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel")
-set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel")
# RPM package - specific variable for ASAN
set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${ROCAL_RPM_PACKAGE_LIST}" )
set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan")
-set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan")
# RPM package specific variable for ASAN
set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" )
set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" )
set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test")
-set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test")
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core," "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
@@ -271,10 +284,10 @@ set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
set(CPACK_RPM_PACKAGE_RELEASE "local")
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
- set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
+ set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
endif()
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
- set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
+ set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
endif()
if(APPLE)
diff --git a/README.md b/README.md
index 37d2edc5e..4bf9eda92 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ rocAL can be currently used to perform the following operations either with rand
Blend |
- Blur (Gaussian 3x3) |
+ Blur (Gaussian 3x3) |
Brightness |
Color Temperature |
@@ -24,31 +24,31 @@ rocAL can be currently used to perform the following operations either with rand
CropResize |
- Exposure Modification |
+ Exposure Modification |
Fisheye Lens |
Flip (Horizontal, Vertical and Both) |
Fog |
- Gamma |
+ Gamma |
Hue |
Jitter |
Lens Correction |
- Pixelization |
+ Pixelization |
Raindrops |
Random Crop |
Resize |
- Resize Crop Mirror |
+ Resize Crop Mirror |
Rotation |
Salt And Pepper Noise |
Saturation |
- Snowflakes |
+ Snowflakes |
Vignette |
Warp Affine |
@@ -60,91 +60,157 @@ rocAL can be currently used to perform the following operations either with rand
+ Ubuntu - `20.04` / `22.04`
+ CentOS - `7`
+ RedHat - `8` / `9`
- + SLES - `15-SP4`
-* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
-* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32`
-* [RPP](https://github.com/ROCm/rpp)
-* [AMD OpenVX™](https://github.com/ROCm/MIVisionX/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media` - MIVisionX Components
-* [Turbo JPEG](https://libjpeg-turbo.org/) - Version 2.0.6.2 from `https://github.com/rrawther/libjpeg-turbo.git`
-* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher
-* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher
-* [LMBD Library](http://www.lmdb.tech/doc/)
-* [RapidJSON](https://github.com/Tencent/rapidjson)
-* [PyBind11](https://github.com/pybind/pybind11)
-* [HIP](https://github.com/ROCm/HIP)
-* OpenMP
-* C++17
-
-## Build and install instructions
-
-* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
-* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32`
+ + SLES - `15-SP5`
-### Package install
+* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
+> [!IMPORTANT]
+> `gfx908` or higher GPU required
-Install rocAL runtime, development, and test packages.
-* Runtime package - `rocal` only provides the dynamic libraries
-* Development package - `rocal-dev`/`rocal-devel` provides the libraries, executables, header files, and samples
-* Test package - `rocal-test` provides ctest to verify installation
+* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm
+> [!IMPORTANT]
+> `sudo amdgpu-install --usecase=rocm`
-##### On `Ubuntu`
+* [HIP](https://github.com/ROCm/HIP)
```shell
- sudo apt-get install rocal rocal-dev rocal-test
+ sudo apt install rocm-hip-runtime-dev
```
-##### On `CentOS`/`RedHat`
+
+* [RPP](https://github.com/ROCm/rpp)
```shell
- sudo yum install rocal rocal-devel rocal-test
+ sudo apt install rpp-dev
```
-##### On `SLES`
+
+* MIVisionX Components: [AMD OpenVX™](https://github.com/ROCm/MIVisionX/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media`
```shell
- sudo zypper install rocal rocal-devel rocal-test
+ sudo apt install mivisionx-dev
+ ```
+
+* [rocDecode](https://github.com/ROCm/rocDecode)
+ ```shell
+ sudo apt install rocdecode-dev
+ ```
+
+* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher
+ ```shell
+ sudo apt install half
+ ```
+
+* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher
+ ```shell
+ sudo apt install libprotobuf-dev
+ ```
+
+* [LMBD Library](http://www.lmdb.tech/doc/)
+ ```shell
+ sudo apt install liblmdb-dev
```
- **Note:**
- * Package install requires `Turbo JPEG`, `PyBind 11 v2.10.4` and `Protobuf V3.12.4` manual install
- * `CentOS`/`RedHat`/`SLES` requires `FFMPEG Dev` package manual install
+* Python3 and Python3 PIP
+ ```shell
+ sudo apt install python3-dev python3-pip
+ ```
-#### Source build and install
+* Python Wheel
+ ```shell
+ pip3 install wheel
+ ```
-### Prerequisites setup script for Linux - rocAL-setup.py
+* [PyBind11](https://github.com/pybind/pybind11)
+ * Source: `https://github.com/pybind/pybind11`
+ * Tag: [v2.11.1](https://github.com/pybind/pybind11/releases/tag/v2.11.1)
-For the convenience of the developer, we here provide the setup script which will install all the dependencies required by this project.
+* [Turbo JPEG](https://libjpeg-turbo.org/)
+ * Source: `https://github.com/libjpeg-turbo/libjpeg-turbo.git`
+ * Tag: [3.0.2](https://github.com/libjpeg-turbo/libjpeg-turbo/releases/tag/3.0.2)
-**NOTE:** This script only needs to be executed once.
+* [RapidJSON](https://github.com/Tencent/rapidjson)
+ * Source: `https://github.com/Tencent/rapidjson.git`
+ * Tag: `master`
-### Prerequisites for running the script
+* **Optional**: FFMPEG
+ ```shell
+ sudo apt install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev
+ ```
-* Linux distribution
- + Ubuntu - `20.04` / `22.04`
- + CentOS - `7`
- + RedHat - `8` / `9`
- + SLES - `15-SP4`
-* [ROCm supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
-* Install ROCm with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=graphics,rocm --no-32`
+* **Optional**: OpenCV
+ ```shell
+ sudo apt install libopencv-dev
+ ```
+
+> [!IMPORTANT]
+> * Compiler features required
+> * OpenMP
+> * C++17
-**usage:**
+>[!NOTE]
+> * All package installs are shown with the `apt` package manager. Use the appropriate package manager for your operating system.
+
+### Prerequisites setup script
+
+For your convenience, we provide the setup script,[rocAL-setup.py](https://github.com/ROCm/rocAL/blob/develop/rocAL-setup.py), which installs all required dependencies. Run this script only once.
```shell
-python rocAL-setup.py --directory [setup directory - optional (default:~/)]
- --opencv [OpenCV Version - optional (default:4.6.0)]
- --protobuf [ProtoBuf Version - optional (default:3.12.4)]
- --pybind11 [PyBind11 Version - optional (default:v2.10.4)]
- --reinstall [Remove previous setup and reinstall (default:OFF)[options:ON/OFF]]
- --backend [rocAL Dependency Backend - optional (default:HIP) [options:OCL/HIP]]
- --rocm_path [ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required]
+python rocAL-setup.py --directory [setup directory - optional (default:~/)]
+ --rocm_path [ROCm Installation Path - optional (default:/opt/rocm)]
+ --backend [rocAL Dependency Backend - optional (default:HIP) [options:OCL/HIP]]
+ --ffmpeg [FFMPEG Installation - optional (default:OFF)[options:ON/OFF]]
+ --reinstall [Reinstall - optional (default:OFF)[options:ON/OFF]]
```
-**Note:**
- * **ROCm upgrade** requires the setup script **rerun**.
-### Using rocAL-setup.py
-
+## Installation instructions
+
+The installation process uses the following steps:
+
+* [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification
+
+* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm`
+
+* Use **either** [package install](#package-install) **or** [source install](#source-install) as described below.
+
+### Package install
+
+Install rocAL runtime, development, and test packages.
+
+* Runtime package - `rocal` only provides the dynamic libraries
+* Development package - `rocal-dev`/`rocal-devel` provides the libraries, executables, header files, and samples
+* Test package - `rocal-test` provides ctest to verify installation
+
+#### `Ubuntu`
+
+ ```shell
+ sudo apt-get install rocal rocal-dev rocal-test
+ ```
+
+#### `CentOS`/`RedHat`
+
+ ```shell
+ sudo yum install rocal rocal-devel rocal-test
+ ```
+
+#### `SLES`
+
+ ```shell
+ sudo zypper install rocal rocal-devel rocal-test
+ ```
+
+>[!NOTE]
+> * Package install requires `TurboJPEG` and `RapidJSON` manual install
+> * `CentOS`/`RedHat`/`SLES` requires additional `FFMPEG Dev` package manual install
+
+### Source install
+
+To build rocAL from source and install, follow the steps below:
+
* Clone rocAL source code
```shell
git clone https://github.com/ROCm/rocAL.git
```
+
**Note:** rocAL has support for two GPU backends: **OPENCL** and **HIP**:
+#### HIP Backend
+
* Instructions for building rocAL with the **HIP** GPU backend (default GPU backend):
+ run the setup script to install all the dependencies required by the **HIP** GPU backend:
```shell
@@ -161,32 +227,38 @@ git clone https://github.com/ROCm/rocAL.git
sudo cmake --build . --target PyPackageInstall
sudo make install
```
+>[!NOTE]
+> * `PyPackageInstall` used for rocal_pybind installation
+> * `sudo` required for pybind installation
+
+>[!IMPORTANT]
+> * Use `-D PYTHON_VERSION_SUGGESTED=3.x` with `cmake` for using a specific Python3 version if required.
+> * Use `-D AUDIO_SUPPORT=ON` to enable Audio features, Audio support will be enabled by default with ROCm versions > 6.2
+ run tests - [test option instructions](https://github.com/ROCm/MIVisionX/wiki/CTest)
```shell
make test
```
- **Note:**
- + `PyPackageInstall` used for rocal_pybind installation
- + `sudo` required for pybind installation
-
+>[!NOTE]
+> To run tests with verbose option, use `make test ARGS="-VV"`.
+
+#### OpenCL Backend
* Instructions for building rocAL with [**OPENCL** GPU backend](https://github.com/ROCm/rocAL/wiki/OpenCL-Backend)
- **Note:**
- + rocAL_pybind is not supported on OPENCL backend
- + rocAL cannot be installed for both GPU backends in the same default folder (i.e., /opt/rocm/)
- + if an app interested in installing rocAL with both GPU backends, then add **-DCMAKE_INSTALL_PREFIX** in the cmake
- commands to install rocAL with OPENCL and HIP backends into two separate custom folders.
+>[!NOTE]
+> + rocAL_pybind is not supported on OPENCL backend
+> + rocAL cannot be installed for both GPU backends in the same default folder (i.e., /opt/rocm/)
+> + if an app interested in installing rocAL with both GPU backends, then add **-DCMAKE_INSTALL_PREFIX** in the cmake commands to install rocAL with OPENCL and HIP backends into two separate custom folders.
## Verify installation
* The installer will copy
- + Executables into `/opt/rocm/bin`
- + Libraries into `/opt/rocm/lib`
- + Header files into `/opt/rocm/include/rocal`
- + Apps, & Samples folder into `/opt/rocm/share/rocal`
- + Documents folder into `/opt/rocm/share/doc/rocal`
+ * Executables into `/opt/rocm/bin`
+ * Libraries into `/opt/rocm/lib`
+ * Header files into `/opt/rocm/include/rocal`
+ * Apps, & Samples folder into `/opt/rocm/share/rocal`
+ * Documents folder into `/opt/rocm/share/doc/rocal`
### Verify with rocal-test package
@@ -197,7 +269,14 @@ mkdir rocAL-test && cd rocAL-test
cmake /opt/rocm/share/rocal/test/
ctest -VV
```
-
+>[!NOTE]
+> * Make sure all rocAL required libraries are in your PATH
+> * `RHEL`/`SLES` - Export FFMPEG libraries into your PATH
+> + `export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64/:/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH`
+> ```shell
+> export PATH=$PATH:/opt/rocm/bin
+> export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib
+> ```
## Documentation
Run the steps below to build documentation locally.
@@ -209,7 +288,7 @@ pip3 install -r sphinx/requirements.txt
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
```
* Doxygen
-```
+```bash
doxygen .Doxyfile
```
@@ -232,17 +311,20 @@ Review all notable [changes](CHANGELOG.md#changelog) with the latest release
### Tested Configurations
* Linux distribution
- + Ubuntu - `20.04` / `22.04`
- + CentOS - `7`
- + RedHat - `8` / `9`
- + SLES - `15-SP4`
-* ROCm: rocm-core - `5.7.0.50700-6`
+ * Ubuntu - `20.04` / `22.04`
+ * CentOS - `7`
+ * RedHat - `8` / `9`
+ * SLES - `15-SP5`
+* ROCm: rocm-core - `6.1.0.60100-64`
* RPP - `rpp` & `rpp-dev`/`rpp-devel`
* MIVisionX - `mivisionx` & `mivisionx-dev`/`mivisionx-devel`
-* Protobuf - [V3.12.4](https://github.com/protocolbuffers/protobuf/releases/tag/v3.12.4)
-* OpenCV - [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0)
-* FFMPEG - [n4.4.2](https://github.com/FFmpeg/FFmpeg/releases/tag/n4.4.2)
-* RapidJSON- [master](https://github.com/Tencent/rapidjson)
-* PyBind11 - [v2.10.4](https://github.com/pybind/pybind11)
-* rocAL Setup Script - `V1.1.0`
+* rocDecode - `rocdecode` & `rocdecode-dev`/`rocdecode-devel`
+* Protobuf - `libprotobuf-dev`/`protobuf-devel`
+* RapidJSON - `https://github.com/Tencent/rapidjson`
+* Turbo JPEG - [Version 3.0.2](https://libjpeg-turbo.org/)
+* PyBind11 - [v2.11.1](https://github.com/pybind/pybind11)
+* FFMPEG - `ffmpeg` dev package
+* OpenCV - `libopencv-dev` / [4.6.0](https://github.com/opencv/opencv/releases/tag/4.6.0)
+* libsndfile - [1.0.31](https://github.com/libsndfile/libsndfile/releases/tag/1.0.31)
+* rocAL Setup Script - `V2.5.0`
* Dependencies for all the above packages
diff --git a/apps/README.md b/apps/README.md
deleted file mode 100644
index 4205215fe..000000000
--- a/apps/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Applications
-
-rocAL has several applications built on top of AMD optimized libraries that can be used as prototypes or used as models to develop products.
-
-## Prerequisites
-* [rocAL](https://github.com/ROCm/rocAL)
-
-## Image Augmentation
-
-This sample [application](./image_augmentation#image-augmentation-application) demonstrates the basic usage of rocAL's C API to load JPEG images from the disk and modify them in different possible ways and displays the output images.
-
-
diff --git a/apps/image_augmentation/image_augmentation.cpp b/apps/image_augmentation/image_augmentation.cpp
deleted file mode 100644
index 91184f805..000000000
--- a/apps/image_augmentation/image_augmentation.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
-MIT License
-
-Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#include
-#include
-#include
-#include
-
-#include
-using namespace cv;
-
-#if USE_OPENCV_4
-#define CV_FONT_HERSHEY_DUPLEX FONT_HERSHEY_DUPLEX
-#define CV_WINDOW_AUTOSIZE WINDOW_AUTOSIZE
-#define CV_RGB2BGR cv::COLOR_BGR2RGB
-#else
-#include
-#endif
-
-#include "rocal_api.h"
-
-#define DISPLAY
-using namespace std::chrono;
-
-
-int main(int argc, const char ** argv)
-{
- // check command-line usage
- const int MIN_ARG_COUNT = 2;
- if(argc < MIN_ARG_COUNT) {
- printf( "Usage: image_augmentation \
- decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count \n" );
- return -1;
- }
- int argIdx = 0;
- const char * folderPath1 = argv[++argIdx];
- int video_mode = 0;// 0 means no video decode, 1 means hardware, 2 means software decoding
- bool display = 1;// Display the images
- int aug_depth = 1;// how deep is the augmentation tree
- int rgb = 1;// process color images
- int decode_width = 0;
- int decode_height = 0;
- bool processing_device = 1;
- size_t shard_count = 2;
- int shuffle = 0;
- int dec_mode = 0;
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- processing_device = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- decode_width = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- decode_height = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- video_mode = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- rgb = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- display = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- shard_count = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- shuffle = atoi(argv[++argIdx]);
-
- if(argc >= argIdx+MIN_ARG_COUNT)
- dec_mode = atoi(argv[++argIdx]);
-
- int inputBatchSize = 2;
-
- std::cout << ">>> Running on " << (processing_device?"GPU":"CPU") << std::endl;
-
- RocalImageColor color_format = (rgb != 0) ? RocalImageColor::ROCAL_COLOR_RGB24 : RocalImageColor::ROCAL_COLOR_U8;
-
- auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1);
-
- if(rocalGetStatus(handle) != ROCAL_OK)
- {
- std::cout << "Could not create the rocAL contex\n";
- return -1;
- }
-
- RocalDecoderType dec_type = (RocalDecoderType) dec_mode;
-
- /*>>>>>>>>>>>>>>>> Creating rocAL parameters <<<<<<<<<<<<<<<<*/
-
- // Creating uniformly distributed random objects to override some of the default augmentation parameters
- RocalFloatParam rand_crop_area = rocalCreateFloatUniformRand( 0.3, 0.5 );
- RocalIntParam color_temp_adj = rocalCreateIntParameter(0);
-
- // Creating a custom random object to set a limited number of values to randomize the rotation angle
- const size_t num_values = 3;
- float values[num_values] = {0,10,135};
- double frequencies[num_values] = {1, 5, 5};
-
- RocalFloatParam rand_angle = rocalCreateFloatRand( values , frequencies, num_values);
-
-
- /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/
- RocalImage input1;
-
-
- if(video_mode != 0)
- {
- unsigned sequence_length = 3;
- unsigned frame_step = 3;
- unsigned frame_stride = 1;
- if (decode_height <= 0 || decode_width <= 0)
- {
- std::cout << "Output width and height is needed for video decode\n";
- return -1;
- }
- input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE:RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false);
- }
- else
- {
- // The jpeg file loader can automatically select the best size to decode all images to that size
- // User can alternatively set the size or change the policy that is used to automatically find the size
- if (dec_type == RocalDecoderType::ROCAL_DECODER_OPENCV) std::cout << "Using OpenCV decoder for Jpeg Source\n";
- if(decode_height <= 0 || decode_width <= 0)
- input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false);
- else
- input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type);
-
- }
-
- if(rocalGetStatus(handle) != ROCAL_OK)
- {
- std::cout << "JPEG source could not initialize : "<>>>>>>>>>>>>>>>>>> Diplay using OpenCV <<<<<<<<<<<<<<<<<*/
- //initializations for logos and heading
- cv::Mat AMD_Epyc_Black_resize, AMD_ROCm_Black_resize;
- AMD_Epyc_Black_resize = cv::imread("../../../samples/images/amd-epyc-black-resize.png");
- AMD_ROCm_Black_resize = cv::imread("../../../samples/images/rocm-black-resize.png");
- int fontFace = CV_FONT_HERSHEY_DUPLEX;
- int thickness = 1.3;
- std::string bufferName = "rocAL Image Augmentation";
-
- int h = rocalGetAugmentationBranchCount(handle) * rocalGetOutputHeight(handle);
- int w = rocalGetOutputWidth(handle);
- int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24 ) ? 3 : 1);
- std::cout << "output width "<< w << " output height "<< h << " color planes "<< p << std::endl;
- const unsigned number_of_cols = video_mode ? 1 : 10;
- auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24 ) ? CV_8UC3 : CV_8UC1);
- cv::Mat mat_output(h+AMD_ROCm_Black_resize.rows, w*number_of_cols, cv_color_format);
- cv::Mat mat_input(h, w, cv_color_format);
- cv::Mat mat_color;
- int col_counter = 0;
- if (display)
- cv::namedWindow( "output", CV_WINDOW_AUTOSIZE );
-
- //adding heading to output display
- cv::Rect roi = Rect(0,0,w*number_of_cols,AMD_Epyc_Black_resize.rows);
- mat_output(roi).setTo(cv::Scalar(128,128,128));
- putText(mat_output, bufferName, Point(250, 70), fontFace, 1.2, cv::Scalar(66,13,9), thickness,5);
-
- //adding logos to output display
- cv::Mat mat_output_ROI = mat_output(cv::Rect(w*number_of_cols - AMD_Epyc_Black_resize.cols,0, AMD_Epyc_Black_resize.cols, AMD_Epyc_Black_resize.rows));
- cv::Mat mat_output_ROI_1 = mat_output(cv::Rect(0,0, AMD_ROCm_Black_resize.cols, AMD_ROCm_Black_resize.rows));
- AMD_Epyc_Black_resize.copyTo(mat_output_ROI);
- AMD_ROCm_Black_resize.copyTo(mat_output_ROI_1);
-
- high_resolution_clock::time_point t1 = high_resolution_clock::now();
- int counter = 0;
- int color_temp_increment = 1;
- while (!rocalIsEmpty(handle))
- {
- if(rocalRun(handle) != 0)
- break;
-
- if(rocalGetIntValue(color_temp_adj) <= -99 || rocalGetIntValue(color_temp_adj)>=99)
- color_temp_increment *= -1;
-
- rocalUpdateIntParameter(rocalGetIntValue(color_temp_adj)+color_temp_increment, color_temp_adj);
-
- rocalCopyToOutput(handle, mat_input.data, h*w*p);
- counter += inputBatchSize;
- if(!display)
- continue;
-
- mat_input.copyTo(mat_output(cv::Rect( col_counter*w, AMD_ROCm_Black_resize.rows, w, h)));
- if(color_format == RocalImageColor::ROCAL_COLOR_RGB24 )
- {
- cv::cvtColor(mat_output, mat_color, CV_RGB2BGR);
- cv::imshow("output",mat_color);
- }
- else
- {
- cv::imshow("output",mat_output);
- }
- cv::waitKey(1);
- col_counter = (col_counter+1)%number_of_cols;
- }
- high_resolution_clock::time_point t2 = high_resolution_clock::now();
- auto dur = duration_cast( t2 - t1 ).count();
- auto rocal_timing = rocalGetTimingInfo(handle);
- std::cout << "Load time "<< rocal_timing.load_time << std::endl;
- std::cout << "Decode time "<< rocal_timing.decode_time << std::endl;
- std::cout << "Process time "<< rocal_timing.process_time << std::endl;
- std::cout << "Transfer time "<< rocal_timing.transfer_time << std::endl;
- std::cout << ">>>>> "<< counter << " images/frames Processed. Total Elapsed Time " << dur/1000000 << " sec " << dur%1000000 << " us " << std::endl;
- rocalRelease(handle);
- mat_input.release();
- mat_output.release();
- return 0;
-}
diff --git a/cmake/FindMIVisionX.cmake b/cmake/FindMIVisionX.cmake
index 52360537d..dcf89e67b 100644
--- a/cmake/FindMIVisionX.cmake
+++ b/cmake/FindMIVisionX.cmake
@@ -33,11 +33,9 @@ find_path(MIVisionX_INCLUDE_DIRS
NAMES vx_ext_amd.h
HINTS
$ENV{MIVisionX_PATH}/include/mivisionx
- $ENV{ROCM_PATH}/include/mivisionx
PATHS
${MIVisionX_PATH}/include/mivisionx
/usr/include
- $ENV{ROCM_PATH}/include/mivisionx
${ROCM_PATH}/include/mivisionx
)
mark_as_advanced(MIVisionX_INCLUDE_DIRS)
@@ -47,12 +45,9 @@ find_library(OPENVX_LIBRARIES
NAMES libopenvx${SHARED_LIB_TYPE}
HINTS
$ENV{MIVisionX_PATH}/lib
- $ENV{MIVisionX_PATH}/lib64
PATHS
${MIVisionX_PATH}/lib
- ${MIVisionX_PATH}/lib64
/usr/lib
- $ENV{ROCM_PATH}/lib
${ROCM_PATH}/lib
)
mark_as_advanced(OPENVX_LIBRARIES)
@@ -62,30 +57,13 @@ find_library(VXRPP_LIBRARIES
NAMES libvx_rpp${SHARED_LIB_TYPE}
HINTS
$ENV{MIVisionX_PATH}/lib
- $ENV{MIVisionX_PATH}/lib64
PATHS
${MIVisionX_PATH}/lib
- ${MIVisionX_PATH}/lib64
/usr/lib
- $ENV{ROCM_PATH}/lib
${ROCM_PATH}/lib
)
mark_as_advanced(VXRPP_LIBRARIES)
-find_path(MIVisionX_LIBRARIES_DIRS
- NAMES libopenvx${SHARED_LIB_TYPE}
- HINTS
- $ENV{MIVisionX_PATH}/lib
- $ENV{MIVisionX_PATH}/lib64
- PATHS
- ${MIVisionX_PATH}/lib
- ${MIVisionX_PATH}/lib64
- /usr/lib
- $ENV{ROCM_PATH}/lib
- ${ROCM_PATH}/lib
-)
-mark_as_advanced(MIVisionX_LIBRARIES_DIRS)
-
if(OPENVX_LIBRARIES AND MIVisionX_INCLUDE_DIRS)
set(MIVisionX_FOUND TRUE)
endif( )
@@ -95,19 +73,36 @@ find_package_handle_standard_args( MIVisionX
FOUND_VAR MIVisionX_FOUND
REQUIRED_VARS
OPENVX_LIBRARIES
- VXRPP_LIBRARIES
+ VXRPP_LIBRARIES
MIVisionX_INCLUDE_DIRS
- MIVisionX_LIBRARIES_DIRS
)
set(MIVisionX_FOUND ${MIVisionX_FOUND} CACHE INTERNAL "")
set(OPENVX_LIBRARIES ${OPENVX_LIBRARIES} CACHE INTERNAL "")
set(VXRPP_LIBRARIES ${VXRPP_LIBRARIES} CACHE INTERNAL "")
set(MIVisionX_INCLUDE_DIRS ${MIVisionX_INCLUDE_DIRS} CACHE INTERNAL "")
-set(MIVisionX_LIBRARIES_DIRS ${MIVisionX_LIBRARIES_DIRS} CACHE INTERNAL "")
if(MIVisionX_FOUND)
- message("-- ${White}Using MIVisionX -- \n\tLibraries:${OPENVX_LIBRARIES} \n\tIncludes:${MIVisionX_INCLUDE_DIRS}${ColourReset}")
+ if(VXRPP_LIBRARIES)
+ if(EXISTS "${MIVisionX_INCLUDE_DIRS}/vx_ext_rpp_version.h")
+ # Find RPP Version
+ file(READ "${MIVisionX_INCLUDE_DIRS}/vx_ext_rpp_version.h" VX_EXT_RPP_VERSION_FILE)
+ string(REGEX MATCH "VX_EXT_RPP_VERSION_MAJOR ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE})
+ set(VX_EXT_RPP_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE INTERNAL "")
+ string(REGEX MATCH "VX_EXT_RPP_VERSION_MINOR ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE})
+ set(VX_EXT_RPP_VERSION_MINOR ${CMAKE_MATCH_1} CACHE INTERNAL "")
+ string(REGEX MATCH "VX_EXT_RPP_VERSION_PATCH ([0-9]*)" _ ${VX_EXT_RPP_VERSION_FILE})
+ set(VX_EXT_RPP_VERSION_PATCH ${CMAKE_MATCH_1} CACHE INTERNAL "")
+ message("-- ${White}Found VX RPP Version: ${VX_EXT_RPP_VERSION_MAJOR}.${VX_EXT_RPP_VERSION_MINOR}.${VX_EXT_RPP_VERSION_PATCH}${ColourReset}")
+ message("-- ${White}Using MIVisionX -- \n\tLibraries:${OPENVX_LIBRARIES} \n\tIncludes:${MIVisionX_INCLUDE_DIRS}${ColourReset}")
+ else()
+ set(VX_EXT_RPP_VERSION_MAJOR 0)
+ set(VX_EXT_RPP_VERSION_MINOR 0)
+ set(VX_EXT_RPP_VERSION_PATCH 0)
+ endif()
+ else()
+ message("-- ${Yellow}VX RPP - Not Found${ColourReset}")
+ endif()
else()
if(MIVisionX_FIND_REQUIRED)
message(FATAL_ERROR "{Red}FindMIVisionX -- NOT FOUND${ColourReset}")
diff --git a/cmake/FindSndFile.cmake b/cmake/FindSndFile.cmake
new file mode 100644
index 000000000..e8e412d39
--- /dev/null
+++ b/cmake/FindSndFile.cmake
@@ -0,0 +1,69 @@
+################################################################################
+#
+# MIT License
+#
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+################################################################################
+find_path(SNDFILE_INCLUDE_DIRS
+ NAMES sndfile.h
+ HINTS
+ $ENV{SNDFILE_PATH}/include
+ PATHS
+ /usr/local/include
+ /usr/include
+)
+mark_as_advanced(SNDFILE_INCLUDE_DIRS)
+
+find_library(SNDFILE_LIBRARIES
+ NAMES sndfile libsndfile
+ HINTS
+ $ENV{SNDFILE_PATH}/lib
+ $ENV{SNDFILE_PATH}/lib64
+ PATHS
+ ${CMAKE_SYSTEM_PREFIX_PATH}
+ ${SNDFILE_PATH}
+ /usr/local/
+ PATH_SUFFIXES lib lib64
+)
+mark_as_advanced(SNDFILE_LIBRARIES)
+
+if(SNDFILE_LIBRARIES AND SNDFILE_INCLUDE_DIRS)
+ set(SNDFILE_FOUND TRUE)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(SndFile
+ FOUND_VAR SNDFILE_FOUND
+ REQUIRED_VARS
+ SNDFILE_LIBRARIES
+ SNDFILE_INCLUDE_DIRS
+)
+
+set(SNDFILE_FOUND ${SNDFILE_FOUND} CACHE INTERNAL "")
+set(SNDFILE_LIBRARIES ${SNDFILE_LIBRARIES} CACHE INTERNAL "")
+set(SNDFILE_INCLUDE_DIRS ${SNDFILE_INCLUDE_DIRS} CACHE INTERNAL "")
+
+if(SNDFILE_FOUND)
+ message("-- ${White}Using SndFile -- \n\tLibraries:${SNDFILE_LIBRARIES} \n\tIncludes:${SNDFILE_INCLUDE_DIRS}${ColourReset}")
+else()
+ message( "-- ${Yellow}NOTE: FindSndFile failed to find -- SndFile${ColourReset}" )
+endif()
diff --git a/cmake/FindTurboJpeg.cmake b/cmake/FindTurboJpeg.cmake
index 60cd9cf6b..83493049b 100644
--- a/cmake/FindTurboJpeg.cmake
+++ b/cmake/FindTurboJpeg.cmake
@@ -34,8 +34,8 @@ find_path(TurboJpeg_INCLUDE_DIRS
HINTS
$ENV{TURBO_JPEG_PATH}/include
PATHS
- ${TURBO_JPEG_PATH}/include
/usr/include
+ ${TURBO_JPEG_PATH}/include
/opt/libjpeg-turbo/include
)
mark_as_advanced(TurboJpeg_INCLUDE_DIRS)
@@ -46,27 +46,39 @@ find_library(TurboJpeg_LIBRARIES
$ENV{TURBO_JPEG_PATH}/lib
$ENV{TURBO_JPEG_PATH}/lib64
PATHS
+ /usr/lib
${TURBO_JPEG_PATH}/lib
${TURBO_JPEG_PATH}/lib64
- /usr/lib
/opt/libjpeg-turbo/lib
)
mark_as_advanced(TurboJpeg_LIBRARIES)
-find_path(TurboJpeg_LIBRARIES_DIRS
- NAMES libturbojpeg${SHARED_LIB_TYPE}
+# Libjpeg
+find_path(LIBJPEG_INCLUDE_DIR
+ NAMES jpeglib.h
+ HINTS
+ $ENV{TURBO_JPEG_PATH}/include
+ PATHS
+ /usr/include
+ ${TURBO_JPEG_PATH}/include
+ /opt/libjpeg-turbo/include
+)
+mark_as_advanced(LIBJPEG_INCLUDE_DIR)
+
+find_library(LIBJPEG_LIBRARIES
+ NAMES libjpeg${SHARED_LIB_TYPE}
HINTS
$ENV{TURBO_JPEG_PATH}/lib
$ENV{TURBO_JPEG_PATH}/lib64
PATHS
+ /usr/lib
${TURBO_JPEG_PATH}/lib
${TURBO_JPEG_PATH}/lib64
- /usr/lib
/opt/libjpeg-turbo/lib
)
-mark_as_advanced(TurboJpeg_LIBRARIES_DIRS)
+mark_as_advanced(LIBJPEG_LIBRARIES)
-if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS)
+if(TurboJpeg_LIBRARIES AND TurboJpeg_INCLUDE_DIRS AND LIBJPEG_INCLUDE_DIR AND LIBJPEG_LIBRARIES)
set(TurboJpeg_FOUND TRUE)
endif( )
@@ -76,13 +88,15 @@ find_package_handle_standard_args( TurboJpeg
REQUIRED_VARS
TurboJpeg_LIBRARIES
TurboJpeg_INCLUDE_DIRS
- TurboJpeg_LIBRARIES_DIRS
+ LIBJPEG_INCLUDE_DIR
+ LIBJPEG_LIBRARIES
)
set(TurboJpeg_FOUND ${TurboJpeg_FOUND} CACHE INTERNAL "")
set(TurboJpeg_LIBRARIES ${TurboJpeg_LIBRARIES} CACHE INTERNAL "")
set(TurboJpeg_INCLUDE_DIRS ${TurboJpeg_INCLUDE_DIRS} CACHE INTERNAL "")
-set(TurboJpeg_LIBRARIES_DIRS ${TurboJpeg_LIBRARIES_DIRS} CACHE INTERNAL "")
+set(LIBJPEG_LIBRARIES ${LIBJPEG_LIBRARIES} CACHE INTERNAL "")
+set(LIBJPEG_INCLUDE_DIR ${LIBJPEG_INCLUDE_DIR} CACHE INTERNAL "")
if(TurboJpeg_FOUND)
message("-- ${White}Using Turbo JPEG -- \n\tLibraries:${TurboJpeg_LIBRARIES} \n\tIncludes:${TurboJpeg_INCLUDE_DIRS}${ColourReset}")
diff --git a/docker/README.md b/docker/README.md
index 6be6057bb..7275cba3c 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -4,6 +4,23 @@ Docker is a set of platform as a service (PaaS) products that use OS-level virtu
## Build - dockerfiles
+```shell
+sudo docker build --build-arg {ARG_1_NAME}={ARG_1_VALUE} [--build-arg {ARG_2_NAME}={ARG_2_VALUE}] -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} .
```
-sudo docker build --build-arg {ARG_NAME}={ARG_VALUE} -f {DOCKER_FILE_NAME}.dockerfile -t {DOCKER_IMAGE_NAME} .
+
+## ARG options
+
+* Pytorch docker:
+
```
+PYTORCH_VERSION: rocm/pytorch docker tag
+ROCAL_PYTHON_VERSION_SUGGESTED: Python version if required for rocal_pybind
+```
+
+* Tensorflow docker:
+
+```
+TENSORFLOW_VERSION: rocm/tensorflow docker tag
+ROCAL_PYTHON_VERSION_SUGGESTED: Python version if required for rocal_pybind
+```
+
diff --git a/docker/rocal-on-rhel-09.dockerfile b/docker/rocal-on-rhel-09.dockerfile
deleted file mode 100644
index a23e0775a..000000000
--- a/docker/rocal-on-rhel-09.dockerfile
+++ /dev/null
@@ -1,37 +0,0 @@
-FROM compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-rel-5.4:67-rhel-9.x-stg1
-
-ENV ROCAL_DEPS_ROOT=/rocAL-deps
-WORKDIR $ROCAL_DEPS_ROOT
-
-RUN sudo yum update -y
-
-# install rocAL base dependencies
-RUN sudo yum -y install gcc g++ cmake pkg-config git kernel-devel
-
-# install OpenCV
-RUN sudo yum install opencv opencv-devel
-
-# install rocAL neural net dependencies
-RUN sudo yum -y install rocblas rocblas-devel miopen-hip miopen-hip-devel migraphx migraphx-devel
-
-
-# install rocAL dependencies
-RUN apt-get -y install curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang
-RUN mkdir rocAL_deps && cd rocAL_deps && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \
- unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd
-RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev && \
- git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \
- -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd
-RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential
-RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
- ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
-RUN git clone -b 0.99 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \
- cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd
-
-ENV ROCAL_WORKSPACE=/workspace
-WORKDIR $ROCAL_WORKSPACE
-
-# Install MIVisionX
-RUN git clone https://github.com/ROCm/MIVisionX.git && \
- mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install
\ No newline at end of file
diff --git a/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile b/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile
deleted file mode 100644
index 79f9c1894..000000000
--- a/docker/rocal-on-ubuntu-20-with-pytorch-with-mesa.dockerfile
+++ /dev/null
@@ -1,69 +0,0 @@
-ARG PYTORCH_VERSION=latest
-FROM rocm/pytorch:${PYTORCH_VERSION}
-
-ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4/ubuntu/focal/amdgpu-install_5.4.50400-1_all.deb
-ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50400-1_all.deb
-
-ENV ROCAL_DEPS_ROOT=/rocAL-deps
-WORKDIR $ROCAL_DEPS_ROOT
-
-RUN apt-get update -y
-
-# install rocAL base dependencies
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git
-
-# install ROCm for rocAL OpenCL/HIP dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration && \
- sudo apt-get -y clean && dpkg --add-architecture i386 && \
- sudo rm -rf /etc/apt/sources.list.d/amdgpu.list && \
- sudo rm -rf /etc/apt/sources.list.d/rocm.list && \
- wget ${ROCM_INSTALLER_REPO} && \
- sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \
- sudo apt-get update -y && \
- sudo amdgpu-install -y --usecase=graphics,rocm
-
-# install OpenCV
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \
- libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip && \
- mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \
- mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd
-
-# install FFMPEG
-ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \
- libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \
- nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \
- wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \
- ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \
- make -j8 && sudo make install && cd
-
-# install rocAL neural net dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \
- mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \
- unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd
-
-# install rocAL dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
- git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \
- -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \
- cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
- ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
-ENV CUPY_INSTALL_USE_HIP=1
-ENV ROCM_HOME=/opt/rocm
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
- git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
- cmake ../ && make -j4 && sudo make install && cd ../../ && \
- pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
- cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
- git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \
- pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21
-
-ENV ROCAL_WORKSPACE=/workspace
-WORKDIR $ROCAL_WORKSPACE
-
-# install MIVisionX
-RUN git clone https://github.com/ROCm/MIVisionX.git && \
- mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install
diff --git a/docker/rocal-on-ubuntu-20.dockerfile b/docker/rocal-on-ubuntu-20.dockerfile
index 7e0f57fd6..f6d2bd1e4 100644
--- a/docker/rocal-on-ubuntu-20.dockerfile
+++ b/docker/rocal-on-ubuntu-20.dockerfile
@@ -1,7 +1,7 @@
FROM ubuntu:20.04
-ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4.1/ubuntu/focal/amdgpu-install_5.4.50401-1_all.deb
-ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50401-1_all.deb
+ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/6.1.1/ubuntu/focal/amdgpu-install_6.1.60101-1_all.deb
+ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_6.1.60101-1_all.deb
ENV ROCAL_DEPS_ROOT=/rocAL-deps
WORKDIR $ROCAL_DEPS_ROOT
@@ -13,11 +13,11 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g
# install ROCm for rocAL OpenCL/HIP dependency
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration && \
- sudo apt-get -y clean && dpkg --add-architecture i386 && \
+ sudo apt-get -y clean && \
wget ${ROCM_INSTALLER_REPO} && \
sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \
sudo apt-get update -y && \
- sudo amdgpu-install -y --usecase=graphics,rocm
+ sudo amdgpu-install -y --usecase=rocm
# install OpenCV
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \
@@ -26,41 +26,41 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-
mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd
# install FFMPEG
-ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \
- libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \
- nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \
- wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \
- ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \
- make -j8 && sudo make install && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev
# install rocAL neural net dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \
- mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \
- unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev
# install rocAL dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
- git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \
- -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \
- cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
- ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
+ git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
+ cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \
+ git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
+ ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../
+
ENV CUPY_INSTALL_USE_HIP=1
ENV ROCM_HOME=/opt/rocm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
cmake ../ && make -j4 && sudo make install && cd ../../ && \
- pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
+ pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
- git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \
- pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21
+ pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
+ env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
+ git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
+ pip install -e . --no-cache-dir -vvvv
+
+# install MIVisionX
+RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \
+ mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install
+
+# install rocDecode
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev
ENV ROCAL_WORKSPACE=/workspace
WORKDIR $ROCAL_WORKSPACE
-# install MIVisionX
-RUN git clone https://github.com/ROCm/MIVisionX.git && \
- mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install
+# Install rocAL
+RUN pip install --upgrade pip
+RUN git clone -b develop https://github.com/ROCm/rocAL && \
+ mkdir build && cd build && cmake ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install
\ No newline at end of file
diff --git a/docker/rocal-on-ubuntu-22.dockerfile b/docker/rocal-on-ubuntu-22.dockerfile
index e68d3d4b9..c98d55278 100644
--- a/docker/rocal-on-ubuntu-22.dockerfile
+++ b/docker/rocal-on-ubuntu-22.dockerfile
@@ -1,7 +1,7 @@
FROM ubuntu:22.04
-ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/5.4.1/ubuntu/jammy/amdgpu-install_5.4.50401-1_all.deb
-ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_5.4.50401-1_all.deb
+ARG ROCM_INSTALLER_REPO=https://repo.radeon.com/amdgpu-install/6.1.1/ubuntu/jammy/amdgpu-install_6.1.60101-1_all.deb
+ARG ROCM_INSTALLER_PACKAGE=amdgpu-install_6.1.60101-1_all.deb
ENV ROCAL_DEPS_ROOT=/rocAL-deps
WORKDIR $ROCAL_DEPS_ROOT
@@ -13,11 +13,11 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g
# install ROCm for rocAL OpenCL/HIP dependencies
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget sudo keyboard-configuration libstdc++-12-dev && \
- sudo apt-get -y clean && dpkg --add-architecture i386 && \
+ sudo apt-get -y clean && \
wget ${ROCM_INSTALLER_REPO} && \
sudo apt-get install -y ./${ROCM_INSTALLER_PACKAGE} && \
sudo apt-get update -y && \
- sudo amdgpu-install -y --usecase=graphics,rocm
+ sudo amdgpu-install -y --usecase=rocm
# install OpenCV
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python3-dev python3-numpy \
@@ -26,43 +26,43 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-
mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd
# install FFMPEG
-ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \
- libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \
- nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \
- wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \
- ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \
- make -j8 && sudo make install && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev
# install rocAL neural net dependencies
-RUN apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx
+RUN apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev
# install rocAL dependencies
-RUN apt-get -y install curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang
-RUN mkdir rocAL_deps && cd rocAL_deps && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \
- unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd
+RUN apt-get -y install rpp-dev curl make g++ unzip libomp-dev libpthread-stubs0-dev wget clang
RUN apt-get update -y && apt-get -y install autoconf automake libbz2-dev libssl-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev && \
- git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \
- -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd
+ git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
+ cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \
+ make -j4 && sudo make install && cd ../../
RUN apt-get -y install sqlite3 libsqlite3-dev libtool build-essential
RUN git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
-RUN git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \
- cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd
ENV CUPY_INSTALL_USE_HIP=1
ENV ROCM_HOME=/opt/rocm
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
cmake ../ && make -j4 && sudo make install && cd ../../ && \
- pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
+ pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
- git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \
- pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21
+ pip install numpy==1.24.2 scipy==1.9.3 cython==0.29.* git+https://github.com/ROCm/hipify_torch.git && \
+ env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py && \
+ git clone -b rocm6.1_internal_testing https://github.com/ROCm/cupy.git && cd cupy && git submodule update --init && \
+ pip install -e . --no-cache-dir -vvvv
+
+# Install MIVisionX
+RUN git clone https://github.com/ROCm/MIVisionX && cd MIVisionX && \
+ mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install
+
+# install rocDecode
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev
ENV ROCAL_WORKSPACE=/workspace
WORKDIR $ROCAL_WORKSPACE
-# Install MIVisionX
-RUN git clone https://github.com/ROCm/MIVisionX.git && \
- mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install
\ No newline at end of file
+# Install rocAL
+RUN pip install --upgrade pip
+RUN git clone -b develop https://github.com/ROCm/rocAL && \
+ mkdir build && cd build && cmake ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install
\ No newline at end of file
diff --git a/docker/rocal-with-pytorch.dockerfile b/docker/rocal-with-pytorch.dockerfile
index 8569747ad..a497ac5fa 100644
--- a/docker/rocal-with-pytorch.dockerfile
+++ b/docker/rocal-with-pytorch.dockerfile
@@ -1,4 +1,5 @@
ARG PYTORCH_VERSION=latest
+ARG ROCAL_PYTHON_VERSION_SUGGESTED=3.9
FROM rocm/pytorch:${PYTORCH_VERSION}
ENV ROCAL_DEPS_ROOT=/rocAL-deps
@@ -10,47 +11,42 @@ RUN apt-get update -y
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git
# install OpenCV
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy \
- libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip && \
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev \
+ libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-dev unzip && \
mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \
mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd
# install FFMPEG
-ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install autoconf automake build-essential cmake git-core libass-dev libfreetype6-dev libsdl2-dev libtool libva-dev \
- libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo wget zlib1g-dev \
- nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev && \
- wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip && cd FFmpeg-n4.4.2/ && sudo ldconfig && \
- ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree && \
- make -j8 && sudo make install && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev
# install rocAL neural net dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocblas rocblas-dev miopen-hip miopen-hip-dev migraphx && \
- mkdir neuralNet && cd neuralNet && wget https://sourceforge.net/projects/half/files/half/1.12.0/half-1.12.0.zip && \
- unzip half-1.12.0.zip -d half-files && sudo mkdir -p /usr/local/include/half && sudo cp half-files/include/half.hpp /usr/local/include/half && cd
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev
# install rocAL dependency
-RUN DEBIAN_FRONTEND=noninteractive apt-get -y install wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
- git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \
- -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b 1.1.0 https://github.com/ROCm/rpp.git && cd rpp && mkdir build && cd build && \
- cmake -DBACKEND=HIP ../ && make -j4 && sudo make install && cd ../../ && \
- git clone -b v3.12.4 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
- ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd
-ENV CUPY_INSTALL_USE_HIP=1
-ENV ROCM_HOME=/opt/rocm
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
+ git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
+ cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \
+ make -j4 && sudo make install && cd ../../ && \
+ git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
+ ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../
+
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
cmake ../ && make -j4 && sudo make install && cd ../../ && \
- pip install pytest==3.1 && git clone -b v2.10.4 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
- cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../ && \
- git clone https://github.com/ROCmSoftwarePlatform/cupy.git && cd cupy && git submodule update --init && \
- pip install -e . --no-cache-dir -vvvv && pip install numpy==1.21
+ pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
+ cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../
+
+# install MIVisionX
+RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \
+ mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install && cd
+
+# install rocDecode
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev
ENV ROCAL_WORKSPACE=/workspace
WORKDIR $ROCAL_WORKSPACE
-# install MIVisionX
-RUN git clone https://github.com/ROCm/MIVisionX.git && \
- mkdir build && cd build && cmake -DBACKEND=HIP -DROCAL=OFF ../MIVisionX && make -j8 && make install
+# Install rocAL
+RUN pip install --upgrade pip
+RUN git clone -b develop https://github.com/ROCm/rocAL && \
+ mkdir build && cd build && cmake -D PYTHON_VERSION_SUGGESTED=${ROCAL_PYTHON_VERSION_SUGGESTED} ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install
\ No newline at end of file
diff --git a/docker/rocal-with-tensorflow.dockerfile b/docker/rocal-with-tensorflow.dockerfile
new file mode 100644
index 000000000..09668aede
--- /dev/null
+++ b/docker/rocal-with-tensorflow.dockerfile
@@ -0,0 +1,54 @@
+ARG TENSORFLOW_VERSION=latest
+ARG ROCAL_PYTHON_VERSION_SUGGESTED=3.9
+FROM rocm/tensorflow:${TENSORFLOW_VERSION}
+
+ENV ROCAL_DEPS_ROOT=/rocAL-deps
+WORKDIR $ROCAL_DEPS_ROOT
+
+RUN apt-get update -y
+
+# install rocAL base dependencies
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config git
+
+# install OpenCV
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev \
+ libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-dev unzip && \
+ mkdir OpenCV && cd OpenCV && wget https://github.com/opencv/opencv/archive/4.6.0.zip && unzip 4.6.0.zip && \
+ mkdir build && cd build && cmake -DWITH_GTK=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_OPENCL=OFF ../opencv-4.6.0 && make -j8 && sudo make install && sudo ldconfig && cd
+
+# install FFMPEG
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev
+
+# install rocAL neural net dependency
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install half rocblas-dev miopen-hip-dev migraphx-dev
+
+# install rocAL dependency
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rpp-dev wget libbz2-dev libssl-dev python-dev python3-dev libgflags-dev libgoogle-glog-dev liblmdb-dev nasm yasm libjsoncpp-dev clang && \
+ git clone -b 3.0.2 https://github.com/libjpeg-turbo/libjpeg-turbo.git && cd libjpeg-turbo && mkdir build && cd build && \
+ cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ../ && \
+ make -j4 && sudo make install && cd ../../ && \
+ git clone -b v3.21.9 https://github.com/protocolbuffers/protobuf.git && cd protobuf && git submodule update --init --recursive && \
+ ./autogen.sh && ./configure && make -j8 && make check -j8 && sudo make install && sudo ldconfig && cd ../
+
+ENV CUPY_INSTALL_USE_HIP=1
+ENV ROCM_HOME=/opt/rocm
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip git g++ hipblas hipsparse rocrand hipfft rocfft rocthrust-dev hipcub-dev python3-dev && \
+ git clone https://github.com/Tencent/rapidjson.git && cd rapidjson && mkdir build && cd build && \
+ cmake ../ && make -j4 && sudo make install && cd ../../ && \
+ pip install pytest==7.3.1 && git clone -b v2.11.1 https://github.com/pybind/pybind11 && cd pybind11 && mkdir build && cd build && \
+ cmake -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../ && make -j4 && sudo make install && cd ../../
+
+# install MIVisionX
+RUN git clone https://github.com/ROCm/MIVisionX.git && cd MIVisionX && \
+ mkdir build && cd build && cmake -DBACKEND=HIP ../ && make -j8 && make install && cd
+
+# install rocDecode
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y install rocdecode-dev
+
+ENV ROCAL_WORKSPACE=/workspace
+WORKDIR $ROCAL_WORKSPACE
+
+# Install rocAL
+RUN pip install --upgrade pip
+RUN git clone -b develop https://github.com/ROCm/rocAL && \
+ mkdir build && cd build && cmake -D PYTHON_VERSION_SUGGESTED=${ROCAL_PYTHON_VERSION_SUGGESTED} ../rocAL && make -j8 && cmake --build . --target PyPackageInstall && make install
diff --git a/docs/README.md b/docs/README.md
index 1b9abccf2..e7988e3f0 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,6 +7,7 @@ AMD ROCm Augmentation Library (rocAL) is designed to efficiently do such process
These pipelines are programmable by the user using both C++ and Python APIs.
## User Guide Chapters
+
* [Chapter 1 - Overview](user_guide/ch1.md)
* [Chapter 2 - Architecture Components](user_guide/ch2.md)
* [Chapter 3 - Installation](user_guide/ch3.md)
@@ -16,11 +17,11 @@ These pipelines are programmable by the user using both C++ and Python APIs.
## Key Components of rocAL
-* Full processing pipeline support for data_loading, meta-data loading, augmentations, and data-format conversions for training and inference.
-* Being able to do processing on CPU or Radeon GPU (with OpenCL or HIP backend)
-* Ease of integration with framework plugins in Python
-* Support variety of augmentation operations through AMD’s Radeon Performance Primitives (RPP).
-* All available public and open-sourced under ROCm.
+* Full processing pipeline support for data_loading, meta-data loading, augmentations, and data-format conversions for training and inference.
+* Being able to do processing on CPU or Radeon GPU (with OpenCL or HIP backend)
+* Ease of integration with framework plugins in Python
+* Support variety of augmentation operations through AMD’s Radeon Performance Primitives (RPP).
+* All available public and open-sourced under ROCm.
## Prerequisites
@@ -32,37 +33,37 @@ Refer [rocAL build instructions](https://github.com/ROCm/rocAL#build-instruction
## rocAL Python
-* rocAL Python package has been created using Pybind11 which enables data transfer between rocAL C++ API and Python API.
-* Module imports are made similar to other data loaders like NVidia's DALI.
-* rocal_pybind package has both PyTorch and TensorFlow framework support.
-* Various reader format support including FileReader, COCOReader, and TFRecordReader.
-* example folder contains sample implementations for each reader variation as well as sample training script for PyTorch
-* rocAL is integrated into MLPerf Resnet-50 Pytorch classification example on the ImageNet dataset.
+* rocAL Python package has been created using Pybind11 which enables data transfer between rocAL C++ API and Python API.
+* rocal Python Bindings has both PyTorch and TensorFlow framework support.
+* Various reader format support including FileReader, COCOReader, and TFRecordReader.
+* [examples folder](https://github.com/ROCm/rocAL/docs/exmaples) has sample implementations for PyTorch and Tensorflow training and inference pipeline.
## rocAL Python API
### amd.rocal.fn
-* Contains the image augmentations & file read and decode operations which are linked to rocAL C++ API
-* All ops (listed below) are supported for the single input image and batched inputs.
-
-| Image Augmentation | Reader and Decoder | Geometric Ops |
-| :----------------: | :--------------------: | :-----------------: |
-| ColorTwist | File Reader | CropMirrorNormalize |
-| Brightness | ImageDecoder | Resize |
-| Gamma Correction | ImageDecoderRandomCrop | ResizeCrop |
-| Snow | COCOReader | WarpAffine |
-| Rain | TFRecordReader | FishEye |
-| Blur | | LensCorrection |
-| Jitter | | Rotate |
-| Hue | | |
-| Saturation | | |
-| Fog | | |
-| Contrast | | |
-| Vignette | | |
-| SNPNoise | | |
-| Pixelate | | |
-| Blend | | |
+* Contains the image augmentations & file read and decode operations which are linked to rocAL C++ API
+* All ops (listed below) are supported for the single input image and batched inputs.
+
+| Image Augmentation | Reader and Decoder | Geometric Ops |
+| :----------------: | :-----------------------: | :--------------------------: |
+| Color Twist | Image File Reader | Crop Mirror Normalize |
+| Color Temperature | Caffe Reader | Crop Resize |
+| Brightness | Caffe2 Reader | Resize |
+| Gamma Correction | CIFAR10 Reader | Random Crop |
+| Snow | COCO Reader | Warp Affine |
+| Rain | TF Record Reader | Fish Eye |
+| Blur | MXNet Record Reader | Lens Correction |
+| Jitter | Video File Reader | Rotate |
+| Hue | Image Decoder | Crop |
+| Saturation | Image Decoder Random Crop | Flip |
+| Fog | Video Decoder | Resize Crop Mirror |
+| Contrast | | Resize Crop Mirror Normalize |
+| Vignette | | |
+| SNP Noise | | |
+| Pixelate | | |
+| Blend | | |
+| Exposure | | |
### amd.rocal.pipeline
@@ -73,51 +74,24 @@ Refer [rocAL build instructions](https://github.com/ROCm/rocAL#build-instruction
### amd.rocal.types
-amd.rocal.types are enums exported from C++ API to python. Some examples include CPU, GPU, FLOAT, FLOAT16, RGB, GRAY, etc..
+amd.rocal.types are enums exported from C++ API to python. Some examples include CPU, GPU, FLOAT, FLOAT16, RGB, GRAY, etc.
### amd.rocal.plugin.pytorch
-* Contains ROCALGenericIterator for Pytorch.
-* ROCALClassificationIterator class implements iterator for image classification and return images with corresponding labels.
-* From the above classes, any hybrid iterator pipeline can be created by adding augmentations.
-* see example [PyTorch Simple Example](./examples). Requires PyTorch.
-
-### installing rocAL python plugin (Python 3.6)
-
-* Build and install RPP
-* Build and install MIVisionX which installs rocAL c++ lib
-* Go to the [rocal_pybind](https://github.com/ROCm/rocAL/tree/develop/rocAL_pybind) folder
-* sudo ./run.sh
-
-### Steps to run MLPerf Resnet50 classification training with rocAL on a system with MI50+ and ROCm
-
-* Step 1: Ensure you have downloaded ILSVRC2012_img_val.tar (6.3GB) and ILSVRC2012_img_train.tar (138 GB) files and unzip into train and val folders
-* Step 2: Build [MIVisionX Pytorch docker](https://github.com/ROCm/rocAL/blob/master/docker/README.md)
-* Step 3: Install rocAL python_pybind plugin as described above
-* Step 4: Clone [MLPerf](https://github.com/rrawther/MLPerf-mGPU) branch and checkout mlperf-v1.1-rocal branch
-
-``` bash
-git clone -b mlperf-v1.1-rocal https://github.com/rrawther/MLPerf-mGPU
-```
+* Contains ROCALGenericIterator for Pytorch.
+* ROCALClassificationIterator class implements iterator for image classification and return images with corresponding labels.
+* From the above classes, any hybrid iterator pipeline can be created by adding augmentations.
+* See example [PyTorch Simple Example](./examples/pytorch/). Requires PyTorch.
-* Step 5: Modify RN50_AMP_LARS_8GPUS_NCHW.sh or RN50_AMP_LARS_8GPUS_NHWC.sh to reflect correct path for imagenet directory
-* Step 8: Run RN50_AMP_LARS_8GPUS_NCHC.sh or RN50_AMP_LARS_8GPUS_NHWC.sh
+### amd.rocal.plugin.tf
-``` bash
-./RN50_AMP_LARS_8GPUS_NCHW.sh
-(or)
-./RN50_AMP_LARS_8GPUS_NHWC.sh
-```
+* Contains ROCALIterator for TensorFlow.
+* Any hybrid iterator pipeline can be created by adding augmentations.
+* See example [Tensorflow Simple Example](./examples/tf/). Requires TensorFlow.
-### MIVisionX Pytorch Docker
+### installing rocAL python plugin (Python 3.9+)
-* Refer to the [docker](https://github.com/ROCm/MIVisionX#docker) page for prerequisites and information on building the docker
-* Step 1: Run the docker image*
-
-``` bash
-sudo docker run -it -v :/data -v /:/dockerx -w /dockerx --privileged --device=/dev/kfd --device=/dev/dri --group-add video --shm-size=4g --ipc="host" --network=host
-```
+* Build and install RPP
+* Build and install MIVisionX
+* Build and install [rocAL](https://github.com/ROCm/rocAL/)
- * Optional: Map localhost directory on the docker image
- * option to map the localhost directory with imagenet dataset folder to be accessed on the docker image.
- * usage: -v {LOCAL_HOST_DIRECTORY_PATH}:{DOCKER_DIRECTORY_PATH}
diff --git a/docs/data/amd-epyc-black-resize.png b/docs/data/amd-epyc-black-resize.png
new file mode 100644
index 000000000..a79b38ca9
Binary files /dev/null and b/docs/data/amd-epyc-black-resize.png differ
diff --git a/docs/data/rocm-black-resize.png b/docs/data/rocm-black-resize.png
new file mode 100644
index 000000000..34829a6ad
Binary files /dev/null and b/docs/data/rocm-black-resize.png differ
diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile
index 150b8472c..d8f9f5570 100644
--- a/docs/doxygen/Doxyfile
+++ b/docs/doxygen/Doxyfile
@@ -951,7 +951,16 @@ INPUT = ../../README.md \
../../rocAL/include/api/rocal_api_meta_data.h \
../../rocAL/include/api/rocal_api_parameters.h \
../../rocAL/include/api/rocal_api_types.h \
- ../../rocAL/include/api/rocal_api.h
+ ../../rocAL/include/api/rocal_api.h \
+ ../../rocAL_pybind/amd/rocal/decoders.py \
+ ../../rocAL_pybind/amd/rocal/fn.py \
+ ../../rocAL_pybind/amd/rocal/pipeline.py \
+ ../../rocAL_pybind/amd/rocal/random.py \
+ ../../rocAL_pybind/amd/rocal/readers.py \
+ ../../rocAL_pybind/amd/rocal/types.py \
+ ../../rocAL_pybind/amd/rocal/plugin/generic.py \
+ ../../rocAL_pybind/amd/rocal/plugin/pytorch.py \
+ ../../rocAL_pybind/amd/rocal/plugin/tf.py
# This tag can be used to specify the character encoding of the source files
diff --git a/docs/examples.md b/docs/examples.md
new file mode 100644
index 000000000..7e9089c38
--- /dev/null
+++ b/docs/examples.md
@@ -0,0 +1,7 @@
+# Examples
+
+Use the links below to see more examples:
+
+- [Image Processing](https://github.com/ROCm/rocAL/tree/master/docs/examples/image_processing)
+
+- [Pytorch](https://github.com/ROCm/rocAL/tree/master/docs/examples/pytorch)
diff --git a/docs/examples.rst b/docs/examples.rst
index 1baa5e5bf..10607f2a3 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -12,4 +12,6 @@ Use the links below to see more examples:
* `Image Processing `_
* `Pytorch `_
+* `Tensorflow `_
+* `Jupyter Notebooks `_
diff --git a/docs/examples/image_processing/inference_pipeline.py b/docs/examples/image_processing/inference_pipeline.py
index a7db74e16..f97da7b37 100644
--- a/docs/examples/image_processing/inference_pipeline.py
+++ b/docs/examples/image_processing/inference_pipeline.py
@@ -31,7 +31,7 @@
seed = 1549361629
-image_dir = "../../../../data/images/AMD-tinyDataSet/"
+image_dir = "../../../data/images/AMD-tinyDataSet/"
batch_size = 4
gpu_id = 0
diff --git a/docs/examples/notebooks/augmentation_examples.ipynb b/docs/examples/notebooks/augmentation_examples.ipynb
new file mode 100644
index 000000000..997f93ca2
--- /dev/null
+++ b/docs/examples/notebooks/augmentation_examples.ipynb
@@ -0,0 +1,189 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "4f777cf2",
+ "metadata": {},
+ "source": [
+ "Image Augmentation App"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "185d3b2d",
+ "metadata": {},
+ "source": [
+ "This application demonstrates a simple rocAL pipeline with different augmentations supported by rocAL."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ace8c53",
+ "metadata": {},
+ "source": [
+ " Common Code "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15c865e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from amd.rocal.plugin.generic import ROCALClassificationIterator\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.fn as fn\n",
+ "import amd.rocal.types as types\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0569dcc",
+ "metadata": {},
+ "source": [
+ "Configuring rocAL pipeline "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "00c2815c",
+ "metadata": {},
+ "source": [
+ "Configure the pipeline parameters as required by the user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f118bb1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_path = \"/media/MIVisionX-data/rocal_data/coco/coco_10_img/train_10images_2017/\"\n",
+ "rocal_cpu = True\n",
+ "device = \"cpu\" \n",
+ "batch_size = 1\n",
+ "num_threads = 1\n",
+ "random_seed = 1\n",
+ "local_rank = 0\n",
+ "world_size = 1\n",
+ "display = True "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e069c4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu, tensor_layout=types.NHWC , tensor_dtype=types.FLOAT)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df5e6005",
+ "metadata": {},
+ "source": [
+ " Image augmentation pipeline "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1ab279e",
+ "metadata": {},
+ "source": [
+ "Here the file reader is used followed by the turbo jpeg decoder. In this pipeline, cascaded augmentations are added on the decoded images.
Multiple augmentation outputs are returned using set_outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f9e1dd12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with pipe:\n",
+ " jpegs, _ = fn.readers.file(file_root=data_path)\n",
+ " images = fn.decoders.image(jpegs, file_root=data_path, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=True)\n",
+ " resize_outputs = fn.resize(images, resize_width=300, resize_height=300)\n",
+ " brightness_outputs = fn.brightness(resize_outputs)\n",
+ " contrast_outputs = fn.contrast(resize_outputs)\n",
+ " fisheye_outputs = fn.fish_eye(resize_outputs)\n",
+ " fog_outputs = fn.fog(resize_outputs)\n",
+ " exposure_outputs = fn.exposure(resize_outputs)\n",
+ " color_twist_outputs = fn.color_twist(resize_outputs)\n",
+ " saturation_outputs = fn.saturation(resize_outputs)\n",
+ " pipe.set_outputs(resize_outputs, brightness_outputs, contrast_outputs, fisheye_outputs, fog_outputs, exposure_outputs, color_twist_outputs, saturation_outputs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b6be22ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe.build()\n",
+ "# Dataloader\n",
+ "data_loader = ROCALClassificationIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "401897a3",
+ "metadata": {},
+ "source": [
+ "Visualizing outputs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "349bf77c",
+ "metadata": {},
+ "source": [
+ "The output of augmented images are displayed using imshow()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67db109e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cnt = 0\n",
+ "aug_list = [\"resize\", \"brightness\", \"contrast\", \"fisheye\", \"fog\", \"exposure\", \"colortwist\", \"saturation\"] \n",
+ "fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15,15))\n",
+ "row = 0\n",
+ "col = 0\n",
+ "for i, it in enumerate(data_loader, 0):\n",
+ " for img in it[0]:\n",
+ " if(cnt < len(aug_list)):\n",
+ " axes[row, col].set_title(aug_list[cnt])\n",
+ " img = (img[0]).astype('uint8')\n",
+ " axes[row, col].imshow(img)\n",
+ " cnt += 1\n",
+ " row += 1\n",
+ " if(row == 2):\n",
+ " row = 0\n",
+ " col += 1\n",
+ " if(col == 4):\n",
+ " col = 0\n",
+ "data_loader.reset()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/notebooks/classification_training_flowerdataset.ipynb b/docs/examples/notebooks/classification_training_flowerdataset.ipynb
new file mode 100644
index 000000000..b6b785b99
--- /dev/null
+++ b/docs/examples/notebooks/classification_training_flowerdataset.ipynb
@@ -0,0 +1,667 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "56c9244e",
+ "metadata": {},
+ "source": [
+ "## Rocal Classification training \n",
+ "This example showcases a usecase for rocAL classification training with small dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "73bdd89e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from torch import nn\n",
+ "from torch.autograd import Variable\n",
+ "import torchvision.models as models\n",
+ "import time\n",
+ "import math\n",
+ "import tqdm as tqdm\n",
+ "import time \n",
+ "from amd.rocal.plugin.pytorch import ROCALClassificationIterator\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.fn as fn\n",
+ "import amd.rocal.types as types\n",
+ "from torch.optim import Optimizer\n",
+ "from collections import defaultdict"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3ce165e2",
+ "metadata": {},
+ "source": [
+ "## Setting Dataset path \n",
+ "Here we are setting the dataset path which will be used in the training."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a06c781",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "start_time = time.time()\n",
+ "device = torch.device('cpu')\n",
+ "data_dir = './Flower102/split_data/' # Run create_classification_flower_dataset.py before running the notebook for dataset creation\n",
+ "train_dir = data_dir + '/train'\n",
+ "val_dir = data_dir + '/val'\n",
+ "test_dir = data_dir + '/test'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f3e8acc2",
+ "metadata": {},
+ "source": [
+ "## Defining the Pipeline\n",
+ "This is defining a pipeline for a classification task. This pipeline will read images from a directory, decode them, apply augmentations and return (image, label) pairs.This pipeline uses image_random_crop then the output is resized to a dimension of (224,224) followed by normalization."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "90041830",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def train_pipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):\n",
+ " pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, rocal_cpu=rocal_cpu,\n",
+ " tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, prefetch_queue_depth=6)\n",
+ " with pipe:\n",
+ " jpegs, labels = fn.readers.file(file_root=data_path)\n",
+ " rocal_device = 'cpu' if rocal_cpu else 'gpu'\n",
+ " decode = fn.decoders.image_random_crop(jpegs, output_type=types.RGB,\n",
+ " file_root=data_path, num_shards=world_size, random_shuffle=True)\n",
+ " res = fn.resize(decode, resize_width=224, resize_height=224, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n",
+ " coin_flip = fn.random.coin_flip(probability=0.5)\n",
+ " cmnp = fn.crop_mirror_normalize(res,\n",
+ " output_dtype=types.FLOAT,\n",
+ " output_layout=types.NCHW,\n",
+ " crop=(224, 224),\n",
+ " mirror=coin_flip,\n",
+ " mean=[0, 0, 0], std=[1, 1, 1])\n",
+ " if (one_hot):\n",
+ " _ = fn.one_hot(labels, num_classes)\n",
+ " pipe.set_outputs(cmnp)\n",
+ " print('rocal \"{0}\" variant'.format(rocal_device))\n",
+ " return pipe\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60094a08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def val_pipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):\n",
+ " pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank + 10, rocal_cpu=rocal_cpu,\n",
+ " tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW, prefetch_queue_depth=2)\n",
+ " with pipe:\n",
+ " jpegs, labels = fn.readers.file(file_root=data_path)\n",
+ " rocal_device = 'cpu' if rocal_cpu else 'gpu'\n",
+ " decode = fn.decoders.image_random_crop(\n",
+ " jpegs, output_type=types.RGB, file_root=data_path, num_shards=world_size, random_shuffle=False)\n",
+ " res = fn.resize(decode, resize_width=224, resize_height=224, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n",
+ " cmnp = fn.crop_mirror_normalize(res,\n",
+ " output_dtype=types.FLOAT16 if fp16 else types.FLOAT,\n",
+ " output_layout=types.NCHW,\n",
+ " crop=(224, 224),\n",
+ " mirror=0,\n",
+ " mean=[0, 0, 0],\n",
+ " std=[1, 1, 1])\n",
+ " if (one_hot):\n",
+ " _ = fn.one_hot(labels, num_classes)\n",
+ " pipe.set_outputs(cmnp)\n",
+ " print('rocal \"{0}\" variant'.format(rocal_device))\n",
+ " return pipe"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "538094db",
+ "metadata": {},
+ "source": [
+ "## Building the Pipeline\n",
+ "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function.\n",
+ "Then iterator object is created with ROCALClassificationIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "838fea17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = train_pipeline(data_path=train_dir, batch_size=64, num_classes=1, one_hot=0,\n",
+ " local_rank=1, world_size=1, num_thread=3, crop=10, rocal_cpu='cpu', fp16=False)\n",
+ "pipe.build()\n",
+ "trainloader = ROCALClassificationIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c50d4b16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = val_pipeline(data_path=val_dir, batch_size=64, num_classes=1, one_hot=0, local_rank=1 , world_size=1 , num_thread=3, crop=10, rocal_cpu='cpu', fp16=False)\n",
+ "pipe.build()\n",
+ "valloader = ROCALClassificationIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "37e1447e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def accuracy(output, target, is_test=False):\n",
+ " global total\n",
+ " global correct\n",
+ " batch_size = target.size(0)\n",
+ " total += batch_size \n",
+ " _, pred = output.max(dim=1)\n",
+ " if is_test:\n",
+ " preds.extend(pred)\n",
+ " correct += torch.sum(pred == target.data)\n",
+ " return (correct.float()/total) * 100\n",
+ "\n",
+ "def reset():\n",
+ " global total, correct\n",
+ " global train_loss, test_loss, best_acc\n",
+ " global trn_losses, trn_accs, val_losses, val_accs\n",
+ " total, correct = 0, 0\n",
+ " train_loss, test_loss, best_acc = 0.0, 0.0, 0.0\n",
+ " trn_losses, trn_accs, val_losses, val_accs = [], [], [], []"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "527e3311",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class AvgStats(object):\n",
+ " def __init__(self):\n",
+ " self.reset()\n",
+ " \n",
+ " def reset(self):\n",
+ " self.losses =[]\n",
+ " self.precs =[]\n",
+ " self.its = []\n",
+ " \n",
+ " def append(self, loss, prec, it):\n",
+ " self.losses.append(loss)\n",
+ " self.precs.append(prec)\n",
+ " self.its.append(it)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f9795e4",
+ "metadata": {},
+ "source": [
+ "## Saving checkpoints\n",
+ "The checkpoints are saved and updated if any new best val_acc is acheived. Then the saved checkpoint is used by the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6fff85f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def save_checkpoint(model, is_best, filename='./checkpoint.pth.tar'):\n",
+ " if is_best:\n",
+ " torch.save(model.state_dict(), filename) # save checkpoint\n",
+ " else:\n",
+ " print (\"=> Validation Accuracy did not improve\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df0c62d9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_checkpoint(model, filename = './checkpoint.pth.tar'):\n",
+ " sd = torch.load(filename, map_location=lambda storage, loc: storage)\n",
+ " names = set(model.state_dict().keys())\n",
+ " for n in list(sd.keys()): \n",
+ " if n not in names and n+'_raw' in names:\n",
+ " if n+'_raw' not in sd: sd[n+'_raw'] = sd[n]\n",
+ " del sd[n]\n",
+ " model.load_state_dict(sd)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8448f914",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class CLR(object):\n",
+ " def __init__(self, optim, bn, base_lr=1e-7, max_lr=100):\n",
+ " self.base_lr = base_lr\n",
+ " self.max_lr = max_lr\n",
+ " self.optim = optim\n",
+ " self.bn = bn - 1\n",
+ " ratio = self.max_lr/self.base_lr\n",
+ " self.mult = ratio ** (1/self.bn)\n",
+ " self.best_loss = 1e9\n",
+ " self.iteration = 0\n",
+ " self.lrs = []\n",
+ " self.losses = []\n",
+ " \n",
+ " def calc_lr(self, loss):\n",
+ " self.iteration +=1\n",
+ " if math.isnan(loss) or loss > 4 * self.best_loss:\n",
+ " return -1\n",
+ " if loss < self.best_loss and self.iteration > 1:\n",
+ " self.best_loss = loss\n",
+ " \n",
+ " mult = self.mult ** self.iteration\n",
+ " lr = self.base_lr * mult\n",
+ " \n",
+ " self.lrs.append(lr)\n",
+ " self.losses.append(loss)\n",
+ " \n",
+ " return lr\n",
+ " \n",
+ " def plot(self, start=10, end=-5):\n",
+ " plt.xlabel(\"Learning Rate\")\n",
+ " plt.ylabel(\"Losses\")\n",
+ " plt.plot(self.lrs[start:end], self.losses[start:end])\n",
+ " plt.xscale('log')\n",
+ " \n",
+ " \n",
+ " def plot_lr(self):\n",
+ " plt.xlabel(\"Iterations\")\n",
+ " plt.ylabel(\"Learning Rate\")\n",
+ " plt.plot(self.lrs)\n",
+ " plt.yscale('log')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd494db3",
+ "metadata": {},
+ "source": [
+ "## Defining Optimizer\n",
+ "The optimizer object used in inner loop for fast weight updates. In this example Lookahead optimizer is implemented."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3cdb0ca6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Lookahead(Optimizer):\n",
+ " def __init__(self, optimizer, alpha=0.5, k=5):\n",
+ " assert(0.0 <= alpha <= 1.0)\n",
+ " assert(k >= 1)\n",
+ " self.optimizer = optimizer\n",
+ " self.alpha = alpha\n",
+ " self.k = k\n",
+ " self.param_groups = self.optimizer.param_groups\n",
+ " self.state = defaultdict(dict)\n",
+ " for group in self.param_groups:\n",
+ " group['k_counter'] = 0\n",
+ " self.slow_weights = [[param.clone().detach() for param in group['params']] for group in self.param_groups]\n",
+ " \n",
+ " def step(self, closure=None):\n",
+ " loss = self.optimizer.step(closure)\n",
+ " for group, slow_Weight in zip(self.param_groups, self.slow_weights):\n",
+ " group['k_counter'] += 1\n",
+ " if group['k_counter'] == self.k:\n",
+ " for param, weight in zip(group['params'], slow_Weight):\n",
+ " weight.data.add_(self.alpha, (param.data - weight.data))\n",
+ " param.data.copy_(weight.data)\n",
+ " group['k_counter'] = 0\n",
+ "\n",
+ " return loss\n",
+ "\n",
+ " def state_dict(self):\n",
+ " fast_dict = self.optimizer.state_dict()\n",
+ " fast_state = fast_dict['state']\n",
+ " param_groups = fast_dict['param_groups']\n",
+ " slow_state = {(id(k) if isinstance(k, torch.Tensor) else k): v\n",
+ " for k, v in self.state.items()}\n",
+ " return {\n",
+ " 'fast_state': fast_state,\n",
+ " 'param_groups': param_groups,\n",
+ " 'slow_state': slow_state\n",
+ " }\n",
+ "\n",
+ " def load_state_dict(self, state_dict):\n",
+ " fast_dict = {\n",
+ " 'state': state_dict['fast_state'],\n",
+ " 'param_groups': state_dict['param_groups']\n",
+ " }\n",
+ " slow_dict = {\n",
+ " 'state': state_dict['slow_state'],\n",
+ " 'param_groups': state_dict['param_groups']\n",
+ " }\n",
+ " super(Lookahead, self).load_state_dict(slow_dict)\n",
+ " self.optimizer.load_state_dict(fast_dict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5358b12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_loss = 0.0\n",
+ "test_loss = 0.0\n",
+ "best_acc = 0.0\n",
+ "trn_losses = []\n",
+ "trn_accs = []\n",
+ "val_losses = []\n",
+ "val_accs = []\n",
+ "total = 0\n",
+ "correct = 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8002987",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def update_lr(optimizer, lr):\n",
+ " for g in optimizer.param_groups:\n",
+ " g['lr'] = lr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fbd95fdd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def lr_find(clr, model, optimizer=None):\n",
+ "\n",
+ " t = tqdm.tqdm(trainloader, leave=False, total=len(trainloader))\n",
+ " running_loss = 0.\n",
+ " avg_beta = 0.98\n",
+ " model.train()\n",
+ " \n",
+ " for i,data in enumerate(t):\n",
+ " input = data[0]\n",
+ " target = data[1]\n",
+ " input, target = input.to(device), target.to(device)\n",
+ " var_ip, var_tg = Variable(input), Variable(target)\n",
+ " output = model(var_ip)\n",
+ " loss = criterion(output, var_tg)\n",
+ " \n",
+ " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n",
+ " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n",
+ " t.set_postfix(loss=smoothed_loss)\n",
+ " \n",
+ " lr = clr.calc_lr(smoothed_loss)\n",
+ " if lr == -1 :\n",
+ " break\n",
+ " update_lr(optimizer, lr) \n",
+ " \n",
+ " # compute gradient and do SGD step\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " trainloader.reset()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55a56d6e",
+ "metadata": {},
+ "source": [
+ "## Defining train and test function \n",
+ "To train the model, the data iterator has to be looped over, the inputs are feeded to the network, and optimized .Then the model is tested with batch of images from the test set."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "126dc268",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def train(epoch=0, model=None, optimizer=None):\n",
+ " model.train()\n",
+ " global best_acc\n",
+ " global trn_accs, trn_losses\n",
+ " is_improving = True\n",
+ " counter = 0\n",
+ " running_loss = 0.\n",
+ " avg_beta = 0.98\n",
+ " \n",
+ " for i, ([input],target) in enumerate(trainloader):\n",
+ " bt_start = time.time()\n",
+ " var_ip, var_tg = Variable(input), Variable(target)\n",
+ " \n",
+ " output = model(var_ip)\n",
+ " loss = criterion(output, var_tg)\n",
+ " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n",
+ " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n",
+ " trn_losses.append(smoothed_loss)\n",
+ " \n",
+ " # measure accuracy and record loss\n",
+ " prec = accuracy(output.data, target)\n",
+ " trn_accs.append(prec)\n",
+ " train_stats.append(smoothed_loss, prec, time.time()-bt_start)\n",
+ " if prec > best_acc :\n",
+ " best_acc = prec\n",
+ " save_checkpoint(model, True)\n",
+ "\n",
+ " # compute gradient and do SGD step\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " trainloader.reset()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4bb16a6f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def test(model=None):\n",
+ " with torch.no_grad():\n",
+ " model.eval()\n",
+ " global val_accs, val_losses\n",
+ " running_loss = 0.\n",
+ " avg_beta = 0.98\n",
+ " for i, ([input],target) in enumerate(valloader):\n",
+ " bt_start = time.time()\n",
+ " input, target = input.to(device), target.to(device)\n",
+ " var_ip, var_tg = Variable(input), Variable(target)\n",
+ " output = model(var_ip)\n",
+ " loss = criterion(output, var_tg)\n",
+ " running_loss = avg_beta * running_loss + (1-avg_beta) *loss.item()\n",
+ " smoothed_loss = running_loss / (1 - avg_beta**(i+1))\n",
+ "\n",
+ " # measure accuracy and record loss\n",
+ " prec = accuracy(output.data, target, is_test=True)\n",
+ " test_stats.append(loss.item(), prec, time.time()-bt_start)\n",
+ " val_losses.append(smoothed_loss)\n",
+ " val_accs.append(prec)\n",
+ " valloader.reset()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0c3334cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fit(model=None, sched=None, optimizer=None):\n",
+ " print(\"Epoch\\tTrn_loss\\tVal_loss\\tTrn_acc\\t\\tVal_acc\")\n",
+ " for j in range(epoch):\n",
+ " train(epoch=j, model=model, optimizer=optimizer)\n",
+ " \n",
+ " test(model)\n",
+ " if sched:\n",
+ " sched.step(j)\n",
+ " print(\"{}\\t{:06.8f}\\t{:06.8f}\\t{:06.8f}\\t{:06.8f}\"\n",
+ " .format(j+1, trn_losses[-1], val_losses[-1], trn_accs[-1], val_accs[-1]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b676d359",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = models.resnet18(pretrained=True)\n",
+ "model.fc = nn.Linear(in_features=model.fc.in_features, out_features=102)\n",
+ "\n",
+ "\n",
+ "for param in model.parameters():\n",
+ " param.require_grad = False\n",
+ " \n",
+ "for param in model.fc.parameters():\n",
+ " param.require_grad = True\n",
+ " \n",
+ "model = model.to(device)\n",
+ "\n",
+ "save_checkpoint(model, True, 'before_start_resnet18.pth.tar')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bca9db3d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "criterion = nn.CrossEntropyLoss()\n",
+ "optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)\n",
+ "optimizer = Lookahead(optim)\n",
+ "\n",
+ "clr = CLR(optim, len(trainloader))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "886ae8bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "load_checkpoint(model, 'before_start_resnet18.pth.tar')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d93bf209",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "preds = []\n",
+ "epoch = 10\n",
+ "train_stats = AvgStats()\n",
+ "test_stats = AvgStats()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92e32e6d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "reset()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "63c43680",
+ "metadata": {},
+ "source": [
+ "## Define a Loss function and optimizer\n",
+ "Here Classification Cross-Entropy loss and SGD with momentum is used as loss function and optimizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "06585899",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "criterion = nn.CrossEntropyLoss()\n",
+ "optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)\n",
+ "optimizer = Lookahead(optim)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "63b70407",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fit(model=model, optimizer=optim)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b91a2bb0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "end_time = time.time()\n",
+ "print(\"Total_time \",end_time - start_time)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/notebooks/create_classification_flower_dataset.py b/docs/examples/notebooks/create_classification_flower_dataset.py
new file mode 100644
index 000000000..6113286e2
--- /dev/null
+++ b/docs/examples/notebooks/create_classification_flower_dataset.py
@@ -0,0 +1,83 @@
+import csv
+import os
+import shutil
+import sys
+import warnings
+from scipy.io import loadmat
+import pandas as pd
+import splitfolders
+
+path = os.getcwd()
+os.system('pip install split_folders')
+if not os.path.exists("dataset_flower"):
+ os.mkdir("dataset_flower")
+os.chdir("dataset_flower")
+os.system("wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz")
+os.system("tar -xvf 102flowers.tgz")
+
+os.chdir(path)
+if not os.path.exists("Flower102"):
+ os.mkdir("Flower102")
+
+os.system("wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat")
+# get the path/directory
+imgs = []
+folder_dir = "./dataset_flower/jpg/"
+for image in os.listdir(folder_dir):
+ imgs.append(image)
+imgs.sort()
+
+warnings.filterwarnings("ignore")
+mat_labels = loadmat("./imagelabels.mat")
+label = mat_labels["labels"]
+label = label[0]
+for i in range(len(label)):
+ label[i] = label[i] - 1
+
+if not os.path.exists("./Flower102/map.csv"):
+ os.system("touch ./Flower102/map.csv")
+
+fields = ["images", "label"]
+with open("./Flower102/map.csv", "w") as csvfile:
+ csvwriter = csv.writer(csvfile)
+ # writing the fields
+ csvwriter.writerow(fields)
+ for i in range(len(imgs)):
+ row = [imgs[i], label[i]]
+ csvwriter.writerow(row)
+
+labels_map = pd.read_csv(r"./Flower102/map.csv")
+train_dir = r"./dataset_flower/jpg" # source folder
+dest_folder = r"./Flower102/flower/" # destination folder
+if not os.path.exists(dest_folder):
+ os.mkdir(dest_folder)
+
+for filename, class_name in labels_map.values:
+ # Create subdirectory with `class_name`
+ if int(class_name) >= 50:
+ continue
+ else:
+ if not os.path.exists(dest_folder + str(class_name)):
+ os.mkdir(dest_folder + str(class_name))
+ src_path = train_dir + "/" + filename
+ dst_path = dest_folder + str(class_name) + "/" + filename
+ try:
+ shutil.copy(src_path, dst_path)
+ print("sucessfull")
+ except IOError as e:
+ print("Unable to copy file {} to {}".format(src_path, dst_path))
+ except:
+ print(
+ "When try copy file {} to {}, unexpected error: {}".format(
+ src_path, dst_path, sys.exc_info()
+ )
+ )
+
+input_folder = "./Flower102/flower"
+splitfolders.ratio(
+ input_folder,
+ output="./Flower102/split_data",
+ seed=42,
+ ratio=(0.7, 0.2, 0.1),
+ group_prefix=None,
+)
diff --git a/docs/examples/image_processing/decoder_examples.ipynb b/docs/examples/notebooks/decoder_examples.ipynb
similarity index 91%
rename from docs/examples/image_processing/decoder_examples.ipynb
rename to docs/examples/notebooks/decoder_examples.ipynb
index 27098f079..1545f0314 100644
--- a/docs/examples/image_processing/decoder_examples.ipynb
+++ b/docs/examples/notebooks/decoder_examples.ipynb
@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -38,7 +38,7 @@
"%matplotlib inline\n",
"\n",
"seed = 1549361629\n",
- "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n",
+ "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n",
"batch_size = 4\n",
"gpu_id = 0\n",
"\n",
@@ -61,7 +61,7 @@
" pipe.build()\n",
" data_loader = ROCALClassificationIterator(pipe, device, device_id)\n",
" images = next(iter(data_loader))\n",
- " show_images(images[0], device)\n"
+ " show_images(images[0][0], device)\n"
]
},
{
@@ -82,9 +82,9 @@
"source": [
"@pipeline_def(seed=seed)\n",
"def image_decoder_pipeline(device=\"cpu\"):\n",
- " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+ " jpegs, labels = fn.readers.file(file_root=image_dir)\n",
" images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n",
- " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+ " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
"\n",
"pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
" reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n",
@@ -109,12 +109,13 @@
"source": [
"@pipeline_def(seed=seed)\n",
"def image_decoder_random_crop_pipeline(device=\"cpu\"):\n",
- " jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+ " jpegs, labels = fn.readers.file(file_root=image_dir)\n",
" images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n",
- " device=device,\n",
" output_type=types.RGB,\n",
+ " shard_id = 0,\n",
+ " num_shards = 1,\n",
" random_shuffle=True)\n",
- " return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+ " return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
" \n",
"pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
" reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n",
@@ -184,7 +185,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.6"
+ "version": "3.10.12"
},
"vscode": {
"interpreter": {
diff --git a/docs/examples/notebooks/resize_implementation.ipynb b/docs/examples/notebooks/resize_implementation.ipynb
new file mode 100644
index 000000000..0c267762a
--- /dev/null
+++ b/docs/examples/notebooks/resize_implementation.ipynb
@@ -0,0 +1,201 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "4f777cf2",
+ "metadata": {},
+ "source": [
+ "Image Augmentation App"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "185d3b2d",
+ "metadata": {},
+ "source": [
+ "This application demonstrates a simple rocAL pipeline with different interpolation types for resize augmentation supported by rocAL."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ace8c53",
+ "metadata": {},
+ "source": [
+ " Common Code "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15c865e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from amd.rocal.plugin.generic import ROCALClassificationIterator\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.fn as fn\n",
+ "import amd.rocal.types as types\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0569dcc",
+ "metadata": {},
+ "source": [
+ "Configuring rocAL pipeline "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "00c2815c",
+ "metadata": {},
+ "source": [
+ "Configure the pipeline parameters as required by the user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f118bb1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_path = \"/media/MIVisionX-data/rocal_data/coco/coco_10_img/train_10images_2017/\"\n",
+ "rocal_cpu = True\n",
+ "device = \"cpu\"\n",
+ "batch_size = 1\n",
+ "num_threads = 1\n",
+ "random_seed = 1\n",
+ "local_rank = 0\n",
+ "world_size = 1\n",
+ "display = True"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e069c4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu, tensor_layout=types.NHWC, tensor_dtype=types.FLOAT)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df5e6005",
+ "metadata": {},
+ "source": [
+ " Image augmentation pipeline "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1ab279e",
+ "metadata": {},
+ "source": [
+ "Here the file reader is used followed by the turbo jpeg decoder. In this pipeline, cascaded augmentations are added on the decoded images.
Multiple augmentation outputs are returned using set_outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "804d8895",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "resize_w = 200\n",
+ "resize_h = 200\n",
+ "with pipe:\n",
+ " jpegs, _ = fn.readers.file(file_root=data_path)\n",
+ " images = fn.decoders.image(jpegs, file_root=data_path, device=0, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n",
+ " output_linear = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.LINEAR_INTERPOLATION)\n",
+ " output_nearest_neighbor = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.NEAREST_NEIGHBOR_INTERPOLATION)\n",
+ " output_cubic = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.CUBIC_INTERPOLATION)\n",
+ " output_lanczos = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.LANCZOS_INTERPOLATION)\n",
+ " output_gaussian = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.GAUSSIAN_INTERPOLATION)\n",
+ " output_triangular = fn.resize(images, resize_width=resize_w, resize_height=resize_h, interpolation_type=types.TRIANGULAR_INTERPOLATION)\n",
+ " pipe.set_outputs(output_linear, output_nearest_neighbor, output_cubic, output_lanczos, output_gaussian, output_triangular)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b6be22ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe.build()\n",
+ "# Dataloader\n",
+ "data_loader = ROCALClassificationIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "401897a3",
+ "metadata": {},
+ "source": [
+ "Visualizing outputs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "349bf77c",
+ "metadata": {},
+ "source": [
+ "The output of augmented images are displayed using imshow()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b7cf8686",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cnt = 0\n",
+ "aug_list = [\"LINEAR_INTERPOLATION\", \"NEAREST_NEIGHBOR_INTERPOLATION\", \"CUBIC_INTERPOLATION\", \"LANCZOS_INTERPOLATION\", \"GAUSSIAN_INTERPOLATION\", \"TRIANGULAR_INTERPOLATION\"] \n",
+ "row = 0\n",
+ "col = 0\n",
+ "fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 15))\n",
+ "for i, it in enumerate(data_loader, 0):\n",
+ " for img in it[0]:\n",
+ " if cnt < len(aug_list):\n",
+ " axes[row, col].set_title(aug_list[cnt])\n",
+ " img = (img[0]).astype(\"uint8\")\n",
+ " axes[row, col].imshow(img)\n",
+ " cnt += 1\n",
+ " row += 1\n",
+ " if row == 3:\n",
+ " row = 0\n",
+ " col += 1\n",
+ " if col == 2:\n",
+ " col = 0\n",
+ "data_loader.reset()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/notebooks/tf_dataloader.ipynb b/docs/examples/notebooks/tf_dataloader.ipynb
new file mode 100644
index 000000000..5305cec9a
--- /dev/null
+++ b/docs/examples/notebooks/tf_dataloader.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "95284add",
+ "metadata": {},
+ "source": [
+ "## Data Loading: TensorFlow TFRecord\n",
+ " This example demonstrates how to utilise rocAL to access data that has been saved in the TensorFlow TFRecord format."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81f6b975",
+ "metadata": {},
+ "source": [
+ "## Common Code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dae4d589",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from amd.rocal.plugin.tf import ROCALIterator\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.types as types\n",
+ "import amd.rocal.fn as fn\n",
+ "import tensorflow as tf\n",
+ "import os\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4c70a58",
+ "metadata": {},
+ "source": [
+ "## Configuring rocAL pipeline\n",
+ "\n",
+ "Configure the pipeline parameters as required by the user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2baa1c4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "image_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"tf\", \"classification\")\n",
+ "rocal_cpu = True\n",
+ "batch_size = 10\n",
+ "one_hot_label = 0\n",
+ "num_threads = 1\n",
+ "local_rank = 1\n",
+ "tf_record_reader_type = 0\n",
+ "featureKeyMap = {\n",
+ " \"image/encoded\": \"image/encoded\",\n",
+ " \"image/class/label\": \"image/class/label\",\n",
+ " \"image/filename\": \"image/filename\",\n",
+ "}\n",
+ "path = \"output_folder/tf_reader/classification/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f5fa309",
+ "metadata": {},
+ "source": [
+ "## Defining and Running the Pipeline\n",
+ " Creating the pipeline using tf.readers for reading the images stored in tf_record format. In this pipeline, resize augmentation is used on the decoded images.
Resize augmentation outputs are returned using set_outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d6ef55f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=2, rocal_cpu=rocal_cpu, tensor_layout = types.NCHW)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f80ad1a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with pipe:\n",
+ " inputs = fn.readers.tfrecord(path=image_path, reader_type=tf_record_reader_type, user_feature_key_map=featureKeyMap,\n",
+ " features={\n",
+ " \"image/encoded\": tf.io.FixedLenFeature((), tf.string, \"\"),\n",
+ " \"image/class/label\": tf.io.FixedLenFeature([1], tf.int64, -1),\n",
+ " \"image/filename\": tf.io.FixedLenFeature((), tf.string, \"\")\n",
+ " }\n",
+ " )\n",
+ " jpegs = inputs[\"image/encoded\"]\n",
+ " images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=image_path)\n",
+ " resized = fn.resize(images, resize_width=300, resize_height=300)\n",
+ " if(one_hot_label == 1):\n",
+ " labels = inputs[\"image/class/label\"]\n",
+ " _ = fn.one_hot(labels, num_classes=1000)\n",
+ " pipe.set_outputs(resized)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6dfa7b3e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe.build()\n",
+ "# Dataloader\n",
+ "imageIterator = ROCALIterator(pipe)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "630cda47",
+ "metadata": {},
+ "source": [
+ "## Visualizing outputs\n",
+ "We have displayed the output of augmented images using imshow()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19493e1e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cnt = 0\n",
+ "fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(20, 20))\n",
+ "row = 0\n",
+ "col = 0\n",
+ "for i, ([images_array], labels_array) in enumerate(imageIterator, 0):\n",
+ " for k in images_array:\n",
+ " cnt += 1\n",
+ " image = k.astype(\"int\")\n",
+ " axes[row, col].imshow(image)\n",
+ " row += 1\n",
+ " if row == 2:\n",
+ " row = 0\n",
+ " if col != 4:\n",
+ " col += 1\n",
+ "imageIterator.reset()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/notebooks/video_decoder.ipynb b/docs/examples/notebooks/video_decoder.ipynb
new file mode 100644
index 000000000..56922adb7
--- /dev/null
+++ b/docs/examples/notebooks/video_decoder.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d7ff9965",
+ "metadata": {},
+ "source": [
+ " ## Video Pipeline Reading From Multiple Files in rocAL\n",
+ "\n",
+ "This example presents a simple rocAL video pipeline that loads and decodes video data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8b1a3e9",
+ "metadata": {},
+ "source": [
+ " ## Common Code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ac44489",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.fn as fn\n",
+ "import amd.rocal.types as types\n",
+ "import numpy as np\n",
+ "import os\n",
+ "%matplotlib inline\n",
+ "from matplotlib import pyplot as plt\n",
+ "import matplotlib.gridspec as gridspec"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "442c364e",
+ "metadata": {},
+ "source": [
+ "## Configuring rocAL pipeline\n",
+ "Configure the pipeline parameters as required by the user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20307afc",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "video_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"video_and_sequence_samples\", \"labelled_videos\")\n",
+ "rocal_cpu = True\n",
+ "batch_size = 2\n",
+ "display = False\n",
+ "num_threads = 4\n",
+ "random_seed = 1\n",
+ "tensor_format = types.NCHW\n",
+ "tensor_dtype = types.FLOAT\n",
+ "local_rank = 1\n",
+ "sequence_length = 3\n",
+ "n_iter = 6"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bf11975c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_sequence(sequence):\n",
+ " columns = 3\n",
+ " rows = (sequence_length + 1) // (columns)\n",
+ " gs = gridspec.GridSpec(rows, columns)\n",
+ " for j in range(rows * columns):\n",
+ " plt.subplot(gs[j])\n",
+ " plt.axis(\"off\")\n",
+ " plt.imshow(sequence)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9076f0f5",
+ "metadata": {},
+ "source": [
+ "## Defining and Running the Pipeline\n",
+ "A custom iterator is created for iterating through the video pipeline outputs\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78257053-674e-4bcd-a047-bd23bf775ee5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class ROCALVideoIterator(object):\n",
+ " \"\"\"\n",
+ " ROCALVideoIterator for pyTorch.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " pipelines : list of amd.rocal.pipeline.Pipeline\n",
+ " List of pipelines to use\n",
+ " size : int\n",
+ " Epoch size.\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, pipelines, tensor_layout=types.NCHW, reverse_channels=False, multiplier=None, offset=None, tensor_dtype=types.FLOAT, display=False, sequence_length=3):\n",
+ "\n",
+ " try:\n",
+ " assert pipelines is not None, \"Number of provided pipelines has to be at least 1\"\n",
+ " except Exception as ex:\n",
+ " print(ex)\n",
+ "\n",
+ " self.loader = pipelines\n",
+ " self.tensor_format = tensor_layout\n",
+ " self.multiplier = multiplier if multiplier else [1.0, 1.0, 1.0]\n",
+ " self.offset = offset if offset else [0.0, 0.0, 0.0]\n",
+ " self.reverse_channels = reverse_channels\n",
+ " self.tensor_dtype = tensor_dtype\n",
+ " self.batch_size = self.loader._batch_size\n",
+ " self.rim = self.loader.get_remaining_images()\n",
+ " self.display = display\n",
+ " self.iter_num = 0\n",
+ " self.sequence_length = sequence_length\n",
+ " self.output = self.dimensions = self.dtype = None\n",
+ "\n",
+ " def next(self):\n",
+ " return self.__next__()\n",
+ "\n",
+ " def __next__(self):\n",
+ " if self.loader.is_empty():\n",
+ " raise StopIteration\n",
+ "\n",
+ " if self.loader.rocal_run() != 0:\n",
+ " raise StopIteration\n",
+ " else:\n",
+ " self.output_tensor_list = self.loader.get_output_tensors()\n",
+ " self.iter_num += 1\n",
+ " # Copy output from buffer to numpy array\n",
+ " if self.output is None:\n",
+ " self.dimensions = self.output_tensor_list[0].dimensions()\n",
+ " self.dtype = self.output_tensor_list[0].dtype()\n",
+ " self.layout = self.output_tensor_list[0].layout()\n",
+ " self.output = np.empty((self.dimensions[0] * self.dimensions[1], self.dimensions[2], self.dimensions[3], self.dimensions[4]), dtype=self.dtype)\n",
+ " self.output_tensor_list[0].copy_data(self.output)\n",
+ " img = torch.from_numpy(self.output)\n",
+ " return img\n",
+ "\n",
+ " def reset(self):\n",
+ " self.loader.rocal_reset_loaders()\n",
+ "\n",
+ " def __iter__(self):\n",
+ " return self\n",
+ "\n",
+ " def __del__(self):\n",
+ " self.loader.rocal_release()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6a8bc652",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu,\n",
+ " tensor_layout=tensor_format, tensor_dtype=tensor_dtype)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "913c943d",
+ "metadata": {},
+ "source": [
+ "## Video Pipeline\n",
+ "Here the video reader is used to read the video data. Then the decoded sequences are passed to CMN. The CMN outputs are returned using set_outputs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09691217",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with pipe:\n",
+ " images = fn.readers.video(file_root=video_path, sequence_length=sequence_length,\n",
+ " random_shuffle=False, image_type=types.RGB)\n",
+ " crop_size = [512, 960]\n",
+ " output_images = fn.crop_mirror_normalize(images,\n",
+ " crop=crop_size,\n",
+ " mean=[0, 0, 0],\n",
+ " std=[1, 1, 1],\n",
+ " mirror=0,\n",
+ " output_dtype=types.UINT8,\n",
+ " output_layout=types.NFHWC)\n",
+ " pipe.set_outputs(output_images)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "599c42aa",
+ "metadata": {},
+ "source": [
+ "## Building the Pipeline\n",
+ "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function. Then iterator object is created with ROCALVideoIterator(video_pipeline)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "974e212e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build the pipeline\n",
+ "pipe.build()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20fbd7f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dataloader\n",
+ "data_loader = ROCALVideoIterator(pipe, multiplier=pipe._multiplier, offset=pipe._offset, display=display, sequence_length=sequence_length)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b8309c7",
+ "metadata": {},
+ "source": [
+ "## Visualizing outputs\n",
+ "The outputs of the video sequence are plotted using matplotlib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c1c7f18",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i, it in enumerate(data_loader):\n",
+ " if i == n_iter:\n",
+ " break\n",
+ " for sequence in it:\n",
+ " display_sequence(sequence)\n",
+ "data_loader.reset()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/notebooks/video_reader_with_label.ipynb b/docs/examples/notebooks/video_reader_with_label.ipynb
new file mode 100644
index 000000000..9eb2ba032
--- /dev/null
+++ b/docs/examples/notebooks/video_reader_with_label.ipynb
@@ -0,0 +1,293 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d7ff9965",
+ "metadata": {},
+ "source": [
+ " ## Video Pipeline Reading Labelled in rocAL\n",
+ "\n",
+ "This example presents a simple rocAL video pipeline that loads and decodes video data along with their labels."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8b1a3e9",
+ "metadata": {},
+ "source": [
+ " ## Common Code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ac44489",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "from amd.rocal.pipeline import Pipeline\n",
+ "import amd.rocal.fn as fn\n",
+ "import amd.rocal.types as types\n",
+ "import numpy as np\n",
+ "import os\n",
+ "%matplotlib inline\n",
+ "from matplotlib import pyplot as plt\n",
+ "import matplotlib.gridspec as gridspec"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "442c364e",
+ "metadata": {},
+ "source": [
+ "## Configuring rocAL pipeline\n",
+ "Configure the pipeline parameters as required by the user."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20307afc",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "video_path = os.path.join(os.environ['ROCAL_DATA_PATH'], \"video_and_sequence_samples\", \"labelled_videos\")\n",
+ "rocal_cpu = True\n",
+ "batch_size = 2\n",
+ "display = False\n",
+ "num_threads = 4\n",
+ "random_seed = 1\n",
+ "tensor_format = types.NCHW\n",
+ "tensor_dtype = types.FLOAT\n",
+ "local_rank = 1\n",
+ "sequence_length = 3\n",
+ "n_iter = 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9076f0f5",
+ "metadata": {},
+ "source": [
+ "## Defining and Running the Pipeline\n",
+ "A custom iterator is created for iterating through the video pipeline outputs\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3cc73c8-8d31-49c7-92d0-818ef7ef8b1d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class ROCALVideoIterator(object):\n",
+ " \"\"\"\n",
+ " ROCALVideoIterator for pyTorch.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " pipelines : list of amd.rocal.pipeline.Pipeline\n",
+ " List of pipelines to use\n",
+ " size : int\n",
+ " Epoch size.\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, pipelines, tensor_layout=types.NCHW, reverse_channels=False, multiplier=None, offset=None, tensor_dtype=types.FLOAT, display=False, sequence_length=3):\n",
+ "\n",
+ " try:\n",
+ " assert pipelines is not None, \"Number of provided pipelines has to be at least 1\"\n",
+ " except Exception as ex:\n",
+ " print(ex)\n",
+ "\n",
+ " self.loader = pipelines\n",
+ " self.tensor_format = tensor_layout\n",
+ " self.multiplier = multiplier if multiplier else [1.0, 1.0, 1.0]\n",
+ " self.offset = offset if offset else [0.0, 0.0, 0.0]\n",
+ " self.reverse_channels = reverse_channels\n",
+ " self.tensor_dtype = tensor_dtype\n",
+ " self.batch_size = self.loader._batch_size\n",
+ " self.rim = self.loader.get_remaining_images()\n",
+ " self.display = display\n",
+ " self.iter_num = 0\n",
+ " self.sequence_length = sequence_length\n",
+ " print(\"____________REMAINING IMAGES____________:\", self.rim)\n",
+ " self.output = self.dimensions = self.dtype = None\n",
+ "\n",
+ " def next(self):\n",
+ " return self.__next__()\n",
+ "\n",
+ " def __next__(self):\n",
+ " if self.loader.is_empty():\n",
+ " raise StopIteration\n",
+ "\n",
+ " if self.loader.rocal_run() != 0:\n",
+ " raise StopIteration\n",
+ " else:\n",
+ " self.output_tensor_list = self.loader.get_output_tensors()\n",
+ " self.iter_num += 1\n",
+ " # Copy output from buffer to numpy array\n",
+ " if self.output is None:\n",
+ " self.dimensions = self.output_tensor_list[0].dimensions()\n",
+ " self.dtype = self.output_tensor_list[0].dtype()\n",
+ " self.layout = self.output_tensor_list[0].layout()\n",
+ " self.output = np.empty((self.dimensions[0] * self.dimensions[1], self.dimensions[2], self.dimensions[3], self.dimensions[4]), dtype=self.dtype)\n",
+ " self.output_tensor_list[0].copy_data(self.output)\n",
+ " img = torch.from_numpy(self.output)\n",
+ " self.labels = self.loader.get_image_labels()\n",
+ " return img, self.labels\n",
+ "\n",
+ " def reset(self):\n",
+ " self.loader.rocal_reset_loaders()\n",
+ "\n",
+ " def __iter__(self):\n",
+ " return self\n",
+ "\n",
+ " def __del__(self):\n",
+ " self.loader.rocal_release()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6a8bc652",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = Pipeline(batch_size=batch_size, num_threads=num_threads,device_id=local_rank, seed=random_seed, rocal_cpu=rocal_cpu,\n",
+ " tensor_layout=tensor_format, tensor_dtype=tensor_dtype)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "913c943d",
+ "metadata": {},
+ "source": [
+ "## Video Pipeline\n",
+ "Here the video reader is used to read the video data. Then the decoded sequences are passed to CMN. The CMN outputs are returned using set_outputs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "09691217",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with pipe:\n",
+ " images = fn.readers.video(file_root=video_path, sequence_length=sequence_length,\n",
+ " random_shuffle=False, image_type=types.RGB)\n",
+ " crop_size = (512,960)\n",
+ " output_images = fn.crop_mirror_normalize(images,\n",
+ " crop=crop_size,\n",
+ " mean=[0, 0, 0],\n",
+ " std=[1, 1, 1],\n",
+ " mirror=0,\n",
+ " output_dtype=types.UINT8,\n",
+ " output_layout=types.NFHWC)\n",
+ " pipe.set_outputs(output_images)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "599c42aa",
+ "metadata": {},
+ "source": [
+ "## Building the Pipeline\n",
+ "Here the pipeline is created. In order to use this Pipeline, the pipeline has to be built. This is achieved by calling the build function. Then iterator object is created with ROCALVideoIterator(video_pipeline)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "974e212e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build the pipeline\n",
+ "pipe.build()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20fbd7f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dataloader\n",
+ "data_loader = ROCALVideoIterator(\n",
+ " pipe, multiplier=pipe._multiplier, offset=pipe._offset, display=display, sequence_length=sequence_length)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b8309c7",
+ "metadata": {},
+ "source": [
+ "## Visualizing outputs\n",
+ "The outputs of the video sequence are plotted using matplotlib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa664089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def display_sequence(sequence, labels, count):\n",
+ " columns = 3\n",
+ " rows = (sequence_length + 1) // (columns)\n",
+ " gs = gridspec.GridSpec(rows, columns)\n",
+ " if(count % 2 == 0):\n",
+ " plt.suptitle(\"label \" + str(labels[0]), fontsize=30)\n",
+ " for j in range(rows * columns):\n",
+ " plt.subplot(gs[j])\n",
+ " plt.axis(\"off\")\n",
+ " plt.imshow(sequence)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c1c7f18",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "count = 0\n",
+ "for i, (it, labels) in enumerate(data_loader):\n",
+ " if i == 0 or i == 112 or i == 244:\n",
+ " for sequence in it:\n",
+ " display_sequence(sequence, labels, count)\n",
+ " count += 1\n",
+ "data_loader.reset()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/examples/pytorch/README.md b/docs/examples/pytorch/README.md
deleted file mode 100644
index 9ae0ded87..000000000
--- a/docs/examples/pytorch/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-* This example shows how to run training using pytorch and ToyNet with 2 classes
-* Use a dataset with 2 classes
-
-To run the sample:
-* Install rocal_pybind
-
-```
-python3 test_training.py
-```
-* rocal device can be cpu/gpu.
diff --git a/docs/examples/pytorch/imagenet_training/README.md b/docs/examples/pytorch/imagenet_training/README.md
new file mode 100644
index 000000000..251d51355
--- /dev/null
+++ b/docs/examples/pytorch/imagenet_training/README.md
@@ -0,0 +1,114 @@
+# ImageNet training in PyTorch
+
+This example implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset.
+This version has been modified to use rocAL. It assumes that the dataset is raw JPEGs from the ImageNet dataset. If offers CPU and GPU based pipeline for rocAL - use `rocal-cpu` argument to enable CPU and use `rocal-gpu` argument to enable GPU.
+
+## Requirements
+
+- Install PyTorch for [ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html)
+- Install rocAL for running rocAL trainings
+- Download the ImageNet dataset from http://www.image-net.org/ and use [the following shell script](https://github.com/pytorch/examples/blob/main/imagenet/extract_ILSVRC.sh) to move and extract the training and validation images to labeled subfolders
+
+## Training
+
+To train a model, run `imagenet_training.py` with the desired model architecture and the path to the ImageNet dataset:
+
+```shell
+python imagenet_training.py -a resnet18 [imagenet-folder with train and val folders]
+```
+
+The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG:
+
+```shell
+python imagenet_training.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders]
+```
+
+To run a rocAL integrated training, use `rocal-cpu` or `rocal-gpu`
+
+```shell
+python3 imagenet_training.py -a resnet50 -j$(nproc) --batch-size 1024 --rocal-cpu [imagenet-folder with train and val folders]
+```
+
+Make sure to remove older checkpoints (`rm *.pth.tar`) saved in the folder if the example has been run before
+
+## Use Dummy Data
+
+ImageNet dataset is large and time-consuming to download. To get started quickly, run `imagenet_training.py` using dummy data by "--dummy". It's also useful for training speed benchmark. Note that the loss or accuracy is useless in this case.
+
+```shell
+python imagenet_training.py -a resnet18 --dummy
+```
+
+## Multi-processing Distributed Data Parallel Training
+
+You should always use the NCCL backend for multi-processing distributed training since it currently provides the best distributed training performance.
+
+### Single node, multiple GPUs
+
+```shell
+python imagenet_training.py -a resnet50 --dist-url 'tcp://127.0.0.1:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 [imagenet-folder with train and val folders]
+```
+
+### Multiple nodes
+
+Node 0:
+
+```shell
+python imagenet_training.py -a resnet50 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 0 [imagenet-folder with train and val folders]
+```
+
+Node 1:
+
+```shell
+python imagenet_training.py -a resnet50 --dist-url 'tcp://IP_OF_NODE0:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 2 --rank 1 [imagenet-folder with train and val folders]
+```
+
+## Usage
+
+```bash
+usage: imagenet_training.py [-h] [-a ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N] [--lr LR] [--momentum M] [--wd W] [-p N] [--resume PATH] [-e] [--pretrained] [--world-size WORLD_SIZE] [--rank RANK]
+ [--dist-url DIST_URL] [--dist-backend DIST_BACKEND] [--seed SEED] [--gpu GPU] [--multiprocessing-distributed] [--dummy]
+ [DIR]
+
+PyTorch ImageNet Training
+
+positional arguments:
+ DIR path to dataset (default: imagenet)
+
+optional arguments:
+ -h, --help show this help message and exit
+ -a ARCH, --arch ARCH model architecture: alexnet | convnext_base | convnext_large | convnext_small | convnext_tiny | densenet121 | densenet161 | densenet169 | densenet201 | efficientnet_b0 |
+ efficientnet_b1 | efficientnet_b2 | efficientnet_b3 | efficientnet_b4 | efficientnet_b5 | efficientnet_b6 | efficientnet_b7 | googlenet | inception_v3 | mnasnet0_5 | mnasnet0_75 |
+ mnasnet1_0 | mnasnet1_3 | mobilenet_v2 | mobilenet_v3_large | mobilenet_v3_small | regnet_x_16gf | regnet_x_1_6gf | regnet_x_32gf | regnet_x_3_2gf | regnet_x_400mf | regnet_x_800mf |
+ regnet_x_8gf | regnet_y_128gf | regnet_y_16gf | regnet_y_1_6gf | regnet_y_32gf | regnet_y_3_2gf | regnet_y_400mf | regnet_y_800mf | regnet_y_8gf | resnet101 | resnet152 | resnet18 |
+ resnet34 | resnet50 | resnext101_32x8d | resnext50_32x4d | shufflenet_v2_x0_5 | shufflenet_v2_x1_0 | shufflenet_v2_x1_5 | shufflenet_v2_x2_0 | squeezenet1_0 | squeezenet1_1 | vgg11 |
+ vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19 | vgg19_bn | vit_b_16 | vit_b_32 | vit_l_16 | vit_l_32 | wide_resnet101_2 | wide_resnet50_2 (default: resnet18)
+ -j N, --workers N number of data loading workers (default: 4)
+ --rocal-cpu use rocAL CPU dataloader
+ --rocal-gpu use rocAL GPU dataloader
+ --epochs N number of total epochs to run
+ --start-epoch N manual epoch number (useful on restarts)
+ -b N, --batch-size N mini-batch size (default: 256), this is the total batch size of all GPUs on the current node when using Data Parallel or Distributed Data Parallel
+ --lr LR, --learning-rate LR
+ initial learning rate
+ --momentum M momentum
+ --wd W, --weight-decay W
+ weight decay (default: 1e-4)
+ -p N, --print-freq N print frequency (default: 10)
+ --resume PATH path to latest checkpoint (default: none)
+ -e, --evaluate evaluate model on validation set
+ --pretrained use pre-trained model
+ --world-size WORLD_SIZE
+ number of nodes for distributed training
+ --rank RANK node rank for distributed training
+ --dist-url DIST_URL url used to set up distributed training
+ --dist-backend DIST_BACKEND
+ distributed backend
+ --seed SEED seed for initializing training.
+ --gpu GPU GPU id to use.
+ --multiprocessing-distributed
+ Use multi-processing distributed training to launch N processes per node, which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel
+ training
+ --dummy use fake data to benchmark
+
+```
diff --git a/docs/examples/pytorch/imagenet_training/imagenet_training.py b/docs/examples/pytorch/imagenet_training/imagenet_training.py
new file mode 100644
index 000000000..4fe15a03e
--- /dev/null
+++ b/docs/examples/pytorch/imagenet_training/imagenet_training.py
@@ -0,0 +1,687 @@
+import argparse
+import os
+import random
+import shutil
+import time
+import warnings
+from enum import Enum
+
+import torch
+import torch.backends.cudnn as cudnn
+import torch.distributed as dist
+import torch.multiprocessing as mp
+import torch.nn as nn
+import torch.nn.parallel
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.datasets as datasets
+import torchvision.models as models
+import torchvision.transforms as transforms
+from torch.optim.lr_scheduler import StepLR
+from torch.utils.data import Subset
+
+try:
+ from amd.rocal.plugin.pytorch import ROCALClassificationIterator
+ from amd.rocal.pipeline import Pipeline
+ import amd.rocal.fn as fn
+ import amd.rocal.types as types
+except ImportError:
+ print('Install rocAL for running rocAL trainings')
+
+model_names = sorted(name for name in models.__dict__
+ if name.islower() and not name.startswith("__")
+ and callable(models.__dict__[name]))
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet',
+ help='path to dataset (default: imagenet)')
+parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
+ choices=model_names,
+ help='model architecture: ' +
+ ' | '.join(model_names) +
+ ' (default: resnet18)')
+parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
+ help='number of data loading workers (default: 4)')
+parser.add_argument('--epochs', default=90, type=int, metavar='N',
+ help='number of total epochs to run')
+parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
+ help='manual epoch number (useful on restarts)')
+parser.add_argument('-b', '--batch-size', default=256, type=int,
+ metavar='N',
+ help='mini-batch size (default: 256), this is the total '
+ 'batch size of all GPUs on the current node when '
+ 'using Data Parallel or Distributed Data Parallel')
+parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
+ metavar='LR', help='initial learning rate', dest='lr')
+parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
+ help='momentum')
+parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
+ metavar='W', help='weight decay (default: 1e-4)',
+ dest='weight_decay')
+parser.add_argument('-p', '--print-freq', default=10, type=int,
+ metavar='N', help='print frequency (default: 10)')
+parser.add_argument('--resume', default='', type=str, metavar='PATH',
+ help='path to latest checkpoint (default: none)')
+parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
+ help='evaluate model on validation set')
+parser.add_argument('--pretrained', dest='pretrained', action='store_true',
+ help='use pre-trained model')
+parser.add_argument('--world-size', default=-1, type=int,
+ help='number of nodes for distributed training')
+parser.add_argument('--rank', default=-1, type=int,
+ help='node rank for distributed training')
+parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
+ help='url used to set up distributed training')
+parser.add_argument('--dist-backend', default='nccl', type=str,
+ help='distributed backend')
+parser.add_argument('--seed', default=None, type=int,
+ help='seed for initializing training. ')
+parser.add_argument('--gpu', default=None, type=int,
+ help='GPU id to use.')
+parser.add_argument('--rocal-gpu', action='store_true',
+ help='enable rocal-gpu based training')
+parser.add_argument('--rocal-cpu', action='store_true',
+ help='enable rocal-cpu based training')
+parser.add_argument('--multiprocessing-distributed', action='store_true',
+ help='Use multi-processing distributed training to launch '
+ 'N processes per node, which has N GPUs. This is the '
+ 'fastest way to use PyTorch for either single node or '
+ 'multi node data parallel training')
+parser.add_argument('--dummy', action='store_true',
+ help="use fake data to benchmark")
+
+best_acc1 = 0
+
+
+def train_pipeline(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):
+ pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10, rocal_cpu=rocal_cpu, tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW,
+ prefetch_queue_depth=6, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], output_memory_type=types.HOST_MEMORY if rocal_cpu else types.DEVICE_MEMORY)
+ with pipe:
+ jpegs, labels = fn.readers.file(file_root=data_path)
+ rocal_device = 'cpu' if rocal_cpu else 'gpu'
+ decode = fn.decoders.image_slice(jpegs, output_type=types.RGB,
+ file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
+ res = fn.resize(decode, resize_width=224, resize_height=224, output_layout=types.NHWC,
+ output_dtype=types.UINT8, interpolation_type=types.TRIANGULAR_INTERPOLATION)
+ flip_coin = fn.random.coin_flip(probability=0.5)
+ cmnp = fn.crop_mirror_normalize(res,
+ output_layout=types.NCHW,
+ output_dtype=types.FLOAT,
+ crop=(crop, crop),
+ mirror=flip_coin,
+ mean=[0.485 * 255, 0.456 *
+ 255, 0.406 * 255],
+ std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
+ pipe.set_outputs(cmnp)
+ print('rocal "{0}" variant'.format(rocal_device))
+ return pipe
+
+
+def get_rocal_train_loader(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16=False):
+ traindir = os.path.join(data_path, 'train')
+ pipe_train = train_pipeline(
+ traindir, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16)
+ pipe_train.build()
+ train_loader = ROCALClassificationIterator(
+ pipe_train, device="cpu" if rocal_cpu else "cuda", device_id=local_rank)
+ return Prefetcher(train_loader, rocal_cpu, batch_size)
+
+
+def val_pipeline(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):
+ pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank + 10, rocal_cpu=rocal_cpu, tensor_dtype=types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW,
+ prefetch_queue_depth=6, mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], output_memory_type=types.HOST_MEMORY if rocal_cpu else types.DEVICE_MEMORY)
+ with pipe:
+ jpegs, labels = fn.readers.file(file_root=data_path)
+ rocal_device = 'cpu' if rocal_cpu else 'gpu'
+ decode = fn.decoders.image(jpegs, file_root=data_path, max_decoded_width=1000, max_decoded_height=1000,
+ output_type=types.RGB, shard_id=local_rank, num_shards=world_size, random_shuffle=False)
+ res = fn.resize(decode, resize_shorter=256, scaling_mode=types.SCALING_MODE_NOT_SMALLER,
+ interpolation_type=types.TRIANGULAR_INTERPOLATION, output_layout=types.NHWC, output_dtype=types.UINT8)
+ cmnp = fn.crop_mirror_normalize(res,
+ output_layout=types.NCHW,
+ output_dtype=types.FLOAT,
+ crop=(crop, crop),
+ mirror=0,
+ mean=[0.485 * 255, 0.456 *
+ 255, 0.406 * 255],
+ std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
+ pipe.set_outputs(cmnp)
+ print('rocal "{0}" variant'.format(rocal_device))
+ return pipe
+
+
+def get_rocal_val_loader(data_path, batch_size, local_rank, world_size, num_thread, crop, rocal_cpu, fp16=False):
+ valdir = data_path + "/val/"
+ pipe_val = val_pipeline(valdir, batch_size, local_rank,
+ world_size, num_thread, crop, rocal_cpu, fp16)
+ pipe_val.build()
+ val_loader = ROCALClassificationIterator(
+ pipe_val, device="cpu" if rocal_cpu else "cuda", device_id=local_rank)
+ val_data = []
+ for (img, target) in Prefetcher(val_loader, rocal_cpu, batch_size):
+ val_data.append((img.clone(), target.clone()))
+ del val_loader
+ return val_data
+
+
+class Prefetcher:
+ def __init__(self, data_loader, rocal_cpu, batch_size):
+ self.data_loader = iter(data_loader)
+ self.rocal_cpu = rocal_cpu
+ self.bs = batch_size
+ self.images = None
+ self.targets = None
+ self.done = False
+ if self.rocal_cpu:
+ self.loader_stream = torch.cuda.Stream()
+
+ def __iter__(self):
+ return self
+
+ def __len__(self):
+ return len(self.data_loader) // self.bs
+
+ def prefetch(self):
+ try:
+ if self.rocal_cpu:
+ with torch.cuda.stream(self.loader_stream):
+ [self.images], self.targets = next(self.data_loader)
+ self.images = self.images.cuda(non_blocking=True)
+ self.targets = self.targets.cuda(non_blocking=True)
+ else:
+ [self.images], self.targets = next(self.data_loader)
+ except StopIteration:
+ self.images, self.targets = None, None
+ self.done = True
+
+ def reset(self):
+ if isinstance(self.data_loader, list):
+ pass
+ self.data_loader.reset()
+ self.images, self.targets = None, None
+ self.done = False
+
+ def __next__(self):
+ if self.rocal_cpu:
+ torch.cuda.current_stream().wait_stream(self.loader_stream)
+ if self.images is None and not self.done:
+ self.prefetch()
+ if self.done:
+ raise StopIteration()
+ else:
+ images, targets = self.images, self.targets
+ self.images, self.targets = None, None
+ return images, targets
+
+
+def main():
+ args = parser.parse_args()
+
+ if args.seed is not None:
+ random.seed(args.seed)
+ torch.manual_seed(args.seed)
+ cudnn.deterministic = True
+ cudnn.benchmark = False
+ warnings.warn('You have chosen to seed training. '
+ 'This will turn on the CUDNN deterministic setting, '
+ 'which can slow down your training considerably! '
+ 'You may see unexpected behavior when restarting '
+ 'from checkpoints.')
+
+ if args.gpu is not None:
+ warnings.warn('You have chosen a specific GPU. This will completely '
+ 'disable data parallelism.')
+
+ if args.dist_url == "env://" and args.world_size == -1:
+ args.world_size = int(os.environ["WORLD_SIZE"])
+
+ args.distributed = args.world_size > 1 or args.multiprocessing_distributed
+
+ if torch.cuda.is_available():
+ ngpus_per_node = torch.cuda.device_count()
+ if ngpus_per_node == 1 and args.dist_backend == "nccl":
+ warnings.warn(
+ "nccl backend >= 2.5 requires GPU count > 1, perhaps use 'gloo'")
+ else:
+ ngpus_per_node = 1
+
+ if args.multiprocessing_distributed:
+ # Since we have ngpus_per_node processes per node, the total world_size
+ # needs to be adjusted accordingly
+ args.world_size = ngpus_per_node * args.world_size
+ # Use torch.multiprocessing.spawn to launch distributed processes: the
+ # main_worker process function
+ mp.spawn(main_worker, nprocs=ngpus_per_node,
+ args=(ngpus_per_node, args))
+ else:
+ # Simply call main_worker function
+ main_worker(args.gpu, ngpus_per_node, args)
+
+
+def main_worker(gpu, ngpus_per_node, args):
+ global best_acc1
+ args.gpu = gpu
+
+ if args.gpu is not None:
+ print("Use GPU: {} for training".format(args.gpu))
+
+ if args.distributed:
+ if args.dist_url == "env://" and args.rank == -1:
+ args.rank = int(os.environ["RANK"])
+ if args.multiprocessing_distributed:
+ # For multiprocessing distributed training, rank needs to be the
+ # global rank among all the processes
+ args.rank = args.rank * ngpus_per_node + gpu
+ dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+ world_size=args.world_size, rank=args.rank)
+ # create model
+ if args.pretrained:
+ print("=> using pre-trained model '{}'".format(args.arch))
+ model = models.__dict__[args.arch](pretrained=True)
+ else:
+ print("=> creating model '{}'".format(args.arch))
+ model = models.__dict__[args.arch]()
+
+ if not torch.cuda.is_available() and not torch.backends.mps.is_available():
+ if args.rocal_gpu:
+ args.rocal_gpu = False
+ args.rocal_cpu = True
+ print('using CPU, this will be slow')
+ elif args.distributed:
+ # For multiprocessing distributed, DistributedDataParallel constructor
+ # should always set the single device scope, otherwise,
+ # DistributedDataParallel will use all available devices.
+ if torch.cuda.is_available():
+ if args.gpu is not None:
+ torch.cuda.set_device(args.gpu)
+ model.cuda(args.gpu)
+ # When using a single GPU per process and per
+ # DistributedDataParallel, we need to divide the batch size
+ # ourselves based on the total number of GPUs of the current node.
+ args.batch_size = int(args.batch_size / ngpus_per_node)
+ args.workers = int(
+ (args.workers + ngpus_per_node - 1) / ngpus_per_node)
+ model = torch.nn.parallel.DistributedDataParallel(
+ model, device_ids=[args.gpu])
+ else:
+ model.cuda()
+ # DistributedDataParallel will divide and allocate batch_size to all
+ # available GPUs if device_ids are not set
+ model = torch.nn.parallel.DistributedDataParallel(model)
+ elif args.gpu is not None and torch.cuda.is_available():
+ torch.cuda.set_device(args.gpu)
+ model = model.cuda(args.gpu)
+ elif torch.backends.mps.is_available():
+ device = torch.device("mps")
+ model = model.to(device)
+ else:
+ # DataParallel will divide and allocate batch_size to all available GPUs
+ if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
+ model.features = torch.nn.DataParallel(model.features)
+ model.cuda()
+ else:
+ model = torch.nn.DataParallel(model).cuda()
+
+ if torch.cuda.is_available():
+ if args.gpu:
+ device = torch.device('cuda:{}'.format(args.gpu))
+ else:
+ device = torch.device("cuda")
+ elif torch.backends.mps.is_available():
+ device = torch.device("mps")
+ else:
+ device = torch.device("cpu")
+ # define loss function (criterion), optimizer, and learning rate scheduler
+ criterion = nn.CrossEntropyLoss().to(device)
+
+ optimizer = torch.optim.SGD(model.parameters(), args.lr,
+ momentum=args.momentum,
+ weight_decay=args.weight_decay)
+
+ """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+ scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
+
+ # optionally resume from a checkpoint
+ if args.resume:
+ if os.path.isfile(args.resume):
+ print("=> loading checkpoint '{}'".format(args.resume))
+ if args.gpu is None:
+ checkpoint = torch.load(args.resume)
+ elif torch.cuda.is_available():
+ # Map model to be loaded to specified single gpu.
+ loc = 'cuda:{}'.format(args.gpu)
+ checkpoint = torch.load(args.resume, map_location=loc)
+ args.start_epoch = checkpoint['epoch']
+ best_acc1 = checkpoint['best_acc1']
+ if args.gpu is not None:
+ # best_acc1 may be from a checkpoint from a different GPU
+ best_acc1 = best_acc1.to(args.gpu)
+ model.load_state_dict(checkpoint['state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ scheduler.load_state_dict(checkpoint['scheduler'])
+ print("=> loaded checkpoint '{}' (epoch {})"
+ .format(args.resume, checkpoint['epoch']))
+ else:
+ print("=> no checkpoint found at '{}'".format(args.resume))
+
+ # Data loading code
+ if args.dummy:
+ print("=> Dummy data is used!")
+ train_dataset = datasets.FakeData(
+ 1281167, (3, 224, 224), 1000, transforms.ToTensor())
+ val_dataset = datasets.FakeData(
+ 50000, (3, 224, 224), 1000, transforms.ToTensor())
+ if args.rocal_gpu or args.rocal_cpu:
+ get_train_loader = get_rocal_train_loader
+ get_val_loader = get_rocal_val_loader
+ local_rank = 0
+ world_size = 1
+
+ crop_size = 224
+ if args.distributed or args.gpu:
+ local_rank = args.rank if args.distributed else args.gpu
+ if args.world_size != -1:
+ world_size = args.world_size
+ if local_rank == None:
+ local_rank = 0
+ train_loader = get_train_loader(data_path=args.data, batch_size=args.batch_size, local_rank=local_rank, world_size=world_size,
+ num_thread=args.workers, crop=crop_size, rocal_cpu=False if args.rocal_gpu else True, fp16=False)
+
+ val_loader = get_val_loader(data_path=args.data, batch_size=args.batch_size, local_rank=local_rank, world_size=world_size,
+ num_thread=args.workers, crop=crop_size, rocal_cpu=False if args.rocal_gpu else True, fp16=False)
+ else:
+ traindir = os.path.join(args.data, 'train')
+ valdir = os.path.join(args.data, 'val')
+ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225])
+
+ train_dataset = datasets.ImageFolder(
+ traindir,
+ transforms.Compose([
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ val_dataset = datasets.ImageFolder(
+ valdir,
+ transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ]))
+
+ if args.distributed:
+ train_sampler = torch.utils.data.distributed.DistributedSampler(
+ train_dataset)
+ val_sampler = torch.utils.data.distributed.DistributedSampler(
+ val_dataset, shuffle=False, drop_last=True)
+ else:
+ train_sampler = None
+ val_sampler = None
+
+ train_loader = torch.utils.data.DataLoader(
+ train_dataset, batch_size=args.batch_size, shuffle=(
+ train_sampler is None),
+ num_workers=args.workers, pin_memory=True, sampler=train_sampler)
+
+ val_loader = torch.utils.data.DataLoader(
+ val_dataset, batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True, sampler=val_sampler)
+
+ if args.evaluate:
+ validate(val_loader, model, criterion, args)
+ return
+
+ for epoch in range(args.start_epoch, args.epochs):
+ if args.distributed and not (args.rocal_gpu or args.rocal_cpu):
+ train_sampler.set_epoch(epoch)
+
+ # train for one epoch
+ train(train_loader, model, criterion, optimizer, epoch, device, args)
+
+ # evaluate on validation set
+ acc1 = validate(val_loader, model, criterion, args)
+
+ scheduler.step()
+
+ # remember best acc@1 and save checkpoint
+ is_best = acc1 > best_acc1
+ best_acc1 = max(acc1, best_acc1)
+
+ if not args.multiprocessing_distributed or (args.multiprocessing_distributed
+ and args.rank % ngpus_per_node == 0):
+ save_checkpoint({
+ 'epoch': epoch + 1,
+ 'arch': args.arch,
+ 'state_dict': model.state_dict(),
+ 'best_acc1': best_acc1,
+ 'optimizer': optimizer.state_dict(),
+ 'scheduler': scheduler.state_dict()
+ }, is_best)
+
+
+def train(train_loader, model, criterion, optimizer, epoch, device, args):
+ batch_time = AverageMeter('Time', ':6.3f')
+ data_time = AverageMeter('Data', ':6.3f')
+ losses = AverageMeter('Loss', ':.4e')
+ top1 = AverageMeter('Acc@1', ':6.2f')
+ top5 = AverageMeter('Acc@5', ':6.2f')
+ progress = ProgressMeter(
+ len(train_loader),
+ [batch_time, data_time, losses, top1, top5],
+ prefix="Epoch: [{}]".format(epoch))
+
+ # switch to train mode
+ model.train()
+
+ end = time.time()
+ for i, (images, target) in enumerate(train_loader):
+ # measure data loading time
+ data_time.update(time.time() - end)
+
+ # move data to the same device as model
+ images = images.to(device, non_blocking=True)
+ target = target.to(device, non_blocking=True)
+
+ # compute output
+ output = model(images)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), images.size(0))
+ top1.update(acc1[0], images.size(0))
+ top5.update(acc5[0], images.size(0))
+
+ # compute gradient and do SGD step
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.display(i + 1)
+
+ if args.rocal_cpu or args.rocal_gpu:
+ train_loader.reset()
+
+
+def validate(val_loader, model, criterion, args):
+
+ def run_validate(loader, base_progress=0):
+ with torch.no_grad():
+ end = time.time()
+ for i, (images, target) in enumerate(loader):
+ i = base_progress + i
+ if args.gpu is not None and torch.cuda.is_available():
+ images = images.cuda(args.gpu, non_blocking=True)
+ if torch.backends.mps.is_available():
+ images = images.to('mps')
+ target = target.to('mps')
+ if torch.cuda.is_available():
+ target = target.cuda(args.gpu, non_blocking=True)
+
+ # compute output
+ output = model(images)
+ loss = criterion(output, target)
+
+ # measure accuracy and record loss
+ acc1, acc5 = accuracy(output, target, topk=(1, 5))
+ losses.update(loss.item(), images.size(0))
+ top1.update(acc1[0], images.size(0))
+ top5.update(acc5[0], images.size(0))
+
+ # measure elapsed time
+ batch_time.update(time.time() - end)
+ end = time.time()
+
+ if i % args.print_freq == 0:
+ progress.display(i + 1)
+
+ batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
+ losses = AverageMeter('Loss', ':.4e', Summary.NONE)
+ top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE)
+ top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE)
+ progress = ProgressMeter(
+ len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size <
+ len(val_loader.dataset))) if not (args.rocal_cpu or args.rocal_gpu) else len(val_loader),
+ [batch_time, losses, top1, top5],
+ prefix='Test: ')
+
+ # switch to evaluate mode
+ model.eval()
+
+ run_validate(val_loader)
+ if args.distributed:
+ top1.all_reduce()
+ top5.all_reduce()
+
+ if not (args.rocal_cpu or args.rocal_gpu):
+ if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)):
+ aux_val_dataset = Subset(val_loader.dataset,
+ range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset)))
+ aux_val_loader = torch.utils.data.DataLoader(
+ aux_val_dataset, batch_size=args.batch_size, shuffle=False,
+ num_workers=args.workers, pin_memory=True)
+ run_validate(aux_val_loader, len(val_loader))
+
+ progress.display_summary()
+
+ return top1.avg
+
+
+def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
+ torch.save(state, filename)
+ if is_best:
+ shutil.copyfile(filename, 'model_best.pth.tar')
+
+
+class Summary(Enum):
+ NONE = 0
+ AVERAGE = 1
+ SUM = 2
+ COUNT = 3
+
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+
+ def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE):
+ self.name = name
+ self.fmt = fmt
+ self.summary_type = summary_type
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+
+ def all_reduce(self):
+ if torch.cuda.is_available():
+ device = torch.device("cuda")
+ elif torch.backends.mps.is_available():
+ device = torch.device("mps")
+ else:
+ device = torch.device("cpu")
+ total = torch.tensor([self.sum, self.count],
+ dtype=torch.float32, device=device)
+ dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False)
+ self.sum, self.count = total.tolist()
+ self.avg = self.sum / self.count
+
+ def __str__(self):
+ fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+ return fmtstr.format(**self.__dict__)
+
+ def summary(self):
+ fmtstr = ''
+ if self.summary_type is Summary.NONE:
+ fmtstr = ''
+ elif self.summary_type is Summary.AVERAGE:
+ fmtstr = '{name} {avg:.3f}'
+ elif self.summary_type is Summary.SUM:
+ fmtstr = '{name} {sum:.3f}'
+ elif self.summary_type is Summary.COUNT:
+ fmtstr = '{name} {count:.3f}'
+ else:
+ raise ValueError('invalid summary type %r' % self.summary_type)
+
+ return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+ def __init__(self, num_batches, meters, prefix=""):
+ self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+ self.meters = meters
+ self.prefix = prefix
+
+ def display(self, batch):
+ entries = [self.prefix + self.batch_fmtstr.format(batch)]
+ entries += [str(meter) for meter in self.meters]
+ print('\t'.join(entries))
+
+ def display_summary(self):
+ entries = [" *"]
+ entries += [meter.summary() for meter in self.meters]
+ print(' '.join(entries))
+
+ def _get_batch_fmtstr(self, num_batches):
+ num_digits = len(str(num_batches // 1))
+ fmt = '{:' + str(num_digits) + 'd}'
+ return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def accuracy(output, target, topk=(1,)):
+ """Computes the accuracy over the k top predictions for the specified values of k"""
+ with torch.no_grad():
+ maxk = max(topk)
+ batch_size = target.size(0)
+
+ _, pred = output.topk(maxk, 1, True, True)
+ pred = pred.t()
+ correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+ res = []
+ for k in topk:
+ correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+ res.append(correct_k.mul_(100.0 / batch_size))
+ return res
+
+
+if __name__ == '__main__':
+ main()
diff --git a/docs/examples/pytorch/toynet_training/README.md b/docs/examples/pytorch/toynet_training/README.md
new file mode 100644
index 000000000..1f0b0b8e2
--- /dev/null
+++ b/docs/examples/pytorch/toynet_training/README.md
@@ -0,0 +1,18 @@
+* This example shows how to run training using pytorch and ToyNet with 2 classes
+* Use a dataset with 2 classes
+* rocal device can be cpu/gpu.
+
+### Building the required Pytorch Rocm docker
+
+* Use the instructions in the [docker section](https://github.com/ROCm/rocAL/docker) to build the required [Pytorch docker](https://github.com/ROCm/rocAL/docker/rocal-with-pytorch.dockerfile)
+* Upgrade pip to the latest version.
+* Run requirements.sh to install the required packages.
+
+### To run the sample
+
+* Install rocal_pybind
+
+```shell
+python3 train.py
+```
+
diff --git a/docs/examples/pytorch/test_training.py b/docs/examples/pytorch/toynet_training/train.py
similarity index 100%
rename from docs/examples/pytorch/test_training.py
rename to docs/examples/pytorch/toynet_training/train.py
diff --git a/docs/examples/tf/pets_training/README.md b/docs/examples/tf/pets_training/README.md
new file mode 100644
index 000000000..87bef65f1
--- /dev/null
+++ b/docs/examples/tf/pets_training/README.md
@@ -0,0 +1,20 @@
+## Running Pets Training Example
+
+### Building the required TF Rocm docker
+
+* Use the instructions in the [docker section](https://github.com/ROCm/rocAL/docker) to build the required [Tensorflow docker](https://github.com/ROCm/rocAL/docker/rocal-with-tensorflow.dockerfile)
+* Upgrade pip to the latest version.
+
+### Running the training
+
+* To setup dataset, run
+
+```shell
+bash download_and_preprocess_dataset.sh
+```
+
+* To run this example, just execute:
+
+```shell
+python3 train.py
+```
diff --git a/docs/examples/tf/pets_training/create_pet_tf_record.py b/docs/examples/tf/pets_training/create_pet_tf_record.py
new file mode 100644
index 000000000..79476a013
--- /dev/null
+++ b/docs/examples/tf/pets_training/create_pet_tf_record.py
@@ -0,0 +1,314 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert the Oxford pet dataset to TFRecord for object_detection.
+
+See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
+ Cats and Dogs
+ IEEE Conference on Computer Vision and Pattern Recognition, 2012
+ http://www.robots.ox.ac.uk/~vgg/data/pets/
+
+Example usage:
+ python object_detection/dataset_tools/create_pet_tf_record.py \
+ --data_dir=/home/user/pet \
+ --output_dir=/home/user/pet/output
+"""
+
+import hashlib
+import io
+import logging
+import os
+import random
+import re
+
+import contextlib2
+from lxml import etree
+import PIL.Image
+from six.moves import range
+import tensorflow.compat.v1 as tf
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
+flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
+flags.DEFINE_string('label_map_path', 'pet_label_map.pbtxt',
+ 'Path to label map proto')
+flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
+
+FLAGS = flags.FLAGS
+
+
+def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
+ """Opens all TFRecord shards for writing and adds them to an exit stack.
+
+ Args:
+ exit_stack: A context2.ExitStack used to automatically closed the TFRecords
+ opened in this function.
+ base_path: The base path for all shards
+ num_shards: The number of shards
+
+ Returns:
+ The list of opened TFRecords. Position k in the list corresponds to shard k.
+ """
+ tf_record_output_filenames = [
+ '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
+ for idx in range(num_shards)
+ ]
+
+ tfrecords = [
+ exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
+ for file_name in tf_record_output_filenames
+ ]
+
+ return tfrecords
+
+
+def int64_feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def int64_list_feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def bytes_feature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def bytes_list_feature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+
+def float_feature(value):
+ return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+
+def float_list_feature(value):
+ return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+def get_class_name_from_filename(file_name):
+ """Gets the class name from a file.
+
+ Args:
+ file_name: The file name to get the class name from.
+ ie. "american_pit_bull_terrier_105.jpg"
+
+ Returns:
+ A string of the class name.
+ """
+ match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
+ return match.groups()[0]
+
+
+def read_examples_list(path):
+ """Read list of training or validation examples.
+
+ The file is assumed to contain a single example per line where the first
+ token in the line is an identifier that allows us to find the image and
+ annotation xml for that example.
+
+ For example, the line:
+ xyz 3
+ would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
+
+ Args:
+ path: absolute path to examples list file.
+
+ Returns:
+ list of example identifiers (strings).
+ """
+ with tf.gfile.GFile(path) as fid:
+ lines = fid.readlines()
+ return [line.strip().split(' ')[0] for line in lines]
+
+
+def recursive_parse_xml_to_dict(xml):
+ """Recursively parses XML contents to python dict.
+
+ We assume that `object` tags are the only ones that can appear
+ multiple times at the same level of a tree.
+
+ Args:
+ xml: xml tree obtained by parsing XML file contents using lxml.etree
+
+ Returns:
+ Python dictionary holding XML contents.
+ """
+ if not xml:
+ return {xml.tag: xml.text}
+ result = {}
+ for child in xml:
+ child_result = recursive_parse_xml_to_dict(child)
+ if child.tag != 'object':
+ result[child.tag] = child_result[child.tag]
+ else:
+ if child.tag not in result:
+ result[child.tag] = []
+ result[child.tag].append(child_result[child.tag])
+ return {xml.tag: result}
+
+
+def dict_to_tf_example(data,
+ mask_path,
+ label_map_dict,
+ image_subdirectory):
+ """Convert XML derived dict to tf.Example proto.
+
+ Notice that this function normalizes the bounding box coordinates provided
+ by the raw data.
+
+ Args:
+ data: dict holding PASCAL XML fields for a single image (obtained by
+ running recursive_parse_xml_to_dict)
+ mask_path: String path to PNG encoded mask.
+ label_map_dict: A map from string label names to integers ids.
+ image_subdirectory: String specifying subdirectory within the
+ Pascal dataset directory holding the actual image data.
+
+ Returns:
+ example: The converted tf.Example.
+
+ Raises:
+ ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+ """
+ img_path = os.path.join(image_subdirectory, data['filename'])
+ with tf.gfile.GFile(img_path, 'rb') as fid:
+ encoded_jpg = fid.read()
+ encoded_jpg_io = io.BytesIO(encoded_jpg)
+ image = PIL.Image.open(encoded_jpg_io)
+ if image.format != 'JPEG':
+ raise ValueError('Image format not JPEG')
+ key = hashlib.sha256(encoded_jpg).hexdigest()
+
+ width = int(data['size']['width'])
+ height = int(data['size']['height'])
+
+ classes = []
+ classes_text = []
+
+ if 'object' in data:
+ class_name = get_class_name_from_filename(data['filename'])
+ classes_text.append(class_name.encode('utf8'))
+ classes.append(label_map_dict[class_name])
+
+ feature_dict = {
+ 'image/height': int64_feature(height),
+ 'image/width': int64_feature(width),
+ 'image/filename': bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/source_id': bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/key/sha256': bytes_feature(key.encode('utf8')),
+ 'image/encoded': bytes_feature(encoded_jpg),
+ 'image/format': bytes_feature('jpeg'.encode('utf8')),
+ 'image/object/class/text': bytes_list_feature(classes_text),
+ 'image/object/class/label': int64_list_feature(classes)
+ }
+ example = tf.train.Example(
+ features=tf.train.Features(feature=feature_dict))
+ return example
+
+
+def create_tf_record(output_filename,
+ num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ examples):
+ """Creates a TFRecord file from examples.
+
+ Args:
+ output_filename: Path to where output file is saved.
+ num_shards: Number of shards for output file.
+ label_map_dict: The label map dictionary.
+ annotations_dir: Directory where annotation files are stored.
+ image_dir: Directory where image files are stored.
+ examples: Examples to parse and save to tf record.
+ """
+ with contextlib2.ExitStack() as tf_record_close_stack:
+ output_tfrecords = open_sharded_output_tfrecords(
+ tf_record_close_stack, output_filename, num_shards)
+ for idx, example in enumerate(examples):
+ if idx % 100 == 0:
+ logging.info('On image %d of %d', idx, len(examples))
+ xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
+ mask_path = os.path.join(
+ annotations_dir, 'trimaps', example + '.png')
+
+ if not os.path.exists(xml_path):
+ logging.warning(
+ 'Could not find %s, ignoring example.', xml_path)
+ continue
+ with tf.gfile.GFile(xml_path, 'r') as fid:
+ xml_str = fid.read()
+ xml = etree.fromstring(xml_str)
+ data = recursive_parse_xml_to_dict(xml)['annotation']
+
+ try:
+ tf_example = dict_to_tf_example(
+ data,
+ mask_path,
+ label_map_dict,
+ image_dir)
+ if tf_example:
+ shard_idx = idx % num_shards
+ output_tfrecords[shard_idx].write(
+ tf_example.SerializeToString())
+ except ValueError:
+ logging.warning('Invalid example: %s, ignoring.', xml_path)
+
+
+def main(_):
+ data_dir = FLAGS.data_dir
+ label_map_dict = {'Abyssinian': 1, 'american_bulldog': 2, 'american_pit_bull_terrier': 3, 'basset_hound': 4, 'beagle': 5, 'Bengal': 6, 'Birman': 7, 'Bombay': 8, 'boxer': 9, 'British_Shorthair': 10, 'chihuahua': 11, 'Egyptian_Mau': 12, 'english_cocker_spaniel': 13, 'english_setter': 14, 'german_shorthaired': 15, 'great_pyrenees': 16, 'havanese': 17, 'japanese_chin': 18,
+ 'keeshond': 19, 'leonberger': 20, 'Maine_Coon': 21, 'miniature_pinscher': 22, 'newfoundland': 23, 'Persian': 24, 'pomeranian': 25, 'pug': 26, 'Ragdoll': 27, 'Russian_Blue': 28, 'saint_bernard': 29, 'samoyed': 30, 'scottish_terrier': 31, 'shiba_inu': 32, 'Siamese': 33, 'Sphynx': 34, 'staffordshire_bull_terrier': 35, 'wheaten_terrier': 36, 'yorkshire_terrier': 37}
+ logging.info('Reading from Pet dataset.')
+ image_dir = os.path.join(data_dir, 'images')
+ annotations_dir = os.path.join(data_dir, 'annotations')
+ examples_path = os.path.join(annotations_dir, 'trainval.txt')
+ examples_list = read_examples_list(examples_path)
+
+ # Test images are not included in the downloaded data set, so we shall perform
+ # our own split.
+ random.seed(42)
+ random.shuffle(examples_list)
+ num_examples = len(examples_list)
+ num_train = int(0.7 * num_examples)
+ train_examples = examples_list[:num_train]
+ val_examples = examples_list[num_train:]
+ logging.info('%d training and %d validation examples.',
+ len(train_examples), len(val_examples))
+
+ train_output_path = os.path.join(
+ FLAGS.output_dir, 'pet_faces_train.record')
+ val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
+ create_tf_record(
+ train_output_path,
+ FLAGS.num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ train_examples)
+ create_tf_record(
+ val_output_path,
+ FLAGS.num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ val_examples)
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh b/docs/examples/tf/pets_training/download_and_preprocess_dataset.sh
similarity index 67%
rename from rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh
rename to docs/examples/tf/pets_training/download_and_preprocess_dataset.sh
index 8b7f5db52..64bb47624 100755
--- a/rocAL_pybind/examples/tf_petsTrainingExample/download_and_preprocess_dataset.sh
+++ b/docs/examples/tf/pets_training/download_and_preprocess_dataset.sh
@@ -4,6 +4,7 @@ DATASET_URL="http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz"
GROUNDTRUTH_URL="http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz"
apt-get install wget
+pip install contextlib2 pillow
printf "\nDownloading Oxford-IIIT-Pet dataset from $DATASET_URL..."
wget $DATASET_URL
printf "\nDownloading Oxford-IIIT-Pet ground truth from $GROUNDTRUTH_URL..."
@@ -12,11 +13,7 @@ printf "\nExtracting..."
tar xzvf images.tar.gz
tar xzvf annotations.tar.gz
mkdir tf_pets_records
-git clone https://github.com/swetha097/rocALmodels.git
-cd rocALmodels
-git checkout TF_V2
-cd ../
-python3 rocALmodels/models/research/object_detection/dataset_tools/create_pet_tf_record.py --data_dir=./ --output_dir=tf_pets_records/ --label_map_path=rocALmodels/models/research/object_detection/data/pet_label_map.pbtxt
+python3 create_pet_tf_record.py --data_dir=./ --output_dir=tf_pets_records/ --label_map_path=pet_label_map.pbtxt
cd tf_pets_records
mkdir train
mv pet_faces_train.record-0000* train
diff --git a/docs/examples/tf/pets_training/pet_label_map.pbtxt b/docs/examples/tf/pets_training/pet_label_map.pbtxt
new file mode 100644
index 000000000..7a91455ef
--- /dev/null
+++ b/docs/examples/tf/pets_training/pet_label_map.pbtxt
@@ -0,0 +1,184 @@
+item {
+ id: 1
+ name: 'Abyssinian'
+}
+
+item {
+ id: 2
+ name: 'american_bulldog'
+}
+
+item {
+ id: 3
+ name: 'american_pit_bull_terrier'
+}
+
+item {
+ id: 4
+ name: 'basset_hound'
+}
+
+item {
+ id: 5
+ name: 'beagle'
+}
+
+item {
+ id: 6
+ name: 'Bengal'
+}
+
+item {
+ id: 7
+ name: 'Birman'
+}
+
+item {
+ id: 8
+ name: 'Bombay'
+}
+
+item {
+ id: 9
+ name: 'boxer'
+}
+
+item {
+ id: 10
+ name: 'British_Shorthair'
+}
+
+item {
+ id: 11
+ name: 'chihuahua'
+}
+
+item {
+ id: 12
+ name: 'Egyptian_Mau'
+}
+
+item {
+ id: 13
+ name: 'english_cocker_spaniel'
+}
+
+item {
+ id: 14
+ name: 'english_setter'
+}
+
+item {
+ id: 15
+ name: 'german_shorthaired'
+}
+
+item {
+ id: 16
+ name: 'great_pyrenees'
+}
+
+item {
+ id: 17
+ name: 'havanese'
+}
+
+item {
+ id: 18
+ name: 'japanese_chin'
+}
+
+item {
+ id: 19
+ name: 'keeshond'
+}
+
+item {
+ id: 20
+ name: 'leonberger'
+}
+
+item {
+ id: 21
+ name: 'Maine_Coon'
+}
+
+item {
+ id: 22
+ name: 'miniature_pinscher'
+}
+
+item {
+ id: 23
+ name: 'newfoundland'
+}
+
+item {
+ id: 24
+ name: 'Persian'
+}
+
+item {
+ id: 25
+ name: 'pomeranian'
+}
+
+item {
+ id: 26
+ name: 'pug'
+}
+
+item {
+ id: 27
+ name: 'Ragdoll'
+}
+
+item {
+ id: 28
+ name: 'Russian_Blue'
+}
+
+item {
+ id: 29
+ name: 'saint_bernard'
+}
+
+item {
+ id: 30
+ name: 'samoyed'
+}
+
+item {
+ id: 31
+ name: 'scottish_terrier'
+}
+
+item {
+ id: 32
+ name: 'shiba_inu'
+}
+
+item {
+ id: 33
+ name: 'Siamese'
+}
+
+item {
+ id: 34
+ name: 'Sphynx'
+}
+
+item {
+ id: 35
+ name: 'staffordshire_bull_terrier'
+}
+
+item {
+ id: 36
+ name: 'wheaten_terrier'
+}
+
+item {
+ id: 37
+ name: 'yorkshire_terrier'
+}
\ No newline at end of file
diff --git a/docs/examples/tf/pets_training/train.py b/docs/examples/tf/pets_training/train.py
new file mode 100755
index 000000000..c804ecc64
--- /dev/null
+++ b/docs/examples/tf/pets_training/train.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import os, math
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import tensorflow as tf
+
+from amd.rocal.plugin.tf import ROCALIterator
+from amd.rocal.pipeline import Pipeline
+import amd.rocal.fn as fn
+import amd.rocal.types as types
+
+
+############################### CHANGE THESE GLOBAL VARIABLES APPROPRIATELY ###############################
+
+RECORDS_DIR = 'tf_pets_records/'
+NUM_CLASSES = 37
+LEARNING_RATE = 0.005
+TRAIN_BATCH_SIZE = 8
+RUN_ON_HOST = True
+
+############################### CHANGE THESE GLOBAL VARIABLES APPROPRIATELY ###############################
+
+
+######################################## NO CHANGES IN CODE NEEDED ########################################
+
+TRAIN_RECORDS_DIR = RECORDS_DIR + 'train/'
+VAL_RECORDS_DIR = RECORDS_DIR + 'val/'
+
+def main():
+
+ global NUM_CLASSES
+ global LEARNING_RATE
+ global TRAIN_BATCH_SIZE
+ global TRAIN_RECORDS_DIR
+ global VAL_RECORDS_DIR
+
+ print("\n-----------------------------------------------------------------------------------------")
+ print('TF records (train) are located in %s' % TRAIN_RECORDS_DIR)
+ print('TF records (val) are located in %s' % VAL_RECORDS_DIR)
+ print("-----------------------------------------------------------------------------------------\n")
+
+ image_size = [128, 128, 3]
+ base_model = tf.keras.applications.MobileNetV2(input_shape=image_size,
+ include_top=False,
+ weights='imagenet')
+ base_model.trainable = False
+ model = tf.keras.Sequential([
+ base_model,
+ tf.keras.layers.GlobalAveragePooling2D(),
+ tf.keras.layers.Dense(NUM_CLASSES)
+ ])
+
+ model.summary()
+ optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)
+ loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+ model.compile(
+ optimizer=optimizer,
+ loss=loss_fn,
+ metrics=['acc'])
+
+ TFRecordReaderType = 0
+ featureKeyMap = {
+ 'image/encoded': 'image/encoded',
+ 'image/class/label': 'image/object/class/label',
+ 'image/filename': 'image/filename'
+ }
+
+ trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=8, rocal_cpu=RUN_ON_HOST,
+ tensor_layout=types.NHWC, mean=[0, 0, 0], std=[255, 255, 255], tensor_dtype=types.FLOAT)
+ with trainPipe:
+ inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap,
+ features={
+ 'image/encoded': tf.io.FixedLenFeature((), tf.string, ""),
+ 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
+ 'image/filename': tf.io.FixedLenFeature((), tf.string, "")
+ }
+ )
+ jpegs = inputs["image/encoded"]
+ labels = inputs["image/class/label"]
+ images = fn.decoders.image(
+ jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR)
+ resized = fn.resize(
+ images, resize_width=image_size[0], resize_height=image_size[1])
+ flip_coin = fn.random.coin_flip(probability=0.5)
+ cmn_images = fn.crop_mirror_normalize(resized, crop=(image_size[1], image_size[0]),
+ mean=[127.5, 127.5, 127.5],
+ std=[127.5, 127.5, 127.5],
+ mirror=flip_coin,
+ output_dtype=types.FLOAT,
+ output_layout=types.NHWC)
+ trainPipe.set_outputs(cmn_images)
+ trainPipe.build()
+
+ valPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=8,
+ rocal_cpu=RUN_ON_HOST, tensor_layout=types.NHWC, tensor_dtype=types.FLOAT)
+ with valPipe:
+ inputs = fn.readers.tfrecord(path=VAL_RECORDS_DIR, reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap,
+ features={
+ 'image/encoded': tf.io.FixedLenFeature((), tf.string, ""),
+ 'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
+ 'image/filename': tf.io.FixedLenFeature((), tf.string, "")
+ }
+ )
+ jpegs = inputs["image/encoded"]
+ labels = inputs["image/class/label"]
+ images = fn.decoders.image(
+ jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=VAL_RECORDS_DIR)
+ resized = fn.resize(
+ images, resize_width=image_size[0], resize_height=image_size[1])
+ flip_coin = fn.random.coin_flip(probability=0.5)
+ cmn_images = fn.crop_mirror_normalize(resized, crop=(image_size[1], image_size[0]),
+ mean=[127.5, 127.5, 127.5],
+ std=[127.5, 127.5, 127.5],
+ mirror=flip_coin,
+ output_dtype=types.FLOAT,
+ output_layout=types.NHWC)
+ valPipe.set_outputs(cmn_images)
+ valPipe.build()
+
+ trainIterator = ROCALIterator(trainPipe)
+ valIterator = ROCALIterator(valPipe)
+
+ # Create the metrics
+ accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy(
+ name='train_acc')
+ epoch = 0
+ train_batches = math.ceil(len(trainIterator) / TRAIN_BATCH_SIZE)
+ val_batches = math.ceil(len(valIterator) / TRAIN_BATCH_SIZE)
+ while epoch < 10:
+ print('Epoch :', epoch + 1)
+ accuracy_metric.reset_states()
+ pbar = tf.keras.utils.Progbar(target=train_batches, stateful_metrics=[])
+ step = 0
+ for ([train_image_ndArray], train_label_ndArray) in trainIterator:
+ train_label_ndArray = train_label_ndArray - 1
+ with tf.GradientTape() as tape:
+ prediction = model(train_image_ndArray, training=True)
+ loss = loss_fn(train_label_ndArray, prediction)
+ gradients = tape.gradient(loss, model.trainable_variables)
+ optimizer.apply_gradients(
+ zip(gradients, model.trainable_variables))
+ accuracy_metric.update_state(train_label_ndArray, prediction)
+ results = {'loss': loss, 'train_acc': accuracy_metric.result()}
+ step += 1
+ pbar.update(step, values=results.items(), finalize=False)
+ pbar.update(step, values=results.items(), finalize=True)
+ trainIterator.reset()
+ accuracy_metric.reset_states()
+ pbar = tf.keras.utils.Progbar(target=val_batches, stateful_metrics=[])
+ step = 0
+ for ([val_image_ndArray], val_label_ndArray) in valIterator:
+ val_label_ndArray = val_label_ndArray - 1
+ prediction = model(val_image_ndArray, training=False)
+ accuracy_metric.update_state(val_label_ndArray, prediction)
+ results = {'val_acc': accuracy_metric.result()}
+ step += 1
+ pbar.update(step, values=results.items(), finalize=False)
+ pbar.update(step, values=results.items(), finalize=True)
+ valIterator.reset()
+ epoch += 1
+
+
+if __name__ == '__main__':
+ main()
+
+######################################## NO CHANGES IN CODE NEEDED ########################################
diff --git a/docs/how-to/framework.rst b/docs/how-to/framework.rst
index b99dfcf5d..5d09ff636 100644
--- a/docs/how-to/framework.rst
+++ b/docs/how-to/framework.rst
@@ -27,7 +27,7 @@ Create Data-loading Pipeline
Follow these steps:
-1. Import libraries for `rocAL `_.
+1. Import libraries for `rocAL `_.
.. code-block:: python
:caption: Import libraries
@@ -38,7 +38,7 @@ Follow these steps:
import amd.rocal.types as types
-2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the `pipeline `_.
+2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the `pipeline `_.
.. code-block:: python
:caption: Pipeline for PyTorch
@@ -80,7 +80,7 @@ Follow these steps:
import torch.optim as optim
-4. Call the training pipeline with rocAL classification data `loader `_.
+4. Call the training pipeline with rocAL classification data `loader `_.
.. code-block:: python
:caption: Call the training pipeline
@@ -93,7 +93,7 @@ Follow these steps:
train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank)
-5. Run the `training script `_.
+5. Run the `training script `_.
.. code-block:: python
:caption: Run the training pipeline
@@ -191,15 +191,15 @@ Follow these steps:
train_label_one_hot_list = get_label_one_hot(train_label_ndArray)
-4. To see and run a sample training script, refer to `rocAL TensorFlow example `_.
+4. To see and run a sample training script, refer to `rocAL TensorFlow example `_.
.. __resnet50:
-Run MLPerf Resnet50 classification training with rocAL
+Run Resnet50 classification training with rocAL
=======================================================
#. Ensure you have downloaded ``ILSVRC2012_img_val.tar`` (6.3GB) and ``ILSVRC2012_img_train.tar`` (138 GB) files and unzip into ``train`` and ``val`` folders
-#. Build `MIVisionX Pytorch docker `_
+#. Build `rocAL Pytorch docker `_
* Run the docker image
@@ -208,7 +208,7 @@ Run MLPerf Resnet50 classification training with rocAL
sudo docker run -it -v :/data -v /:/dockerx -w /dockerx --privileged --device=/dev/kfd --device=/dev/dri --group-add video --shm-size=4g --ipc="host" --network=host
.. note::
- Refer to the `docker `_ page for prerequisites and information on building the docker image.
+ Refer to the `docker `_ page for prerequisites and information on building the docker image.
Optional: Map localhost directory on the docker image
@@ -217,16 +217,4 @@ Run MLPerf Resnet50 classification training with rocAL
#. To see and run a sample training script, refer to `rocAL Imagenet example `_.
-.. code-block:: shell
-
- git clone -b mlperf-v1.1-rocal https://github.com/rrawther/MLPerf-mGPU
-
-#. Modify ``RN50_AMP_LARS_8GPUS_NCHW.sh`` or ``RN50_AMP_LARS_8GPUS_NHWC.sh`` to reflect correct path for imagenet directory
-#. Run appropriate script as needed:
-
-.. code-block:: shell
-
- ./RN50_AMP_LARS_8GPUS_NCHW.sh
- (or)
- ./RN50_AMP_LARS_8GPUS_NHWC.sh
diff --git a/docs/how-to/overview.rst b/docs/how-to/overview.rst
index fc3d674ad..11e108189 100644
--- a/docs/how-to/overview.rst
+++ b/docs/how-to/overview.rst
@@ -109,4 +109,4 @@ Decoders Description
====================== ========================================
To see examples demonstrating the usage of decoders and readers, see
-`MIVisionX rocAL Python Binding Examples `_.
+`rocAL Python Examples `_.
diff --git a/docs/how-to/using-with-cpp.rst b/docs/how-to/using-with-cpp.rst
index 50704c60b..cb3a02188 100644
--- a/docs/how-to/using-with-cpp.rst
+++ b/docs/how-to/using-with-cpp.rst
@@ -92,7 +92,7 @@ See `rocalRelease example `_.
+The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing `C++ API `_.
.. code-block:: cpp
:caption: Example Image Augmentation
@@ -131,4 +131,4 @@ The example below shows how to create a pipeline, read JPEG images, perform cert
}
-To see a sample image augmentation application in C++, see `Image Augmentation `_.
+To see a sample image augmentation application in C++, see `Image Augmentation `_.
diff --git a/docs/how-to/using-with-python.rst b/docs/how-to/using-with-python.rst
index 9252f1316..46ec1209f 100644
--- a/docs/how-to/using-with-python.rst
+++ b/docs/how-to/using-with-python.rst
@@ -176,7 +176,7 @@ Building the Pipeline
Building the pipeline ensures that all operators are validated with the corresponding inputs and outputs.
-To build the pipeline, see `https://github.com/ROCm/rocAL/blob/master/rocAL_pybind/examples/rocAL_api_python_unittest.py#L166`
+To build the pipeline, see `https://github.com/ROCm/rocAL/blob/master/tests/python_api/unit_test.py#L166`
.. code-block:: python
:caption: Build the Pipeline
@@ -245,6 +245,7 @@ To run the pipeline, see:
images, labels = pipe_out
show_images(images)
+
rocAL Data Types
=========================
diff --git a/docs/install/install.rst b/docs/install/install.rst
index 920a0640f..c51546b24 100644
--- a/docs/install/install.rst
+++ b/docs/install/install.rst
@@ -227,3 +227,4 @@ Test package will install ctest module to test rocAL. Follow below steps to test
mkdir rocAL-test && cd rocAL-test
cmake /opt/rocm/share/rocal/test/
ctest -VV
+
diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in
index b1eb21341..c316de276 100644
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1 +1 @@
-rocm-docs-core[api_reference]==1.4.1
+rocm-docs-core[api_reference]==1.5.1
diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt
index b0d156078..cae8bc379 100644
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -112,7 +112,7 @@ requests==2.32.2
# via
# pygithub
# sphinx
-rocm-docs-core[api-reference]==1.4.1
+rocm-docs-core[api-reference]==1.5.1
# via -r requirements.in
smmap==5.0.1
# via gitdb
diff --git a/docs/user_guide/ch1.md b/docs/user_guide/ch1.md
new file mode 100644
index 000000000..c552757b1
--- /dev/null
+++ b/docs/user_guide/ch1.md
@@ -0,0 +1,83 @@
+# Chapter 1: Overview
+
+## 1.1 Overview
+
+The performance of Deep Learning applications depends upon the efficiency of performance pipelines that can load and preprocess data efficiently to provide a high throughput. The pipelines are typically used to perform tasks such as loading and decoding data, perform a variety of augmentations, perform color-format conversions, etc., before passing the data for training or inference. The Deep Learning frameworks also require the pipelines to support multiple data formats and augmentations to adapt to a variety of datasets and models.
+
+This can be achieved by creating processing pipelines that fully utilize the underlying hardware capabilities.
+
+ROCm™ Augmentation Library (rocAL™) allows the user to create hybrid pipelines to maximize the throughput for Machine Learning applications. It helps to create pipelines that can efficiently process images, videos, and a variety of storage formats. The user can program these pipelines using C or Python API. rocAL significantly accelerates data processing on AMD processors.
+
+To optimize the preprocessing pipeline, rocAL utilizes the following features:
+
+- Prefetching: Loads the data for the next batch while the existing batch is under process. This parallelization allows more batches to be processed in less time.
+- Hybrid execution: Utilizes both the CPU and GPU simultaneously. For example, decoding the data on the CPU while running the training on the GPU.
+- Hardware decoding: Uses the AMD VCN and VA-API to efficiently decode data on the hardware.
+- Batch processing: Groups and processes the data together as a batch.
+
+![The Role of Pipelines in Deep Learning Applications](../data/ch1_pipelines.png)
+
+Figure 1. The Role of Pipelines in Deep Learning Applications
+
+## 1.2 Key Components
+
+- CPU- or GPU-based implementation for each augmentation and data_loader nodes
+- Python and C APIs for easy integration and testing
+- Multiple framework support and portable on PyTorch, TensorFlow, and MXNet
+- Flexible graphs to help the user create custom pipelines
+- Multicore host and multi-gpu execution for the graph
+- Support for various augmentations such as fish-eye, water, gitter, non-linear blend, etc., using the AMD ROCm Performance Primitive (RPP) library
+- Support for classification, object detection, segmentation, and keypoint data pipelines
+
+## 1.3 Third-party Integration
+
+rocAL provides support for many operators. The module imports are designed like other available data loaders for a smooth integration with training frameworks. The rocal_pybind package provides support for integrating with PyTorch, TensorFlow, and Caffe2. rocAL also supports many data formats such as FileReader, COCO Reader, TFRecordReader, and Lightning Memory-Mapped Database (LMDB), thus offering a unified approach to framework integration.
+
+## 1.4 rocAL Operators
+
+rocAL operators offer the flexibility to run on CPU or GPU for building hybrid pipelines. They also support classification and object detection on the workload. Some of the useful operators supported by rocAL are listed below:
+
+- Augmentations: These are used to enhance the data set by adding effects to the original images. To use the augmentations, import the instance of amd.rocal.fn. into the Python script. These augmentation APIs further call the RPP kernels underneath (HIP/HOST) depending on the backend used to build RPP and rocAL.
+
+### Table 1. Augmentations Available through rocAL
+
+| Color Augmentations | Effects Augmentations | Geometry Augmentations |
+| ------------------- | --------------------- | ------------------------------------- |
+| Blend | Fog | Crop |
+| Blur | Jitter | Crop Mirror Normalization |
+| Brightness | Pixelization | Crop Resize |
+| Color Temperature | Raindrops | Fisheye Lens |
+| Color Twist | Snowflakes | Flip (Horizontal, Vertical, and Both) |
+| Contrast | Salt and Pepper Noise | Lens Correction |
+| Exposure | | Random Crop |
+| Gamma | | Resize |
+| Hue | | Resize Crop Mirror |
+| Saturation | | Rotation |
+| Vignette | | Warp Affine |
+
+- Readers: These are used to read and understand the different types of datasets and their metadata. Some examples of readers are list of files with folders, LMDB, TFRecord, and JSON file for metadata. To use the readers, import the instance of amd.rocal.readers into the Python script.
+
+### Table 2. Readers Available through rocAL
+
+| Readers | Description |
+| --------------------------------------- | --------------------------------------------------- |
+| File Reader | Reads images from a list of files in a folder(s) |
+| Video Reader | Reads videos from a list of files in a folder(s) |
+| Caffe LMDB Reader | Reads (key, value) pairs from Caffe LMDB |
+| Caffe2 LMDB Reader | Reads (key, value) pairs from Caffe2 LMDB |
+| COCO Reader – file source and keypoints | Reads images and JSON annotations from COCO dataset |
+| TFRecord Reader | Reads from a TFRecord dataset |
+| MXNet Reader | Reads from a RecordIO dataset |
+
+- Decoders: These are used to support different input formats of images and videos. Decoders extract data from the datasets that are in compressed formats such as JPEG, MP4, etc. To use the decoders, import the instance of amd.rocal.decoders into the Python script.
+
+### Table 3. Decoders Available through rocAL
+
+| Decoders | Description |
+| ----------------- | -------------------------------------- |
+| Image | Decodes JPEG images |
+| Image_raw | Decodes images in raw format |
+| Image_random_crop | Decodes and randomly crops JPEG images |
+| Image_slice | Decodes and slices JPEG images |
+
+To see examples demonstrating the usage of decoders and readers, [click here](https://github.com/ROCm/rocAL/tree/master/docs/examples)
diff --git a/docs/user_guide/ch2.md b/docs/user_guide/ch2.md
new file mode 100644
index 000000000..b54eecbde
--- /dev/null
+++ b/docs/user_guide/ch2.md
@@ -0,0 +1,21 @@
+# Chapter 2: Architecture Components
+
+The rocAL architecture comprises rocAL Master-Graph and RPP as major components.
+
+## 2.1 rocAL Master-Graph
+
+The rocAL pipeline is built on top of rocAL Master-Graph. The architectural components of rocAL Master-Graph are described below:
+
+- Loader and Processing Modules: The rocAL Master-Graph consists of two main architectural components, a loader module to load data and a processing module to process data. The loader module is clearly separated from the processing module for a seamless execution without any blockages. The Prefetch queue helps to load data ahead of time and can be configured with user-defined parameters. The Output routine runs in parallel with the load routine, as both have separate queues for storing the result.
+
+![rocAL Master-Graph Architecture](../data/ch2_arch.png)
+
+Figure 2. rocAL Master-Graph Architecture
+
+- rocAL Pipeline: The rocAL pipeline holds great significance, as it contains all the information required to create a rocAL graph with data loader, augmentation nodes, and the output format. Once a rocAL pipeline is created, the user can build, run, and call an iterator to get the next batch of data into the pipeline. The user can install the rocAL pipeline using the rocAL Python package. It supports many operators for data loading and data augmentation.
+
+## 2.2 ROCm Performance Primitive Library
+
+RPP is a comprehensive high-performance computer vision library optimized for the AMD CPU and GPU with HIP and OpenCL backends. It is available under the AMD ROCm software platform. It provides low-level functionality for all rocAL operators for single, image, and tensor datatypes. RPP provides an extensive library for vision augmentations that includes vision functions, color augmentations, filter augmentations, geometric distortions, and a few more features.
+
+For more information on RPP along with the list of supported kernels, see https://github.com/ROCm/rpp.
diff --git a/docs/user_guide/ch3.md b/docs/user_guide/ch3.md
new file mode 100644
index 000000000..e74ad45e4
--- /dev/null
+++ b/docs/user_guide/ch3.md
@@ -0,0 +1,42 @@
+# Chapter 3: Installation
+
+This chapter provides information about the installation of rocAL and related packages.
+
+## 3.1 Prerequisites
+
+* Linux distribution
+* [AMD RPP](https://github.com/ROCm/rpp)
+* [AMD OpenVX™](https://github.com/ROCm/rocAL/tree/master/amd_openvx) and AMD OpenVX™ Extensions: `VX_RPP` and `AMD Media`
+* [Turbo JPEG](https://libjpeg-turbo.org/) - Version `3.0` or higher
+* [Half-precision floating-point](https://half.sourceforge.net) library - Version `1.12.0` or higher
+* [Google Protobuf](https://developers.google.com/protocol-buffers) - Version `3.12.4` or higher
+* [LMBD Library](http://www.lmdb.tech/doc/)
+* [RapidJSON](https://github.com/Tencent/rapidjson)
+* [PyBind11](https://github.com/pybind/pybind11)
+
+## 3.2 Platform Support
+
+To see the list of supported platforms for rocAL, see the [ROCm Installation Guide](https://rocm.docs.amd.com).
+
+## 3.3 Installing rocAL
+
+To build and install the rocAL library, follow the instructions given [here](https://github.com/ROCm/rocAL#build-instructions)
+
+## 3.4 Installing rocAL Python Package
+
+The rocAL Python package (rocal_pybind) is a separate redistributable wheel. rocal_pybind, which is created using Pybind11, enables data transfer between rocAL C++ API and Python API. With the help of rocal_pybind.so wrapper library, the rocAL functionality, which is primarily in C/C++, can be effectively used in Python.
+The Python package supports PyTorch, TensorFlow, Caffe2, and data readers available for various formats such as FileReader, COCO Reader, TFRecord Reader, and CaffeReader.
+
+To build and install the Python package, install the PyPackageInstall instruction [here](https://github.com/ROCm/rocAL#build-instructions)
+
+## 3.5 Installing rocAL Using Framework Dockers
+
+To test the rocAL Python APIs using PyTorch or TensorFlow, we recommend building a docker with rocAL and ROCm using any of the links below:
+
+* [rocAL PyTorch docker](https://github.com/ROCm/rocAL/tree/master/docker/rocal-with-pytorch.dockerfile)
+* [rocAL TensorFlow docker](https://github.com/ROCm/rocAL/tree/master/docker/rocal-with-tensorflow.dockerfile)
+
+To use rocAL on Ubuntu, use the following dockers:
+
+* [rocAL on ubuntu20](https://github.com/ROCm/rocAL/blob/master/docker/rocAL-on-ubuntu20.dockerfile)
+* [rocAL on Ubuntu22](https://github.com/ROCm/rocAL/blob/master/docker/rocAL-on-ubuntu22.dockerfile)
diff --git a/docs/user_guide/ch5.md b/docs/user_guide/ch5.md
new file mode 100644
index 000000000..2c0f325ff
--- /dev/null
+++ b/docs/user_guide/ch5.md
@@ -0,0 +1,164 @@
+# Chapter 5: Framework Integration
+
+rocAL improves the pipeline efficiency by preprocessing the data and parallelizing the data loading on the CPU and running trainings on the GPU. To separate the data loading from the training, rocAL provides TensorFlow and PyTorch iterators and readers as a plugin. The integration process with PyTorch and TensorFlow is described in the sections below.
+
+## 5.1 PyTorch Integration
+
+This section demonstrates how to use rocAL with PyTorch for training. Follow the steps below to get started.
+
+## 5.1.1 Build PyTorch Docker
+
+Build a rocAL PyTorch docker by following the steps here.
+
+## 5.1.2 Create Data-loading Pipeline
+
+Follow these steps:
+
+1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L28).
+
+```python
+from amd.rocal.plugin.pytorch import ROCALClassificationIterator
+from amd.rocal.pipeline import Pipeline
+import amd.rocal.fn as fn
+import amd.rocal.types as types
+```
+
+2. See a rocAL pipeline for PyTorch below. It reads data from the dataset using a fileReader and uses image_slice to decode the raw images. The other required augmentation operations are also defined in the [pipeline](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L38).
+
+```python
+def trainPipeline(data_path, batch_size, num_classes, one_hot, local_rank, world_size, num_thread, crop, rocal_cpu, fp16):
+ pipe = Pipeline(batch_size=batch_size, num_threads=num_thread, device_id=local_rank, seed=local_rank+10,
+ rocal_cpu=rocal_cpu, tensor_dtype = types.FLOAT16 if fp16 else types.FLOAT, tensor_layout=types.NCHW,
+ prefetch_queue_depth = 7)
+ with pipe:
+ jpegs, labels = fn.readers.file(file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
+ rocal_device = 'cpu' if rocal_cpu else 'gpu'
+ # decode = fn.decoders.image(jpegs, output_type=types.RGB,file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
+ decode = fn.decoders.image_slice(jpegs, output_type=types.RGB,
+ file_root=data_path, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
+ res = fn.resize(decode, resize_x=224, resize_y=224)
+ flip_coin = fn.random.coin_flip(probability=0.5)
+ cmnp = fn.crop_mirror_normalize(res, device="gpu",
+ output_dtype=types.FLOAT,
+ output_layout=types.NCHW,
+ crop=(crop, crop),
+ mirror=flip_coin,
+ image_type=types.RGB,
+ mean=[0.485,0.456,0.406],
+ std=[0.229,0.224,0.225])
+ if(one_hot):
+ _ = fn.one_hot(labels, num_classes)
+ pipe.set_outputs(cmnp)
+ print('rocal "{0}" variant'.format(rocal_device))
+ return pipe
+```
+
+3. Import libraries for PyTorch.
+
+```python
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+```
+
+4. Call the training pipeline with rocAL classification data [loader](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L78).
+
+```python
+def get_pytorch_train_loader(self):
+ print(“in get_pytorch_train_loader function”)
+ pipe_train = trainPipeline(self.data_path, self.batch_size, self.num_classes, self.one_hot, self.local_rank,
+ self.world_size, self.num_thread, self.crop, self.rocal_cpu, self.fp16)
+ pipe_train.build()
+ train_loader = ROCALClassificationIterator(pipe_train, device=”cpu” if self.rocal_cpu else “cuda”, device_id = self.local_rank)
+```
+
+5. Run the [training](https://github.com/ROCm/rocAL/blob/master/docs/examples/pytorch/toynet_training/train.py#L179).
+
+```python
+# Training loop
+ for epoch in range(10): # loop over the dataset multiple times
+ print(“\n epoch:: “,epoch)
+ running_loss = 0.0
+
+ for i, (inputs,labels) in enumerate(train_loader, 0):
+
+ sys.stdout.write(“\r Mini-batch “ + str(i))
+ # print(“Images”,inputs)
+ # print(“Labels”,labels)
+ inputs, labels = inputs.to(device), labels.to(device)
+```
+
+6. Run the training as shown [here](https://github.com/ROCm/rocAL/tree/develop/rocAL/docs/examples/pytorch/toynet_training).
+
+To see a sample training script, click [here](https://github.com/ROCm/rocAL/tree/develop/rocAL/docs/examples/pytorch/toynet_training).
+
+## 5.2 TensorFlow Integration
+
+This section demonstrates how to use rocAL with TensorFlow for training. Follow the steps below to get started.
+
+## 5.2.1 Build TensorFlow Docker
+
+Build a rocAL TensorFlow docker by following the steps here.
+
+## 5.2.2 Create Data-loading Pipeline
+
+Follow these steps:
+
+1. Import libraries for [rocAL](https://github.com/ROCm/rocAL/blob/master/docs/examples/tf/pets_training/train.py#L22).
+
+```python
+from amd.rocal.plugin.tf import ROCALIterator
+from amd.rocal.pipeline import Pipeline
+import amd.rocal.fn as fn
+import amd.rocal.types as types
+```
+
+2. See a rocAL pipeline for TensorFlow below. It reads data from the TFRecords using TFRecord Reader and uses fn.decoders.image to decode the raw [images](https://github.com/ROCm/rocAL/blob/master/examples/tf/pets_training/train.py#L128).
+
+```python
+trainPipe = Pipeline(batch_size=TRAIN_BATCH_SIZE, num_threads=1, rocal_cpu=RUN_ON_HOST, tensor_layout = types.NHWC)
+ with trainPipe:
+ inputs = fn.readers.tfrecord(path=TRAIN_RECORDS_DIR, index_path = "", reader_type=TFRecordReaderType, user_feature_key_map=featureKeyMap,
+ features={
+ 'image/encoded':tf.io.FixedLenFeature((), tf.string, ""),
+ 'image/class/label':tf.io.FixedLenFeature([1], tf.int64, -1),
+ 'image/filename':tf.io.FixedLenFeature((), tf.string, "")
+ }
+ )
+ jpegs = inputs["image/encoded"]
+ images = fn.decoders.image(jpegs, user_feature_key_map=featureKeyMap, output_type=types.RGB, path=TRAIN_RECORDS_DIR)
+ resized = fn.resize(images, resize_x=crop_size[0], resize_y=crop_size[1])
+ flip_coin = fn.random.coin_flip(probability=0.5)
+ cmn_images = fn.crop_mirror_normalize(resized, crop=(crop_size[1], crop_size[0]),
+ mean=[0,0,0],
+ std=[255,255,255],
+ mirror=flip_coin,
+ output_dtype=types.FLOAT,
+ output_layout=types.NHWC,
+ pad_output=False)
+ trainPipe.set_outputs(cmn_images)
+trainPipe.build()
+```
+
+3. Import libraries for [TensorFlow](https://github.com/ROCm/rocAL/blob/master/examples/tf/pets_training/train.py#L174).
+
+```python
+import tensorflow.compat.v1 as tf
+tf.compat.v1.disable_v2_behavior()
+import tensorflow_hub as hub
+Call the train pipeline
+ trainIterator = ROCALIterator(trainPipe)
+Run the training Session
+ i = 0
+ with tf.compat.v1.Session(graph = train_graph) as sess:
+ sess.run(tf.compat.v1.global_variables_initializer())
+ while i < NUM_TRAIN_STEPS:
+
+
+ for t, (train_image_ndArray, train_label_ndArray) in enumerate(trainIterator, 0):
+ train_label_one_hot_list = get_label_one_hot(train_label_ndArray)
+```
+
+4. Run the training as shown [here](https://github.com/ROCm/rocAL/tree/master/rocAL/examples/tf/pets_training/).
+
+To see a sample training script, click [here](https://github.com/ROCm/rocAL/tree/master/rocAL/examples/tf/pets_training/).
diff --git a/docs/user_guide/ch6.md b/docs/user_guide/ch6.md
new file mode 100644
index 000000000..d85d9586e
--- /dev/null
+++ b/docs/user_guide/ch6.md
@@ -0,0 +1,109 @@
+# Chapter 6: Using with C++ API
+
+This chapter explains how to create a pipeline and add augmentations using C++ APIs directly. The Python APIs also call these C++ APIs internally using the Python pybind utility as explained in the section Installing rocAL Python Package.
+
+## 6.1 C++ Common APIs
+
+The following sections list the commonly used C++ APIs.
+
+## 6.1.1 rocalCreate
+
+Use: To create the pipeline
+
+Returns: The context for the pipeline
+
+Arguments:
+
+* RocalProcessMode: Defines whether rocal data loading should be on the CPU or [GPU](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L91)
+
+```c++
+RocalProcessMode::ROCAL_PROCESS_GPU
+RocalProcessMode::ROCAL_PROCESS_CPU
+```
+
+* RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or [FP16](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api_types.h#L124)
+
+```c++
+RocalTensorOutputType::ROCAL_FP32
+RocalTensorOutputType::ROCAL_FP16
+```
+
+[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L41):
+
+```c++
+extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32);
+```
+
+## 6.1.2 rocalVerify
+
+Use: To verify the graph for all the inputs and outputs
+
+Returns: A status code indicating the success or failure
+
+[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L47):
+
+```c++
+extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context);
+```
+
+## 6.1.3 rocalRun
+
+Use: To process and run the built and verified graph
+
+Returns: A status code indicating the success or failure
+
+[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L52):
+
+```c++
+extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context);
+```
+
+## 6.1.4 rocalRelease
+
+Use: To free all the resources allocated during the graph creation process
+
+Returns: A status code indicating the success or failure
+
+[Example](https://github.com/ROCm/rocAL/blob/master/rocAL/include/api/rocal_api.h#L57):
+
+```c++
+extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context);
+```
+
+## 6.1.5 Image Augmentation Using C++ API
+
+The example below shows how to create a pipeline, read JPEG images, perform certain augmentations on them, and show the output using OpenCV by utilizing C++ [APIs](https://github.com/ROCm/rocAL/blob/develop/tests/cpp_api/image_augmentation/image_augmentation.cpp#L103).
+
+```c++
+Auto handle = rocalCreate(inputBatchSize, processing_device?RocalProcessMode::ROCAL_PROCESS_GPU:RocalProcessMode::ROCAL_PROCESS_CPU, 0,1);
+input1 = rocalJpegFileSource(handle, folderPath1, color_format, shard_count, false, shuffle, false, ROCAL_USE_USER_GIVEN_SIZE, decode_width, decode_height, dec_type);
+
+image0 = rocalResize(handle, input1, resize_w, resize_h, true);
+
+RocalImage image1 = rocalRain(handle, image0, false);
+
+RocalImage image11 = rocalFishEye(handle, image1, false);
+
+
+rocalRotate(handle, image11, true, rand_angle);
+
+
+// Creating successive blur nodes to simulate a deep branch of augmentations
+RocalImage image2 = rocalCropResize(handle, image0, resize_w, resize_h, false, rand_crop_area);;
+for(int i = 0 ; i < aug_depth; i++) {
+ image2 = rocalBlurFixed(handle, image2, 17.25, (i == (aug_depth -1)) ? true:false );
+}
+// Calling the API to verify and build the augmentation graph
+if(rocalVerify(handle) != ROCAL_OK)
+{
+ std::cout << "Could not verify the augmentation graph" << std::endl;
+ return -1;
+}
+
+while (!rocalIsEmpty(handle)) {
+ if(rocalRun(handle) != 0)
+ break;
+}
+```
+
+To see a sample image augmentation application in C++, click [here](https://github.com/ROCm/rocAL/tree/develop/tests/cpp_api/image_augmentation).
diff --git a/rocAL-setup.py b/rocAL-setup.py
index da7eac015..7b412d27c 100644
--- a/rocAL-setup.py
+++ b/rocAL-setup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2022 - 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -22,66 +22,81 @@
import sys
import argparse
import platform
+import traceback
if sys.version_info[0] < 3:
import commands
else:
import subprocess
-__author__ = "Kiriti Nagesh Gowda"
-__copyright__ = "Copyright 2022 - 2023, AMD ROCm Augmentation Library"
+__copyright__ = "Copyright 2022 - 2024, AMD ROCm Augmentation Library"
__license__ = "MIT"
-__version__ = "1.1.0"
-__maintainer__ = "Kiriti Nagesh Gowda"
+__version__ = "2.5.0"
__email__ = "mivisionx.support@amd.com"
__status__ = "Shipping"
+# error check calls
+def ERROR_CHECK(call):
+ status = call
+ if(status != 0):
+ print('ERROR_CHECK failed with status:'+str(status))
+ traceback.print_stack()
+ exit(status)
+
# Arguments
parser = argparse.ArgumentParser()
parser.add_argument('--directory', type=str, default='~/rocal-deps',
help='Setup home directory - optional (default:~/)')
-parser.add_argument('--opencv', type=str, default='4.6.0',
- help='OpenCV Version - optional (default:4.6.0)')
-parser.add_argument('--protobuf', type=str, default='3.12.4',
- help='ProtoBuf Version - optional (default:3.12.4)')
-parser.add_argument('--pybind11', type=str, default='v2.10.4',
- help='PyBind11 Version - optional (default:v2.10.4)')
-parser.add_argument('--reinstall', type=str, default='ON',
- help='Remove previous setup and reinstall - optional (default:ON) [options:OFF/ON]')
-parser.add_argument('--backend', type=str, default='HIP',
- help='rocAL Dependency Backend - optional (default:HIP) [options:CPU/OCL/HIP]')
parser.add_argument('--rocm_path', type=str, default='/opt/rocm',
help='ROCm Installation Path - optional (default:/opt/rocm) - ROCm Installation Required')
+parser.add_argument('--backend', type=str, default='HIP',
+ help='rocAL Dependency Backend - optional (default:HIP) [options:CPU/OCL/HIP]')
+parser.add_argument('--ffmpeg', type=str, default='OFF',
+ help='FFMPEG Installation - optional (default:OFF) [options:ON/OFF]')
+parser.add_argument('--reinstall', type=str, default='OFF',
+ help='Remove previous setup and reinstall - optional (default:OFF) [options:ON/OFF]')
args = parser.parse_args()
setupDir = args.directory
-opencvVersion = args.opencv
-ProtoBufVersion = args.protobuf
-pybind11Version = args.pybind11
-reinstall = args.reinstall.upper()
-backend = args.backend.upper()
ROCM_PATH = args.rocm_path
+backend = args.backend.upper()
+ffmpegInstall = args.ffmpeg.upper()
+reinstall = args.reinstall.upper()
+# override default path if env path set
if "ROCM_PATH" in os.environ:
ROCM_PATH = os.environ.get('ROCM_PATH')
print("\nROCm PATH set to -- "+ROCM_PATH+"\n")
-if reinstall not in ('ON', 'OFF'):
+# check developer inputs
+if backend not in ('OCL', 'HIP', 'CPU'):
print(
- "ERROR: Re-Install Option Not Supported - [Supported Options: ON or OFF]")
+ "ERROR: Backend Option Not Supported - [Supported Options: CPU or OCL or HIP]\n")
+ parser.print_help()
exit()
-if backend not in ('OCL', 'HIP', 'CPU'):
+if ffmpegInstall not in ('OFF', 'ON'):
print(
- "ERROR: Backend Option Not Supported - [Supported Options: CPU or OCL or HIP]")
+ "ERROR: FFMPEG Install Option Not Supported - [Supported Options: OFF or ON]\n")
+ parser.print_help()
+ exit()
+if reinstall not in ('OFF', 'ON'):
+ print(
+ "ERROR: Re-Install Option Not Supported - [Supported Options: OFF or ON]\n")
+ parser.print_help()
exit()
# check ROCm installation
-if os.path.exists(ROCM_PATH):
+if os.path.exists(ROCM_PATH) and backend != 'CPU':
print("\nROCm Installation Found -- "+ROCM_PATH+"\n")
os.system('echo ROCm Info -- && '+ROCM_PATH+'/bin/rocminfo')
else:
- print("\nWARNING: ROCm Not Found at -- "+ROCM_PATH+"\n")
- print(
- "WARNING: Set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n")
+ if backend != 'CPU':
+ print("\nWARNING: ROCm Not Found at -- "+ROCM_PATH+"\n")
+ print(
+ "WARNING: If ROCm installed, set ROCm Path with \"--rocm_path\" option for full installation [Default:/opt/rocm]\n")
+ print("WARNING: Limited dependencies will be installed\n")
+ backend = 'CPU'
+ else:
+ print("\nSTATUS: CPU Backend Install\n")
# get platfrom info
platfromInfo = platform.platform()
@@ -108,303 +123,316 @@
deps_dir = os.path.expanduser(setupDir_deps)
deps_dir = os.path.abspath(deps_dir)
+# check os version
+os_info_data = 'NOT Supported'
+if os.path.exists('/etc/os-release'):
+ with open('/etc/os-release', 'r') as os_file:
+ os_info_data = os_file.read().replace('\n', ' ')
+ os_info_data = os_info_data.replace('"', '')
+
# setup for Linux
linuxSystemInstall = ''
linuxCMake = 'cmake'
linuxSystemInstall_check = ''
linuxFlag = ''
-if "centos" in platfromInfo or "redhat" in platfromInfo or os.path.exists('/usr/bin/yum'):
+sudoValidate = 'sudo -v'
+if "centos" in os_info_data or "redhat" in os_info_data or "Oracle" in os_info_data:
linuxSystemInstall = 'yum -y'
linuxSystemInstall_check = '--nogpgcheck'
- if "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
+ if "VERSION_ID=7" in os_info_data:
linuxCMake = 'cmake3'
- os.system(linuxSystemInstall+' install cmake3')
- if not "centos" in platfromInfo or not "redhat" in platfromInfo:
- platfromInfo = platfromInfo+'-redhat'
-elif "Ubuntu" in platfromInfo or os.path.exists('/usr/bin/apt-get'):
+ sudoValidate = 'sudo -k'
+ platfromInfo = platfromInfo+'-redhat-7'
+ elif "VERSION_ID=8" in os_info_data:
+ platfromInfo = platfromInfo+'-redhat-8'
+ elif "VERSION_ID=9" in os_info_data:
+ platfromInfo = platfromInfo+'-redhat-9'
+ else:
+ platfromInfo = platfromInfo+'-redhat-centos-undefined-version'
+elif "Ubuntu" in os_info_data:
linuxSystemInstall = 'apt-get -y'
linuxSystemInstall_check = '--allow-unauthenticated'
linuxFlag = '-S'
- if not "Ubuntu" in platfromInfo:
- platfromInfo = platfromInfo+'-Ubuntu'
-elif os.path.exists('/usr/bin/zypper'):
+ if "VERSION_ID=20" in os_info_data:
+ platfromInfo = platfromInfo+'-Ubuntu-20'
+ elif "VERSION_ID=22" in os_info_data:
+ platfromInfo = platfromInfo+'-Ubuntu-22'
+ elif "VERSION_ID=24" in os_info_data:
+ platfromInfo = platfromInfo+'-Ubuntu-24'
+ else:
+ platfromInfo = platfromInfo+'-Ubuntu-undefined-version'
+elif "SLES" in os_info_data:
linuxSystemInstall = 'zypper -n'
linuxSystemInstall_check = '--no-gpg-checks'
platfromInfo = platfromInfo+'-SLES'
+elif "Mariner" in os_info_data:
+ linuxSystemInstall = 'tdnf -y'
+ linuxSystemInstall_check = '--nogpgcheck'
+ platfromInfo = platfromInfo+'-Mariner'
else:
print("\nrocAL Setup on "+platfromInfo+" is unsupported\n")
- print("\nrocAL Setup Supported on: Ubuntu 20/22; CentOS 7/8; RedHat 7/8; & SLES 15-SP4\n")
+ print("\nrocAL Setup Supported on: Ubuntu 20/22, RedHat 8/9, & SLES 15\n")
exit()
# rocAL Setup
print("\nrocAL Setup on: "+platfromInfo+"\n")
if userName == 'root':
- os.system(linuxSystemInstall+' update')
- os.system(linuxSystemInstall+' install sudo')
+ ERROR_CHECK(os.system(linuxSystemInstall+' update'))
+ ERROR_CHECK(os.system(linuxSystemInstall+' install sudo'))
# Delete previous install
if os.path.exists(deps_dir) and reinstall == 'ON':
- os.system('sudo -v')
- os.system('sudo rm -rf '+deps_dir)
+ ERROR_CHECK(os.system(sudoValidate))
+ ERROR_CHECK(os.system('sudo rm -rf '+deps_dir))
print("\nrocAL Setup: Removing Previous Install -- "+deps_dir+"\n")
-# Re-Install
-if os.path.exists(deps_dir):
- print("\nrocAL Setup: Re-Installing Libraries from -- "+deps_dir+"\n")
- # opencv
- if os.path.exists(deps_dir+'/build/OpenCV'):
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/build/OpenCV; sudo ' +
- linuxFlag+' make install -j8)')
-
- # ProtoBuf
- if os.path.exists(deps_dir+'/protobuf-'+ProtoBufVersion):
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion +
- '; sudo '+linuxFlag+' make install -j8)')
-
- # FFMPEG
- if os.path.exists(deps_dir+'/FFmpeg-n4.4.2'):
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo ' +
- linuxFlag+' make install -j8)')
-
- # MIVisionX
- if os.path.exists(deps_dir+'/MIVisionX/build-'+backend):
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/MIVisionX/build-'+backend+'; sudo ' +
- linuxFlag+' make install -j8)')
-
- print("\nrocAL Dependencies Re-Installed with rocAL-setup.py V-"+__version__+"\n")
+# Core package dependencies
+libpkgConfig = "pkg-config"
+if "centos" in os_info_data and "VERSION_ID=7" in os_info_data:
+ libpkgConfig = "pkgconfig"
+commonPackages = [
+ 'gcc',
+ 'cmake',
+ 'git',
+ 'wget',
+ 'unzip',
+ str(libpkgConfig)
+]
+
+rocmDebianPackages = [
+ 'half',
+ 'rpp',
+ 'rpp-dev',
+ 'mivisionx',
+ 'mivisionx-dev'
+]
+rocmRPMPackages = [
+ 'half',
+ 'rpp',
+ 'rpp-devel',
+ 'mivisionx',
+ 'mivisionx-devel'
+]
+
+rocdecodeDebianPackages = [
+ 'rocdecode',
+ 'rocdecode-dev'
+]
+
+rocdecodeRPMPackages = [
+ 'rocdecode',
+ 'rocdecode-devel'
+]
+
+opencvDebianPackages = [
+ 'build-essential',
+ 'pkg-config',
+ 'libgtk2.0-dev',
+ 'libavcodec-dev',
+ 'libavformat-dev',
+ 'libswscale-dev',
+ 'libtbb2',
+ 'libtbb-dev',
+ 'libjpeg-dev',
+ 'libpng-dev',
+ 'libtiff-dev',
+ 'libdc1394-dev',
+ 'unzip'
+]
+
+opencvRPMPackages = [
+ 'gtk2-devel',
+ 'libjpeg-devel',
+ 'libpng-devel',
+ 'libtiff-devel',
+ 'libavc1394',
+ 'unzip'
+]
+
+coreDebianPackages = [
+ 'nasm',
+ 'yasm',
+ 'liblmdb-dev',
+ #'rapidjson-dev',
+ 'python3-dev',
+ 'python3-pip',
+ 'python3-protobuf',
+ 'libprotobuf-dev',
+ 'libprotoc-dev',
+ 'protobuf-compiler'
+]
+
+libPythonProto = "python3-protobuf"
+libProtoCompiler = "protobuf-compiler"
+if "centos" in os_info_data and "VERSION_ID=7" in os_info_data:
+ libPythonProto = "protobuf-python"
+if "SLES" in os_info_data:
+ libProtoCompiler = "libprotobuf-c-devel"
+coreRPMPackages = [
+ 'nasm',
+ 'yasm',
+ 'lmdb-devel',
+ 'jsoncpp-devel',
+ #'rapidjson-devel',
+ 'python3-devel',
+ 'python3-pip',
+ str(libPythonProto),
+ 'protobuf-devel',
+ str(libProtoCompiler)
+]
+
+pip3Packages = [
+ 'pytest==7.0.0',
+ 'wheel==0.37.0'
+]
+
+debianOptionalPackages = [
+ 'ffmpeg',
+ 'libavcodec-dev',
+ 'libavformat-dev',
+ 'libavutil-dev',
+ 'libswscale-dev',
+ 'libopencv-dev'
+]
+
+# Install
+ERROR_CHECK(os.system(sudoValidate))
+if os.path.exists(deps_dir):
+ print("\nrocAL Setup: install found -- "+deps_dir)
+ print("\nrocAL Setup: use option --reinstall ON to reinstall all dependencies")
+ print("\nrocAL Dependencies Installed with rocAL-setup.py on "+platfromInfo+"\n")
+ exit(0)
# Clean Install
else:
print("\nrocAL Dependencies Installation with rocAL-setup.py V-"+__version__+"\n")
- os.system('mkdir '+deps_dir)
+ ERROR_CHECK(os.system('mkdir '+deps_dir))
# Create Build folder
- os.system('(cd '+deps_dir+'; mkdir build )')
- # install pre-reqs
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install gcc cmake git wget unzip pkg-config inxi mivisionx python3 python3-pip')
-
- # Get Installation Source
- os.system(
- '(cd '+deps_dir+'; wget https://github.com/opencv/opencv/archive/'+opencvVersion+'.zip )')
- os.system('(cd '+deps_dir+'; unzip '+opencvVersion+'.zip )')
- os.system(
- '(cd '+deps_dir+'; wget https://github.com/protocolbuffers/protobuf/archive/v'+ProtoBufVersion+'.zip )')
- os.system('(cd '+deps_dir+'; unzip v'+ProtoBufVersion+'.zip )')
- os.system(
- '(cd '+deps_dir+'; wget https://github.com/FFmpeg/FFmpeg/archive/refs/tags/n4.4.2.zip && unzip n4.4.2.zip )')
-
- # Install
- # package dependencies
- os.system('sudo -v')
- if "centos" in platfromInfo or "redhat" in platfromInfo:
- if "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check +
- ' install kernel-devel libsqlite3x-devel bzip2-devel openssl-devel python3-devel autoconf automake libtool curl make g++ unzip')
- elif "centos-8" in platfromInfo or "redhat-8" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' + linuxSystemInstall_check +
- ' install kernel-devel libsqlite3x-devel bzip2-devel openssl-devel python3-devel autoconf automake libtool curl make gcc-c++ unzip')
- elif "Ubuntu" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install sqlite3 libsqlite3-dev libbz2-dev libssl-dev python3-dev autoconf automake libtool')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install curl make g++ unzip libomp-dev libpthread-stubs0-dev')
- elif "SLES" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install sqlite3 sqlite3-devel libbz2-devel libopenssl-devel python3-devel autoconf automake libtool curl make gcc-c++ unzip')
- # Install half.hpp
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install -y half')
- # Install ProtoBuf
- os.system('(cd '+deps_dir+'/protobuf-' +
- ProtoBufVersion+'; ./autogen.sh )')
- os.system('(cd '+deps_dir+'/protobuf-' +
- ProtoBufVersion+'; ./configure )')
- os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion+'; make -j8 )')
- os.system('(cd '+deps_dir+'/protobuf-' +
- ProtoBufVersion+'; make check -j8 )')
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion +
- '; sudo '+linuxFlag+' make install )')
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/protobuf-'+ProtoBufVersion +
- '; sudo '+linuxFlag+' ldconfig )')
-
- # Install OpenCV
- os.system('(cd '+deps_dir+'/build; mkdir OpenCV )')
- # Install pre-reqs
- os.system('sudo -v')
- if "Ubuntu" in platfromInfo:
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libswscale-dev python-dev python-numpy ')
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev unzip')
- elif "centos" in platfromInfo or "redhat" in platfromInfo:
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' groupinstall \'Development Tools\'')
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install gtk2-devel libjpeg-devel libpng-devel libtiff-devel libavc1394 wget unzip')
- elif "SLES" in platfromInfo:
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install -t pattern devel_basis')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install gtk2-devel libjpeg-devel libpng-devel libtiff-devel libavc1394 wget unzip')
- # OpenCV 4.6.0
- os.system('(cd '+deps_dir+'/build/OpenCV; '+linuxCMake +
- ' -D WITH_GTK=ON -D WITH_JPEG=ON -D BUILD_JPEG=ON -D WITH_OPENCL=OFF -D WITH_OPENCLAMDFFT=OFF -D WITH_OPENCLAMDBLAS=OFF -D WITH_VA_INTEL=OFF -D WITH_OPENCL_SVM=OFF -D CMAKE_INSTALL_PREFIX=/usr/local ../../opencv-'+opencvVersion+' )')
- os.system('(cd '+deps_dir+'/build/OpenCV; make -j8 )')
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/build/OpenCV; sudo '+linuxFlag+' make install )')
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/build/OpenCV; sudo '+linuxFlag+' ldconfig )')
+ ERROR_CHECK(os.system('(cd '+deps_dir+'; mkdir build )'))
+ # update
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +' '+linuxSystemInstall_check+' update'))
+ # common packages
+ for i in range(len(commonPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ commonPackages[i]))
+ if "redhat-7" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install cmake3'))
+ # ROCm Packages
if "Ubuntu" in platfromInfo:
- # Install Packages for rocAL
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install libgflags-dev libgoogle-glog-dev liblmdb-dev')
- # Yasm/Nasm for TurboJPEG
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install nasm yasm')
- # clang
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install clang')
- elif "redhat" in platfromInfo or "SLES" in platfromInfo:
- # Nasm & Yasm
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install nasm yasm')
- # JSON-cpp
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install jsoncpp-devel')
- # lmbd
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
- linuxSystemInstall_check+' install lmdb-devel')
-
- # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2
- os.system(
- '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )')
- os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake +
- ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )')
- # RPP
- os.system('sudo -v')
- if "Ubuntu" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install -y rpp rpp-dev')
+ for i in range(len(rocmDebianPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ rocmDebianPackages[i]))
else:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install -y rpp rpp-devel')
- # RapidJSON
- os.system('sudo -v')
+ for i in range(len(rocmRPMPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ rocmRPMPackages[i]))
+
+ # rocDecode
if "Ubuntu" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall + ' ' +
- linuxSystemInstall_check+' install -y rapidjson-dev')
- else:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall + ' ' +
- linuxSystemInstall_check+' install -y rapidjson-devel')
- os.system('(cd '+deps_dir+'; git clone https://github.com/Tencent/rapidjson.git; cd rapidjson; mkdir build; cd build; ' +
- linuxCMake+' ../; make -j4; sudo make install)')
- # PyBind11
- os.system('sudo -v')
- os.system('pip install pytest==7.3.1')
- os.system('(cd '+deps_dir+'; git clone -b '+pybind11Version+' https://github.com/pybind/pybind11; cd pybind11; mkdir build; cd build; ' +
- linuxCMake+' -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../; make -j4; sudo make install)')
-
- # Install ffmpeg
+ for i in range(len(rocdecodeDebianPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ rocdecodeDebianPackages[i]))
+ elif "redhat-7" not in platfromInfo:
+ for i in range(len(rocdecodeRPMPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ rocdecodeRPMPackages[i]))
+
+ ERROR_CHECK(os.system(sudoValidate))
+ # rocAL Core Packages
if "Ubuntu" in platfromInfo:
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install autoconf automake build-essential git-core libass-dev libfreetype6-dev')
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install libsdl2-dev libtool libva-dev libvdpau-dev libvorbis-dev libxcb1-dev')
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texinfo zlib1g-dev')
- os.system('sudo -v')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install nasm yasm libx264-dev libx265-dev libnuma-dev libfdk-aac-dev')
+ for i in range(len(coreDebianPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ coreDebianPackages[i]))
else:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install autoconf automake bzip2 bzip2-devel freetype-devel')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install gcc-c++ libtool make pkgconfig zlib-devel')
- # Nasm
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install nasm')
- if "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
- # Yasm
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' --enablerepo=extras install epel-release')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install yasm')
- # libx264 & libx265
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install libx264-devel libx265-devel')
- # libfdk_aac
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install https://forensics.cert.org/cert-forensics-tools-release-el7.rpm')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' --enablerepo=forensics install fdk-aac')
- # libASS
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install libass-devel')
- elif "centos-8" in platfromInfo or "redhat-8" in platfromInfo:
- # el8 x86_64 packages
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-8.noarch.rpm')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install http://mirror.centos.org/centos/8/PowerTools/x86_64/os/Packages/SDL2-2.0.10-2.el8.x86_64.rpm')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install ffmpeg ffmpeg-devel')
- elif "SLES" in platfromInfo:
- # FFMPEG-4 packages
- os.system(
- 'sudo zypper ar -cfp 90 \'https://ftp.gwdg.de/pub/linux/misc/packman/suse/openSUSE_Leap_$releasever/Essentials\' packman-essentials')
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
- ' install ffmpeg-4')
-
- # FFMPEG 4 from source -- for Ubuntu, CentOS 7, & RedHat 7
- if "Ubuntu" in platfromInfo or "centos-7" in platfromInfo or "redhat-7" in platfromInfo:
- os.system('sudo -v')
- os.system(
- '(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo '+linuxFlag+' ldconfig )')
- os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; export PKG_CONFIG_PATH="/usr/local/lib/pkgconfig/"; ./configure --enable-shared --disable-static --enable-libx264 --enable-libx265 --enable-libfdk-aac --enable-libass --enable-gpl --enable-nonfree)')
- os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; make -j8 )')
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'/FFmpeg-n4.4.2; sudo ' +
- linuxFlag+' make install )')
-
- # MIVisionX
- os.system('sudo -v')
+ for i in range(len(coreRPMPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ coreRPMPackages[i]))
+
+ #pip3 packages
+ for i in range(len(pip3Packages)):
+ ERROR_CHECK(os.system('pip3 install '+ pip3Packages[i]))
+
+ # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.2
+ turboJpegVersion = '3.0.2'
+ ERROR_CHECK(os.system(
+ '(cd '+deps_dir+'; git clone -b '+turboJpegVersion+' https://github.com/libjpeg-turbo/libjpeg-turbo.git )'))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake +
+ ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j$(nproc); sudo make install )'))
+
+ # PyBind11 - https://github.com/pybind/pybind11 -- v2.11.1
+ pybind11Version = 'v2.11.1'
+ ERROR_CHECK(os.system('(cd '+deps_dir+'; git clone -b '+pybind11Version+' https://github.com/pybind/pybind11; cd pybind11; mkdir build; cd build; ' +
+ linuxCMake+' -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON ../; make -j$(nproc); sudo make install)'))
+
+ # RapidJSON - Source TBD: Package install of RapidJSON has compile issues - https://github.com/Tencent/rapidjson.git -- master
+ os.system('(cd '+deps_dir+'; git clone https://github.com/Tencent/rapidjson.git; cd rapidjson; mkdir build; cd build; ' +
+ linuxCMake+' ../; make -j$(nproc); sudo make install)')
+
+ # Optional Deps
if "Ubuntu" in platfromInfo:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install -y mivisionx mivisionx-dev')
+ for i in range(len(debianOptionalPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ debianOptionalPackages[i]))
else:
- os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
- ' '+linuxSystemInstall_check+' install -y mivisionx mivisionx-devel')
- # TBD: Need source install as rocm-6.0 mivisionx is missing vx_rpp
- os.system('sudo -v')
- os.system('(cd '+deps_dir+'; git clone https://github.com/ROCm/MIVisionX.git; cd MIVisionX; mkdir build-'+backend+'; cd build-'+backend+'; ' +
- linuxCMake+' -DBACKEND='+backend+' -D ROCAL=OFF ../; make -j4; sudo make install)')
-
- print("\nrocAL Dependencies Installed with rocAL-setup.py V-"+__version__+"\n")
+ # Install ffmpeg
+ if ffmpegInstall == 'ON':
+ if "redhat-7" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install epel-release'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' localinstall --nogpgcheck https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install ffmpeg ffmpeg-devel'))
+ elif "redhat-8" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-8.noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install http://mirror.centos.org/centos/8/PowerTools/x86_64/os/Packages/SDL2-2.0.10-2.el8.x86_64.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install ffmpeg ffmpeg-devel'))
+ elif "redhat-9" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install https://dl.fedoraproject.org/pub/epel/epel-next-release-latest-9.noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install --nogpgcheck https://mirrors.rpmfusion.org/free/el/rpmfusion-free-release-$(rpm -E %rhel).noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install https://mirrors.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-$(rpm -E %rhel).noarch.rpm'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install ffmpeg ffmpeg-free-devel'))
+ elif "SLES" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install ffmpeg-4 ffmpeg-4-libavcodec-devel ffmpeg-4-libavformat-devel'))
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' install ffmpeg-4-libavutil-devel ffmpeg-4-libswscale-devel'))
+
+ # Install OpenCV -- TBD cleanup
+ opencvVersion = '4.6.0'
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/build; mkdir OpenCV )'))
+ # Install pre-reqs
+ ERROR_CHECK(os.system(sudoValidate))
+ if "redhat" in platfromInfo:
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' '+linuxSystemInstall_check +
+ ' groupinstall \'Development Tools\''))
+ for i in range(len(opencvRPMPackages)):
+ ERROR_CHECK(os.system('sudo '+linuxFlag+' '+linuxSystemInstall +
+ ' '+linuxSystemInstall_check+' install -y '+ opencvRPMPackages[i]))
+ # OpenCV 4.6.0
+ # Get Installation Source
+ ERROR_CHECK(os.system(
+ '(cd '+deps_dir+'; wget https://github.com/opencv/opencv/archive/'+opencvVersion+'.zip )'))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'; unzip '+opencvVersion+'.zip )'))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; '+linuxCMake +
+ ' -D WITH_EIGEN=OFF -D WITH_GTK=ON -D WITH_JPEG=ON -D BUILD_JPEG=ON -D WITH_OPENCL=OFF -D WITH_OPENCLAMDFFT=OFF -D WITH_OPENCLAMDBLAS=OFF -D WITH_VA_INTEL=OFF -D WITH_OPENCL_SVM=OFF -D CMAKE_INSTALL_PREFIX=/usr/local ../../opencv-'+opencvVersion+' )'))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; make -j$(nproc))'))
+ ERROR_CHECK(os.system(sudoValidate))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; sudo make install)'))
+ ERROR_CHECK(os.system('(cd '+deps_dir+'/build/OpenCV; sudo ldconfig)'))
+
+print("\nrocAL Dependencies Installed with rocAL-setup.py V-"+__version__+" on "+platfromInfo+"\n")
diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt
index f7219f683..4973940e6 100644
--- a/rocAL/CMakeLists.txt
+++ b/rocAL/CMakeLists.txt
@@ -21,7 +21,7 @@
# SOFTWARE.
cmake_minimum_required(VERSION 3.5)
-set(VERSION "1.0.0")
+set(VERSION "2.0.0")
# Set Project Version and Language
project(rocal VERSION ${VERSION} LANGUAGES CXX)
@@ -44,6 +44,7 @@ find_package(LMDB QUIET)
find_package(RapidJSON QUIET)
find_package(StdFilesystem QUIET)
find_package(HALF QUIET)
+find_package(SndFile QUIET)
# HIP Backend
if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP")
@@ -55,11 +56,21 @@ if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP")
endif()
endif()
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip)
- set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1031;gfx1032;gfx1100")
- if(BUILD_WITH_AMD_ADVANCE)
- set(DEFAULT_AMDGPU_TARGETS ${DEFAULT_AMDGPU_TARGETS} "gfx941,gfx942")
+ # Set supported GPU Targets
+ set(DEFAULT_AMDGPU_TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102")
+ if (BUILD_WITH_AMD_ADVANCE)
+ set(DEFAULT_AMDGPU_TARGETS ${DEFAULT_AMDGPU_TARGETS} "gfx1200;gfx1201")
endif()
- set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
+
+ # Set AMDGPU_TARGETS
+ if(DEFINED ENV{AMDGPU_TARGETS})
+ set(AMDGPU_TARGETS $ENV{AMDGPU_TARGETS} CACHE STRING "List of specific machine types for library to target")
+ elseif(AMDGPU_TARGETS)
+ message("-- ${White}${PROJECT_NAME} -- AMDGPU_TARGETS set with -D option${ColourReset}")
+ else()
+ set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
+ endif()
+ message("-- ${White}${PROJECT_NAME} -- AMDGPU_TARGETS: ${AMDGPU_TARGETS}${ColourReset}")
find_package(HIP QUIET)
if(HIP_FOUND)
SET(BUILD_ROCAL true)
@@ -78,10 +89,10 @@ if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "OPENCL")
set(BUILD_ROCAL true)
message("-- ${White}rocAL -- Using OpenCL -- Path:" ${OpenCL_LIBRARIES} "\tVersion:" ${CL_TARGET_OpenCL_VERSION})
else()
- set(BACKEND "CPU")
- message("-- ${Yellow}NOTE: rocAL library requires OpenCL for BACKEND=OpenCL, Not Found ${ColourReset}")
- message("-- ${Yellow}NOTE: rocAL Backend set to CPU ${ColourReset}")
- endif()
+ set(BACKEND "CPU")
+ message("-- ${Yellow}NOTE: rocAL library requires OpenCL for BACKEND=OpenCL, Not Found ${ColourReset}")
+ message("-- ${Yellow}NOTE: rocAL Backend set to CPU ${ColourReset}")
+ endif()
endif()
# Dependency Check
# AMD RPP
@@ -168,20 +179,20 @@ if(NOT Threads_FOUND)
set(BUILD_ROCAL false)
message("-- ${Yellow}NOTE: rocAL library requires Threads, Not Found ${ColourReset}")
endif()
-if(NOT LMDB_FOUND)
- set(BUILD_ROCAL false)
+if(NOT LMDB_FOUND)
+ set(BUILD_ROCAL false)
message("-- ${Yellow}NOTE: rocAL library requires LMDB, Not Found ${ColourReset}")
-endif()
-if(NOT RapidJSON_FOUND)
- set(BUILD_ROCAL false)
+endif()
+if(NOT RapidJSON_FOUND)
+ set(BUILD_ROCAL false)
message("-- ${Yellow}NOTE: rocAL library requires RapidJSON, Not Found ${ColourReset}")
endif()
if(NOT FILESYSTEM_FOUND)
set(BUILD_ROCAL false)
message("-- ${Yellow}NOTE: rocAL library requires FileSystem, Not Found ${ColourReset}")
endif()
-if(NOT HALF_FOUND)
- set(BUILD_ROCAL false)
+if(NOT HALF_FOUND)
+ set(BUILD_ROCAL false)
message("-- ${Yellow}NOTE: rocAL library requires HALF, Not Found ${ColourReset}")
endif()
@@ -204,10 +215,10 @@ if(${BUILD_ROCAL})
# Protobuf
include_directories(${PROTOBUF_INCLUDE_DIRS})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${PROTOBUF_LIBRARIES})
- # LMDB
+ # LMDB
include_directories(${LMDB_INCLUDE_DIRS})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LMDB_LIBRARIES})
- # RapidJSON
+ # RapidJSON
include_directories(${RapidJSON_INCLUDE_DIRS})
# Filesystem
include_directories(${FILESYSTEM_INCLUDE_DIRS})
@@ -221,22 +232,8 @@ if(${BUILD_ROCAL})
include_directories(
${CMAKE_CURRENT_BINARY_DIR}
- include/api/
- include/augmentations/
- include/augmentations/color_augmentations/
- include/augmentations/effects_augmentations/
- include/augmentations/geometry_augmentations/
- include/decoders/image/
- include/decoders/video/
- include/device/
- include/loaders/
- include/loaders/image/
- include/loaders/video/
- include/meta_data/
- include/parameters/
- include/pipeline/
- include/readers/image/
- include/readers/video/
+ include
+ include/api
)
link_directories(${ROCM_PATH}/${CMAKE_INSTALL_LIBDIR})
@@ -287,7 +284,7 @@ if(${BUILD_ROCAL})
if(${OpenCV_VERSION_MAJOR} EQUAL 4)
target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=1)
else()
- target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=0)
+ target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=0)
endif()
else()
target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_OPENCV=0)
@@ -309,6 +306,29 @@ if(${BUILD_ROCAL})
else()
message(FATAL_ERROR "No filesystem library found.")
endif()
+
+ # Audio features for rocAL
+ if(${VX_EXT_RPP_VERSION_MAJOR} VERSION_GREATER_EQUAL "3" AND ${VX_EXT_RPP_VERSION_MINOR} VERSION_GREATER_EQUAL "0" AND ${VX_EXT_RPP_VERSION_PATCH} VERSION_GREATER "0")
+ if(NOT SNDFILE_FOUND)
+ message("-- ${Yellow}NOTE: rocAL built without SndFile - Audio Functionalities will not be supported${ColourReset}")
+ set(AUDIO_SUPPORT FALSE)
+ else()
+ set(AUDIO_SUPPORT TRUE)
+ endif()
+ else()
+ set(AUDIO_SUPPORT FALSE)
+ message("-- ${Yellow}rocAL -- Requires MIVisionX vx_rpp version > 3.0.0 for audio${ColourReset}")
+ endif()
+
+ # SndFile
+ if(AUDIO_SUPPORT)
+ include_directories(${SNDFILE_INCLUDE_DIRS})
+ set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${SNDFILE_LIBRARIES})
+ message("-- ${White}rocAL built with Audio Functionality${ColourReset}")
+ target_compile_definitions(${PROJECT_NAME} PUBLIC -DROCAL_AUDIO)
+ else()
+ message("-- ${Yellow}NOTE: rocAL built without Audio support - Audio Functionalities will not be enabled${ColourReset}")
+ endif()
# -Wall -- Enable most warning messages
# -mavx2 -- Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation
# -mfma -- Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
@@ -336,6 +356,7 @@ if(${BUILD_ROCAL})
include/api/rocal_api_data_transfer.h
include/api/rocal_api_parameters.h
include/api/rocal_api_meta_data.h
+ include/api/rocal_api_tensor.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocal COMPONENT dev)
else()
message(FATAL_ERROR "-- ${Red}rocAL dependencies not satisfied${ColourReset}")
diff --git a/rocAL/include/api/rocal_api.h b/rocAL/include/api/rocal_api.h
index 484ef8c4b..78c0acb17 100644
--- a/rocAL/include/api/rocal_api.h
+++ b/rocAL/include/api/rocal_api.h
@@ -1,4 +1,5 @@
/*
+MIT License
Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,6 +25,7 @@ THE SOFTWARE.
#define ROCAL_H
#include "rocal_api_types.h"
+#include "rocal_api_tensor.h"
#include "rocal_api_parameters.h"
#include "rocal_api_data_loaders.h"
#include "rocal_api_augmentation.h"
@@ -42,27 +44,21 @@ THE SOFTWARE.
/*!
* \brief rocalCreate creates the context for a new augmentation pipeline. Initializes all the required internals for the pipeline
* \ingroup group_rocal
- *
- * \param [in] batch_size
+ * \param [in] batch_size batch size
* \param [in] affinity RocalProcessMode: Defines whether rocal data loading should be on the CPU or GPU.
- * \param [in] gpu_id
- * \param [in] cpu_thread_count
- * \param [in] prefetch_queue_depth
+ * \param [in] gpu_id GPU id
+ * \param [in] cpu_thread_count number of cpu threads
+ * \param [in] prefetch_queue_depth The depth of the prefetch queue.
* \param [in] output_tensor_data_type RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or FP16.
* \return A \ref RocalContext - The context for the pipeline
*/
-extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size,
- RocalProcessMode affinity,
- int gpu_id = 0,
- size_t cpu_thread_count = 1,
- size_t prefetch_queue_depth = 3,
- RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32);
+extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32);
/*!
* \brief rocalVerify function to verify the graph for all the inputs and outputs
* \ingroup group_rocal
*
- * \param [in] context
+ * \param [in] context the rocal context
* \return A \ref RocalStatus - A status code indicating the success or failure
*/
extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context);
@@ -71,7 +67,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context);
* \brief rocalRun function to process and run the built and verified graph.
* \ingroup group_rocal
*
- * \param [in] context
+ * \param [in] context the rocal context
* \return A \ref RocalStatus - A status code indicating the success or failure
*/
extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context);
@@ -80,7 +76,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context);
* \brief rocalRelease function to free all the resources allocated during the graph creation process.
* \ingroup group_rocal
*
- * \param [in] context
+ * \param [in] context the rocal context
* \return A \ref RocalStatus - A status code indicating the success or failure.
*/
extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context);
diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h
index d397e8002..110e7a85d 100644
--- a/rocAL/include/api/rocal_api_augmentation.h
+++ b/rocAL/include/api/rocal_api_augmentation.h
@@ -33,725 +33,1295 @@ THE SOFTWARE.
*/
/*!
- * \brief Rearranges the order of the frames in the sequences with respect to new_order.
- * new_order can have values in the range [0, sequence_length).
- * Frames can be repeated or dropped in the new_order.
+ * \brief Rearranges the order of the frames in the sequences with respect to new_order. new_order can have values in the range [0, sequence_length). Frames can be repeated or dropped in the new_order.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] p_context context for the pipeline.
+ * \param [in] p_input Input Rocal Tensor
+ * \param [in] new_order represents the new order of the frames in the sequence
+ * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSequenceRearrange(RocalContext p_context, RocalTensor p_input,
+ std::vector &new_order,
+ bool is_output);
+
+/*! \brief Resize images.
+ * \note Accepts U8 and RGB24 input.
* \ingroup group_rocal_augmentations
* \note: Accepts U8 and RGB24 input.
- * \param context context for the pipeline.
- * \param input
- * \param new_order
- * \param new_sequence_length
- * \param sequence_length
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSequenceRearrange(RocalContext context, RocalImage input,
- unsigned int *new_order, unsigned int new_sequence_length,
- unsigned int sequence_length, bool is_output);
-
-/*! \brief Accepts U8 and RGB24 input.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param dest_width
- * \param dest_height
- * \param is_output
- * \param scaling_mode The resize scaling_mode to resize the image.
- * \param max_size Limits the size of the resized image.
- * \param resize_shorter The length of the shorter dimension of the image.
- * \param resize_longer The length of the larger dimension of the image.
- * \param interpolation_type The type of interpolation to be used for resize.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalResize(RocalContext context, RocalImage input,
- unsigned dest_width, unsigned dest_height,
+ * \param [in] context context for the pipeline.
+ * \param [in] input Input Rocal Tensor
+ * \param [in] dest_width output width
+ * \param [in] dest_height ouput Height
+ * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \param [in] scaling_mode The resize scaling_mode to resize the image.
+ * \param [in] max_size Limits the size of the resized image.
+ * \param [in] resize_shorter The length of the shorter dimension of the image.
+ * \param [in] resize_longer The length of the larger dimension of the image.
+ * \param [in] interpolation_type The type of interpolation to be used for resize.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalResize(RocalContext context, RocalTensor input,
+ unsigned dest_width, unsigned dest_height,
+ bool is_output,
+ RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH,
+ std::vector max_size = {},
+ unsigned resize_shorter = 0,
+ unsigned resize_longer = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which performs resize, normalize and flip on images.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal Tensor
+ * \param [in] dest_width output width
+ * \param [in] dest_height output height
+ * \param [in] mean The channel mean values
+ * \param [in] std_dev The channel standard deviation values
+ * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \param [in] scaling_mode The resize scaling_mode to resize the image.
+ * \param [in] max_size Limits the size of the resized image.
+ * \param [in] resize_shorter The length of the shorter dimension of the image.
+ * \param [in] resize_longer The length of the larger dimension of the image.
+ * \param [in] interpolation_type The type of interpolation to be used for resize.
+ * \param [in] mirror Parameter to enable horizontal flip for output image.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalTensor p_input, unsigned dest_width,
+ unsigned dest_height, std::vector &mean, std::vector &std_dev,
+ bool is_output,
+ RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH,
+ std::vector max_size = {}, unsigned resize_shorter = 0,
+ unsigned resize_longer = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalIntParam mirror = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which perrforms crop and resize on images.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal Tensor
+ * \param [in] dest_width output width
+ * \param [in] dest_height output height
+ * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \param [in] area Target area for the crop
+ * \param [in] aspect_ratio specifies the aspect ratio of the cropped region
+ * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image
+ * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCropResize(RocalContext context, RocalTensor input,
+ unsigned dest_width, unsigned dest_height,
+ bool is_output,
+ RocalFloatParam area = NULL,
+ RocalFloatParam aspect_ratio = NULL,
+ RocalFloatParam x_center_drift = NULL,
+ RocalFloatParam y_center_drift = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which perrforms crop and resize on images with fixed crop coordinates.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] dest_width output width
+ * \param [in] dest_height output height
+ * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \param [in] area Target area for the crop
+ * \param [in] aspect_ratio specifies the aspect ratio of the cropped region
+ * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image
+ * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalTensor input,
+ unsigned dest_width, unsigned dest_height,
+ bool is_output,
+ float area, float aspect_ratio,
+ float x_center_drift, float y_center_drift,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Rotates images.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal Tensor
+ * \param [in] is_output True: the output tensor is needed by user and will be copied to output buffers using the data transfer API calls. False: the output tensor is just an intermediate tensor, user is not interested in using it directly. This option allows certain optimizations to be achieved.
+ * \param [in] angle Rocal parameter defining the rotation angle value in degrees.
+ * \param [in] dest_width output width
+ * \param [in] dest_height output height
+ * \param [in] interpolation_type The type of interpolation to be used for rotate.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRotate(RocalContext context, RocalTensor input, bool is_output,
+ RocalFloatParam angle = NULL, unsigned dest_width = 0,
+ unsigned dest_height = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Rotates images with fixed angle value.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal Tensor
+ * \param [in] dest_width output width
+ * \param [in] dest_height output height
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] angle The rotation angle value in degrees.
+ * \param [in] interpolation_type The type of interpolation to be used for rotate.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalTensor input, float angle,
+ bool is_output, unsigned dest_width = 0, unsigned dest_height = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts brightness of the image.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] alpha controls contrast of the image
+ * \param [in] beta controls brightness of the image
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalBrightness(RocalContext context, RocalTensor input, bool is_output,
+ RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts brightness of the image with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] alpha controls contrast of the image
+ * \param [in] beta controls brightness of the image
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalTensor input,
+ float alpha, float beta,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies gamma correction on image.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] gamma gamma value for the image.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalGamma(RocalContext context, RocalTensor input,
bool is_output,
- RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH,
- std::vector max_size = {},
- unsigned resize_shorter = 0,
- unsigned resize_longer = 0,
- RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION);
-
-/*! \brief Accepts U8 and RGB24 input.
- * \ingroup group_rocal_augmentations
- * \param context Rocal context
- * \param input Input Rocal Image
- * \param dest_width The output width
- * \param dest_height The output height
- * \param mean The channel mean values
- * \param std_dev The channel standard deviation values
- * \param is_output True: the output image is needed by user and will be copied to output buffers using the data
- * transfer API calls. False: the output image is just an intermediate image, user is not interested in
- * using it directly. This option allows certain optimizations to be achieved.
- * \param p_mirror Parameter to enable horizontal flip for output image.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalImage p_input,
- unsigned dest_width, unsigned dest_height,
- std::vector &mean, std::vector &std_dev,
- bool is_output, RocalIntParam p_mirror = NULL);
-
-/*! \brief Accepts U8 and RGB24 input.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param dest_width
- * \param dest_height
- * \param is_output
- * \param area
- * \param x_center_drift
- * \param y_center_drift
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalCropResize(RocalContext context, RocalImage input, unsigned dest_width,
- unsigned dest_height, bool is_output,
- RocalFloatParam area = NULL,
- RocalFloatParam aspect_ratio = NULL,
- RocalFloatParam x_center_drift = NULL,
- RocalFloatParam y_center_drift = NULL);
-
-/*! \brief Accepts U8 and RGB24 input. Crops the input image to a new area and same aspect ratio.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param dest_width
- * \param dest_height
- * \param is_output
- * \param area
- * \param x_center_drift
- * \param y_center_drift
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalImage input, unsigned dest_width,
- unsigned dest_height, bool is_output, float area, float aspect_ratio,
- float x_center_drift, float y_center_drift);
-
-/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit,
- * otherwise; the image is cropped to fit the result.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline. Rocal context
- * \param input Input Rocal Image
- * \param is_output True: the output image is needed by user and will be copied to output buffers using the data
- * transfer API calls. False: the output image is just an intermediate image, user is not interested in
- * using it directly. This option allows certain optimizations to be achieved.
- * \param angle Rocal parameter defining the rotation angle value in degrees.
- * \param dest_width The output width
- * \param dest_height The output height
- * \return Returns a new image that keeps the result.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRotate(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam angle = NULL, unsigned dest_width = 0,
- unsigned dest_height = 0);
-
-/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit,
- * otherwise; the image is cropped to fit the result.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline. Rocal context
- * \param input Input Rocal Image
- * \param dest_width The output width
- * \param dest_height The output height
- * \param is_output Is the output image part of the graph output
- * \param angle The rotation angle value in degrees.
- * \return Returns a new image that keeps the result.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalImage input, float angle,
- bool is_output, unsigned dest_width = 0, unsigned dest_height = 0);
-
-/*! \brief Accepts U8 and RGB24 inputs
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param alpha
- * \param beta
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalBrightness(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL);
-
-/*! \brief Accepts U8 and RGB24 inputs
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param alpha
- * \param beta
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalImage input,
- float alpha, float beta,
- bool is_output);
-
-/*! \brief Accepts U8 and RGB24 inputs
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param alpha
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalGamma(RocalContext context, RocalImage input,
- bool is_output,
- RocalFloatParam alpha = NULL);
+ RocalFloatParam gamma = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs
+/*! \brief Applies gamma correction on image with fixed parameters.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param alpha
- * \param is_output
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] gamma gamma value for the image.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalImage input, float alpha, bool is_output);
+extern "C" RocalTensor ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalTensor input,
+ float gamma,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs.
+/*! \brief Adjusts contrast of the image.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param min
- * \param max
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation
+ * \param [in] contrast_center parameter representing the contrast center for the contrast operation
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalContrast(RocalContext context, RocalImage input, bool is_output,
- RocalIntParam min = NULL, RocalIntParam max = NULL);
+extern "C" RocalTensor ROCAL_API_CALL rocalContrast(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam contrast_factor = NULL, RocalFloatParam contrast_center = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs.
+/*! \brief Adjusts contrast of the image with fixed parameters.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param min
- * \param max
- * \param is_output
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation
+ * \param [in] contrast_center parameter representing the contrast center for the contrast operation
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalImage input,
- unsigned min, unsigned max,
- bool is_output);
+extern "C" RocalTensor ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalTensor input,
+ float contrast_factor, float contrast_center,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief
+/*! \brief Flip images horizontally and/or vertically based on inputs.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param axis
- * \param is_output
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally
+ * \param [in] vertical_flag determines whether the input tensor should be flipped vertically
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalFlip(RocalContext context, RocalImage input, bool is_output,
- RocalIntParam flip_axis = NULL);
+extern "C" RocalTensor ROCAL_API_CALL rocalFlip(RocalContext context, RocalTensor input, bool is_output,
+ RocalIntParam horizonal_flag = NULL, RocalIntParam vertical_flag = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief
+/*! \brief Flip images horizontally and/or vertically with fixed parameters.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param axis
- * \param is_output
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally
+ * \param [in] vertical_flag determines whether the input tensor should be flipped vertically
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalImage input, int flip_axis, bool is_output);
+extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalTensor input,
+ int horizonal_flag, int vertical_flag, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs
+/*! \brief Applies blur effect to images.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param sdev
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] kernel_size size ofthr kernel used for blurring
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalBlur(RocalContext context, RocalImage input, bool is_output,
- RocalIntParam sdev = NULL);
+extern "C" RocalTensor ROCAL_API_CALL rocalBlur(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalIntParam kernel_size = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief
+/*! \brief Applies blur effect to images with fixed parameters.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param sdev
- * \param is_output
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] kernel_size size of the kernel used for blurring
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalImage input, int sdev, bool is_output);
+extern "C" RocalTensor ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalTensor input,
+ int kernel_size, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
/*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio)
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input1
- * \param input2
- * \param is_output
- * \param ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0.
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input1 Input1 Rocal tensor
+ * \param [in] input2 Input2 Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalBlend(RocalContext context, RocalImage input1, RocalImage input2, bool is_output,
- RocalFloatParam ratio = NULL);
+extern "C" RocalTensor ROCAL_API_CALL rocalBlend(RocalContext context, RocalTensor input1, RocalTensor input2,
+ bool is_output,
+ RocalFloatParam ratio = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio)
+/*! \brief Blends two input images given the fixed ratio: output = input1*ratio + input2*(1-ratio)
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input1
- * \param input2
- * \param ratio Float value defining the blending ratio, should be between 0.0 and 1.0.
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalImage input1, RocalImage input2,
- float ratio,
- bool is_output);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param x0
- * \param x1
- * \param y0
- * \param y1
- * \param o0
- * \param o1
- * \param dest_height
- * \param dest_width
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalImage input, bool is_output,
- unsigned dest_height = 0, unsigned dest_width = 0,
- RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL,
- RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL,
- RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param x0
- * \param x1
- * \param y0
- * \param y1
- * \param o0
- * \param o1
- * \param is_output
- * \param dest_height
- * \param dest_width
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalImage input, float x0, float x1,
- float y0, float y1, float o0, float o1, bool is_output,
- unsigned int dest_height = 0, unsigned int dest_width = 0);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalFishEye(RocalContext context, RocalImage input, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param sdev
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVignette(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam sdev = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param sdev
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalImage input, float sdev, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param min
- * \param max
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalJitter(RocalContext context, RocalImage input, bool is_output,
- RocalIntParam kernel_size = NULL);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param min
- * \param max
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalImage input,
- int kernel_size, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param sdev
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam sdev = NULL);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param sdev
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalImage input, float sdev, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param sdev
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSnow(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam shift = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param sdev
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalImage input, float shift, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param rain_value
- * \param rain_width
- * \param rain_heigth
- * \param rain_transparency
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRain(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam rain_value = NULL,
- RocalIntParam rain_width = NULL,
- RocalIntParam rain_height = NULL,
- RocalFloatParam rain_transparency = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param rain_value
- * \param rain_width
- * \param rain_heigth
- * \param rain_transparency
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalImage input,
- float rain_value,
- int rain_width,
- int rain_height,
- float rain_transparency,
- bool is_output);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param adjustment
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalImage input, bool is_output,
- RocalIntParam adjustment = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param adjustment
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalImage input, int adjustment, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param fog_value
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalFog(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam fog_value = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param fog_value
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalImage input, float fog_value, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param strength
- * \param zoom
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam strength = NULL,
- RocalFloatParam zoom = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param strength
- * \param zoom
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalImage input,
- float strength, float zoom, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalPixelate(RocalContext context, RocalImage input, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param shift
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalExposure(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam shift = NULL);
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param shift
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalImage input, float shift, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalHue(RocalContext context, RocalImage input,
- bool is_output,
- RocalFloatParam hue = NULL);
-
-/*! \brief
+ * \param [in] context Rocal context
+ * \param [in] input1 Input1 Rocal tensor
+ * \param [in] input2 Input2 Rocal tensor
+ * \param [in] ratio Float value defining the blending ratio, should be between 0.0 and 1.0.
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalTensor input1, RocalTensor input2,
+ float ratio, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies affine transformation to images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] x0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] x1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] y0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] y1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] o0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] o1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] dest_height output height
+ * \param [in] dest_width output width
+ * \param [in] interpolation_type The type of interpolation to be used for warp affine.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalTensor input, bool is_output,
+ unsigned dest_height = 0, unsigned dest_width = 0,
+ RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL,
+ RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL,
+ RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies affine transformation to images with fixed affine matrix.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param hue
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] x0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] x1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] y0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] y1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] o0 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] o1 float parameter representing the coefficient of affine tensor matrix
+ * \param [in] dest_height output height
+ * \param [in] dest_width output width
+ * \param [in] interpolation_type The type of interpolation to be used for warp affine.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalImage input,
- float hue,
- bool is_output);
+extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalTensor input, float x0, float x1,
+ float y0, float y1, float o0, float o1, bool is_output,
+ unsigned int dest_height = 0, unsigned int dest_width = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs.
+/*! \brief Applies fish eye effect on images.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param min
- * \param max
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalSaturation(RocalContext context,
- RocalImage input,
+extern "C" RocalTensor ROCAL_API_CALL rocalFishEye(RocalContext context, RocalTensor input, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies vignette effect on images.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] sdev standard deviation for the vignette effect
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalVignette(RocalContext context, RocalTensor input,
+ bool is_output, RocalFloatParam sdev = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies vignette effect on images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \note Accepts U8 and RGB24 input.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] sdev standard deviation for the vignette effect
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalTensor input,
+ float sdev, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies jitter effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] kernel_size kernel size used for the jitter effect
+ * \param [in] seed seed value for the random number generator
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJitter(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalIntParam kernel_size = NULL,
+ int seed = 0,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies jitter effect on images with fixed kernel size.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] kernel_size kernel size used for the jitter effect
+ * \param [in] seed seed value for the random number generator
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalTensor input,
+ int kernel_size, bool is_output, int seed = 0,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies salt and pepper noise effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] noise_prob probability of applying the Salt and Pepper noise.
+ * \param [in] salt_prob probability of applying salt noise
+ * \param [in] salt_val specifies the value of the salt noise
+ * \param [in] pepper_val specifies the value of the pepper noise
+ * \param [in] seed seed value for the random number generator
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam noise_prob = NULL, RocalFloatParam salt_prob = NULL,
+ RocalFloatParam salt_val = NULL, RocalFloatParam pepper_val = NULL,
+ int seed = 0,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies salt and pepper noise on images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] noise_prob probability of applying the Salt and Pepper noise.
+ * \param [in] salt_prob probability of applying salt noise
+ * \param [in] salt_val specifies the value of the salt noise
+ * \param [in] pepper_val specifies the value of the pepper noise
+ * \param [in] seed seed value for the random number generator
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalTensor input,
+ float noise_prob, float salt_prob,
+ float salt_val, float pepper_val,
+ bool is_output, int seed = 0,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies snow effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] snow Float param representing the intensity of snow effect
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSnow(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam snow = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies snow effect on images with fixed parameter.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] snow Float param representing the intensity of snow effect
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalTensor input,
+ float snow, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies rain effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] rain_value parameter represents the intensity of rain effect
+ * \param [in] rain_width parameter represents the width of the rain effect
+ * \param [in] rain_height parameter represents the width of the rain effect
+ * \param [in] rain_transparency parameter represents the transperancy of the rain effect
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRain(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam rain_value = NULL,
+ RocalIntParam rain_width = NULL,
+ RocalIntParam rain_height = NULL,
+ RocalFloatParam rain_transparency = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies rain effect on images with fixed parameter.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] rain_value parameter represents the intensity of rain effect
+ * \param [in] rain_width parameter represents the width of the rain effect
+ * \param [in] rain_height parameter represents the width of the rain effect
+ * \param [in] rain_transparency parameter represents the transperancy of the rain effect
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalTensor input,
+ float rain_value,
+ int rain_width,
+ int rain_height,
+ float rain_transparency,
bool is_output,
- RocalFloatParam sat = NULL);
-
-/*! \brief rocalSaturationFixed
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalImage input, float sat,
- bool is_output);
-
-/*! \brief Accepts U8 and RGB24 inputs.
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param min
- * \param max
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalCopy(RocalContext context, RocalImage input, bool is_output);
-
-/*! \brief
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalNop(RocalContext context, RocalImage input, bool is_output);
-
-/*! \brief Accepts U8 and RGB24 inputs
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param alpha
- * \param beta
- * \param hue
- * \param sat
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam alpha = NULL,
- RocalFloatParam beta = NULL,
- RocalFloatParam hue = NULL,
- RocalFloatParam sat = NULL);
-
-/*! \brief Accepts U8 and RGB24 inputs
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param is_output
- * \param alpha
- * \param beta
- * \param hue
- * \param sat
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalImage input,
- float alpha,
- float beta,
- float hue,
- float sat,
- bool is_output);
-/*! \brief rocalCropMirrorNormalize
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalImage input,
- unsigned crop_depth,
- unsigned crop_height,
- unsigned crop_width,
- float start_x,
- float start_y,
- float start_z,
- std::vector &mean,
- std::vector &std_dev,
- bool is_output,
- RocalIntParam mirror = NULL);
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the color temperature in images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] adjustment color temperature adjustment value
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalIntParam adjustment = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the color temperature in images with fixed value.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] adjustment color temperature adjustment value
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalTensor input,
+ int adjustment, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief rocalCrop
+/*! \brief Applies fog effect on images.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] fog_value parameter representing the intensity of fog effect
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalCrop(RocalContext context, RocalImage input, bool is_output,
- RocalFloatParam crop_width = NULL,
- RocalFloatParam crop_height = NULL,
- RocalFloatParam crop_depth = NULL,
- RocalFloatParam crop_pox_x = NULL,
- RocalFloatParam crop_pos_y = NULL,
- RocalFloatParam crop_pos_z = NULL);
+extern "C" RocalTensor ROCAL_API_CALL rocalFog(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam fog_value = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief rocalCropFixed
+/*! \brief Applies fog effect on images with fixed parameter.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] fog_value parameter representing the intensity of fog effect
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalImage input,
- unsigned crop_width,
- unsigned crop_height,
- unsigned crop_depth,
+extern "C" RocalTensor ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalTensor input,
+ float fog_value, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies lens correction effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] strength parameter representing the strength of the lens correction.
+ * \param [in] zoom parameter representing the zoom factor of the lens correction.
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalTensor input, bool is_output,
+ RocalFloatParam strength = NULL,
+ RocalFloatParam zoom = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies lens correction effect on images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] strength parameter representing the strength of the lens correction.
+ * \param [in] zoom parameter representing the zoom factor of the lens correction.
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalTensor input,
+ float strength, float zoom, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies pixelate effect on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalPixelate(RocalContext context, RocalTensor input,
bool is_output,
- float crop_pox_x,
- float crop_pos_y,
- float crop_pos_z);
-/*! \brief rocalCropCenterFixed
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param crop_width
- */
-extern "C" RocalImage ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalImage input,
- unsigned crop_width,
- unsigned crop_height,
- unsigned crop_depth,
- bool output);
-/*! \brief rocalResizeCropMirrorFixed
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalImage input,
- unsigned dest_width, unsigned dest_height,
- bool is_output,
- unsigned crop_h,
- unsigned crop_w,
- RocalIntParam mirror);
-/*! \brief rocalResizeCropMirror
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalImage input,
- unsigned dest_width, unsigned dest_height,
- bool is_output, RocalFloatParam crop_height = NULL,
- RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL);
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the exposure in images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] exposure_factor exposure adjustment factor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalExposure(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam exposure_factor = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the exposure in images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] exposure_factor exposure adjustment factor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalTensor input,
+ float exposure_factor, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the hue in images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] hue hue adjustment value in degrees
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalHue(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam hue = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the hue in images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] hue hue adjustment value in degrees
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalTensor input,
+ float hue,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the saturation in images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] saturation saturation adjustment value
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSaturation(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam saturation = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the saturation in images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] saturation saturation adjustment value
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalTensor input,
+ float saturation, bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Copies input tensor to output tensor.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCopy(RocalContext context, RocalTensor input, bool is_output);
+
+/*! \brief Performs no operation.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalNop(RocalContext context, RocalTensor input, bool is_output);
+
+/*! \brief Adjusts the brightness, hue and saturation of the images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] alpha parameter that controls the brightness of an image
+ * \param [in] beta parameter that helps in tuning the color balance of an image
+ * \param [in] hue parameter that adjusts the hue of an image
+ * \param [in] sat parameter that controls the intensity of colors
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam alpha = NULL,
+ RocalFloatParam beta = NULL,
+ RocalFloatParam hue = NULL,
+ RocalFloatParam sat = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Adjusts the brightness, hue and saturation of the images with fixed parameters.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] alpha parameter that controls the brightness of an image
+ * \param [in] beta parameter that helps in tuning the color balance of an image
+ * \param [in] hue parameter that adjusts the hue of an image
+ * \param [in] sat parameter that controls the intensity of colors
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalTensor input,
+ float alpha,
+ float beta,
+ float hue,
+ float sat,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which performs crop, normalize and flip on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] crop_height crop width of the tensor
+ * \param [in] crop_width crop height of the tensor
+ * \param [in] start_x x-coordinate, start of the input tensor to be cropped
+ * \param [in] start_y y-coordinate, start of the input tensor to be cropped
+ * \param [in] mean mean value (specified for each channel) for tensor normalization
+ * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] mirror controls horizontal flip of the tensor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalTensor input,
+ unsigned crop_height,
+ unsigned crop_width,
+ float start_x,
+ float start_y,
+ std::vector &mean,
+ std::vector &std_dev,
+ bool is_output,
+ RocalIntParam mirror = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
-/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels
+/*! \brief Crops images.
* \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param num_of_attmpts
- * \return
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] crop_height crop width of the tensor
+ * \param [in] crop_width crop height of the tensor
+ * \param [in] crop_depth crop depth of the tensor
+ * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped
+ * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped
+ * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalImage input,
+extern "C" RocalTensor ROCAL_API_CALL rocalCrop(RocalContext context, RocalTensor input, bool is_output,
+ RocalFloatParam crop_width = NULL,
+ RocalFloatParam crop_height = NULL,
+ RocalFloatParam crop_depth = NULL,
+ RocalFloatParam crop_pox_x = NULL,
+ RocalFloatParam crop_pos_y = NULL,
+ RocalFloatParam crop_pos_z = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Crops images with fixed coordinates.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] crop_height crop width of the tensor
+ * \param [in] crop_width crop height of the tensor
+ * \param [in] crop_depth crop depth of the tensor
+ * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped
+ * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped
+ * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalTensor input,
+ unsigned crop_width,
+ unsigned crop_height,
+ unsigned crop_depth,
bool is_output,
- RocalFloatParam crop_area_factor = NULL,
- RocalFloatParam crop_aspect_ratio = NULL,
- RocalFloatParam crop_pos_x = NULL,
- RocalFloatParam crop_pos_y = NULL,
- int num_of_attempts = 20);
-
-/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels
- * \ingroup group_rocal_augmentations
- * \param context context for the pipeline.
- * \param input
- * \param IOU_threshold
- * \param num_of_attmpts
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalImage input,
- bool is_output,
- RocalFloatParam threshold = NULL,
- RocalFloatParam crop_area_factor = NULL,
- RocalFloatParam crop_aspect_ratio = NULL,
- RocalFloatParam crop_pos_x = NULL,
- RocalFloatParam crop_pos_y = NULL,
- int num_of_attempts = 20);
-
-#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H
+ float crop_pox_x,
+ float crop_pos_y,
+ float crop_pos_z,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Crops images at the center with fixed coordinates.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] crop_height crop width of the tensor
+ * \param [in] crop_width crop height of the tensor
+ * \param [in] crop_depth crop depth of the tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalTensor input,
+ unsigned crop_width,
+ unsigned crop_height,
+ unsigned crop_depth,
+ bool is_output,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which performs resize, crop and flip on images with fixed crop.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] dest_height output height
+ * \param [in] dest_width output width
+ * \param [in] crop_h crop width of the tensor
+ * \param [in] crop_w crop height of the tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] mirror controls horizontal flip of the tensor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalTensor input,
+ unsigned dest_width, unsigned dest_height,
+ bool is_output,
+ unsigned crop_h,
+ unsigned crop_w,
+ RocalIntParam mirror,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Fused function which performs resize, crop and flip on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] dest_height output height
+ * \param [in] dest_width output width
+ * \param [in] crop_height crop width of the tensor
+ * \param [in] crop_width crop height of the tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] mirror controls horizontal flip of the tensor
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalTensor input,
+ unsigned dest_width, unsigned dest_height,
+ bool is_output, RocalFloatParam crop_height = NULL,
+ RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Crops images randomly.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] num_of_attempts maximum number of attempts the function will make to find a valid crop
+ * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region
+ * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region
+ * \param [in] crop_pos_x specifies a specific horizontal position for the crop
+ * \param [in] crop_pos_y specifies a specific vertical position for the crop
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam crop_area_factor = NULL,
+ RocalFloatParam crop_aspect_ratio = NULL,
+ RocalFloatParam crop_pos_x = NULL,
+ RocalFloatParam crop_pos_y = NULL,
+ int num_of_attempts = 20,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Crops images randomly used for SSD training.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] threshold the threshold parameter for crop operation
+ * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region
+ * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region
+ * \param [in] crop_pos_x specifies a specific horizontal position for the crop
+ * \param [in] crop_pos_y specifies a specific vertical position for the crop
+ * \param [in] num_of_attempts he maximum number of attempts the function will make to find a valid crop
+ * \param [in] output_layout the layout of the output tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalTensor input,
+ bool is_output,
+ RocalFloatParam threshold = NULL,
+ RocalFloatParam crop_area_factor = NULL,
+ RocalFloatParam crop_aspect_ratio = NULL,
+ RocalFloatParam crop_pos_x = NULL,
+ RocalFloatParam crop_pos_y = NULL,
+ int num_of_attempts = 20,
+ RocalTensorLayout output_layout = ROCAL_NONE,
+ RocalTensorOutputType output_datatype = ROCAL_UINT8);
+
+/*! \brief Applies preemphasis filter to the input data.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output Sets to True if the output tensor is part of the graph output
+ * \param [in] preemph_coeff Preemphasis coefficient
+ * \param [in] preemph_border_type Border value policy. Possible values are "zero", "clamp", "reflect".
+ * \param [in] output_datatype The data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalPreEmphasisFilter(RocalContext context,
+ RocalTensor input,
+ bool is_output,
+ RocalFloatParam preemph_coeff = NULL,
+ RocalAudioBorderType preemph_border_type = RocalAudioBorderType::ROCAL_CLAMP,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Produces a spectrogram from a 1D audio signal.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] window_fn values of the window function
+ * \param [in] center_windows boolean value to specify whether extracted windows should be padded so that the window function is centered at multiples of window_step
+ * \param [in] reflect_padding Indicates the padding policy when sampling outside the bounds of the audio data
+ * \param [in] spectrogram_layout output spectrogram layout
+ * \param [in] power Exponent of the magnitude of the spectrum
+ * \param [in] nfft Size of the Fast Fourier transform (FFT)
+ * \param [in] window_length Window size in the number of samples
+ * \param [in] window_step Step between the Short-time Fourier transform (STFT) windows in number of samples
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSpectrogram(RocalContext context,
+ RocalTensor input,
+ bool is_output,
+ std::vector &window_fn,
+ bool center_windows,
+ bool reflect_padding,
+ int power,
+ int nfft,
+ int window_length = 512,
+ int window_step = 256,
+ RocalTensorLayout output_layout = ROCAL_NFT,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief A
+ * \ingroup group_rocal_augmentations
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param[in] cutoff_db minimum or cut-off ratio in dB
+ * \param[in] multiplier factor by which the logarithm is multiplied
+ * \param[in] reference_magnitude Reference magnitude which if not provided uses maximum value of input as reference
+ * \param [in] rocal_tensor_output_type the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalToDecibels(RocalContext p_context,
+ RocalTensor p_input,
+ bool is_output,
+ float cutoff_db,
+ float multiplier,
+ float reference_magnitude,
+ RocalTensorOutputType rocal_tensor_output_type);
+
+/*! \brief Applies resample augmentation to input tensors
+ * \ingroup group_rocal_augmentations
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] p_output_resample_rate the output resample rate for a batch of audio samples
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] sample_hint sample_hint value is the value required to allocate the max memory for output tensor wrt resample_rate and the samples
+ * \param [in] quality The resampling is achieved by applying a sinc filter with Hann window with an extent controlled by the quality argument
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalResample(RocalContext p_context,
+ RocalTensor p_input,
+ RocalTensor p_output_resample_rate,
+ bool is_output,
+ float sample_hint,
+ float quality = 50.0,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Creates and returns rocALTensor generated from an uniform distribution
+ * \ingroup group_rocal_augmentations
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] range The range for generating uniform distribution
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalUniformDistribution(RocalContext p_context,
+ RocalTensor p_input,
+ bool is_output,
+ std::vector &range);
+
+/*! \brief Creates and returns rocALTensor generated from an normal distribution
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] mean The mean value for generating the normal distribution
+ * \param [in] stddev The stddev value for generating the normal distribution
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalNormalDistribution(RocalContext p_context,
+ RocalTensor p_input,
+ bool is_output,
+ float mean = 0.0,
+ float stddev = 0.0);
+
+/*! \brief Multiples a tensor and a scalar and returns the output
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] scalar The scalar value to be multiplied with the input tensor
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalTensorMulScalar(RocalContext p_context,
+ RocalTensor p_input,
+ bool is_output,
+ float scalar = 0.0,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Adds two tensors and returns the output.
+ * \param [in] p_context Rocal context
+ * \param [in] p_input1 Input Rocal tensor1
+ * \param [in] p_input2 Input Rocal tensor2
+ * \param [in] is_output Is the output tensor part of the graph output
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalTensorAddTensor(RocalContext p_context,
+ RocalTensor p_input1,
+ RocalTensor p_input2,
+ bool is_output,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Performs silence detection in the input audio tensor
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] cutoff_db threshold(dB) below which the signal is considered silent
+ * \param [in] reference_power reference power that is used to convert the signal to dB
+ * \param [in] reset_interval number of samples after which the moving mean average is recalculated to avoid loss of precision
+ * \param [in] window_length size of the sliding window used to calculate of the short-term power of the signal
+ * \return std::pair
+ */
+extern "C" std::pair ROCAL_API_CALL rocalNonSilentRegionDetection(RocalContext context,
+ RocalTensor input,
+ bool is_output,
+ float cutoff_db,
+ float reference_power,
+ int reset_interval,
+ int window_length);
+
+/*! \brief Extracts the sub-tensor from a given input tensor
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] anchor anchor values used for specifying the starting indices of slice
+ * \param [in] shape shape values used for specifying the length of slice
+ * \param [in] fill_values fill values based on out of Bound policy
+ * \param [in] policy
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalSlice(RocalContext context,
+ RocalTensor input,
+ bool is_output,
+ RocalTensor anchor,
+ RocalTensor shape,
+ std::vector fill_values,
+ RocalOutOfBoundsPolicy policy = RocalOutOfBoundsPolicy::ROCAL_ERROR,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Performs mean-stddev normalization on images.
+ * \ingroup group_rocal_augmentations
+ * \param [in] context Rocal context
+ * \param [in] input Input Rocal tensor
+ * \param [in] axes axes list for tensor normalization
+ * \param [in] mean mean value (specified for each channel) for tensor normalization
+ * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] scale scale value (specified for each channel) for tensor normalization
+ * \param [in] shift shift value (specified for each channel) for tensor normalization
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input,
+ std::vector &axes,
+ std::vector &mean,
+ std::vector &std_dev,
+ bool is_output,
+ float scale = 1.0, float shift = 0.0,
+ RocalTensorOutputType output_datatype = ROCAL_FP32);
+
+/*! \brief Applies mel-filter bank augmentation on the given input tensor
+ * \ingroup group_rocal_augmentations
+ * \param [in] p_context Rocal context
+ * \param [in] p_input Input Rocal tensor
+ * \param [in] is_output is the output tensor part of the graph output
+ * \param [in] freq_high maximum frequency
+ * \param [in] freq_low minimum frequency
+ * \param [in] mel_formula formula used to convert frequencies from hertz to mel and from mel to hertz
+ * \param [in] nfilter number of mel filters
+ * \param [in] normalize boolean variable that determine whether to normalize weights / not
+ * \param [in] sample_rate sampling rate of the audio data
+ * \param [in] output_datatype the data type of the output tensor
+ * \return RocalTensor
+ */
+
+extern "C" RocalTensor ROCAL_API_CALL rocalMelFilterBank(RocalContext p_context,
+ RocalTensor p_input,
+ bool is_output,
+ float freq_high,
+ float freq_low,
+ RocalMelScaleFormula mel_formula,
+ int nfilter,
+ bool normalize,
+ float sample_rate,
+ RocalTensorOutputType output_datatype);
+
+#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H
diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h
index 567dca1a9..eec0c9a64 100644
--- a/rocAL/include/api/rocal_api_data_loaders.h
+++ b/rocAL/include/api/rocal_api_data_loaders.h
@@ -23,7 +23,6 @@ THE SOFTWARE.
#ifndef MIVISIONX_ROCAL_API_DATA_LOADERS_H
#define MIVISIONX_ROCAL_API_DATA_LOADERS_H
#include "rocal_api_types.h"
-#include
/*!
* \file
@@ -33,310 +32,258 @@ THE SOFTWARE.
* \brief The AMD rocAL data loader functions.
*/
-/*!
- * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
- * \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
- */
-extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSource(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned internal_shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
-
-/*!
- * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
+/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded tensors to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
-
-/*!
- * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams.
- * \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images in a sequence will be decoded to.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
- * \param sequence_length: The number of frames in a sequence.
- * \param is_output Determines if the user wants the loaded sequences to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the sequences or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \return Reference to the output image.
- */
-extern "C" RocalImage ROCAL_API_CALL rocalSequenceReader(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned internal_shard_count,
- unsigned sequence_length,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0);
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSource(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned internal_shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only.
+/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images in a sequence will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param sequence_length: The number of frames in a sequence.
- * \param is_output Determines if the user wants the loaded sequences to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded tensor to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- unsigned sequence_length,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0);
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams.
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param json_path Path to the COCO Json File
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images in a sequence will be decoded to.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the sequences or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor.
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context,
- const char *source_path,
- const char *json_path,
- RocalImageColor color_format,
- unsigned internal_shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReader(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned internal_shard_count,
+ unsigned sequence_length,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only.
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param json_path Path to the COCO Json File
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
- * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
- * \param num_attempts Maximum number of attempts to generate crop. Default 10
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images in a sequence will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context,
- const char *source_path,
- const char *json_path,
- RocalImageColor rocal_color_format,
- unsigned internal_shard_count,
- bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
- unsigned num_attempts,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
+extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ unsigned sequence_length,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+/*! \brief JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param json_path Path to the COCO Json File
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
- * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] json_path Path to the COCO Json File
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context,
- const char *source_path,
- const char *json_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
- unsigned num_attempts,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
-/*!
- * \brief \param rocal_context Rocal context
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context,
+ const char* source_path,
+ const char* json_path,
+ RocalImageColor color_format,
+ unsigned internal_shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param json_path Path to the COCO Json File
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] json_path Path to the COCO Json File
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
+ * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
+ * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context,
- const char *source_path,
- const char *json_path,
- RocalImageColor color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context,
+ const char* source_path,
+ const char* json_path,
+ RocalImageColor rocal_color_format,
+ unsigned internal_shard_count,
+ bool is_output,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
+ unsigned num_attempts,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] json_path Path to the COCO Json File
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
+ * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned internal_shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context,
+ const char* source_path,
+ const char* json_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
+ unsigned num_attempts,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
+/*! \brief Creates JPEG image reader. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] json_path Path to the COCO Json File
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
-
-/*!
- * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context,
+ const char* source_path,
+ const char* json_path,
+ RocalImageColor color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context,
+ const char* source_path,
RocalImageColor rocal_color_format,
unsigned internal_shard_count,
bool is_output,
@@ -344,27 +291,28 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContex
bool loop = false,
RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only
+/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
* \ingroup group_rocal_data_loaders
- * \param p_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context,
+ const char* source_path,
RocalImageColor rocal_color_format,
unsigned shard_id,
unsigned shard_count,
@@ -373,177 +321,114 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(
bool loop = false,
RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
- * \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
- */
-extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned internal_shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
-
-/*!
- * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only
+/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param p_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
-
-/*!
- * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored and Crops t
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned internal_shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
- * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
- * \param num_attempts Maximum number of attempts to generate crop. Default 10
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return Reference to the output image
+ * \param [in] p_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned num_threads,
- bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
- unsigned num_attempts,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
-
-/*!
- * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
- * \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
- * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
- * \param num_attempts Maximum number of attempts to generate crop. Default 10
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context,
- const char *source_path,
- RocalImageColor color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
- unsigned num_attempts,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
-
-/*!
- * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
- * If images are not Jpeg compressed they will be ignored.
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context,
+ const char* source_path,
RocalImageColor rocal_color_format,
unsigned internal_shard_count,
bool is_output,
- const char *user_key_for_encoded,
- const char *user_key_for_filename,
bool shuffle = false,
bool loop = false,
RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
+/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
- * \return Reference to the output image
+ * \param [in] p_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context,
+ const char* source_path,
RocalImageColor rocal_color_format,
unsigned shard_id,
unsigned shard_count,
@@ -552,300 +437,490 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalCon
bool loop = false,
RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
unsigned max_width = 0, unsigned max_height = 0,
- RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG);
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems.
+/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored and Crops t
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle: to shuffle dataset
- * \param loop: repeat data loading
- * \param out_width The output_width of raw image
- * \param out_height The output height of raw image
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context,
- const char *source_path,
- const char *user_key_for_raw,
- const char *user_key_for_filename,
- RocalImageColor rocal_color_format,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- unsigned out_width = 0, unsigned out_height = 0,
- const char *record_name_prefix = "");
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
+ * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
+ * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned num_threads,
+ bool is_output,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
+ unsigned num_attempts,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems.
+/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param shuffle: to shuffle dataset
- * \param loop: repeat data loading
- * \param out_width The output_width of raw image
- * \param out_height The output height of raw image
- * \param record_name_prefix : if nonempty reader will only read records with certain prefix
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- bool shuffle = false,
- bool loop = false,
- unsigned out_width = 0, unsigned out_height = 0,
- const char *record_name_prefix = "");
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
+ * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
+ * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
+ unsigned num_attempts,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems.
+/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk.
- * source_path can be a video file, folder containing videos or a text file
- * \param color_format The color format the frames will be decoded to.
- * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
- * \param sequence_length: The number of frames in a sequence.
- * \param file_names_list List of input video filenames
- * \param shuffle: to shuffle sequences.
- * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
- * \param loop: repeat data loading.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSource(RocalContext context,
- const char *source_path,
- RocalImageColor color_format,
- RocalDecodeDevice rocal_decode_device,
- unsigned internal_shard_count,
- unsigned sequence_length,
- const std::vector& file_names_list,
- bool is_output = false,
- bool shuffle = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0,
- bool file_list_frame_num = true);
-
-/*!
- * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only.
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output image
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned internal_shard_count,
+ bool is_output,
+ const char* user_key_for_encoded,
+ const char* user_key_for_filename,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk.
- * source_path can be a video file, folder containing videos or a text file
- * \param color_format The color format the frames will be decoded to.
- * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
- * \param shard_id Shard id for this loader.
- * \param shard_count Total shard count.
- * \param sequence_length: The number of frames in a sequence.
- * \param file_names_list List of input video filenames
- * \param shuffle: to shuffle sequences.
- * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
- * \param loop: repeat data loading.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context,
- const char *source_path,
- RocalImageColor color_format,
- RocalDecodeDevice rocal_decode_device,
- unsigned shard_id,
- unsigned shard_count,
- unsigned sequence_length,
- const std::vector& file_names_list,
- bool shuffle = false,
- bool is_output = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0,
- bool file_list_frame_num = true);
-
-/*!
- * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height.
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk.
- * source_path can be a video file, folder containing videos or a text file
- * \param color_format The color format the frames will be decoded to.
- * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
- * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
- * \param sequence_length: The number of frames in a sequence.
- * \param dest_width The output width of frames.
- * \param dest_height The output height of frames.
- * \param file_names_list List of input video filenames
- * \param shuffle: to shuffle sequences.
- * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
- * \param loop: repeat data loading.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResize(RocalContext context,
- const char *source_path,
- RocalImageColor color_format,
- RocalDecodeDevice rocal_decode_device,
- unsigned internal_shard_count,
- unsigned sequence_length,
- unsigned dest_width,
- unsigned dest_height,
- const std::vector& file_names_list,
- bool shuffle = false,
- bool is_output = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0,
- bool file_list_frame_num = true,
- RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT,
- std::vector max_size = {},
- unsigned resize_shorter = 0,
- unsigned resize_longer = 0,
- RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION);
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle: to shuffle dataset
+ * \param [in] loop: repeat data loading
+ * \param [in] out_width The output_width of raw image
+ * \param [in] out_height The output height of raw image
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context,
+ const char* source_path,
+ const char* user_key_for_raw,
+ const char* user_key_for_filename,
+ RocalImageColor rocal_color_format,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ unsigned out_width = 0, unsigned out_height = 0,
+ const char* record_name_prefix = "",
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-/*!
- * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only.
+/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk.
- * source_path can be a video file, folder containing videos or a text file
- * \param color_format The color format the frames will be decoded to.
- * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
- * \param shard_id Shard id for this loader.
- * \param shard_count Total shard count.
- * \param sequence_length: The number of frames in a sequence.
- * \param dest_width The output width of frames.
- * \param dest_height The output height of frames.
- * \param file_names_list List of input video filenames
- * \param shuffle: to shuffle sequences.
- * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
- * \param loop: repeat data loading.
- * \param step: Frame interval between each sequence.
- * \param stride: Frame interval between frames in a sequence.
- * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
- * \return
- */
-extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context,
- const char *source_path,
- RocalImageColor color_format,
- RocalDecodeDevice rocal_decode_device,
- unsigned shard_id,
- unsigned shard_count,
- unsigned sequence_length,
- unsigned dest_width,
- unsigned dest_height,
- const std::vector& file_names_list,
- bool shuffle = false,
- bool is_output = false,
- bool loop = false,
- unsigned step = 0,
- unsigned stride = 0,
- bool file_list_frame_num = true,
- RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT,
- std::vector max_size = {},
- unsigned resize_shorter = 0,
- unsigned resize_longer = 0,
- RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION);
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] shuffle: to shuffle dataset
+ * \param [in] loop: repeat data loading
+ * \param [in] out_width The output_width of raw image
+ * \param [in] out_height The output height of raw image
+ * \param [in] record_name_prefix : if nonempty reader will only read records with certain prefix
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ unsigned out_width = 0, unsigned out_height = 0,
+ const char* record_name_prefix = "",
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
/*!
- * \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems.
+ * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems.
* \ingroup group_rocal_data_loaders
- * \param context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param out_width ; output width
- * \param out_height ; output_height
- * \param filename_prefix ; if set loader will only load files with the given prefix name
- * \return Reference to the output image
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file
+ * \param [in] color_format The color format the frames will be decoded to.
+ * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] shuffle: to shuffle sequences.
+ * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
+ * \param [in] loop: repeat data loading.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSource(RocalContext context,
+ const char* source_path,
RocalImageColor color_format,
- bool is_output,
- unsigned out_width, unsigned out_height, const char *filename_prefix = "",
- bool loop = false);
-
-/*!
- * \brief
+ RocalDecodeDevice rocal_decode_device,
+ unsigned internal_shard_count,
+ unsigned sequence_length,
+ bool is_output = false,
+ bool shuffle = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ bool file_list_frame_num = true,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only.
* \ingroup group_rocal_data_loaders
- * \param context
- * \return
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file
+ * \param [in] color_format The color format the frames will be decoded to.
+ * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
+ * \param [in] shard_id Shard id for this loader.
+ * \param [in] shard_count Total shard count.
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] shuffle: to shuffle sequences.
+ * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
+ * \param [in] loop: repeat data loading.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context);
+extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor color_format,
+ RocalDecodeDevice rocal_decode_device,
+ unsigned shard_id,
+ unsigned shard_count,
+ unsigned sequence_length,
+ bool shuffle = false,
+ bool is_output = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ bool file_list_frame_num = true,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height.
+ * \ingroup group_rocal_data_loaders
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file
+ * \param [in] color_format The color format the frames will be decoded to.
+ * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
+ * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances.
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] dest_width The output width of frames.
+ * \param [in] dest_height The output height of frames.
+ * \param [in] shuffle: to shuffle sequences.
+ * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
+ * \param [in] loop: repeat data loading.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResize(RocalContext context,
+ const char* source_path,
+ RocalImageColor color_format,
+ RocalDecodeDevice rocal_decode_device,
+ unsigned internal_shard_count,
+ unsigned sequence_length,
+ unsigned dest_width,
+ unsigned dest_height,
+ bool shuffle = false,
+ bool is_output = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ bool file_list_frame_num = true,
+ RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT,
+ std::vector max_size = {},
+ unsigned resize_shorter = 0,
+ unsigned resize_longer = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only.
+ * \ingroup group_rocal_data_loaders
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file
+ * \param [in] color_format The color format the frames will be decoded to.
+ * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported.
+ * \param [in] shard_id Shard id for this loader.
+ * \param [in] shard_count Total shard count.
+ * \param [in] sequence_length: The number of frames in a sequence.
+ * \param [in] dest_width The output width of frames.
+ * \param [in] dest_height The output height of frames.
+ * \param [in] shuffle: to shuffle sequences.
+ * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not.
+ * \param [in] loop: repeat data loading.
+ * \param [in] step: Frame interval between each sequence.
+ * \param [in] stride: Frame interval between frames in a sequence.
+ * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path.
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context,
+ const char* source_path,
+ RocalImageColor color_format,
+ RocalDecodeDevice rocal_decode_device,
+ unsigned shard_id,
+ unsigned shard_count,
+ unsigned sequence_length,
+ unsigned dest_width,
+ unsigned dest_height,
+ bool shuffle = false,
+ bool is_output = false,
+ bool loop = false,
+ unsigned step = 0,
+ unsigned stride = 0,
+ bool file_list_frame_num = true,
+ RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT,
+ std::vector max_size = {},
+ unsigned resize_shorter = 0,
+ unsigned resize_longer = 0,
+ RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems.
+ * \ingroup group_rocal_data_loaders
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] out_width output width
+ * \param [in] out_height output_height
+ * \param [in] filename_prefix if set loader will only load files with the given prefix name
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context,
+ const char* source_path,
+ RocalImageColor color_format,
+ bool is_output,
+ unsigned out_width, unsigned out_height, const char* filename_prefix = "",
+ bool loop = false);
-/*!
- * \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
+/*! \brief reset Loaders
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
- * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
- * \param num_attempts Maximum number of attempts to generate crop. Default 10
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return Reference to the output image
+ * \param [in] context Rocal Context
+ * \return Rocal status value
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context,
- const char *source_path,
- RocalImageColor rocal_color_format,
- unsigned shard_id,
- unsigned shard_count,
- bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
- unsigned num_attempts,
- bool shuffle = false,
- bool loop = false,
- RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
+extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context);
-/*!
- * \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
+/*! \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
* \ingroup group_rocal_data_loaders
- * \param rocal_context Rocal context
- * \param source_path A NULL terminated char string pointing to the location on the disk
- * \param rocal_color_format The color format the images will be decoded to.
- * \param shard_id Shard id for this loader
- * \param shard_count Total shard count
- * \param is_output Determines if the user wants the loaded images to be part of the output or not.
- * \param shuffle Determines if the user wants to shuffle the dataset or not.
- * \param loop Determines if the user wants to indefinitely loops through images or not.
- * \param decode_size_policy
- * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
- * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
- * \return Reference to the output image
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1.
+ * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33.
+ * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
*/
-extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context,
- const char *source_path,
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context,
+ const char* source_path,
RocalImageColor rocal_color_format,
unsigned shard_id,
unsigned shard_count,
bool is_output,
- std::vector &area_factor,
- std::vector &aspect_ratio,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
unsigned num_attempts,
bool shuffle = false,
bool loop = false,
RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
- unsigned max_width = 0, unsigned max_height = 0);
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
-#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H
+/*! \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants.
+ * \ingroup group_rocal_data_loaders
+ * \param [in] rocal_context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Total shard count
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context,
+ const char* source_path,
+ RocalImageColor rocal_color_format,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ std::vector& area_factor,
+ std::vector& aspect_ratio,
+ unsigned num_attempts,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+/*! \brief Creates JPEG external source image reader.
+ * \ingroup group_rocal_data_loaders
+ * \param [in] rocal_context Rocal context
+ * \param [in] rocal_color_format The color format the images will be decoded to.
+ * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not.
+ * \param [in] shuffle Determines if the user wants to shuffle the dataset or not.
+ * \param [in] loop Determines if the user wants to indefinitely loops through images or not.
+ * \param [in] decode_size_policy is the RocalImageSizeEvaluationPolicy for decoding
+ * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec
+ * \param [in] external_source_mode Determines the mode of the source passed from the user - file_names / uncompressed data / compressed data
+ * \param [in] last_batch_info Determines the handling of the last batch when the shard size is not divisible by the batch size. Check RocalLastBatchPolicy() enum for possible values & If set to True, pads the shards last batch by repeating the last sample's data (dummy data).
+ * \return Reference to the output tensor
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalJpegExternalFileSource(RocalContext p_context,
+ RocalImageColor rocal_color_format,
+ bool is_output = false,
+ bool shuffle = false,
+ bool loop = false,
+ RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE,
+ unsigned max_width = 0, unsigned max_height = 0,
+ RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG,
+ RocalExternalSourceMode external_source_mode = RocalExternalSourceMode::ROCAL_EXTSOURCE_FNAME,
+ std::pair last_batch_info = {RocalLastBatchPolicy::ROCAL_LAST_BATCH_FILL, true});
+
+/*! Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel if user wants.
+ * If the files are not in standard audio compression formats they will be ignored, Currently wav format is supported
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] source_file_list_path A char string pointing to the file list location on the disk
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Boolean variable to enable the audio to be part of the output.
+ * \param [in] shuffle Boolean variable to shuffle the dataset.
+ * \param [in] loop Boolean variable to indefinitely loop through audio.
+ * \param [in] downmix Boolean variable to downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio.
+ * \return Reference to the output audio
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSource(RocalContext context,
+ const char* source_path,
+ const char* source_file_list_path,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ bool downmix = false);
+
+/*! Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants.
+ * If the files are not in standard audio compression formats they will be ignored.
+ * \param [in] context Rocal context
+ * \param [in] source_path A NULL terminated char string pointing to the location on the disk
+ * \param [in] source_file_list_path A char string pointing to the file list location on the disk
+ * \param [in] shard_id Shard id for this loader
+ * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available.
+ * \param [in] is_output Boolean variable to enable the audio to be part of the output.
+ * \param [in] shuffle Boolean variable to shuffle the dataset.
+ * \param [in] loop Boolean variable to indefinitely loop through audio.
+ * \param [in] downmix Boolean variable to downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio.
+ * \return Reference to the output audio
+ */
+extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSourceSingleShard(RocalContext p_context,
+ const char* source_path,
+ const char* source_file_list_path,
+ unsigned shard_id,
+ unsigned shard_count,
+ bool is_output,
+ bool shuffle = false,
+ bool loop = false,
+ bool downmix = false);
+
+#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H
diff --git a/rocAL/include/api/rocal_api_data_transfer.h b/rocAL/include/api/rocal_api_data_transfer.h
index b31b8911a..966ee6bdf 100644
--- a/rocAL/include/api/rocal_api_data_transfer.h
+++ b/rocAL/include/api/rocal_api_data_transfer.h
@@ -33,59 +33,75 @@ THE SOFTWARE.
*/
/*!
- * \brief TBD
+ * \brief copies data to output buffer
* \ingroup group_rocal_data_transfer
- *
- * \param [in] context
- * \return A \ref RocalStatus - A status code indicating the success or failure
+ * \param [in] context Rocal context
+ * \param [in] out_ptr pointer to output buffer
+ * \param [in] out_size size of output buffer
+ * \return Rocal status indicating success or failure
*/
extern "C" RocalStatus ROCAL_API_CALL rocalCopyToOutput(RocalContext context, unsigned char *out_ptr, size_t out_size);
/*!
- * \brief TBD
+ * \brief converts data to a tensor
* \ingroup group_rocal_data_transfer
- *
- * \param [in] context
- * \return A \ref RocalStatus - A status code indicating the success or failure
+ * \param [in] rocal_context Rocal context
+ * \param [in] out_ptr pointer to output buffer
+ * \param [in] tensor_format the layout of the tensor data
+ * \param [in] tensor_output_type the output type of the tensor data
+ * \param [in] multiplier0 the multiplier for channel 0
+ * \param [in] multiplier1 the multiplier for channel 1
+ * \param [in] multiplier2 the multiplier for channel 2
+ * \param [in] offset0 the offset for channel 0
+ * \param [in] offset1 the offset for channel 1
+ * \param [in] offset2 the offset for channel 2
+ * \param [in] reverse_channels flag to reverse the channel orders
+ * \param [in] output_mem_type the memory type of output tensor buffer
+ * \return Rocal status indicating success or failure
*/
-extern "C" RocalStatus ROCAL_API_CALL rocalToTensor32(RocalContext rocal_context, float *out_ptr,
- RocalTensorLayout tensor_format, float multiplier0,
- float multiplier1, float multiplier2, float offset0,
- float offset1, float offset2,
- bool reverse_channels, RocalOutputMemType output_mem_type);
+extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, void *out_ptr,
+ RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type,
+ float multiplier0, float multiplier1, float multiplier2, float offset0,
+ float offset1, float offset2,
+ bool reverse_channels, RocalOutputMemType output_mem_type, int max_roi_height = 0, int max_roi_width = 0);
/*!
- * \brief TBD
+ * \brief Sets the output images in the RocalContext
* \ingroup group_rocal_data_transfer
- *
- * \param [in] context
- * \return A \ref RocalStatus - A status code indicating the success or failure
+ * \param [in] p_context Rocal context
+ * \param [in] num_of_outputs number of output images
+ * \param [in] output_images output images
*/
-extern "C" RocalStatus ROCAL_API_CALL rocalToTensor16(RocalContext rocal_context, half *out_ptr,
- RocalTensorLayout tensor_format, float multiplier0,
- float multiplier1, float multiplier2, float offset0,
- float offset1, float offset2,
- bool reverse_channels, RocalOutputMemType output_mem_type);
+extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images);
/*!
- * \brief TBD
+ * \brief gives the list of output tensors from rocal context
* \ingroup group_rocal_data_transfer
- *
- * \param [in] context
- * \return A \ref RocalStatus - A status code indicating the success or failure
+ * \param [in] p_context Rocal Context
+ * \return A RocalTensorList containing the list of output tensors
*/
-extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, void *out_ptr,
- RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type,
- float multiplier0, float multiplier1, float multiplier2, float offset0,
- float offset1, float offset2,
- bool reverse_channels, RocalOutputMemType output_mem_type);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetOutputTensors(RocalContext p_context);
+
/*!
- * \brief TBD
+ * \brief Creates ExternalSourceFeedInput for data transfer
* \ingroup group_rocal_data_transfer
- *
- * \param [in] context
- * \return A \ref RocalStatus - A status code indicating the success or failure
+ * \param rocal_context Rocal context
+ * \param input_images Strings pointing to the location on the disk
+ * \param labels Labels whose values is passed by the user using an external source
+ * \param input_buffer Compressed or uncompressed input buffer
+ * \param roi_width The roi width of the images
+ * \param roi_height The roi height of the images
+ * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest
+ * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest
+ * \param channels The number of channels for the image
+ * \param mode Determines the mode of the source passed from the user - file_names / uncompressed data / compressed data
+ * \param layout Determines the layout of the images - NCHW / NHWC
+ * \return Reference to the output tensor
*/
-extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images);
+extern "C" RocalStatus ROCAL_API_CALL rocalExternalSourceFeedInput(RocalContext p_context, const std::vector& input_images_names,
+ bool is_labels, const std::vector& input_buffer,
+ const std::vector& roi_xywh,
+ unsigned int max_width, unsigned int max_height, unsigned int channels,
+ RocalExternalSourceMode mode, RocalTensorLayout layout, bool eos);
-#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H
+#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H
diff --git a/rocAL/include/api/rocal_api_info.h b/rocAL/include/api/rocal_api_info.h
index e00d5e4f7..9ea36fbb8 100644
--- a/rocAL/include/api/rocal_api_info.h
+++ b/rocAL/include/api/rocal_api_info.h
@@ -33,115 +33,108 @@ THE SOFTWARE.
*/
/*!
- * \brief rocalGetOutputWidth
+ * \brief Retrieves the width of the output.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return The width of the ROCAL's output image in pixels
+ * \param [in] rocal_context The RocalContext
+ * \return The width of the output.
*/
extern "C" int ROCAL_API_CALL rocalGetOutputWidth(RocalContext rocal_context);
/*!
- * \brief rocalGetOutputHeight
+ * \brief Retrieves the height of the output.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return The height of the ROCAL's output image in pixels. It includes all images in the batch.
+ * \param [in] rocal_context The RocalContext
+ * \return The height of the output.
*/
extern "C" int ROCAL_API_CALL rocalGetOutputHeight(RocalContext rocal_context);
/*!
- * \brief rocalGetOutputColorFormat
+ * \brief Retrieves the color format of the output.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return The color format of the ROCAL's output. It's equivalent of what's passed to the loaders as input color format.
+ * \param [in] rocal_context The RocalContext.
+ * \return The color format of the output.
*/
extern "C" int ROCAL_API_CALL rocalGetOutputColorFormat(RocalContext rocal_context);
/*!
- * \brief rocalGetRemainingImages
+ * \brief Retrieves the number of remaining images.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return The number of images yet to be processed
+ * \param [in] rocal_context The RocalContext.
+ * \return The number of remaining images yet to be processed.
*/
+
extern "C" size_t ROCAL_API_CALL rocalGetRemainingImages(RocalContext rocal_context);
/*!
- * \brief rocalGetImageWidth
+ * \brief Retrieves the width of the image.
* \ingroup group_rocal_info
- *
- * \param [in] image
- * \return Width of the graph output image
- * \note Returned value valid only after rocalVerify is called
+ * \param [in] image The RocalTensor data.
+ * \return The width of the image.
*/
-extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalImage image);
+extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalTensor image);
/*!
- * \brief rocalGetImageHeight
+ * \brief Retrieves the height of the image.
* \ingroup group_rocal_info
- *
- * \param [in] image
- * \return Height of the pipeline output image, includes all images in the batch
- * \note Returned value valid only after rocalVerify is called
+ * \param [in] image The RocalTensor data.
+ * \return The height of the image.
*/
-extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalImage image);
+extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalTensor image);
/*!
- * \brief rocalGetImagePlanes
+ * \brief Retrieves the number of planes (channels) in the image.
* \ingroup group_rocal_info
- *
- * \param [in] image
- * \return Color format of the pipeline output image.
- * \note Returned value valid only after rocalVerify is called
+ * \param [in] image The RocalTensor data.
+ * \return The number of planes (channels) in the image.
*/
-extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalImage image);
+extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalTensor image);
/*!
- * \brief rocalIsEmpty
+ * \brief Checks if the RocalContext is empty.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return 1 if all images have been processed, otherwise 0
- * \note Returned value valid only after rocalVerify is called
+ * \param [in] rocal_context The RocalContext
+ * \return return if RocalContext is empty or not.
*/
extern "C" size_t ROCAL_API_CALL rocalIsEmpty(RocalContext rocal_context);
/*!
- * \brief rocalGetAugmentationBranchCount
+ * \brief Retrieves the number of augmentation branches.
* \ingroup group_rocal_info
- *
- * \param [in] context
+ * \param [in] rocal_context The RocalContext
* \return Number of augmentation graph branches. Defined by number of calls to the augmentation API's with the is_output flag set to true.
*/
extern "C" size_t ROCAL_API_CALL rocalGetAugmentationBranchCount(RocalContext rocal_context);
/*!
- * \brief rocalGetStatus
- * \ingroup group_rocal_info
- *
- * \param [in] context
+ * \brief Retrieves the status.
+ * \ingroup group_rocal_info
+ * \param [in] rocal_context The RocalContext from which to retrieve the status.
* \return The status of tha last API call
*/
extern "C" RocalStatus ROCAL_API_CALL rocalGetStatus(RocalContext rocal_context);
/*!
- * \brief rocalGetErrorMessage
+ * \brief Retrieves the error message.
* \ingroup group_rocal_info
- *
- * \param [in] context
- * \return The last error message generated by call to rocal API
+ * \param [in] rocal_context The RocalContext
+ * \return A pointer to the error message string.
*/
-extern "C" const char *ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context);
+extern "C" const char* ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context);
/*!
- * \brief rocalGetTimingInfo
+ * \brief Retrieves timing information.
* \ingroup group_rocal_info
- *
- * \param [in] context
+ * \param [in] rocal_context The RocalContext
* \return The timing info associated with recent execution.
*/
extern "C" TimingInfo ROCAL_API_CALL rocalGetTimingInfo(RocalContext rocal_context);
-#endif // MIVISIONX_ROCAL_API_INFO_H
+/*!
+ * \brief Retrieves the information about the size of the last batch.
+ * \ingroup group_rocal_info
+ * \param rocal_context
+ * \return The number of samples that were padded in the last batch in adherence with last_batch_policy and last_batch_padded
+ */
+extern "C" size_t ROCAL_API_CALL rocalGetLastBatchPaddedSize(RocalContext rocal_context);
+
+#endif // MIVISIONX_ROCAL_API_INFO_H
diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h
index edee9dda4..9907427bb 100644
--- a/rocAL/include/api/rocal_api_meta_data.h
+++ b/rocAL/include/api/rocal_api_meta_data.h
@@ -32,272 +32,288 @@ THE SOFTWARE.
* \brief The AMD rocAL meta data functions.
*/
-/*!
- * \brief rocalCreateLabelReader
+/*! \brief creates label reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the folder that contains the dataset or metadata file
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the folder that contains the dataset or metadata file
+ * \param file_list_path is the path to file list that contains the file names and its corresponding labels
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char *source_path);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char* source_path, const char* file_list_path = "");
-/*!
- * \brief rocalCreateVideoLabelReader
- * \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the folder that contains the dataset or metadata file
- * \param sequence_length The number of frames in a sequence.
- * \param frame_step Frame interval between each sequence.
- * \param frame_stride Frame interval between frames in a sequence.
- * \param file_names_list List of input video filenames
- * \param labels List of labels corresponding to each video filename in filenames_list
- * \param file_list_frame_num True : when the inputs from text file is to be considered as frame numbers.
- * False : when the inputs from text file is to considered as timestamps.
+/*! \brief creates video label reader
+ * \ingroup group_rocal_meta_data
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the folder that contains the dataset or metadata file
+ * \param [in] sequence_length The number of frames in a sequence.
+ * \param [in] frame_step Frame interval between each sequence.
+ * \param [in] frame_stride Frame interval between frames in a sequence.
+ * \param [in] file_list_frame_num True : when the inputs from text file is to be considered as frame numbers. False : when the inputs from text file is to considered as timestamps.
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char *source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, const std::vector& file_names_list, const std::vector& labels, bool file_list_frame_num = true);
-
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char* source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true);
-/*!
- * \brief rocalCreateTFReader
+/*! \brief create tf reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the coco json file
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the coco json file
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char *source_path, bool is_output,
- const char *user_key_for_label, const char *user_key_for_filename);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char* source_path, bool is_output,
+ const char* user_key_for_label, const char* user_key_for_filename);
-/*!
- * \brief rocalCreateTFReaderDetection
+/*! \brief create tf reader detection
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the coco json file
+ * \param [in] rocal_context
+ * \param [in] source_path path to the coco json file
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output,
- const char *user_key_for_label, const char *user_key_for_text,
- const char *user_key_for_xmin, const char *user_key_for_ymin, const char *user_key_for_xmax, const char *user_key_for_ymax,
- const char *user_key_for_filename);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output,
+ const char* user_key_for_label, const char* user_key_for_text,
+ const char* user_key_for_xmin, const char* user_key_for_ymin, const char* user_key_for_xmax, const char* user_key_for_ymax,
+ const char* user_key_for_filename);
-/*!
- * \brief rocalCreateCOCOReader
+/*! \brief create coco reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the coco json file
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the coco json file
+ * \param [in] mask enable polygon masks
+ * \param [in] ltrb If set to True, bboxes are returned as [left, top, right, bottom]. If set to False, the bboxes are returned as [x, y, width, height]
+ * \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors
+ * \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values
+ * \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned
+ * \param [in] is_box_iou_matcher If set to True, box iou matcher which returns matched indices is enabled in the pipeline
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char *source_path, bool is_output);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false);
-/*!
- * \brief rocalCreateCOCOReaderKeyPoints
+/*! \brief create coco reader key points
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the coco json file
- * \param sigma sigma used for gaussian distribution (needed for HRNet Pose estimation)
- * \param pose_output_width output image width (needed for HRNet Pose estimation)
- * \param pose_output_width output image height (needed for HRNet Pose estimation)
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the coco json file
+ * \param [in] sigma sigma used for gaussian distribution (needed for HRNet Pose estimation)
+ * \param [in] pose_output_width output image width (needed for HRNet Pose estimation)
+ * \param [in] pose_output_width output image height (needed for HRNet Pose estimation)
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char *source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char* source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0);
-/*!
- * \brief rocalCreateTextFileBasedLabelReader
+/*! \brief create text file based label reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the file that contains the metadata file
+ * \param [in] rocal_context
+ * \param [in] source_path path to the file that contains the metadata file
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char *source_path);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char* source_path);
-/*!
- * \brief rocalCreateCaffeLMDBLabelReader
+/*! \brief create caffe LMDB label reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the Caffe LMDB records for Classification
+ * \param [in] rocal_context
+ * \param [in] source_path path to the Caffe LMDB records for Classification
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char *source_path);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char* source_path);
-/*!
- * \brief rocalCreateCaffeLMDBReaderDetection
+/*! \brief create caffe LMDB label reader for object detection
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the Caffe LMDB records for Object Detection
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the Caffe LMDB records for Object Detection
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char *source_path);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char* source_path);
-/*!
- * \brief rocalCreateCaffe2LMDBLabelReader
+/*! \brief create caffe2 LMDB label reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the Caffe2LMDB records for Classification
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the Caffe2LMDB records for Classification
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char *source_path, bool is_output);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char* source_path, bool is_output);
-/*!
- * \brief rocalCreateCaffe2LMDBReaderDetection
+/*! \brief create caffe2 LMDB label reader for object detection
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the Caffe2LMDB records for Object Detection
- * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the Caffe2LMDB records for Object Detection
+ * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output);
-/*!
- * \brief rocalCreateMXNetReader
+/*! \brief create MXNet reader
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the MXNet recordio files for Classification
- * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the MXNet recordio files for Classification
+ * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char *source_path, bool is_output);
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char* source_path, bool is_output);
-/*!
- * \brief rocalGetImageName
+/*! \brief get image name
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf user buffer provided to be filled with output image names for images in the output batch.
+ * \param [in] rocal_context rocal context
+ * \param [out] buf user buffer provided to be filled with output image names for images in the output batch.
*/
-extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char *buf);
+extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char* buf);
-/*!
- * \brief rocalGetImageNameLen
+/*! \brief get image name lengths
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf userbuffer provided to be filled with the length of the image names in the output batch
+ * \param [in] rocal_context rocal context
+ * \param [out] buf userbuffer provided to be filled with the length of the image names in the output batch
* \return The size of the buffer needs to be provided by user to get the image names of the output batch
*/
-extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int *buf);
+extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int* buf);
-/*!
- * \brief rocalGetImageLabels
+/*! \brief get image labels
* \ingroup group_rocal_meta_data
- * \param meta_data RocalMetaData object that contains info about the images and labels
- * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size.
+ * \param [in] meta_data RocalMetaData object that contains info about the images and labels
+ * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size.
+ * \return RocalTensorList of labels associated with image
*/
-extern "C" void ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context, void *buf, RocalOutputMemType output_mem_type = RocalOutputMemType::ROCAL_MEMCPY_HOST);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context);
-/*!
- * \brief rocalGetBoundingBoxCount
+/*! \brief get bounding box count
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf The user's buffer that will be filled with number of object in the images.
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with number of object in the images.
* \return The size of the buffer needs to be provided by user to get bounding box info for all images in the output batch.
*/
-extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context, int *buf);
+extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context);
-/*!
- * \brief rocalGetBoundingBoxLabel
+/*! \brief get mask count
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount
+ * \param [in] rocal_context rocal context
+ * \param [out] buf the imageIdx in the output batch
+ * \return The size of the buffer needs to be provided by user to get mask box info associated with image_idx in the output batch.
*/
-extern "C" void ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context, int *buf);
+extern "C" unsigned ROCAL_API_CALL rocalGetMaskCount(RocalContext p_context, int* buf);
-/*!
- * \brief rocalGetBoundingBoxCords
+/*! \brief get mask coordinates
* \ingroup group_rocal_meta_data
- * \param rocal_context
+ * \param [in] rocal_context rocal context
+ * \param [out] bufcount The user's buffer that will be filled with polygon size for the mask info
+ * \return The tensorlist with the mask coordinates
*/
-extern "C" void ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context, float *buf);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetMaskCoordinates(RocalContext p_context, int* bufcount);
-/*!
- * \brief rocalGetImageSizes
+/*! \brief get bounding box label
* \ingroup group_rocal_meta_data
- * \param rocal_context
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount
+ * \return RocalTensorList of labels associated with bounding box coordinates
*/
-extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int *buf);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context);
-/*!
- * \brief rocalCreateTextCifar10LabelReader
+/*! \brief get bounding box coordinates
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param source_path path to the file that contains the metadata file
- * \param filename_prefix: look only files with prefix ( needed for cifar10)
- * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with bounding box coords info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCords
+ * \return RocalTensorList of bounding box co-ordinates
*/
-extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char *source_path, const char *file_prefix);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context);
-/*!
- * \brief rocalGetOneHotImageLabels
+/*! \brief get image sizes
* \ingroup group_rocal_meta_data
- * \param meta_data RocalMetaData object that contains info about the images and labels
- * \param numOfClasses the number of classes for a image dataset
- * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size.
- * \param dest destination can be host=0 / device=1
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with images sizes info for the images in the output batch
*/
-extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void *buf, int numOfClasses, int dest);
+extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int* buf);
-/*!
- * \brief rocalRandomBBoxCrop
+/*! \brief get ROI image sizes
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * */
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with ROI image size info for the images in the output batch
+ */
+extern "C" void ROCAL_API_CALL rocalGetROIImageSizes(RocalContext rocal_context, int* buf);
+
+/*! \brief create text cifar10 label reader
+ * \ingroup group_rocal_meta_data
+ * \param [in] rocal_context rocal context
+ * \param [in] source_path path to the file that contains the metadata file
+ * \param [in] filename_prefix: look only files with prefix ( needed for cifar10)
+ * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
+ */
+extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char* source_path, const char* file_prefix);
+
+/*! \brief get one hot image labels
+ * \ingroup group_rocal_meta_data
+ * \param [in] meta_data RocalMetaData object that contains info about the images and labels
+ * \param [in] numOfClasses the number of classes for a image dataset
+ * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size.
+ * \param [in] dest destination can be host=0 / device=1
+ */
+extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void* buf, int numOfClasses, RocalOutputMemType output_mem_type);
+
extern "C" void ROCAL_API_CALL rocalRandomBBoxCrop(RocalContext p_context, bool all_boxes_overlap, bool no_crop, RocalFloatParam aspect_ratio = NULL, bool has_shape = false, int crop_width = 0, int crop_height = 0, int num_attempts = 1, RocalFloatParam scaling = NULL, int total_num_attempts = 0, int64_t seed = 0);
-/*!
- * \brief rocalGetSequenceStartFrameNumber
+/*! \brief get sequence starting frame number
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf The user's buffer that will be filled with starting frame numbers of the output batch sequences.
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with starting frame numbers of the output batch sequences.
*/
-extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int *buf);
+extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int* buf);
-/*!
- * \brief rocalGetSequenceFrameTimestamps
+/*! \brief get sequence time stamps
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences.
+ * \param [in] rocal_context rocal context
+ * \param [out] buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences.
*/
-extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float *buf);
+extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float* buf);
-/*!
- * \brief rocalBoxEncoder
+/*! \brief rocal box encoder
* \ingroup group_rocal_meta_data
- * \param anchors Anchors to be used for encoding, as the array of floats is in the ltrb format.
- * \param criteria Threshold IoU for matching bounding boxes with anchors.
- * The value needs to be between 0 and 1.
- * \param offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments
- * \param means [x y w h] mean values for normalization.
- * \param stds [x y w h] standard deviations for offset normalization.
- * \param scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values).
+ * \param [in] anchors Anchors to be used for encoding, as the array of floats is in the ltrb format.
+ * \param [in] criteria Threshold IoU for matching bounding boxes with anchors. The value needs to be between 0 and 1.
+ * \param [in] offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments
+ * \param [in] means [x y w h] mean values for normalization.
+ * \param [in] stds [x y w h] standard deviations for offset normalization.
+ * \param [in] scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values).
*/
-extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector &anchors, float criteria,
- std::vector &means, std::vector &stds, bool offset = false, float scale = 1.0);
+extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria,
+ std::vector& means, std::vector& stds, bool offset = false, float scale = 1.0);
-/*!
- * \brief rocalCopyEncodedBoxesAndLables
+/*! \brief copy encoded boxes and labels
* \ingroup group_rocal_meta_data
- * \param boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size.
- * \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size.
+ * \param [in] p_context rocal context
+ * \param [out] boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size.
+ * \param [out] labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size.
*/
-extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float *boxes_buf, int *labels_buf);
+extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float* boxes_buf, int* labels_buf);
-/*!
- * \brief rocalGetEncodedBoxesAndLables
+/*! \brief
* \ingroup group_rocal_meta_data
* \param boxes_buf ptr to user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size.
* \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size.
*/
-extern "C" void ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, float **boxes_buf_ptr, int **labels_buf_ptr, int num_encoded_boxes);
+extern "C" RocalMetaData ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, int num_encoded_boxes);
-/*!
- * \brief rocalGetImageId
+/*! \brief get image id
* \ingroup group_rocal_meta_data
- * \param rocal_context
+ * \param rocal_context rocal context
* \param buf The user's buffer that will be filled with image id info for the images in the output batch.
*/
-extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int *buf);
+extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int* buf);
-/*!
- * \brief rocalGetJointsDataPtr
+/*! \brief get joints data pointer
+ * \ingroup group_rocal_meta_data
+ * \param [in] rocal_context rocal context
+ * \param [out] joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer
+ */
+extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData** joints_data);
+
+/*! \brief API to enable box IOU matcher and pass required params to pipeline
+ * \ingroup group_rocal_meta_data
+ * \param [in] p_context rocAL context
+ * \param [in] anchors The anchors / ground truth bounding box coordinates
+ * \param [in] high_threshold The max threshold for IOU
+ * \param [in] low_threshold The min threshold for IOU
+ * \param [in] allow_low_quality_matches bool value when set to true allows low quality matches
+ */
+extern "C" void ROCAL_API_CALL rocalBoxIouMatcher(RocalContext p_context, std::vector& anchors,
+ float high_threshold, float low_threshold, bool allow_low_quality_matches = true);
+
+/*! \brief API to return the matched indices for the bounding box and anchors
* \ingroup group_rocal_meta_data
- * \param rocal_context
- * \param joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer
+ * \param [in] p_context rocAL context
+ * \return RocalTensorList of matched indices
*/
-extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData **joints_data);
+extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext p_context);
-#endif // MIVISIONX_ROCAL_API_META_DATA_H
+#endif // MIVISIONX_ROCAL_API_META_DATA_H
diff --git a/rocAL/include/api/rocal_api_parameters.h b/rocAL/include/api/rocal_api_parameters.h
index bc2e5907f..d79abc49b 100644
--- a/rocAL/include/api/rocal_api_parameters.h
+++ b/rocAL/include/api/rocal_api_parameters.h
@@ -32,163 +32,132 @@ THE SOFTWARE.
* \brief The AMD rocAL Parameters.
*/
-/*!
- * \brief rocalSetSeed
+/*! \brief set seed for random number generation
* \ingroup group_rocal_parameters
- *
- * \param seed
+ * \param [in] seed seed for the random number generation
*/
extern "C" void ROCAL_API_CALL rocalSetSeed(unsigned seed);
-/*!
- * \brief rocalGetSeed
+/*! \brief gets the seed value
* \ingroup group_rocal_parameters
- *
- * \return
+ * \return seed value
*/
extern "C" unsigned ROCAL_API_CALL rocalGetSeed();
-/*!
- * \brief rocalCreateIntUniformRand
+/*! \brief Creates a new uniform random integer parameter within a specified range.
* \ingroup group_rocal_parameters
- *
- * \param start
- * \param end
- * \return
+ * \param start start value of the integer range
+ * \param end end value of the integer range
+ * \return RocalIntParam representing the uniform random integer parameter.
*/
extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntUniformRand(int start, int end);
-/*!
- * \brief rocalUpdateIntUniformRand
+/*! \brief updates uniform random integer parameter within a specified range.
* \ingroup group_rocal_parameters
- *
- * \param start
- * \param end
- * \param input_obj
- * \return
+ * \param start start value of the integer range
+ * \param end start value of the integer range
+ * \param input_obj RocalIntParam to be updated.
+ * \return rocal status value
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntUniformRand(int start, int end, RocalIntParam updating_obj);
-/*!
- * \brief rocalGetIntValue
+/*! \brief gets the value of a RocalIntParam.
* \ingroup group_rocal_parameters
- *
- * \param obj
- * \return
+ * \param [in] obj The RocalIntParam from which to retrieve the value.
+ * \return integer value of the RocalIntParam.
*/
extern "C" int ROCAL_API_CALL rocalGetIntValue(RocalIntParam obj);
-/*!
- * \brief rocalGetFloatValue
+/*! \brief gets the value of a RocalFloatParam.
* \ingroup group_rocal_parameters
- *
- * \param obj
- * \return
+ * \param [in] obj The RocalFloatParam from which to retrieve the value.
+ * \return float value of the RocalIntParam.
*/
extern "C" float ROCAL_API_CALL rocalGetFloatValue(RocalFloatParam obj);
-/*!
- * \brief rocalCreateFloatUniformRand
+/*! \brief Creates a new uniform random float parameter within a specified range.
* \ingroup group_rocal_parameters
- *
- * \param start
- * \param end
- * \return
+ * \param start start value of the float range
+ * \param end end value of the float range
+ * \return RocalFloatParam representing the uniform random float parameter.
*/
extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatUniformRand(float start, float end);
-/*!
- * \brief rocalCreateFloatParameter
+/*! \brief Creates a new float parameter with a specified value.
* \ingroup group_rocal_parameters
- *
- * \param val
- * \return
+ * \param [in] val value to create float param
+ * \return A new RocalFloatParam representing the float parameter.
*/
extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatParameter(float val);
-/*!
- * \brief rocalCreateIntParameter
+/*! \brief Creates a new int parameter with a specified value.
* \ingroup group_rocal_parameters
- *
- * \param val
- * \return
+ * \param [in] val value to create integer param
+ * \return A new RocalIntParam representing the integer parameter.
*/
extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntParameter(int val);
-/*!
- * \brief rocalUpdateFloatParameter
+/*! \brief Updates a float parameter with a new value.
* \ingroup group_rocal_parameters
- *
- * \param new_val
- * \param input_obj
- * \return
+ * \param[in] new_val The new value to update the float parameter.
+ * \param[in] input_obj The RocalFloatParam to be updated.
+ * \return RocalStatus value.
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatParameter(float new_val, RocalFloatParam input_obj);
-/*!
- * \brief rocalUpdateIntParameter
+/*! \brief Updates a integer parameter with a new value.
* \ingroup group_rocal_parameters
- *
- * \param new_val
- * \param input_obj
- * \return
+ * \param[in] new_val The new value to update the integer parameter.
+ * \param[in] input_obj The RocalIntParam to be updated.
+ * \return RocalStatus value.
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntParameter(int new_val, RocalIntParam input_obj);
-/*!
- * \brief rocalUpdateFloatUniformRand
+/*! \brief updates uniform random float parameter within a specified range.
* \ingroup group_rocal_parameters
- *
- * \param start
- * \param end
- * \param input_obj
- * \return
+ * \param start start value of the float range
+ * \param end start value of the float range
+ * \param input_obj RocalFloatParam to be updated.
+ * \return rocal status value
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatUniformRand(float start, float end, RocalFloatParam updating_obj);
-/*!
- * \brief rocalCreateIntRand
+/*! \brief Sets the parameters for a new or existing RocalIntRandGen object
* \ingroup group_rocal_parameters
- *
- * \param values
- * \param frequencies
- * \param size
- * \return
+ * \param [in] values random int values
+ * \param [in] frequencies frequencies of the values
+ * \param size size of the array
+ * \return random int paraeter
*/
extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntRand(const int *values, const double *frequencies, unsigned size);
-/*!
- * \brief rocalUpdateIntRand
+/*! \brief update the int random value
* \ingroup group_rocal_parameters
- *
- * \param values
- * \param frequencies
- * \param size
- * \param updating_obj
- * \return
+ * \param [in] values random int values
+ * \param [in] frequencies frequencies of the values
+ * \param [in] size size of the array
+ * \param [in] updating_obj Rocal int Param to update
+ * \return rocal status value
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntRand(const int *values, const double *frequencies, unsigned size, RocalIntParam updating_obj);
-/*!
- * \brief Sets the parameters for a new or existing RocalFloatRandGen object
+/*! \brief Sets the parameters for a new or existing RocalFloatRandGen object
* \ingroup group_rocal_parameters
- * \param values
- * \param frequencies
- * \param size
- * \return
+ * \param [in] values random float values
+ * \param [in] frequencies frequencies of the values
+ * \param size size of the array
+ * \return random float parameter
*/
extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatRand(const float *values, const double *frequencies, unsigned size);
-/*!
- * \brief rocalUpdateFloatRand
+/*! \brief update the float random value
* \ingroup group_rocal_parameters
- *
- * \param values
- * \param frequencies
- * \param size
- * \param updating_obj
- * \return
+ * \param [in] values random float values
+ * \param [in] frequencies frequencies of the values
+ * \param [in] size size of the array
+ * \param [in] updating_obj Rocal Float Param to update
+ * \return rocal status value
*/
extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatRand(const float *values, const double *frequencies, unsigned size, RocalFloatParam updating_obj);
-#endif // MIVISIONX_ROCAL_API_PARAMETERS_H
+#endif // MIVISIONX_ROCAL_API_PARAMETERS_H
diff --git a/rocAL/include/api/rocal_api_tensor.h b/rocAL/include/api/rocal_api_tensor.h
new file mode 100644
index 000000000..5faf0e0ee
--- /dev/null
+++ b/rocAL/include/api/rocal_api_tensor.h
@@ -0,0 +1,73 @@
+/*
+Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef MIVISIONX_ROCAL_API_TENSOR_H
+#define MIVISIONX_ROCAL_API_TENSOR_H
+#include "rocal_api_types.h"
+
+/*!
+ * \file
+ * \brief The AMD rocAL Library - Tensor
+ *
+ * \defgroup group_rocal_tensor API: AMD rocAL - Tensor API
+ * \brief The AMD rocAL Tensor.
+ */
+
+/*!
+ * \brief class representing rocal tensor
+ */
+class rocalTensor {
+ public:
+ virtual ~rocalTensor() = default;
+ virtual void* buffer() = 0;
+ virtual unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type = ROCAL_MEMCPY_HOST) = 0;
+ virtual unsigned num_of_dims() = 0;
+ virtual unsigned batch_size() = 0;
+ virtual std::vector dims() = 0;
+ virtual std::vector strides() = 0;
+ virtual RocalTensorLayout layout() = 0;
+ virtual RocalTensorBackend backend() = 0;
+ virtual RocalTensorOutputType data_type() = 0;
+ virtual size_t data_size() = 0;
+ virtual RocalROICordsType roi_type() = 0;
+ virtual size_t get_roi_dims_size() = 0;
+ virtual void copy_roi(void* roi_buffer) = 0;
+ virtual std::vector shape() = 0;
+ virtual void set_dims(std::vector dims) = 0;
+ virtual void set_mem_handle(void* buffer) = 0;
+};
+
+/*!
+ * \brief class representing rocal tensor list
+ */
+class rocalTensorList {
+ public:
+ virtual uint64_t size() = 0;
+ virtual rocalTensor* at(size_t index) = 0;
+ // isDenseTensor
+};
+
+typedef rocalTensor* RocalTensor;
+typedef rocalTensorList* RocalTensorList;
+typedef std::vector RocalMetaData;
+
+#endif // MIVISIONX_ROCAL_API_TENSOR_H
diff --git a/rocAL/include/api/rocal_api_types.h b/rocAL/include/api/rocal_api_types.h
index 13af3671c..e63a5a915 100644
--- a/rocAL/include/api/rocal_api_types.h
+++ b/rocAL/include/api/rocal_api_types.h
@@ -50,23 +50,17 @@ using half_float::half;
/*! \brief typedef void* Float Param
* \ingroup group_rocal_types
*/
-typedef void *RocalFloatParam;
+typedef void* RocalFloatParam;
+
/*! \brief typedef void* rocAL Int Param
* \ingroup group_rocal_types
*/
-typedef void *RocalIntParam;
+typedef void* RocalIntParam;
+
/*! \brief typedef void* rocAL Context
* \ingroup group_rocal_types
*/
-typedef void *RocalContext;
-/*! \brief typedef void* rocAL Image
- * \ingroup group_rocal_types
- */
-typedef void *RocalImage;
-/*! \brief typedef void* rocAL Meta Data
- * \ingroup group_rocal_types
- */
-typedef void *RocalMetaData;
+typedef void* RocalContext;
/*! \brief typedef std::vectors
* \ingroup group_rocal_types
@@ -82,19 +76,18 @@ typedef std::vector>> JointsBatch, JointsVisibili
/*! \brief Timing Info struct
* \ingroup group_rocal_types
*/
-struct TimingInfo
-{
+struct TimingInfo {
long long unsigned load_time;
long long unsigned decode_time;
long long unsigned process_time;
long long unsigned transfer_time;
};
+// HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user
/*! \brief rocAL Joints Data struct - HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user
* \ingroup group_rocal_types
*/
-struct RocalJointsData
-{
+struct RocalJointsData {
ImageIDBatch image_id_batch;
AnnotationIDBatch annotation_id_batch;
ImagePathBatch image_path_batch;
@@ -106,11 +99,17 @@ struct RocalJointsData
RotationBatch rotation_batch;
};
+struct ROIxywh {
+ unsigned x;
+ unsigned y;
+ unsigned w;
+ unsigned h;
+};
+
/*! \brief rocAL Status enum
* \ingroup group_rocal_types
*/
-enum RocalStatus
-{
+enum RocalStatus {
/*! \brief AMD ROCAL_OK
*/
ROCAL_OK = 0,
@@ -131,8 +130,7 @@ enum RocalStatus
/*! \brief rocAL Image Color enum
* \ingroup group_rocal_types
*/
-enum RocalImageColor
-{
+enum RocalImageColor {
/*! \brief AMD ROCAL_COLOR_RGB24
*/
ROCAL_COLOR_RGB24 = 0,
@@ -150,8 +148,7 @@ enum RocalImageColor
/*! \brief rocAL Process Mode enum
* \ingroup group_rocal_types
*/
-enum RocalProcessMode
-{
+enum RocalProcessMode {
/*! \brief AMD ROCAL_PROCESS_GPU
*/
ROCAL_PROCESS_GPU = 0,
@@ -163,8 +160,7 @@ enum RocalProcessMode
/*! \brief rocAL Flip Axis enum
* \ingroup group_rocal_types
*/
-enum RocalFlipAxis
-{
+enum RocalFlipAxis {
/*! \brief AMD ROCAL_FLIP_HORIZONTAL
*/
ROCAL_FLIP_HORIZONTAL = 0,
@@ -176,8 +172,7 @@ enum RocalFlipAxis
/*! \brief rocAL Image Size Evaluation Policy enum
* \ingroup group_rocal_types
*/
-enum RocalImageSizeEvaluationPolicy
-{
+enum RocalImageSizeEvaluationPolicy {
/*! \brief AMD ROCAL_USE_MAX_SIZE
*/
ROCAL_USE_MAX_SIZE = 0,
@@ -189,17 +184,16 @@ enum RocalImageSizeEvaluationPolicy
ROCAL_USE_MOST_FREQUENT_SIZE = 2,
/*! \brief Use the given size only if the actual decoded size is greater than the given size
*/
- ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3,
+ ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3, // use the given size only if the actual decoded size is greater than the given size
/*! \brief Use max size if the actual decoded size is greater than max
*/
- ROCAL_USE_MAX_SIZE_RESTRICTED = 4,
+ ROCAL_USE_MAX_SIZE_RESTRICTED = 4, // use max size if the actual decoded size is greater than max
};
/*! \brief rocAL Decode Device enum
* \ingroup group_rocal_types
*/
-enum RocalDecodeDevice
-{
+enum RocalDecodeDevice {
/*! \brief AMD ROCAL_HW_DECODE
*/
ROCAL_HW_DECODE = 0,
@@ -211,37 +205,63 @@ enum RocalDecodeDevice
/*! \brief rocAL Tensor Layout enum
* \ingroup group_rocal_types
*/
-enum RocalTensorLayout
-{
+enum RocalTensorLayout {
/*! \brief AMD ROCAL_NHWC
*/
ROCAL_NHWC = 0,
/*! \brief AMD ROCAL_NCHW
*/
- ROCAL_NCHW = 1
+ ROCAL_NCHW = 1,
+ /*! \brief AMD ROCAL_NFHWc
+ */
+ ROCAL_NFHWC = 2,
+ /*! \brief AMD ROCAL_NFCHW
+ */
+ ROCAL_NFCHW = 3,
+ /*! \brief AMD ROCAL_NHW
+ */
+ ROCAL_NHW = 4,
+ /*! \brief AMD ROCAL_NFT
+ * Spectrogram Layout FT
+ */
+ ROCAL_NFT = 5,
+ /*! \brief AMD ROCAL_NTF
+ * Spectrogram Layout TF
+ */
+ ROCAL_NTF = 6,
+ /*! \brief AMD ROCAL_NONE
+ */
+ ROCAL_NONE = 7 // Layout for generic tensors (Non-Image or Non-Video)
};
/*! \brief rocAL Tensor Output Type enum
* \ingroup group_rocal_types
*/
-enum RocalTensorOutputType
-{
+enum RocalTensorOutputType {
/*! \brief AMD ROCAL_FP32
*/
ROCAL_FP32 = 0,
/*! \brief AMD ROCAL_FP16
*/
ROCAL_FP16 = 1,
- /*! \brief AMD ROCAL_U8
+ /*! \brief AMD ROCAL_UINT8
+ */
+ ROCAL_UINT8 = 2,
+ /*! \brief AMD ROCAL_INT8
*/
- ROCAL_U8 = 2,
+ ROCAL_INT8 = 3,
+ /*! \brief AMD ROCAL_UINT32
+ */
+ ROCAL_UINT32 = 4,
+ /*! \brief AMD ROCAL_INT32
+ */
+ ROCAL_INT32 = 5
};
/*! \brief rocAL Decoder Type enum
* \ingroup group_rocal_types
*/
-enum RocalDecoderType
-{
+enum RocalDecoderType {
/*! \brief AMD ROCAL_DECODER_TJPEG
*/
ROCAL_DECODER_TJPEG = 0,
@@ -256,14 +276,14 @@ enum RocalDecoderType
ROCAL_DECODER_VIDEO_FFMPEG_SW = 3,
/*! \brief AMD ROCAL_DECODER_VIDEO_FFMPEG_HW
*/
- ROCAL_DECODER_VIDEO_FFMPEG_HW = 4
+ ROCAL_DECODER_VIDEO_FFMPEG_HW = 4,
+ /*! \brief AMD ROCAL_DECODER_AUDIO_GENERIC
+ * Uses SndFile library to read audio files
+ */
+ ROCAL_DECODER_AUDIO_GENERIC = 5
};
-/*! \brief rocAL Output Mem Type enum
- * \ingroup group_rocal_types
- */
-enum RocalOutputMemType
-{
+enum RocalOutputMemType {
/*! \brief AMD ROCAL_MEMCPY_HOST
*/
ROCAL_MEMCPY_HOST = 0,
@@ -275,24 +295,24 @@ enum RocalOutputMemType
ROCAL_MEMCPY_PINNED = 2
};
+// rocal external memcpy flags
/*! \brief AMD rocAL external memcpy flags - force copy to user provided host memory
* \ingroup group_rocal_types
*/
-#define ROCAL_MEMCPY_TO_HOST 1
+#define ROCAL_MEMCPY_TO_HOST 1 // force copy to user provided host memory
/*! \brief AMD rocAL external memcpy flags - force copy to user provided device memory (gpu)
* \ingroup group_rocal_types
*/
-#define ROCAL_MEMCPY_TO_DEVICE 2
+#define ROCAL_MEMCPY_TO_DEVICE 2 // force copy to user provided device memory (gpu)
/*! \brief AMD rocAL external memcpy flags - for future use
* \ingroup group_rocal_types
*/
-#define ROCAL_MEMCPY_IS_PINNED 4
+#define ROCAL_MEMCPY_IS_PINNED 4 // for future use
/*! \brief rocAL Resize Scaling Mode enum
* \ingroup group_rocal_types
*/
-enum RocalResizeScalingMode
-{
+enum RocalResizeScalingMode {
/*! \brief scales wrt specified size, if only resize width/height is provided the other dimension is scaled according to aspect ratio
*/
ROCAL_SCALING_MODE_DEFAULT = 0,
@@ -304,7 +324,10 @@ enum RocalResizeScalingMode
ROCAL_SCALING_MODE_NOT_SMALLER = 2,
/*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified size
*/
- ROCAL_SCALING_MODE_NOT_LARGER = 3
+ ROCAL_SCALING_MODE_NOT_LARGER = 3,
+ /*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified min and max size
+ */
+ ROCAL_SCALING_MODE_MIN_MAX = 4
};
/*! \brief rocAL Resize Interpolation Type enum
@@ -332,4 +355,102 @@ enum RocalResizeInterpolationType
ROCAL_TRIANGULAR_INTERPOLATION = 5
};
-#endif // MIVISIONX_ROCAL_API_TYPES_H
+/*! \brief Tensor Backend
+ * \ingroup group_rocal_types
+ */
+enum RocalTensorBackend {
+ /*! \brief ROCAL_CPU
+ */
+ ROCAL_CPU = 0,
+ /*! \brief ROCAL_GPU
+ */
+ ROCAL_GPU = 1
+};
+
+/*! \brief Tensor ROI type
+ * \ingroup group_rocal_types
+ */
+enum class RocalROICordsType {
+ /*! \brief ROCAL_LTRB
+ */
+ ROCAL_LTRB = 0,
+ /*! \brief ROCAL_XYWH
+ */
+ ROCAL_XYWH = 1
+};
+
+/*! \brief RocalExternalSourceMode struct
+ * \ingroup group_rocal_types
+ */
+enum RocalExternalSourceMode {
+ /*! \brief list of filename passed as input
+ */
+ ROCAL_EXTSOURCE_FNAME = 0,
+ /*! \brief compressed raw buffer passed as input
+ */
+ ROCAL_EXTSOURCE_RAW_COMPRESSED = 1,
+ /*! \brief uncompressed raw buffer passed as input
+ */
+ ROCAL_EXTSOURCE_RAW_UNCOMPRESSED = 2,
+};
+
+/*! \brief rocAL Audio Border Type enum
+ * \ingroup group_rocal_types
+ */
+enum RocalAudioBorderType {
+ /*! \brief AMD ROCAL_ZERO
+ */
+ ROCAL_ZERO = 0,
+ /*! \brief AMD ROCAL_CLAMP
+ */
+ ROCAL_CLAMP = 1,
+ /*! \brief AMD ROCAL_REFLECT
+ */
+ ROCAL_REFLECT = 2
+};
+
+/*! \brief rocAL Out Of Bounds Policy Type enum
+ * \ingroup group_rocal_types
+ */
+enum RocalOutOfBoundsPolicy {
+ /*! \brief Pad
+ */
+ ROCAL_PAD = 0,
+ /*! \brief Trimtoshape
+ */
+ ROCAL_TRIMTOSHAPE,
+ /*! \brief Error
+ */
+ ROCAL_ERROR
+};
+
+/*! \brief rocAL MelScale formula enum
+ * \ingroup group_rocal_types
+ */
+enum RocalMelScaleFormula {
+ /*! \brief Slaney
+ * Follows Slaney’s MATLAB Auditory Modelling Work behavior
+ */
+ ROCAL_MELSCALE_SLANEY = 0,
+ /*! \brief HTK
+ * Follows O’Shaughnessy’s book formula, consistent with Hidden Markov Toolkit(HTK), m = 2595 * log10(1 + (f/700))
+ */
+ ROCAL_MELSCALE_HTK
+};
+
+/*! \brief Tensor Last Batch Policies
+ * \ingroup group_rocal_types
+ */
+enum RocalLastBatchPolicy {
+ /*! \brief ROCAL_LAST_BATCH_FILL - The last batch is filled by either repeating the last sample or by wrapping up the data set.
+ */
+ ROCAL_LAST_BATCH_FILL = 0,
+ /*! \brief ROCAL_LAST_BATCH_DROP - The last batch is dropped if there are not enough samples from the current epoch.
+ */
+ ROCAL_LAST_BATCH_DROP = 1,
+ /*! \brief ROCAL_LAST_BATCH_PARTIAL - The last batch is partially filled with the remaining data from the current epoch, keeping the rest of the samples empty. (currently this policy works similar to FILL in rocAL, PARTIAL policy needs to be handled from python end)
+ */
+ ROCAL_LAST_BATCH_PARTIAL = 2
+};
+
+#endif // MIVISIONX_ROCAL_API_TYPES_H
diff --git a/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h
new file mode 100644
index 000000000..ec62cddc8
--- /dev/null
+++ b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_add_tensor.h
@@ -0,0 +1,36 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "rocal_api_types.h"
+
+class TensorAddTensorNode : public Node {
+ public:
+ TensorAddTensorNode(const std::vector &inputs, const std::vector &outputs);
+ TensorAddTensorNode() = delete;
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+};
diff --git a/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h
new file mode 100644
index 000000000..49d79c89b
--- /dev/null
+++ b/rocAL/include/augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h
@@ -0,0 +1,40 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "rocal_api_types.h"
+
+class TensorMulScalarNode : public Node {
+ public:
+ TensorMulScalarNode(const std::vector &inputs, const std::vector &outputs);
+ TensorMulScalarNode() = delete;
+ void init(float scalar);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ float _scalar;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_downmix.h b/rocAL/include/augmentations/audio_augmentations/node_downmix.h
new file mode 100644
index 000000000..19bb40cc2
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_downmix.h
@@ -0,0 +1,34 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+class DownmixNode : public Node {
+ public:
+ DownmixNode(const std::vector &inputs, const std::vector &outputs);
+ DownmixNode() = delete;
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h b/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h
new file mode 100644
index 000000000..995392edc
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_mel_filter_bank.h
@@ -0,0 +1,45 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "rocal_api_types.h"
+
+class MelFilterBankNode : public Node {
+ public:
+ MelFilterBankNode(const std::vector &inputs, const std::vector &outputs);
+ MelFilterBankNode() = delete;
+ void init(float freq_high, float freq_low, RocalMelScaleFormula mel_formula, int nfilter, bool normalize, float sample_rate);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ float _freq_high = 0;
+ float _freq_low = 0;
+ int _mel_formula = 0;
+ int _nfilter = 128;
+ float _sample_rate = 44100;
+ bool _normalize = true;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h b/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h
new file mode 100644
index 000000000..7bbe7182f
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_non_silent_region_detection.h
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+
+class NonSilentRegionDetectionNode : public Node {
+ public:
+ NonSilentRegionDetectionNode(const std::vector &inputs, const std::vector &outputs);
+ NonSilentRegionDetectionNode() = delete;
+ void init(float cutoff_db, float reference_power, int reset_interval, int window_length);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ float _cutoff_db = -60.0;
+ float _reference_power = 0.0;
+ int _window_length = 2048;
+ int _reset_interval = 8192;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h b/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h
new file mode 100644
index 000000000..e3ba6c516
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_preemphasis_filter.h
@@ -0,0 +1,44 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
+#include "rocal_api_types.h"
+
+class PreemphasisFilterNode : public Node {
+ public:
+ PreemphasisFilterNode(const std::vector &inputs, const std::vector &outputs);
+ PreemphasisFilterNode() = delete;
+ void init(FloatParam *preemph_coeff, RocalAudioBorderType preemph_border);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ ParameterVX _preemph_coeff;
+ constexpr static float PREEMPH_COEFF_RANGE[2] = {0.97, 0.97};
+ RocalAudioBorderType _preemph_border;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_resample.h b/rocAL/include/augmentations/audio_augmentations/node_resample.h
new file mode 100644
index 000000000..32163b473
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_resample.h
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "rocal_api_types.h"
+
+class ResampleNode : public Node {
+ public:
+ ResampleNode(const std::vector &inputs, const std::vector &outputs);
+ ResampleNode() = delete;
+ void init(Tensor *resample_rate, float quality);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ Tensor *_output_resample_rate;
+ float _quality;
+ vx_array _src_sample_rate_array;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h b/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h
new file mode 100644
index 000000000..d79576ff6
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_spectrogram.h
@@ -0,0 +1,60 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "rocal_api_types.h"
+
+/// @brief Generates hann window for spectrogram
+/// @param output
+/// @param window_size
+inline void hann_window(float *output, int window_size) {
+ if (window_size <= 0)
+ THROW("Invalid window size, for Hann window")
+ double a = (2.0 * M_PI) / window_size;
+ for (int t = 0; t < window_size; t++) {
+ double phase = a * (t + 0.5);
+ output[t] = (0.5 * (1.0 - std::cos(phase)));
+ }
+}
+
+class SpectrogramNode : public Node {
+ public:
+ SpectrogramNode(const std::vector &inputs, const std::vector &outputs);
+ SpectrogramNode() = delete;
+ void init(bool is_center_windows, bool is_reflect_padding, int power, int nfft,
+ int window_length, int window_step, std::vector &window_fn);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ std::vector _window_fn;
+ int _power = 2;
+ int _nfft = 2048;
+ int _window_length = 512;
+ int _window_step = 256;
+ bool _is_center_windows = true;
+ bool _is_reflect_padding = true;
+};
diff --git a/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h b/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h
new file mode 100644
index 000000000..e6a5b28af
--- /dev/null
+++ b/rocAL/include/augmentations/audio_augmentations/node_to_decibels.h
@@ -0,0 +1,41 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+
+class ToDecibelsNode : public Node {
+ public:
+ ToDecibelsNode(const std::vector &inputs, const std::vector &outputs);
+ ToDecibelsNode() = delete;
+ void init(float cutoff_db, float multiplier, float reference_magnitude);
+
+ protected:
+ void create_node() override;
+ void update_node() override;
+
+ private:
+ float _cutoff_db = -200.0;
+ float _multiplier = 10.0;
+ float _reference_magnitude = 0.0;
+};
diff --git a/rocAL/include/augmentations/augmentations_nodes.h b/rocAL/include/augmentations/augmentations_nodes.h
index 34bc1d6a8..6f9def1c0 100644
--- a/rocAL/include/augmentations/augmentations_nodes.h
+++ b/rocAL/include/augmentations/augmentations_nodes.h
@@ -22,36 +22,48 @@ THE SOFTWARE.
#pragma once
-#include "node_warp_affine.h"
-#include "node_exposure.h"
-#include "node_vignette.h"
-#include "node_jitter.h"
-#include "node_snp_noise.h"
-#include "node_snow.h"
-#include "node_rain.h"
-#include "node_color_temperature.h"
-#include "node_fog.h"
-#include "node_pixelate.h"
-#include "node_lens_correction.h"
-#include "node_gamma.h"
-#include "node_flip.h"
-#include "node_crop_resize.h"
-#include "node_brightness.h"
-#include "node_contrast.h"
-#include "node_blur.h"
-#include "node_fisheye.h"
-#include "node_blend.h"
-#include "node_resize.h"
-#include "node_rotate.h"
-#include "node_color_twist.h"
-#include "node_hue.h"
-#include "node_saturation.h"
-#include "node_crop_mirror_normalize.h"
-#include "node_resize_mirror_normalize.h"
-#include "node_resize_crop_mirror.h"
-#include "node_ssd_random_crop.h"
-#include "node_crop.h"
-#include "node_random_crop.h"
-#include "node_copy.h"
-#include "node_nop.h"
-#include "node_sequence_rearrange.h"
+#include "augmentations/geometry_augmentations/node_warp_affine.h"
+#include "augmentations/color_augmentations/node_exposure.h"
+#include "augmentations/color_augmentations/node_vignette.h"
+#include "augmentations/effects_augmentations/node_jitter.h"
+#include "augmentations/effects_augmentations/node_snp_noise.h"
+#include "augmentations/effects_augmentations/node_snow.h"
+#include "augmentations/effects_augmentations/node_rain.h"
+#include "augmentations/color_augmentations/node_color_temperature.h"
+#include "augmentations/effects_augmentations/node_fog.h"
+#include "augmentations/effects_augmentations/node_pixelate.h"
+#include "augmentations/geometry_augmentations/node_lens_correction.h"
+#include "augmentations/color_augmentations/node_gamma.h"
+#include "augmentations/geometry_augmentations/node_flip.h"
+#include "augmentations/geometry_augmentations/node_crop_resize.h"
+#include "augmentations/color_augmentations/node_brightness.h"
+#include "augmentations/color_augmentations/node_contrast.h"
+#include "augmentations/color_augmentations/node_blur.h"
+#include "augmentations/geometry_augmentations/node_fisheye.h"
+#include "augmentations/color_augmentations/node_blend.h"
+#include "augmentations/geometry_augmentations/node_resize.h"
+#include "augmentations/geometry_augmentations/node_rotate.h"
+#include "augmentations/color_augmentations/node_color_twist.h"
+#include "augmentations/color_augmentations/node_hue.h"
+#include "augmentations/color_augmentations/node_saturation.h"
+#include "augmentations/geometry_augmentations/node_crop_mirror_normalize.h"
+#include "augmentations/geometry_augmentations/node_resize_mirror_normalize.h"
+#include "augmentations/geometry_augmentations/node_resize_crop_mirror.h"
+#include "augmentations/node_ssd_random_crop.h"
+#include "augmentations/geometry_augmentations/node_crop.h"
+#include "augmentations/geometry_augmentations/node_random_crop.h"
+#include "augmentations/node_copy.h"
+#include "augmentations/node_nop.h"
+#include "augmentations/node_sequence_rearrange.h"
+#include "augmentations/audio_augmentations/node_preemphasis_filter.h"
+#include "augmentations/audio_augmentations/node_spectrogram.h"
+#include "augmentations/audio_augmentations/node_to_decibels.h"
+#include "augmentations/audio_augmentations/node_resample.h"
+#include "augmentations/node_uniform_distribution.h"
+#include "augmentations/node_normal_distribution.h"
+#include "augmentations/arithmetic_augmentations/node_tensor_mul_scalar.h"
+#include "augmentations/arithmetic_augmentations/node_tensor_add_tensor.h"
+#include "augmentations/audio_augmentations/node_non_silent_region_detection.h"
+#include "augmentations/geometry_augmentations/node_slice.h"
+#include "augmentations/effects_augmentations/node_normalize.h"
+#include "augmentations/audio_augmentations/node_mel_filter_bank.h"
diff --git a/rocAL/include/augmentations/color_augmentations/node_blend.h b/rocAL/include/augmentations/color_augmentations/node_blend.h
index cee6ac60f..46d8d94e2 100644
--- a/rocAL/include/augmentations/color_augmentations/node_blend.h
+++ b/rocAL/include/augmentations/color_augmentations/node_blend.h
@@ -21,23 +21,23 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-
-class BlendNode : public Node
-{
-public:
- explicit BlendNode(const std::vector &inputs, const std::vector &outputs);
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
+
+class BlendNode : public Node {
+ public:
+ explicit BlendNode(const std::vector &inputs, const std::vector &outputs);
BlendNode() = delete;
void init(float ratio);
- void init(FloatParam* ratio);
+ void init(FloatParam *ratio);
-protected:
+ protected:
void update_node() override;
void create_node() override;
-private:
+
+ private:
ParameterVX _ratio;
- constexpr static float RATIO_RANGE [2] = {0.1, 0.9};
+ constexpr static float RATIO_RANGE[2] = {0.1, 0.9};
};
\ No newline at end of file
diff --git a/rocAL/include/augmentations/color_augmentations/node_blur.h b/rocAL/include/augmentations/color_augmentations/node_blur.h
index 341c109ff..d26751fc8 100644
--- a/rocAL/include/augmentations/color_augmentations/node_blur.h
+++ b/rocAL/include/augmentations/color_augmentations/node_blur.h
@@ -21,24 +21,23 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-class BlurNode : public Node
-{
-public:
- BlurNode(const std::vector &inputs, const std::vector &outputs);
+class BlurNode : public Node {
+ public:
+ BlurNode(const std::vector &inputs, const std::vector &outputs);
BlurNode() = delete;
- void init(int sdev);
- void init(IntParam *sdev);
+ void init(int kernel_size);
+ void init(IntParam *kernel_size_param);
-protected:
+ protected:
void update_node() override;
void create_node() override;
-private:
- ParameterVX _sdev;
- constexpr static int SDEV_RANGE [2] = {3, 9};
+ private:
+ ParameterVX _kernel_size;
+ constexpr static int KERNEL_SIZE_RANGE[2] = {3, 9};
};
diff --git a/rocAL/include/augmentations/color_augmentations/node_brightness.h b/rocAL/include/augmentations/color_augmentations/node_brightness.h
index b672c81eb..5825fb8ad 100644
--- a/rocAL/include/augmentations/color_augmentations/node_brightness.h
+++ b/rocAL/include/augmentations/color_augmentations/node_brightness.h
@@ -21,27 +21,26 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
-
-class BrightnessNode : public Node
-{
-public:
- BrightnessNode(const std::vector &inputs, const std::vector &outputs);
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
+
+class BrightnessNode : public Node {
+ public:
+ BrightnessNode(const std::vector &inputs, const std::vector &outputs);
BrightnessNode() = delete;
- void init( float alpha, float beta);
- void init( FloatParam* alpha_param, FloatParam* beta_param);
+ void init(float alpha, float beta);
+ void init(FloatParam *alpha_param, FloatParam *beta_param);
-protected:
- void create_node() override ;
+ protected:
+ void create_node() override;
void update_node() override;
-private:
+ private:
ParameterVX _alpha;
ParameterVX _beta;
- constexpr static float ALPHA_RANGE [2] = {0.1, 1.95};
- constexpr static float BETA_RANGE [2] = {0, 25};
+ constexpr static float ALPHA_RANGE[2] = {0.1, 1.95};
+ constexpr static float BETA_RANGE[2] = {0, 25};
};
\ No newline at end of file
diff --git a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h
index ab1bd172f..19f393ba0 100644
--- a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h
+++ b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h
@@ -21,24 +21,24 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-class ColorTemperatureNode : public Node
-{
-public:
- ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs);
+class ColorTemperatureNode : public Node {
+ public:
+ ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs);
ColorTemperatureNode() = delete;
void init(int adjustment);
void init(IntParam *adjustment);
-protected:
- void create_node() override ;
+ protected:
+ void create_node() override;
void update_node() override;
-private:
+
+ private:
ParameterVX _adj_value_param;
- constexpr static int ADJUSTMENT_RANGE [2] = {-99, 99};
+ constexpr static int ADJUSTMENT_RANGE[2] = {-99, 99};
};
\ No newline at end of file
diff --git a/rocAL/include/augmentations/color_augmentations/node_color_twist.h b/rocAL/include/augmentations/color_augmentations/node_color_twist.h
index eddbeb4f6..265abef36 100644
--- a/rocAL/include/augmentations/color_augmentations/node_color_twist.h
+++ b/rocAL/include/augmentations/color_augmentations/node_color_twist.h
@@ -21,31 +21,28 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
-
-class ColorTwistBatchNode : public Node
-{
-public:
- ColorTwistBatchNode(const std::vector &inputs, const std::vector &outputs);
- ColorTwistBatchNode() = delete;
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
+
+class ColorTwistNode : public Node {
+ public:
+ ColorTwistNode(const std::vector &inputs, const std::vector &outputs);
+ ColorTwistNode() = delete;
void init(float alpha, float beta, float hue, float sat);
- void init(FloatParam *alpha, FloatParam *beta, FloatParam *hue, FloatParam *sat);
+ void init(FloatParam *alpha_param, FloatParam *beta_param, FloatParam *hue_param, FloatParam *sat_param);
-protected:
+ protected:
void create_node() override;
void update_node() override;
-private:
+ private:
ParameterVX _alpha;
ParameterVX _beta;
ParameterVX _hue;
ParameterVX _sat;
-
- constexpr static float ALPHA_RANGE [2] = {0.1, 1.95};
- constexpr static float BETA_RANGE [2] = {0.1, 25.0};
- constexpr static float HUE_RANGE [2] = {5.0, 170.0};
- constexpr static float SAT_RANGE [2] = {0.1, 0.4};
-};
\ No newline at end of file
+ constexpr static float ALPHA_RANGE[2] = {0.1, 1.95};
+ constexpr static float BETA_RANGE[2] = {0.1, 25.0};
+ constexpr static float HUE_RANGE[2] = {5.0, 170.0};
+ constexpr static float SAT_RANGE[2] = {0.1, 0.4};
+};
diff --git a/rocAL/include/augmentations/color_augmentations/node_contrast.h b/rocAL/include/augmentations/color_augmentations/node_contrast.h
index 075673c90..e5e096dd7 100644
--- a/rocAL/include/augmentations/color_augmentations/node_contrast.h
+++ b/rocAL/include/augmentations/color_augmentations/node_contrast.h
@@ -22,25 +22,24 @@ THE SOFTWARE.
#pragma once
#include
-#include "node.h"
-#include "parameter_vx.h"
-#include "graph.h"
-
-class RocalContrastNode : public Node
-{
-public:
- RocalContrastNode(const std::vector &inputs, const std::vector &outputs);
- RocalContrastNode() = delete;
- void init(int min, int max);
- void init(IntParam *min, IntParam * max);
-
-protected:
- void create_node() override ;
+
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_vx.h"
+
+class ContrastNode : public Node {
+ public:
+ ContrastNode(const std::vector &inputs, const std::vector &outputs);
+ ContrastNode() = delete;
+ void init(float contrast_factor, float contrast_center);
+ void init(FloatParam *contrast_factor_param, FloatParam *contrast_center_param);
+
+ protected:
+ void create_node() override;
void update_node() override;
-private:
- ParameterVX _min;
- ParameterVX _max;
- constexpr static int CONTRAST_MIN_RANGE [2] = {0, 30};
- constexpr static int CONTRAST_MAX_RANGE [2] = {60, 90};
-};
\ No newline at end of file
+ private:
+ ParameterVX _factor, _center;
+ constexpr static float CONTRAST_FACTOR_RANGE[2] = {0.1, 1.95};
+ constexpr static float CONTRAST_CENTER_RANGE[2] = {60, 90};
+};
diff --git a/rocAL/include/augmentations/color_augmentations/node_exposure.h b/rocAL/include/augmentations/color_augmentations/node_exposure.h
index 4f1cb95f4..825edfa57 100644
--- a/rocAL/include/augmentations/color_augmentations/node_exposure.h
+++ b/rocAL/include/augmentations/color_augmentations/node_exposure.h
@@ -21,23 +21,23 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-class ExposureNode : public Node
-{
-public:
- ExposureNode(const std::vector &inputs, const std::vector &outputs);
+class ExposureNode : public Node {
+ public:
+ ExposureNode(const std::vector &inputs, const std::vector &outputs);
ExposureNode() = delete;
- void init(float shift);
- void init(FloatParam *shift);
-protected:
+ void init(float exposure_factor);
+ void init(FloatParam *exposure_factor_param);
+
+ protected:
void create_node() override;
void update_node() override;
-private:
- ParameterVX _shift;
- vx_array _width_array ,_height_array;
- constexpr static float SHIFT_RANGE [2] = {0.15, 0.95};
-};
\ No newline at end of file
+
+ private:
+ ParameterVX _exposure_factor;
+ constexpr static float EXPOSURE_FACTOR_RANGE[2] = {0.15, 0.95};
+};
diff --git a/rocAL/include/augmentations/color_augmentations/node_gamma.h b/rocAL/include/augmentations/color_augmentations/node_gamma.h
index b113fd96b..3fd62417e 100644
--- a/rocAL/include/augmentations/color_augmentations/node_gamma.h
+++ b/rocAL/include/augmentations/color_augmentations/node_gamma.h
@@ -21,23 +21,22 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-
-class GammaNode : public Node
-{
-public:
- GammaNode(const std::vector &inputs, const std::vector &outputs);
+class GammaNode : public Node {
+ public:
+ GammaNode(const std::vector &inputs, const std::vector &outputs);
GammaNode() = delete;
- void init(float shift);
- void init(FloatParam *shift);
+ void init(float gamma);
+ void init(FloatParam *gamma_param);
-protected:
+ protected:
void update_node() override;
void create_node() override;
-private:
- ParameterVX _shift;
- constexpr static float SHIFT_RANGE [2] = {0.3, 7.00};
+
+ private:
+ ParameterVX _gamma;
+ constexpr static float GAMMA_RANGE[2] = {0.3, 7.00};
};
diff --git a/rocAL/include/augmentations/color_augmentations/node_hue.h b/rocAL/include/augmentations/color_augmentations/node_hue.h
index 79f1639bd..9d76e9dc5 100644
--- a/rocAL/include/augmentations/color_augmentations/node_hue.h
+++ b/rocAL/include/augmentations/color_augmentations/node_hue.h
@@ -21,22 +21,22 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-
-class HueNode : public Node
-{
-public:
- HueNode(const std::vector &inputs, const std::vector &outputs);
+class HueNode : public Node {
+ public:
+ HueNode(const std::vector &inputs, const std::vector &outputs);
HueNode() = delete;
void init(float hue);
void init(FloatParam *hue);
-protected:
+
+ protected:
void create_node() override;
void update_node() override;
-private:
+
+ private:
ParameterVX _hue;
- constexpr static float HUE_RANGE [2] = {-359.0, 359.0};
+ constexpr static float HUE_RANGE[2] = {-359.0, 359.0};
};
diff --git a/rocAL/include/augmentations/color_augmentations/node_saturation.h b/rocAL/include/augmentations/color_augmentations/node_saturation.h
index a503c4c85..e8a085032 100644
--- a/rocAL/include/augmentations/color_augmentations/node_saturation.h
+++ b/rocAL/include/augmentations/color_augmentations/node_saturation.h
@@ -21,22 +21,22 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-
-class SatNode : public Node
-{
-public:
- SatNode(const std::vector &inputs, const std::vector &outputs);
- SatNode() = delete;
+class SaturationNode : public Node {
+ public:
+ SaturationNode(const std::vector &inputs, const std::vector &outputs);
+ SaturationNode() = delete;
void init(float sat);
void init(FloatParam *sat);
-protected:
+
+ protected:
void create_node() override;
void update_node() override;
-private:
- ParameterVX _sat; // For saturation
- constexpr static float SAT_RANGE [2] = {-0.5, 0.5};
+
+ private:
+ ParameterVX _saturation;
+ constexpr static float SAT_RANGE[2] = {-0.5, 0.5};
};
diff --git a/rocAL/include/augmentations/color_augmentations/node_vignette.h b/rocAL/include/augmentations/color_augmentations/node_vignette.h
index 9af231b09..0a93f759f 100644
--- a/rocAL/include/augmentations/color_augmentations/node_vignette.h
+++ b/rocAL/include/augmentations/color_augmentations/node_vignette.h
@@ -21,22 +21,23 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-#include "graph.h"
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-class VignetteNode : public Node
-{
-public:
- VignetteNode(const std::vector &inputs, const std::vector &outputs);
- VignetteNode () = delete;
+class VignetteNode : public Node {
+ public:
+ VignetteNode(const std::vector &inputs, const std::vector &outputs);
+ VignetteNode() = delete;
void init(float sdev);
void init(FloatParam *sdev);
-protected:
+
+ protected:
void create_node() override;
void update_node() override;
-private:
+
+ private:
ParameterVX _sdev;
- constexpr static float SDEV_RANGE [2] = {40 , 60};
+ constexpr static float SDEV_RANGE[2] = {40, 60};
};
diff --git a/rocAL/include/augmentations/effects_augmentations/node_fog.h b/rocAL/include/augmentations/effects_augmentations/node_fog.h
index ea0309dc8..58554adf2 100644
--- a/rocAL/include/augmentations/effects_augmentations/node_fog.h
+++ b/rocAL/include/augmentations/effects_augmentations/node_fog.h
@@ -21,22 +21,22 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
-class FogNode : public Node
-{
-public:
- FogNode(const std::vector &inputs, const std::vector &outputs);
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
+
+class FogNode : public Node {
+ public:
+ FogNode(const std::vector &inputs, const std::vector &outputs);
FogNode() = delete;
void init(float fog_param);
void init(FloatParam *fog_param);
-protected:
+
+ protected:
void create_node() override;
void update_node() override;
-private:
+
+ private:
ParameterVX _fog_param;
- constexpr static float FOG_VALUE_RANGE [2] = {0.2, 0.8};
+ constexpr static float FOG_VALUE_RANGE[2] = {0.2, 0.8};
};
-
-
diff --git a/rocAL/include/augmentations/effects_augmentations/node_jitter.h b/rocAL/include/augmentations/effects_augmentations/node_jitter.h
index 2ddc58645..971d51d95 100644
--- a/rocAL/include/augmentations/effects_augmentations/node_jitter.h
+++ b/rocAL/include/augmentations/effects_augmentations/node_jitter.h
@@ -21,22 +21,23 @@ THE SOFTWARE.
*/
#pragma once
-#include "node.h"
-#include "parameter_factory.h"
-#include "parameter_vx.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_factory.h"
+#include "parameters/parameter_vx.h"
-
-class JitterNode : public Node
-{
-public:
- JitterNode(const std::vector &inputs, const std::vector &outputs);
+class JitterNode : public Node {
+ public:
+ JitterNode(const std::vector &inputs, const std::vector &outputs);
JitterNode() = delete;
- void init(int kernel_size);
- void init(IntParam *kernel_size);
-protected:
+ void init(int kernel_size, int seed);
+ void init(IntParam *kernel_size, int seed);
+
+ protected:
void create_node() override;
void update_node() override;
-private:
+
+ private:
ParameterVX _kernel_size;
- constexpr static int KERNEL_SIZE [2] = {2, 5};
+ int _seed;
+ constexpr static int KERNEL_SIZE[2] = {2, 5};
};
diff --git a/rocAL/include/augmentations/effects_augmentations/node_normalize.h b/rocAL/include/augmentations/effects_augmentations/node_normalize.h
new file mode 100644
index 000000000..a4b76d99c
--- /dev/null
+++ b/rocAL/include/augmentations/effects_augmentations/node_normalize.h
@@ -0,0 +1,51 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+#include "pipeline/graph.h"
+#include "pipeline/node.h"
+#include "parameters/parameter_vx.h"
+
+class NormalizeNode : public Node {
+ public:
+ NormalizeNode(const std::vector &inputs, const std::vector &outputs);
+ NormalizeNode() = delete;
+ void init(std::vector &axes, std::vector &mean, std::vector &std_dev, float scale, float shift);
+
+ protected:
+ void create_node() override;
+ void update_node() override {};
+
+ private:
+ int _axis_mask = 0;
+ vx_array _mean_vx_array, _stddev_vx_array;
+ std::vector _axes;
+ std::vector