diff --git a/CMakeLists.txt b/CMakeLists.txt index e7e68ce848..7d50dc2ae4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -341,7 +341,7 @@ if(MIOPEN_USE_MLIR) find_library(LIBMLIRMIOPEN MLIRMIOpen REQUIRED) if(NOT LIBMLIRMIOPEN) message(FATAL_ERROR "library libMLIRMIOpen not found, please reinstall dependencies. \ - Refer to https://github.com/ROCmSoftwarePlatform/MIOpen#installing-the-dependencies") + Refer to https://github.com/ROCm/MIOpen#installing-the-dependencies") else() message(STATUS "Build with library libMLIRMIOpen: " ${LIBMLIRMIOPEN}) set(rocMLIR_VERSION 0.0.1) diff --git a/README.md b/README.md index f73af33a46..440cd7bbc4 100755 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # MIOpen AMD's library for high performance machine learning primitives. -Sources and binaries can be found at [MIOpen's GitHub site](https://github.com/ROCmSoftwarePlatform/MIOpen). +Sources and binaries can be found at [MIOpen's GitHub site](https://github.com/ROCm/MIOpen). The latest released documentation can be read online [here](https://rocm.docs.amd.com/projects/MIOpen/en/latest/index.html). -MIOpen supports two programming models +MIOpen supports two programming models, or backends: -1. [HIP](https://github.com/ROCm-Developer-Tools/HIP) (Primary Support). -2. OpenCL. +1. [HIP](https://github.com/ROCm-Developer-Tools/HIP) +2. OpenCL (deprecated). ## Documentation @@ -44,11 +44,11 @@ python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html * [SQLite3](https://sqlite.org/index.html) - reading and writing performance database * lbzip2 - multi-threaded compress or decompress utility * [MIOpenTENSILE](https://github.com/ROCmSoftwarePlatform/MIOpenTensile) - users can enable this library using the cmake configuration flag`-DMIOPEN_USE_MIOPENTENSILE=On`. (deprecated after ROCm 5.1.1) -* [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) - AMD library for Basic Linear Algebra Subprograms (BLAS) on the ROCm platform. - * Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCmSoftwarePlatform/rocBLAS/tree/master-rocm-2.10) - * Minimum version branch for post-ROCm 3.5 [master-rocm-3.5](https://github.com/ROCmSoftwarePlatform/rocBLAS/releases/tag/rocm-3.5.0) -* [MLIR](https://github.com/ROCmSoftwarePlatform/llvm-project-mlir) - (Multi-Level Intermediate Representation) with its MIOpen dialect to support and complement kernel development. -* [Composable Kernel](https://github.com/ROCmSoftwarePlatform/composable_kernel) - C++ templated device library for GEMM-like and reduction-like operators. +* [rocBLAS](https://github.com/ROCm/rocBLAS) - AMD library for Basic Linear Algebra Subprograms (BLAS) on the ROCm platform. + * Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCm/rocBLAS/tree/master-rocm-2.10) + * Minimum version branch for post-ROCm 3.5 [master-rocm-3.5](https://github.com/ROCm/rocBLAS/releases/tag/rocm-3.5.0) +* [MLIR](https://github.com/ROCm/llvm-project-mlir) - (Multi-Level Intermediate Representation) with its MIOpen dialect to support and complement kernel development. +* [Composable Kernel](https://github.com/ROCm/composable_kernel) - C++ templated device library for GEMM-like and reduction-like operators. ## Installing MIOpen with pre-built packages @@ -84,7 +84,7 @@ The script `utils/install_precompiled_kernels.sh` provided as part of MIOpen aut The above script depends on the *rocminfo* package to query the GPU architecture. -More info can be found [here](https://github.com/ROCmSoftwarePlatform/MIOpen/blob/develop/docs/cache.md#installing-pre-compiled-kernels). +More info can be found [here](https://github.com/ROCm/MIOpen/blob/develop/docs/cache.md#installing-pre-compiled-kernels). ## Installing the dependencies @@ -104,7 +104,7 @@ cmake -P install_deps.cmake --minimum --prefix /root/MIOpen/install_dir This prefix can used to specify the dependency path during the configuration phase using the `CMAKE_PREFIX_PATH`. -* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend. +* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCm/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend. * MIOpen's OpenCL backend uses [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) by default. Users can install MIOpenGEMM minimum release by using `apt-get install miopengemm`. @@ -212,7 +212,7 @@ This will install the library to the `CMAKE_INSTALL_PREFIX` path that was set. ## Building the driver -MIOpen provides an [application-driver](https://github.com/ROCmSoftwarePlatform/MIOpen/tree/master/driver) which can be used to execute any one particular layer in isolation and measure performance and verification of the library. +MIOpen provides an [application-driver](https://github.com/ROCm/MIOpen/tree/master/driver) which can be used to execute any one particular layer in isolation and measure performance and verification of the library. The driver can be built using the `MIOpenDriver` target: @@ -338,5 +338,5 @@ MIOpen's paper is freely available and can be accessed on arXiv: ## Porting from cuDNN to MIOpen The [porting -guide](https://github.com/ROCmSoftwarePlatform/MIOpen/tree/develop/docs/MIOpen_Porting_Guide.md) +guide](https://github.com/ROCm/MIOpen/tree/develop/docs/MIOpen_Porting_Guide.md) highlights the key differences between the current cuDNN and MIOpen APIs. diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md index 3ae5db123a..ffa30cbed8 100644 --- a/docs/DebugAndLogging.md +++ b/docs/DebugAndLogging.md @@ -172,14 +172,14 @@ Additionally, using environment variable "MIOPEN_GEMM_ENFORCE_BACKEND", can over both MIOpenGEMM and rocBlas depending on the input configuration: * `MIOPEN_GEMM_ENFORCE_BACKEND=1`, use rocBLAS if enabled -* `MIOPEN_GEMM_ENFORCE_BACKEND=2`, use MIOpenGEMM for FP32, use rocBLAS for FP16 if enabled +* `MIOPEN_GEMM_ENFORCE_BACKEND=2`, reserved * `MIOPEN_GEMM_ENFORCE_BACKEND=3`, no gemm will be called -* `MIOPEN_GEMM_ENFORCE_BACKEND=4`, use MIOpenTensile for FP32, use rocBLAS for FP16 if enabled +* `MIOPEN_GEMM_ENFORCE_BACKEND=4`, reserved * `MIOPEN_GEMM_ENFORCE_BACKEND=`, use default behavior To disable using rocBlas entirely, set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off` during MIOpen configuration. -More information on logging with rocBlas can be found [here](https://github.com/ROCmSoftwarePlatform/rocBLAS/wiki/5.Logging). +More information on logging with rocBlas can be found [here](https://github.com/ROCm/rocBLAS/wiki/5.Logging). ## Numerical Checking diff --git a/docs/Getting_Started_FusionAPI.md b/docs/Getting_Started_FusionAPI.md index ed437ea4ee..44140cb0ef 100644 --- a/docs/Getting_Started_FusionAPI.md +++ b/docs/Getting_Started_FusionAPI.md @@ -3,7 +3,7 @@ Fusion API: Getting Started ## Introduction Increasing depth of deep learning networks necessitate the need for novel mechanisms to improve performance on GPUs. One mechanism to achieve higher efficiency is to _fuse_ separate kernels into a single kernel to reduce off-chip memory access and avoid kernel launch overhead. This document outlines the addition of a Fusion API to the MIOpen library. The fusion API would allow users to specify operators that they wants to fuse in a single kernel, compile it and then launch the kernel. While not all combinations might be supported by the library, the API is flexible enough to allow the specification of many operations in any order from a finite set of supported operations. The API provides a mechanism to report unsupported combinations. -A complete example of the Fusion API in the context of MIOpen is given [here](https://github.com/ROCmSoftwarePlatform/MIOpenExamples/tree/master/fusion). We will use code from the example project as we go along. The example project creates a fusion plan to merge the convolution, bias and activation operations. For a list of supported fusion operations and associated constraints please refer to the [Supported Fusions](#supported-fusions) section. The example depicts bare-bones code without any error checking or even populating the tensors with meaningful data in the interest of simplicity. +A complete example of the Fusion API in the context of MIOpen is given [here](https://github.com/ROCm/MIOpenExamples/tree/master/fusion). We will use code from the example project as we go along. The example project creates a fusion plan to merge the convolution, bias and activation operations. For a list of supported fusion operations and associated constraints please refer to the [Supported Fusions](#supported-fusions) section. The example depicts bare-bones code without any error checking or even populating the tensors with meaningful data in the interest of simplicity. The following list outlines the steps required @@ -48,7 +48,7 @@ The fusion API introduces the notion of **operators** which represent different Notice that _Bias_ is a separate operator, although it is typically only used with convolution. This list is expected to grow as support for more operators is added to the API, moreover, operators for backward passes are in the works as well. -The fusion API provides calls for the creation of the supported operators, here we would describe the process for the convolution operator, details for other operators may be found in the [miopen header file](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/fusion.html) +The fusion API provides calls for the creation of the supported operators, here we would describe the process for the convolution operator, details for other operators may be found in the [miopen header file](https://rocm.docs.amd.com/projects/MIOpen/en/latest/fusion.html) Once the fusion plan descriptor is created, two or more operators can be added to it by using the individual operator creation API calls. Creation of an operator might fail if the API does not support the fusion of the operations being added and report back immediately to the user. For our example we need to add the Convolution, Bias and Activation operations to our freshly minted fusion plan. This is done using the following calls for the Convolution, Bias and Activation operations respectively: @@ -77,7 +77,7 @@ miopenCreateOpBiasForward(fusePlanDesc, &biasOp, bias.desc); miopenCreateOpActivationForward(fusePlanDesc, &activOp, miopenActivationRELU); ``` -It may be noted that `conv_desc` is the regular MIOpen Convolution descriptor and is created in the standard way before it is referenced here. For more details on creating and setting the convolution descriptor please refer to the example code as well as the [MIOpen documentation](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/convolution.html). In the above snippet `weights.desc` refers to the `miopenTensorDescriptor_t` for the convolution operations and `bias.desc` refers to the object of the same type for the bias operation. The order of insertion of operators indicates the order in which the operations would be performed on the data. Therefore, the above code implies that the convolution operation would be the first operation to execute on the incoming data, followed by the bias and activation operations. +It may be noted that `conv_desc` is the regular MIOpen Convolution descriptor and is created in the standard way before it is referenced here. For more details on creating and setting the convolution descriptor please refer to the example code as well as the [MIOpen documentation](https://rocm.docs.amd.com/projects/MIOpen/en/latest/convolution.html). In the above snippet `weights.desc` refers to the `miopenTensorDescriptor_t` for the convolution operations and `bias.desc` refers to the object of the same type for the bias operation. The order of insertion of operators indicates the order in which the operations would be performed on the data. Therefore, the above code implies that the convolution operation would be the first operation to execute on the incoming data, followed by the bias and activation operations. During this process, it is important that the returned codes be checked to make sure that the operations as well as their order is supported. The operator insertion might fail for a number of reasons such as unsupported sequence of operations, unsupported dimensions of the input or in case of convolution unsupported dimensions for the filters. In the above example, these aspects are ignored for the sake of simplicity. diff --git a/docs/cache.md b/docs/cache.md index e8a08ff5c7..7180cfc024 100644 --- a/docs/cache.md +++ b/docs/cache.md @@ -15,7 +15,7 @@ The are several ways to disable the cache. This is generally useful for developm Updating MIOpen and removing the cache -------------------------------------- -For MIOpen version 2.3 and earlier, if the compiler changes, or the user modifies the kernels then the cache must be deleted for the MIOpen version in use; e.g., `rm -rf $HOME/.cache/miopen/`. More information about the cache can be found [here](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/cache.html). +For MIOpen version 2.3 and earlier, if the compiler changes, or the user modifies the kernels then the cache must be deleted for the MIOpen version in use; e.g., `rm -rf $HOME/.cache/miopen/`. More information about the cache can be found [here](https://rocm.docs.amd.com/projects/MIOpen/en/latest/cache.html). For MIOpen version 2.4 and later, MIOpen's kernel cache directory is versioned so that users' cached kernels will not collide when upgrading from earlier version. @@ -30,4 +30,4 @@ If MIOpen kernels package is not installed, or if we do not deliver the kernels The performance degradation mentioned in the warning only affects the network start-up time (aka "initial iteration time") and thus can be safely ignored. -Please refer to the MIOpen installation instructions: [installing MIOpen kernels package](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/install.html#installing-miopen-kernels-package) for guidance on installing the MIOpen kernels package. +Please refer to the MIOpen installation instructions: [installing MIOpen kernels package](https://rocm.docs.amd.com/projects/MIOpen/en/latest/install.html#installing-miopen-kernels-package) for guidance on installing the MIOpen kernels package. diff --git a/docs/driver.md b/docs/driver.md index df090b4366..d17b578813 100644 --- a/docs/driver.md +++ b/docs/driver.md @@ -1,9 +1,9 @@ ## Building the driver -MIOpen provides an [application-driver](https://github.com/ROCmSoftwarePlatform/MIOpen/tree/master/driver) which can be used to execute any one particular layer in isolation and measure performance and verification of the library. +MIOpen provides an [application-driver](https://github.com/ROCm/MIOpen/tree/master/driver) which can be used to execute any one particular layer in isolation and measure performance and verification of the library. The driver can be built using the `MIOpenDriver` target: ` cmake --build . --config Release --target MIOpenDriver ` **OR** ` make MIOpenDriver ` -Documentation on how to run the driver is [here](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/driver.html). +Documentation on how to run the driver is [here](https://rocm.docs.amd.com/projects/MIOpen/en/latest/driver.html). diff --git a/docs/find_and_immediate.md b/docs/find_and_immediate.md index 313ea9c26e..69d4d999dd 100644 --- a/docs/find_and_immediate.md +++ b/docs/find_and_immediate.md @@ -64,7 +64,7 @@ The results of Find() are returned in an array of `miopenConvAlgoPerf_t` structs This call sequence is executed once per session as it is inherently expensive. Of those, `miopenFindConvolution*()` is the most expensive call. It caches its own results on disk, so the subsequent calls during the same MIOpen session will execute faster. However, it is better to remember results of `miopenFindConvolution*()` in the application, as recommended above. -Internally MIOpen's Find calls will compile and benchmark a set of `solvers` contained in `miopenConvAlgoPerf_t` this is done in parallel per `miopenConvAlgorithm_t`. The level of parallelism can be controlled using an environment variable. See the debugging section [controlling parallel compilation](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/DebugAndLogging.html#controlling-parallel-compilation) for more details. +Internally MIOpen's Find calls will compile and benchmark a set of `solvers` contained in `miopenConvAlgoPerf_t` this is done in parallel per `miopenConvAlgorithm_t`. The level of parallelism can be controlled using an environment variable. See the debugging section [controlling parallel compilation](https://rocm.docs.amd.com/projects/MIOpen/en/latest/DebugAndLogging.html#controlling-parallel-compilation) for more details. ## Immediate Mode API @@ -139,7 +139,7 @@ miopenConvolutionForwardCompileSolution(handle, ## Immediate Mode Fallback -The immediate mode is underpinned by the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html), however it may not contain every configuration of interest. If Find-Db encounters a database miss it has two fallback paths it can take, depending on whether the cmake variable MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK is set to ON or OFF. However, if the user requires the best possible performance they should run the Find stage at least once. +The immediate mode is underpinned by the [Find-Db](https://rocm.docs.amd.com/projects/MIOpen/en/latest/finddb.html), however it may not contain every configuration of interest. If Find-Db encounters a database miss it has two fallback paths it can take, depending on whether the cmake variable MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK is set to ON or OFF. However, if the user requires the best possible performance they should run the Find stage at least once. ### 1. AI-based Heuristic Fallback (Default) @@ -173,10 +173,10 @@ OpenCL support for immediate mode via the fallback is limited to fp32 datatypes. MIOpen provides a set of Find modes which are used to accelerate the Find calls. The different modes are set by using the environment variable `MIOPEN_FIND_MODE`, and setting it to one of the values: - `NORMAL`, or `1`: Normal Find: This is the full Find mode call, which will benchmark all the solvers and return a list. -- `FAST`, or `2`: Fast Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, utilize the Immediate mode fallback. If Start-up times are expected to be faster, but worse GPU performance. -- `HYBRID`, or `3`, or unset `MIOPEN_FIND_MODE`: Hybrid Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, use the existing Find machinery. Slower start-up times than Fast Find, but no GPU performance drop. +- `FAST`, or `2`: Fast Find: Checks the [Find-Db](https://rocm.docs.amd.com/projects/MIOpen/en/latest/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, utilize the Immediate mode fallback. If Start-up times are expected to be faster, but worse GPU performance. +- `HYBRID`, or `3`, or unset `MIOPEN_FIND_MODE`: Hybrid Find: Checks the [Find-Db](https://rocm.docs.amd.com/projects/MIOpen/en/latest/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, use the existing Find machinery. Slower start-up times than Fast Find, but no GPU performance drop. - `4`: This value is reserved and should not be used. -- `DYNAMIC_HYBRID`, or `5`: Dynamic Hybrid Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, uses that entry. If there is a miss, uses the existing Find machinery with skipping non-dynamic kernels. Faster start-up times than Hybrid Find, but GPU performance may be a bit worse. +- `DYNAMIC_HYBRID`, or `5`: Dynamic Hybrid Find: Checks the [Find-Db](https://rocm.docs.amd.com/projects/MIOpen/en/latest/finddb.html) for an entry. If there is a Find-Db hit, uses that entry. If there is a miss, uses the existing Find machinery with skipping non-dynamic kernels. Faster start-up times than Hybrid Find, but GPU performance may be a bit worse. Currently, the default Find mode is `DYNAMIC_HYBRID`. To run the full `NORMAL` Find mode, set the environment as: ``` diff --git a/docs/finddb.md b/docs/finddb.md index e6e21b901b..602835cea0 100644 --- a/docs/finddb.md +++ b/docs/finddb.md @@ -1,7 +1,7 @@ Find-Db Database ================ -Prior to MIOpen 2.0, users utilized calls such as `miopenFindConvolution*Algorithm()` to gather a set of convolution algorithms in the form of an array of `miopenConvSolution_t` structs. This process is time consuming because it requires online benchmarking of competing algorithms. In MIOpen 2.0 an [immediate mode](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/find_and_immediate.html) is introduced. +Prior to MIOpen 2.0, users utilized calls such as `miopenFindConvolution*Algorithm()` to gather a set of convolution algorithms in the form of an array of `miopenConvSolution_t` structs. This process is time consuming because it requires online benchmarking of competing algorithms. In MIOpen 2.0 an [immediate mode](https://rocm.docs.amd.com/projects/MIOpen/en/latest/find_and_immediate.html) is introduced. Immediate mode is based on a database which contains the results of calls to the legacy Find() stage. This database is called `Find-Db`. It consists of two parts: - **System Find-Db**, a system-wide storage which holds the pre-run values for the most applicable configurations, @@ -9,7 +9,7 @@ Immediate mode is based on a database which contains the results of calls to the The User Find-Db **always takes precedence** over System Find-Db. -By default, System Find-Db resides within MIOpen's install location, while User Find-Db resides in the user's home directory. See [Setting up locations](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/install.html#setting-up-locations) for more information. +By default, System Find-Db resides within MIOpen's install location, while User Find-Db resides in the user's home directory. See [Setting up locations](https://github.com/ROCm/MIOpen#setting-up-locations) for more information. * The System Find-Db is *not* modified upon installation of MIOpen. * There are separate Find databases for HIP and OpenCL backends. diff --git a/docs/index.rst b/docs/index.rst index bbab6c4b9d..a9687765e6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,7 @@ Welcome to MIOpen ================= **Advanced Micro Devices's open source deep learning library.** -Sources and binaries can be found at `MIOpen's GitHub site `_. +Sources and binaries can be found at `MIOpen's GitHub site `_. Indices and tables ================== diff --git a/docs/install.md b/docs/install.md index 0932cd2563..de8cd2d4e4 100644 --- a/docs/install.md +++ b/docs/install.md @@ -12,15 +12,14 @@ * [ROCm cmake](https://github.com/RadeonOpenCompute/rocm-cmake) - provide cmake modules for common build tasks needed for the ROCM software stack. * [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library * [Boost](http://www.boost.org/) - * MIOpen uses `boost-system` and `boost-filesystem` packages to enable persistent [kernel cache](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/cache.html) - * Version 1.79 is recommended, older version may need patches to work on newer systems, e.g. boost1{69,70,72} w/glibc-2.34 -* [SQLite3](https://sqlite.org/index.html) - reading and writing performance database -* [MIOpenTENSILE](https://github.com/ROCmSoftwarePlatform/MIOpenTensile) - users can enable this library using the cmake configuration flag`-DMIOPEN_USE_MIOPENTENSILE=On`. (deprecated after ROCm 5.1.1) -* [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) - AMD library for Basic Linear Algebra Subprograms (BLAS) on the ROCm platform. - * Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCmSoftwarePlatform/rocBLAS/tree/master-rocm-2.10) - * Minimum version branch for post-ROCm 3.5 [master-rocm-3.5](https://github.com/ROCmSoftwarePlatform/rocBLAS/releases/tag/rocm-3.5.0) -* [MLIR](https://github.com/ROCmSoftwarePlatform/llvm-project-mlir) - (Multi-Level Intermediate Representation) with its MIOpen dialect to support and complement kernel development. -* [Composable Kernel](https://github.com/ROCmSoftwarePlatform/composable_kernel) - C++ templated device library for GEMM-like and reduction-like operators. + * MIOpen uses `boost-system` and `boost-filesystem` packages + * Version 1.83 is recommended +* [SQLite3](https://sqlite.org/index.html) - reading and writing performance database, enabling persistent [kernel cache](https://rocm.docs.amd.com/projects/MIOpen/en/latest/cache.html) +* [rocBLAS](https://github.com/ROCm/rocBLAS) - AMD library for Basic Linear Algebra Subprograms (BLAS) on the ROCm platform. + * Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCm/rocBLAS/tree/master-rocm-2.10) + * Minimum version branch for post-ROCm 3.5 [master-rocm-3.5](https://github.com/ROCm/rocBLAS/releases/tag/rocm-3.5.0) +* [MLIR](https://github.com/ROCm/llvm-project-mlir) - (Multi-Level Intermediate Representation) with its MIOpen dialect to support and complement kernel development. +* [Composable Kernel](https://github.com/ROCm/composable_kernel) - C++ templated device library for GEMM-like and reduction-like operators. ## Installing MIOpen with pre-built packages @@ -56,7 +55,7 @@ The script `utils/install_precompiled_kernels.sh` provided as part of MIOpen aut The above script depends on the __rocminfo__ package to query the GPU architecture. -More info can be found [here](https://github.com/ROCmSoftwarePlatform/MIOpen/blob/develop/docs/cache.md#installing-pre-compiled-kernels). +More info can be found [here](https://github.com/ROCm/MIOpen/blob/develop/docs/cache.md#installing-pre-compiled-kernels). ## Installing the dependencies @@ -72,6 +71,6 @@ cmake -P install_deps.cmake --minimum --prefix /root/MIOpen/install_dir ``` This prefix can used to specify the dependency path during the configuration phase using the `CMAKE_PREFIX_PATH`. -* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCmSoftwarePlatform/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend. +* MIOpen's HIP backend uses [rocBLAS](https://github.com/ROCm/rocBLAS) by default. Users can install rocBLAS minimum release by using `apt-get install rocblas`. To disable using rocBLAS set the configuration flag `-DMIOPEN_USE_ROCBLAS=Off`. rocBLAS is *not* available for the OpenCL backend. -* MIOpen's OpenCL backend uses [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) by default. Users can install MIOpenGEMM minimum release by using `apt-get install miopengemm`. +* MIOpen's OpenCL backend uses [MIOpenGEMM](https://github.com/ROCm/MIOpenGEMM) by default. Users can install MIOpenGEMM minimum release by using `apt-get install miopengemm`. diff --git a/docs/perfdatabase.md b/docs/perfdatabase.md index f28671b1dc..5644334b2a 100644 --- a/docs/perfdatabase.md +++ b/docs/perfdatabase.md @@ -11,7 +11,7 @@ User PerfDb **always takes precedence** over System PerfDb. MIOpen also has auto-tuning functionality, which is able to find optimized kernel parameter values for a specific configuration. The auto-tune process may take a substantial amount of time, however, once the optimized values are found, they are stored in the User PerfDb. MIOpen then will automatically read and use these parameter values when needed again instead of running the expensive auto-tuning search. -By default, System PerfDb resides within MIOpen's install location, while User PerfDb resides in the user's home directory. See [Setting up locations](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/install.html#setting-up-locations) for more information. +By default, System PerfDb resides within MIOpen's install location, while User PerfDb resides in the user's home directory. See [Setting up locations](https://github.com/ROCm/MIOpen#setting-up-locations) for more information. The System PerfDb is not modified upon installation of MIOpen. diff --git a/docs/releasenotes.md b/docs/releasenotes.md index 4c0547f30c..1f399320fc 100644 --- a/docs/releasenotes.md +++ b/docs/releasenotes.md @@ -58,30 +58,30 @@ This release provides additional bug fixes and support for embedded build using - Added cmake flag for embedding system databases when building a static library - Added a way to disable building MIOpenDriver when building a static library - Added CC compiler detection in ROCm environment -- Known issue: This release may show warnings for "obsolete configs" in the performance database. This can be fixed by rerunning tuning on a specfic network; [see tuning documentation](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/perfdatabase.html#miopen-find-enforce) +- Known issue: This release may show warnings for "obsolete configs" in the performance database. This can be fixed by rerunning tuning on a specfic network; [see tuning documentation](https://rocm.docs.amd.com/projects/MIOpen/en/latest/perfdatabase.html#miopen-find-enforce) ### 09/18/2020 [ 2.7.0 ] -- This release contains a new reduction API; see [API documentation](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/apireference.html) for more information. Additional features for embedded builds have been added, and further support for 3D convolutional networks. +- This release contains a new reduction API; see [API documentation](https://ROCm.github.io/MIOpen/doc/html/apireference.html) for more information. Additional features for embedded builds have been added, and further support for 3D convolutional networks. - Added additional tunings into performance database - Added general reduction API - Added cmake flag for embedding binary database into a static MIOpen build - Added cmake flag for embedding system find-db text files into static MIOpen build -- Fixed issue with GEMM workspace size calculation for backwards data convolutions [#381](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/381) -- Fixed issue with 3D pooling indexing [#365](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/365) +- Fixed issue with GEMM workspace size calculation for backwards data convolutions [#381](https://github.com/ROCm/MIOpen/issues/381) +- Fixed issue with 3D pooling indexing [#365](https://github.com/ROCm/MIOpen/issues/365) ### 08/20/2020 [ 2.6.0 ] - This release contains convolution performance improvements, improved multi-threading behavior, and improved stability for half precision convolutions. Initial iteration time has been reduced with the introduction of hybrid find mode. Builds for a static library have been refined for this release. -- Added MIOPEN_FIND_MODE=3 as the new default convolution Find mode; see documentation [here](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/find_and_immediate.html#find-modes) for details +- Added MIOPEN_FIND_MODE=3 as the new default convolution Find mode; see documentation [here](https://ROCm.github.io/MIOpen/doc/html/find_and_immediate.html#find-modes) for details - Added a more runtime-parameterized version of pooling to reduce the number of online compilations - Improved the performance of backwards spatial batch normalization for small images -- Fixed issue with std::logic_error in SQLite deleter [#306](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/306) +- Fixed issue with std::logic_error in SQLite deleter [#306](https://github.com/ROCm/MIOpen/issues/306) - Fixed issues with half precision stability for convolutions - Fixed issues with multi-threaded SQLite database accesses - Fixed issues with 3-D convolutions and incorrect parameters @@ -112,7 +112,7 @@ This release provides additional bug fixes and support for embedded build using - Added support for AMD's rocclr runtime and compiler - Improved performance for implicitGEMM and Winograd algorithms - Improved database locking -- Fixed issue with GPU memory segmentation fault on asymmetric padding [#142](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/142) +- Fixed issue with GPU memory segmentation fault on asymmetric padding [#142](https://github.com/ROCm/MIOpen/issues/142) ### 03/01/2020 [ 2.3.0 ] @@ -125,7 +125,7 @@ This release provides additional bug fixes and support for embedded build using - Added full CO v3 support for all kernels in MIOpen - Added new Winograd group convolution kernels - Added an API to query MIOpen's version -- Added parallel compilation in initial convolutional algorithm search; partial solution to [#130](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/130) +- Added parallel compilation in initial convolutional algorithm search; partial solution to [#130](https://github.com/ROCm/MIOpen/issues/130) - Added SQLite binary program cache - Improved logging across all layers - Improved MIOpen's internal design for calling convolutional solvers @@ -141,7 +141,7 @@ Changes: - Added support for multiple ROCm installations - Added additional support for code object v3 -- Fixed issue with incorrect LRN calculation [#127](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/127) +- Fixed issue with incorrect LRN calculation [#127](https://github.com/ROCm/MIOpen/issues/127) - Fixed incorrect performance database documentation - Fixed issue with incorrect workspace calculation in group convolutions - Fixed issue with unsupported hardware instructions used with inline assembly @@ -151,7 +151,7 @@ Changes: - This release contains bug fixes, performance improvements, and expanded applicability for specific convolutional algorithms. - MIOpen has posted a citable paper on ArXiv [here](https://arxiv.org/abs/1910.00078). -- An SQLite database has been added to replace the text-based performance database. While the text file still exists, by default SQLite is used over the text-based performance database; see [documentation](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/perfdatabase.html) from more details. +- An SQLite database has been added to replace the text-based performance database. While the text file still exists, by default SQLite is used over the text-based performance database; see [documentation](https://ROCm.github.io/MIOpen/doc/html/perfdatabase.html) from more details. Changes: @@ -183,7 +183,7 @@ Changes: - Added further support for bfp16 in convolutions - Added a [docker hub link](https://hub.docker.com/r/rocm/miopen/tags) for MIOpen docker images. - Fixed issue with NaN appearing on batch normalization backwards pass in fp16 -- Fixed softmax kernel bug in log mode [#112](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/112) +- Fixed softmax kernel bug in log mode [#112](https://github.com/ROCm/MIOpen/issues/112) - Fixed ROCm gfx803 support issue [#869](https://github.com/RadeonOpenCompute/ROCm/issues/869) - Improved performance of batch normalization fp16 forward training layers - Improved performance of convolutions layers @@ -235,16 +235,16 @@ Changes: - Added a shipped System Find-Db containing offline run Find() results - Added an additional, faster batch norm assembly kernel for fp16 - Added CTC loss layer -- Added MIOpenDriver as a default component in MIOpen's build [#34](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/34) -- Fixed C compatability for boolean types in C API [#103](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/103) -- Fixed incorrect calculation in per-activation batch norm backwards pass [#104](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/104) -- Fixed bug [#95](https://github.com/ROCmSoftwarePlatform/MIOpen/issues/95) with asm batch norm ISA +- Added MIOpenDriver as a default component in MIOpen's build [#34](https://github.com/ROCm/MIOpen/issues/34) +- Fixed C compatability for boolean types in C API [#103](https://github.com/ROCm/MIOpen/issues/103) +- Fixed incorrect calculation in per-activation batch norm backwards pass [#104](https://github.com/ROCm/MIOpen/issues/104) +- Fixed bug [#95](https://github.com/ROCm/MIOpen/issues/95) with asm batch norm ISA - Fixed IsApplicable bug in Conv3x3Asm for group convolutions - Improved performance of 1x1 stride 2 fp32 convolutions in the forward and backwards data passes - Improved 3-D convolution stability - Improved applicability of direct convolution backwards weights for 2x2, 5x10, and 5x20 filter sizes - Improved maintainability in kernels and cpp code -- Updated rocBLAS minimum version to branch [master-rocm-2.6](https://github.com/ROCmSoftwarePlatform/rocBLAS/tree/master-rocm-2.6) +- Updated rocBLAS minimum version to branch [master-rocm-2.6](https://github.com/ROCm/rocBLAS/tree/master-rocm-2.6) ### 05/03/2019 [ 1.8.1 ] @@ -419,7 +419,7 @@ Known Issues: - RNNs do not support fp16 - Training with CNNs does not support fp16 - Users may encounter a warning that their performance database is out of date. The performance database can be updated by setting the environment variable for just the initial run of an application: `MIOPEN_FIND_ENFORCE=search` -For more information on the performance database, see: https://rocmsoftwareplatform.github.io/MIOpen/doc/html/perfdatabase.html# +For more information on the performance database, see: https://ROCm.github.io/MIOpen/doc/html/perfdatabase.html# ### 07/19/2018 [ 1.4.1 ] @@ -433,7 +433,7 @@ Known Issues: - RNNs do not support fp16 - Training with CNNs does not support fp16 - Users may encounter a warning that their performance database is out of date. The performance database can be updated by setting the environment variable for just the initial run of an application: `MIOPEN_FIND_ENFORCE=search` -For more information on the performance database, see: https://rocmsoftwareplatform.github.io/MIOpen/doc/html/perfdatabase.html# +For more information on the performance database, see: https://ROCm.github.io/MIOpen/doc/html/perfdatabase.html# ### 07/06/2018 [ 1.4.0 ] diff --git a/driver/dropout_gpu_emulator.hpp b/driver/dropout_gpu_emulator.hpp index 522cffde56..dd29aed5e4 100644 --- a/driver/dropout_gpu_emulator.hpp +++ b/driver/dropout_gpu_emulator.hpp @@ -110,7 +110,7 @@ void xorwow_lite_init_emu(prngStates* cur_state, cur_state->d = 6615241; - // Adopt constants choice of rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) + // Adopt constants choice of rocRAND (https://github.com/ROCm/rocRAND) const unsigned int s0 = static_cast(seed) ^ 0x2c7f967fU; const unsigned int s1 = static_cast(seed >> 32) ^ 0xa03697cbU; const unsigned int t0 = 1228688033 * s0; diff --git a/include/miopen/config.h.in b/include/miopen/config.h.in index 734d19f263..d87f5e105d 100644 --- a/include/miopen/config.h.in +++ b/include/miopen/config.h.in @@ -148,8 +148,8 @@ /// WORKAROUND_BOOST_ISSUE_392 /// Workaround for https://github.com/boostorg/config/issues/392#issuecomment-1109889533 -/// See also https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1490#issuecomment-1109928102, -/// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1543 +/// See also https://github.com/ROCm/MIOpen/pull/1490#issuecomment-1109928102, +/// https://github.com/ROCm/MIOpen/pull/1543 /// TODO: Remove the W/A as soon we switch to the properly fixed boost. #if MIOPEN_BACKEND_HIP #include diff --git a/src/comgr.cpp b/src/comgr.cpp index 4ee5f635be..4040881e09 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -58,7 +58,7 @@ /// Correctness problems on MI200 with base driver 5.11.14 (~ROCm 4.3.1). /// With base driver 5.11.32 the errors disappear. -/// More info at https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1257. +/// More info at https://github.com/ROCm/MIOpen/issues/1257. #define WORKAROUND_ISSUE_1257 (HIP_PACKAGE_VERSION_FLAT >= 4003021331ULL) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_LOG_CALLS) @@ -1131,7 +1131,7 @@ static void PrintVersion() } /// \ref -/// https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/blob/21193e875fe2133b38872decb7b2d0f985f48496/src/targets/gpu/compile_hip.cpp#L44 +/// https://github.com/ROCm/AMDMIGraphX/blob/21193e875fe2133b38872decb7b2d0f985f48496/src/targets/gpu/compile_hip.cpp#L44 /// Workaround hiprtc's broken API static void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); } using hiprtc_program_ptr = MIOPEN_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy); diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp index 52d00346cf..f5fa35adfb 100644 --- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp +++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp @@ -67,7 +67,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) cvt_bf16_fp32_t target_val; target_val.f32 = src_val; // BF16 round and NaN preservation code matches - // https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h + // https://github.com/ROCm/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h if((~target_val.u32 & 0x7f800000) == 0) // Inf or NaN { // When all of the exponent bits are 1, the value is Inf or NaN. diff --git a/src/include/miopen/batchnorm/solvers.hpp b/src/include/miopen/batchnorm/solvers.hpp index 70d64bb204..7edba36b49 100644 --- a/src/include/miopen/batchnorm/solvers.hpp +++ b/src/include/miopen/batchnorm/solvers.hpp @@ -32,7 +32,7 @@ #include /// W/A for build error for OCL BN kernels when datatype is FP16 and MIO_BN_VARIANT=1. See: -/// https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1549#issuecomment-1152644636 +/// https://github.com/ROCm/MIOpen/issues/1549#issuecomment-1152644636 #define WORKAROUND_ISSUE_1549_FP16_BUILD_ERROR 1 namespace miopen { diff --git a/src/include/miopen/bfloat16.hpp b/src/include/miopen/bfloat16.hpp index 57726b1463..708c168ea2 100644 --- a/src/include/miopen/bfloat16.hpp +++ b/src/include/miopen/bfloat16.hpp @@ -42,7 +42,7 @@ class bfloat16 : boost::totally_ordered> } bits_st = {rhs}; // BF16 round and NaN preservation code matches - // https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h + // https://github.com/ROCm/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h if((~bits_st.bf16_st & 0x7f800000) == 0) // Inf or NaN { // When all of the exponent bits are 1, the value is Inf or NaN. diff --git a/src/include/miopen/conv/asm_implicit_gemm.hpp b/src/include/miopen/conv/asm_implicit_gemm.hpp index 1ca9f49a5c..35a8a24fe6 100644 --- a/src/include/miopen/conv/asm_implicit_gemm.hpp +++ b/src/include/miopen/conv/asm_implicit_gemm.hpp @@ -36,7 +36,7 @@ #include /// W/A for issue 1979: igemm solver does not support group conv. See: -/// https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1979 +/// https://github.com/ROCm/MIOpen/issues/1979 #define WORKAROUND_ISSUE_1979 1 namespace miopen { diff --git a/src/include/miopen/solver.hpp b/src/include/miopen/solver.hpp index 577a7e3387..972cd2906a 100644 --- a/src/include/miopen/solver.hpp +++ b/src/include/miopen/solver.hpp @@ -111,7 +111,7 @@ struct SolverBase /// * Value 1.0 corresponds to the 100% utilization of HW capabilities as /// if Direct computational algorithm is used. /// * [Notice] WTI may exceed 1.0 for highly optimized algorithms like Winograd. - /// * @see https://github.com/ROCmSoftwarePlatform/MIOpen/issues/410 + /// * @see https://github.com/ROCm/MIOpen/issues/410 virtual float GetWti(const ExecutionContext& ctx, const boost::any& problem) const = 0; // Returns the workspace size required by the solver for a given ExecutionContext @@ -1019,7 +1019,7 @@ struct PerformanceConvMlirIgemm : PerfConfigBase int GemmNPerThread; bool use_spare_set; - /// \ref https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1154 + /// \ref https://github.com/ROCm/MIOpen/issues/1154 static PerformanceConvMlirIgemm& MlirHeuristicInitRequest() { static PerformanceConvMlirIgemm heur; @@ -1093,7 +1093,7 @@ struct PerformanceConvMlirIgemmXdlops : PerfConfigBased = 6615241; - // Adopt constants choice of rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) + // Adopt constants choice of rocRAND (https://github.com/ROCm/rocRAND) const uint s0 = (uint)(seed) ^ 0x2c7f967fU; const uint s1 = (uint)(seed >> 32) ^ 0xa03697cbU; const uint t0 = 1228688033 * s0; diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp index c1a77c90db..f5f24baa81 100644 --- a/src/kernels/bfloat16_dev.hpp +++ b/src/kernels/bfloat16_dev.hpp @@ -67,7 +67,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) cvt_bf16_fp32_t target_val; target_val.f32 = src_val; // BF16 round and NaN preservation code matches - // https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h + // https://github.com/ROCm/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h if((~target_val.u32 & 0x7f800000) == 0) // Inf or NaN { // When all of the exponent bits are 1, the value is Inf or NaN. diff --git a/src/ocl/handleocl.cpp b/src/ocl/handleocl.cpp index 9f36a8b5c5..ce5c680c26 100644 --- a/src/ocl/handleocl.cpp +++ b/src/ocl/handleocl.cpp @@ -195,7 +195,7 @@ static bool PrintOpenCLDeprecateMsg() MIOPEN_LOG_W( "please port your application to the better supported and functional HIP backend. "); MIOPEN_LOG_W("If you have any questions, please reach out to the MIOpen developers at "); - MIOPEN_LOG_W("https://github.com/ROCmSoftwarePlatform/MIOpen"); + MIOPEN_LOG_W("https://github.com/ROCm/MIOpen"); return true; } diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 4f4c362bf3..ac515b6b05 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -31,7 +31,7 @@ #include /// Disable ConvHipImplicitGemmBwdDataV4R1Xdlops for FP32 by default. -/// \ref https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1206. +/// \ref https://github.com/ROCm/MIOpen/issues/1206. #define WORKAROUND_ISSUE_1206 1 #define WORKAROUND_SWDEV_329642 1 diff --git a/src/solver/conv_mlir_igemm_bwd.cpp b/src/solver/conv_mlir_igemm_bwd.cpp index b6d7d5a40b..85f58c4282 100644 --- a/src/solver/conv_mlir_igemm_bwd.cpp +++ b/src/solver/conv_mlir_igemm_bwd.cpp @@ -61,7 +61,7 @@ bool ConvMlirIgemmBwd::IsApplicable(const ExecutionContext& ctx, // save compilation overhead if(IsXdlopsSupport(ctx)) return false; - // Refer to https://github.com/ROCmSoftwarePlatform/llvm-project-private/issues/389 + // Refer to https://github.com/ROCm/llvm-project-private/issues/389 const auto device_name = ctx.GetStream().GetDeviceName(); if(StartsWith(device_name, "gfx900")) return false; diff --git a/src/solver/conv_mlir_igemm_fwd.cpp b/src/solver/conv_mlir_igemm_fwd.cpp index ccc27ccbd0..08f30cc2d7 100644 --- a/src/solver/conv_mlir_igemm_fwd.cpp +++ b/src/solver/conv_mlir_igemm_fwd.cpp @@ -181,7 +181,7 @@ bool ConvMlirIgemmFwd::IsApplicable(const ExecutionContext& ctx, // save compilation overhead if(IsXdlopsSupport(ctx)) return false; - // Refer to https://github.com/ROCmSoftwarePlatform/llvm-project-private/issues/389 + // Refer to https://github.com/ROCm/llvm-project-private/issues/389 const auto device_name = ctx.GetStream().GetDeviceName(); if(StartsWith(device_name, "gfx900")) return false; diff --git a/src/solver/conv_mlir_igemm_wrw.cpp b/src/solver/conv_mlir_igemm_wrw.cpp index 1c8a303388..50088e1af1 100644 --- a/src/solver/conv_mlir_igemm_wrw.cpp +++ b/src/solver/conv_mlir_igemm_wrw.cpp @@ -62,7 +62,7 @@ bool ConvMlirIgemmWrW::IsApplicable(const ExecutionContext& ctx, // save compilation overhead if(IsXdlopsSupport(ctx)) return false; - // Refer to https://github.com/ROCmSoftwarePlatform/llvm-project-private/issues/389 + // Refer to https://github.com/ROCm/llvm-project-private/issues/389 const auto device_name = ctx.GetStream().GetDeviceName(); if(StartsWith(device_name, "gfx900")) return false; diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 10dfe96a20..46a65f5c4d 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -370,7 +370,7 @@ bool PerformanceConfigConvOclBwdWrw2::IsValid( return false; size_t n_wei_blk = workgroup_size / wei_blk_sz; if(n_wei_blk == 0) - { /// \todo This is quickfix for DIV/0, see ROCmSoftwarePlatform/MIOpen/issues/70. + { /// \todo This is quickfix for DIV/0, see ROCm/MIOpen/issues/70. MIOPEN_LOG_I2("ConvOClBwdWrW2: GRP_SZ < wei_blk_sz, not applicable?"); return false; } diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 5d0e1b24d4..0fef47f5af 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -195,7 +195,7 @@ auto PerfFieldRules() // Winograd v21 is preferred on Vega10/Vega20 ASICs due to ~25% performance regression with Winograd // v30. The exception is Winograd F(3,2) stride2 as this mode is unsupported in v21. Details: -// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1927#issuecomment-1412741130 +// https://github.com/ROCm/MIOpen/pull/1927#issuecomment-1412741130 template inline bool IsWinogradV21Preferred(const std::string& asic, const ProblemDescription& problem) { diff --git a/src/solver/conv_winoRxS_fused.cpp b/src/solver/conv_winoRxS_fused.cpp index 0893ca1afb..19881d4e58 100644 --- a/src/solver/conv_winoRxS_fused.cpp +++ b/src/solver/conv_winoRxS_fused.cpp @@ -60,7 +60,7 @@ namespace { // Winograd v21 is preferred on Vega10/Vega20 ASICs due to ~25% performance regression with Winograd // v30. The exception is Winograd F(3,2) stride2 as this mode is unsupported in v21. Details: -// https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1927#issuecomment-1412741130 +// https://github.com/ROCm/MIOpen/pull/1927#issuecomment-1412741130 template inline bool IsWinogradV21Preferred(const std::string& asic, const miopen::conv::ProblemDescription& problem) diff --git a/src/solver/gemm_common.cpp b/src/solver/gemm_common.cpp index 22d4b3a2e6..be0294ad48 100644 --- a/src/solver/gemm_common.cpp +++ b/src/solver/gemm_common.cpp @@ -31,7 +31,7 @@ /// This W/A disables all GEMM convolution solvers for xDLOPs /// targets when MIOpenGEMM is used (OCL BE). More info at -/// https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1315. +/// https://github.com/ROCm/MIOpen/issues/1315. /// /// W/A affects ROCm releases starting from 4.5 and also /// pre-5.0 Mainline HIP builds, e.g. 9148. diff --git a/src/solver/pooling/backwardNd.cpp b/src/solver/pooling/backwardNd.cpp index 77dc917d2a..4095dfe14d 100644 --- a/src/solver/pooling/backwardNd.cpp +++ b/src/solver/pooling/backwardNd.cpp @@ -31,7 +31,7 @@ #include #include -#define WORKAROUND_ISSUE_MIFIN_80 1 // https://github.com/ROCmSoftwarePlatform/MIFin/issues/80 +#define WORKAROUND_ISSUE_MIFIN_80 1 // https://github.com/ROCm/MIFin/issues/80 namespace miopen { diff --git a/src/solver/pooling/forwardNaive.cpp b/src/solver/pooling/forwardNaive.cpp index c0d0ccb5b2..6ee893617e 100644 --- a/src/solver/pooling/forwardNaive.cpp +++ b/src/solver/pooling/forwardNaive.cpp @@ -30,7 +30,7 @@ #include #include -#define WORKAROUND_ISSUE_MIFIN_80 1 // https://github.com/ROCmSoftwarePlatform/MIFin/issues/80 +#define WORKAROUND_ISSUE_MIFIN_80 1 // https://github.com/ROCm/MIFin/issues/80 namespace miopen { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index aec30828eb..afb6b20f94 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -683,7 +683,7 @@ endif() if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) add_custom_test(test_miopendriver_regression_issue_1576 SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED FLOAT_DISABLED HALF_ENABLED - # Regression test for https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1576 + # Regression test for https://github.com/ROCm/MIOpen/issues/1576 ENVIRONMENT MIOPEN_FIND_MODE=1 MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvDirectNaiveConvBwd COMMAND $ ${MIOPENDRIVER_MODE_CONV} --forw 2 --in_layout NCHW --out_layout NCHW --fil_layout NCHW -n 256 -c 1024 -H 14 -W 14 -k 256 -y 1 -x 1 -p 0 -q 0 -u 1 -v 1 -l 1 -j 1 -m conv -g 1 -t 1 ) @@ -691,9 +691,9 @@ if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) add_custom_test(test_miopendriver_regression_half SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED FLOAT_DISABLED HALF_ENABLED # WORKAROUND_ISSUE_2110_2: tests for 2110 and 2160 shall be added to "test_pooling3d --all" but this is # impossible until backward pooling limitation (issue #2110 (2)) is fully fixed. - # Partial (3D only) regression test for https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2160. + # Partial (3D only) regression test for https://github.com/ROCm/MIOpen/issues/2160. COMMAND $ ${MIOPENDRIVER_MODE_POOL} -M 0 --input 1x64x41x40x70 -y 41 -x 40 -Z 70 -m avg -F 1 -t 1 -i 1 - # Partial (3D only) regression test for https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2110 (1). + # Partial (3D only) regression test for https://github.com/ROCm/MIOpen/issues/2110 (1). COMMAND $ ${MIOPENDRIVER_MODE_POOL} -M 0 --input 1x64x41x40x100 -y 4 -x 4 -Z 100 -m max -F 1 -t 1 -i 1 ) @@ -705,14 +705,14 @@ if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) add_custom_test(test_miopendriver_regression_float_half_gfx10 SKIP_UNLESS_ALL GFX900_DISABLED GFX906_DISABLED GFX908_DISABLED GFX90A_DISABLED GFX103X_ENABLED HALF_ENABLED # Regression test for: # [Navi21] Fixing Batchnorm backward precision issues by adjusting workgroup size (SWDEV-292187, SWDEV-319919) - # https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1386 + # https://github.com/ROCm/MIOpen/pull/1386 COMMAND $ ${MIOPENDRIVER_MODE_BN} -n 256 -c 512 -H 18 -W 18 -m 1 --forw 0 -b 1 -r 1 COMMAND $ ${MIOPENDRIVER_MODE_BN} -n 256 -c 512 -H 28 -W 28 -m 1 --forw 0 -b 1 -r 1 ) # Disabled for gfx908 due to WORKAROUND_ISSUE_1787. add_custom_test(test_miopendriver_regression_big_tensor GFX900_DISABLED GFX906_DISABLED GFX908_DISABLED SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED - # Regression test for https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1661 + # Regression test for https://github.com/ROCm/MIOpen/issues/1661 # Issue #1697: this is large test which has to run in serial and not enabled on gfx900/gfx906 COMMAND $ ${MIOPENDRIVER_MODE_CONV} -W 5078 -H 4903 -c 24 -n 5 -k 1 --fil_w 3 --fil_h 3 --pad_w 6 --pad_h 4 -F 1 ) @@ -724,7 +724,7 @@ if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) add_custom_test(test_miopendriver_regression_half_gfx9 SKIP_UNLESS_ALL GFX900_DISABLED GFX906_DISABLED GFX908_DISABLED GFX94X_ENABLED GFX103X_DISABLED FLOAT_DISABLED HALF_ENABLED # Regression test for: # [SWDEV-375617] Fix 3d convolution Host API bug - # https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1935 + # https://github.com/ROCm/MIOpen/pull/1935 COMMAND $ ${MIOPENDRIVER_MODE_CONV} -n 2 -c 64 --in_d 128 -H 128 -W 128 -k 32 --fil_d 3 -y 3 -x 3 --pad_d 1 -p 1 -q 1 --conv_stride_d 1 -u 1 -v 1 --dilation_d 1 -l 1 -j 1 --spatial_dim 3 -m conv -g 1 -F 1 -t 1 ) @@ -735,7 +735,7 @@ if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) # Why we have to use the driver: # The transposed convolutions are paritally implemented in the convolution_api layer, # but test apps (including test_conv*) were designed as unit tests and, therefore, do not use the public API. - # Also serves as a regression test for https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2459. + # Also serves as a regression test for https://github.com/ROCm/MIOpen/issues/2459. COMMAND $ ${MIOPENDRIVER_MODE_CONV} -m trans -x 1 -y 1 -W 112 -H 112 -c 64 -n 8 -k 32 -p 0 -q 0 -u 1 -v 1 -l 1 -j 1 -g 1 -F 0 -V 1 COMMAND $ ${MIOPENDRIVER_MODE_CONV} -m trans -x 1 -y 7 -W 17 -H 17 -c 32 -n 128 -k 16 -p 3 -q 0 -u 1 -v 1 -l 1 -j 1 -g 2 -F 0 -V 1 COMMAND $ ${MIOPENDRIVER_MODE_CONV} -m trans -x 10 -y 5 -W 341 -H 79 -c 32 -n 4 -k 8 -p 0 -q 0 -u 2 -v 2 -l 1 -j 1 -g 4 -F 0 -V 1 @@ -747,7 +747,7 @@ if(${MIOPEN_TEST_WITH_MIOPENDRIVER}) ) add_custom_test(test_miopendriver_regression_issue_2047 GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED HALF_ENABLED BF16_ENABLED INT8_ENABLED - # Regression test for: MIOpenIm3d2Col stuck with ROCm update, https://github.com/ROCmSoftwarePlatform/MIOpen/issues/2047 + # Regression test for: MIOpenIm3d2Col stuck with ROCm update, https://github.com/ROCm/MIOpen/issues/2047 ENVIRONMENT MIOPEN_FIND_MODE=normal MIOPEN_DEBUG_FIND_ONLY_SOLVER=GemmFwdRest COMMAND $ ${MIOPENDRIVER_MODE_CONV} -n 1 -c 1 --in_d 2 -H 1 -W 2 -k 2 --fil_d 2 -y 1 -x 2 @@ -1682,7 +1682,7 @@ set(CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW_ENV MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvCkIgemmFwdV6r1DlopsNchw MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW=1) -# gfx908 disabled as a workaround for https://github.com/ROCmSoftwarePlatform/MIOpen/pull/1790/files?diff=split&w=1#r982923610 +# gfx908 disabled as a workaround for https://github.com/ROCm/MIOpen/pull/1790/files?diff=split&w=1#r982923610 add_custom_test(test_conv_ck_igemm_fwd_v6r1_dlops_nchw FLOAT_ENABLED HALF_ENABLED BF16_DISABLED GFX908_DISABLED GFX103X_ENABLED SKIP_UNLESS_ALL ENVIRONMENT ${CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW_ENV} COMMAND $ ${MIOPEN_TEST_FLOAT_ARG} --verbose --input 128 1024 14 14 --weights 2048 1024 1 1 --pads_strides_dilations 0 0 2 2 1 1 --disable-backward-data --disable-backward-weights @@ -2019,7 +2019,7 @@ add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1WrW GFX103X_ENABLED HALF_ENA # which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. # smoke_solver_ConvHipImplicitGemmV4R1Fwd is split to BF16+FP16 and FP32 tests because of # WORKAROUND_ISSUE_2038, which disables validation of FP16 and BF16 datatypes in this test, -# see https://github.com/ROCmSoftwarePlatform/MIOpen/pull/2043#issuecomment-1482657160. +# see https://github.com/ROCm/MIOpen/pull/2043#issuecomment-1482657160. add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp16_bf16 GFX103X_ENABLED FLOAT_DISABLED HALF_ENABLED BF16_ENABLED TEST_TUNING ENVIRONMENT MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 MIOPEN_FIND_MODE=normal MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmV4R1Fwd COMMAND $ ${TEST_CONV_VERBOSE_F} --input 256 32 27 27 --weights 128 32 1 1 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} --disable-validation diff --git a/test/dropout_util.hpp b/test/dropout_util.hpp index 33a73f590f..c17da2857e 100644 --- a/test/dropout_util.hpp +++ b/test/dropout_util.hpp @@ -197,7 +197,7 @@ inline void xorwow_lite_init_emu(prngStates* cur_state, cur_state->d = 6615241; - // Adopt constants choice of rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) + // Adopt constants choice of rocRAND (https://github.com/ROCm/rocRAND) const unsigned int s0 = static_cast(seed) ^ 0x2c7f967fU; const unsigned int s1 = static_cast(seed >> 32) ^ 0xa03697cbU; const unsigned int t0 = 1228688033 * s0;