From b607105d5ad7214aa21d906e5519bfd48e72fcc8 Mon Sep 17 00:00:00 2001
From: Anerudhan Gopal <agopal@nvidia.com>
Date: Tue, 25 Jun 2024 15:58:13 -0700
Subject: [PATCH] Release notes for cudnn-frontend 1.5.2:

[Enhancement] Allows stride value of 0 indicating repetition of tensor in those dimensions.
---
 CMakeLists.txt                                |  2 +-
 include/cudnn_frontend.h                      |  2 +-
 .../node/scaled_dot_product_flash_attention.h | 66 +++++++++++--------
 include/cudnn_frontend_Tensor.h               |  2 +-
 python/cudnn/__init__.py                      |  2 +-
 5 files changed, 42 insertions(+), 32 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28eb961..adf22fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.17)
 
-project(cudnn_frontend VERSION 1.5.1)
+project(cudnn_frontend VERSION 1.5.2)
 
 option(CUDNN_FRONTEND_SKIP_JSON_LIB "Defines whether FE should not include nlohmann/json.hpp." OFF)
 option(CUDNN_FRONTEND_BUILD_SAMPLES "Defines if samples are built or not." ON)
diff --git a/include/cudnn_frontend.h b/include/cudnn_frontend.h
index 7bca425..21d2e95 100644
--- a/include/cudnn_frontend.h
+++ b/include/cudnn_frontend.h
@@ -125,7 +125,7 @@
 
 #define CUDNN_FRONTEND_MAJOR_VERSION 1
 #define CUDNN_FRONTEND_MINOR_VERSION 5
-#define CUDNN_FRONTEND_PATCH_VERSION 1
+#define CUDNN_FRONTEND_PATCH_VERSION 2
 #define CUDNN_FRONTEND_VERSION \
     ((CUDNN_FRONTEND_MAJOR_VERSION * 10000) + (CUDNN_FRONTEND_MINOR_VERSION * 100) + CUDNN_FRONTEND_PATCH_VERSION)
 
diff --git a/include/cudnn_frontend/node/scaled_dot_product_flash_attention.h b/include/cudnn_frontend/node/scaled_dot_product_flash_attention.h
index 94de2dc..eb44313 100644
--- a/include/cudnn_frontend/node/scaled_dot_product_flash_attention.h
+++ b/include/cudnn_frontend/node/scaled_dot_product_flash_attention.h
@@ -38,20 +38,25 @@ class SDPANode : public NodeCRTP<SDPANode> {
 
         // check that Q, K, V, O tensors has been assigned
         // check that dim and strides has been assigned and last stride is 1
-#define CUDNN_FE_SDPA_VALIDATE_DIM_STRIDE(port, port_map)                                                       \
-    {                                                                                                           \
-        std::shared_ptr<Tensor_attributes> tensor_ptr = port_map.at(port);                                      \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_dim().size() != 4,                                       \
-                                       error_code_t::ATTRIBUTE_NOT_SET,                                         \
-                                       "The dim for " + std::string(#port) + " is invalid");                    \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_stride().size() != 4,                                    \
-                                       error_code_t::ATTRIBUTE_NOT_SET,                                         \
-                                       "The stride for " + std::string(#port) + " is invalid");                 \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                         \
-            tensor_ptr->get_stride()[3] != 1,                                                                   \
-            error_code_t::GRAPH_NOT_SUPPORTED,                                                                  \
-            "The stride for the last dimension corresponding to the embedding size per head should be 1 for " + \
-                std::string(#port));                                                                            \
+#define CUDNN_FE_SDPA_VALIDATE_DIM_STRIDE(port, port_map)                                                        \
+    {                                                                                                            \
+        std::shared_ptr<Tensor_attributes> tensor_ptr = port_map.at(port);                                       \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_dim().size() != 4,                                        \
+                                       error_code_t::ATTRIBUTE_NOT_SET,                                          \
+                                       "The dim for " + std::string(#port) + " is invalid");                     \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_stride().size() != 4,                                     \
+                                       error_code_t::ATTRIBUTE_NOT_SET,                                          \
+                                       "The stride for " + std::string(#port) + " is invalid");                  \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                          \
+            tensor_ptr->get_stride()[3] != 1,                                                                    \
+            error_code_t::GRAPH_NOT_SUPPORTED,                                                                   \
+            "The stride for the last dimension corresponding to the embedding size per head should be 1 for " +  \
+                std::string(#port));                                                                             \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                          \
+            tensor_ptr->get_stride()[2] == 0,                                                                    \
+            error_code_t::GRAPH_NOT_SUPPORTED,                                                                   \
+            "The stride for the dimension corresponding to the sequence lengths per head should not be 0 for " + \
+                std::string(#port));                                                                             \
     }
 
         CUDNN_FE_VALIDATE_INPUT_TENSOR(input_names::Q);
@@ -681,20 +686,25 @@ class SDPABackwardNode : public NodeCRTP<SDPABackwardNode> {
 
         // check that Q, K, V, O, stats, dO, dQ, dK, dV tensors has been assigned
         // check that dim and strides has been assigned and last stride is 1
-#define CUDNN_FE_SDPA_VALIDATE_DIM_STRIDE(port, port_map)                                                       \
-    {                                                                                                           \
-        std::shared_ptr<Tensor_attributes> tensor_ptr = port_map.at(port);                                      \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_dim().size() != 4,                                       \
-                                       error_code_t::ATTRIBUTE_NOT_SET,                                         \
-                                       "The dim for " + std::string(#port) + " is invalid");                    \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_stride().size() != 4,                                    \
-                                       error_code_t::ATTRIBUTE_NOT_SET,                                         \
-                                       "The stride for " + std::string(#port) + " is invalid");                 \
-        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                         \
-            tensor_ptr->get_stride()[3] != 1,                                                                   \
-            error_code_t::GRAPH_NOT_SUPPORTED,                                                                  \
-            "The stride for the last dimension corresponding to the embedding size per head should be 1 for " + \
-                std::string(#port));                                                                            \
+#define CUDNN_FE_SDPA_VALIDATE_DIM_STRIDE(port, port_map)                                                        \
+    {                                                                                                            \
+        std::shared_ptr<Tensor_attributes> tensor_ptr = port_map.at(port);                                       \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_dim().size() != 4,                                        \
+                                       error_code_t::ATTRIBUTE_NOT_SET,                                          \
+                                       "The dim for " + std::string(#port) + " is invalid");                     \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(tensor_ptr->get_stride().size() != 4,                                     \
+                                       error_code_t::ATTRIBUTE_NOT_SET,                                          \
+                                       "The stride for " + std::string(#port) + " is invalid");                  \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                          \
+            tensor_ptr->get_stride()[3] != 1,                                                                    \
+            error_code_t::GRAPH_NOT_SUPPORTED,                                                                   \
+            "The stride for the last dimension corresponding to the embedding size per head should be 1 for " +  \
+                std::string(#port));                                                                             \
+        RETURN_CUDNN_FRONTEND_ERROR_IF(                                                                          \
+            tensor_ptr->get_stride()[2] == 0,                                                                    \
+            error_code_t::GRAPH_NOT_SUPPORTED,                                                                   \
+            "The stride for the dimension corresponding to the sequence lengths per head should not be 0 for " + \
+                std::string(#port));                                                                             \
     }
 
         CUDNN_FE_VALIDATE_INPUT_TENSOR(input_names::Q);
diff --git a/include/cudnn_frontend_Tensor.h b/include/cudnn_frontend_Tensor.h
index 68070ad..73f797c 100644
--- a/include/cudnn_frontend_Tensor.h
+++ b/include/cudnn_frontend_Tensor.h
@@ -305,7 +305,7 @@ class TensorBuilder_v8 {
                 "CUDNN_BACKEND_TENSOR_DESCRIPTOR: Check and Set the CUDNN_ATTR_TENSOR_UNIQUE_ID as a valid value");
             return std::move(m_tensor);
         }
-        if (m_tensor.btensor_strA[0] <= 0) {
+        if (m_tensor.btensor_strA[0] < 0) {
             set_error_and_throw_exception(
                 &m_tensor,
                 CUDNN_STATUS_BAD_PARAM,
diff --git a/python/cudnn/__init__.py b/python/cudnn/__init__.py
index ad29d10..35eb883 100644
--- a/python/cudnn/__init__.py
+++ b/python/cudnn/__init__.py
@@ -25,7 +25,7 @@
 
 from .datatypes import _library_type, _is_torch_tensor
 
-__version__ = "1.5.1"
+__version__ = "1.5.2"
 
 
 def _tensor(