ARM-software · christophe0606 · Sep 13, 2024 · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/Documentation/Doxygen/dsp.dxy.in b/Documentation/Doxygen/dsp.dxy.in
@@ -923,6 +923,7 @@ INPUT                  = ./src/mainpage.md   \
                          ./src/introduction.md   \
                          ./src/template.md   \
                          ./src/guidelines.md \
+                         ./src/cpp_complex.md \
                          ./src/vectorop.md   \
                          ./src/memory_allocator.md   \
                          ./src/memory_static_dynamic.md   \

diff --git a/Documentation/Doxygen/src/cpp_complex.md b/Documentation/Doxygen/src/cpp_complex.md
@@ -0,0 +1,37 @@
+# Complex numbers {#dsppp_complex}
+
+Complex numbers are supported. They benefit from vector instructions only with Helium. When only DSP extensions are available, a scalar version of the algorithm is used.
+
+To use them, you just need to use the `std::complex` datatype.
+
+For instance, you can use:
+
+```C
+std::complex<float32_t>
+std::complex<float16_t>
+std::complex<Q31>
+std::complex<Q15>
+```
+
+`Q7` complex numbers have not been implemented because there is no equivalent function in the C CMSIS-DSP to compare with.
+
+With `std::complex`, you can declare a vector with:
+
+```C
+Vector<std::complex<Q31>,10> a
+```
+
+Operator `conjugate` is available. 
+
+If you want an hermitian product, you can just write:
+
+```C
+auto result = dot(a,conjugate(b));
+```
+
+Mixed complex / real arithmetic is supported but may not yet be very efficient. In some cases, it may be faster to convert the real object to complex before doing the computation.
+
+It is a temporary limitation.
+
+Note that there is not yet any function to get the real or imaginary part of a vector or matrix. You'll need to loop on the elements and use the `std::complex` functions. As consequence, those operations cannot yet participate into the fusion mechanism.
+
diff --git a/Documentation/Doxygen/src/dsppp_main.md b/Documentation/Doxygen/src/dsppp_main.md
@@ -15,4 +15,5 @@ The headers are not yet part of the CMSIS-DSP pack since they are experimental.
 * @subpage dsppp_matrix "Matrix operators"
 * @subpage dsppp_building "Building and running examples"
 * @subpage dsppp_guidelines "Usage guidelines"
+* @subpage dsppp_complex "Complex numbers"
 
diff --git a/Documentation/Doxygen/src/history.txt b/Documentation/Doxygen/src/history.txt
@@ -5,6 +5,85 @@
   <th>Version</th>
   <th>Description</th>
 </tr>
+<tr>
+  <td>v1.16.2</td>
+  <td>
+    * Improvements to documentation
+    * Added arm_fir_decimate_f64
+    * Removed some gcc workarounds that are no more needed for Helium
+    * Removed redundant test in arm_atan2_q15 and arm_atan2_q31
+    * Replaced INFINITY by maximum positive float
+    * Replaced hex constants with 2's complement equivalents
+    * Added C++ template extension (outside of pack - headers available from github repository)
+    * Added new compilation defines (ARM_DSP_ATTRIBUTE, ARM_DSP_TABLE_ATTRIBUTE and ARM_DSP_CUSTOM_CONFIG)
+    * Solved several github issues
+    * Corrected wrong version number in pack for v1.16.0
+    * Corrected issue #201 in v1.16.1 (related to new ARM_DSP_ATTRIBUTE)
+  </td>
+</tr>
+<tr>
+  <td>v1.16.1</td>
+  <td>
+    * Improvements to documentation
+    * Added arm_fir_decimate_f64
+    * Removed some gcc workarounds that are no more needed for Helium
+    * Removed redundant test in arm_atan2_q15 and arm_atan2_q31
+    * Replaced INFINITY by maximum positive float
+    * Replaced hex constants with 2's complement equivalents
+    * Added C++ template extension (outside of pack - headers can be got from github * repository)
+    * Added new compilation defines (ARM_DSP_ATTRIBUTE, ARM_DSP_TABLE_ATTRIBUTE and ARM_DSP_CUSTOM_CONFIG)
+    * Solved several github issues
+  </td>
+</tr>
+<tr>
+  <td>v1.16.0</td>
+  <td>
+    v1.16.0 release
+
+    * Improvements to documentation
+    * Added arm_fir_decimate_f64
+    * Removed some gcc workarounds that are no more needed for Helium
+    * Removed redundant test in atan2_q15 and q31
+    * Replaced INFINITY by maximum positive float
+    * Replaced hex constants with 2's complement equivalents
+    * Added C++ template extension (outside of pack - headers can be got from github * repository)
+    * Solved issue #198
+    * Solved issue #197
+    * Solved issue #197
+    * Solved issue #98
+    * Solved issue #195
+    * Solved issue #180
+    * Solved issue #191
+    * Solved issue #189
+    * Solved issue #188
+    * Solved issue #187
+    * Solved issue #178
+    * Solved issue #184
+    * Solved issue #183
+    * Solved issue #172
+    * Solved issue #170
+    * Solved issue #165
+    * Solved issue #156
+    * Solved issue #164
+    * Solved issue #163
+    * Solved issue #160
+    * Solved issue #159
+    * Solved issue #146
+    * Solved issue #145
+    * Solved issue #133
+    * Solved issue #144
+    * Solved issue #142
+    * Solved issue #141
+    * Solved issue #140
+    * Solved issue #139
+    * Solved issue #138
+    * Solved issue #135
+    * Solved issue #131
+    * Solved issue #118
+    * Solved issue #124
+    * Solved issue #99
+  </td>
+</tr>
 <tr>
   <td>v1.15.0</td>
   <td>

diff --git a/dsppp/Include/dsppp/DSP/basic.hpp b/dsppp/Include/dsppp/DSP/basic.hpp
@@ -20,10 +20,12 @@
 
 #define DSP_UNROLL 1
 
+// No vector instruction for complex number implementation
 template<typename T,typename DST,
 typename std::enable_if<has_vector_inst<DST>() &&
                         IsVector<DST>::value &&
-         SameElementType<DST,T>::value,bool>::type = true>
+         compatible_element<DST,T>() &&
+         !is_complex<DST>(),bool>::type = true>
 inline void _Fill(DST &v,
                   const T val, 
                   vector_length_t l,
@@ -50,7 +52,8 @@ inline void _Fill(DST &v,
 template<typename T,typename DST,
 typename std::enable_if<has_vector_inst<DST>() &&
          must_use_matrix_idx<DST>() &&
-         SameElementType<DST,T>::value,bool>::type = true>
+         SameElementType<DST,T>() &&
+         !is_complex<DST>(),bool>::type = true>
 inline void _Fill2D(DST &v,
                     const T val, 
                     const vector_length_t rows,
@@ -72,7 +75,7 @@ inline void _Fill2D(DST &v,
               }
           }
 
-          for(; col < cols;col += nb_lanes)
+          for(; col < cols;col ++)
           {
              for(int k=0;k<(1<<DSP_UNROLL);k++)
              {
@@ -90,7 +93,7 @@ inline void _Fill2D(DST &v,
               v.matrix_store(row,col,inner::vconst(val));
           }
 
-          for(; col < cols;col += nb_lanes)
+          for(; col < cols;col ++)
           {
               v(row,col) = val;
           }
@@ -105,7 +108,9 @@ Evaluation : used when result is a vector
 */
 template<typename DA,typename DB,
 typename std::enable_if<has_vector_inst<DA>() &&
-                        vector_idx_pair<DA,DB>(),bool>::type = true>
+                        vector_idx_pair<DA,DB>() &&
+                        !is_complex<DA>() &&
+                        !is_complex<DB>(),bool>::type = true>
 inline void eval(DA &v,
                  const DB& other,
                  const vector_length_t l,
@@ -132,7 +137,9 @@ inline void eval(DA &v,
 
 template<typename DA,typename DB,
 typename std::enable_if<has_vector_inst<DA>() &&
-                        must_use_matrix_idx_pair<DA,DB>(),bool>::type = true>
+                        must_use_matrix_idx_pair<DA,DB>() &&
+                        !is_complex<DA>() &&
+                        !is_complex<DB>(),bool>::type = true>
 inline void eval2D(DA &v,
                    const DB& other,
                    const vector_length_t rows,
@@ -155,7 +162,7 @@ inline void eval2D(DA &v,
               }
           }
 
-          for(; col < cols;col += nb_lanes)
+          for(; col < cols;col ++)
           {
              for(int k=0;k<(1<<DSP_UNROLL);k++)
              {
@@ -173,7 +180,7 @@ inline void eval2D(DA &v,
               v.matrix_store(row,col,other.matrix_op(row,col));
           }
 
-          for(; col < cols;col += nb_lanes)
+          for(; col < cols;col ++)
           {
               v(row,col) = other(row,col);
           }
@@ -182,21 +189,23 @@ inline void eval2D(DA &v,
 
 template<typename DA,typename DB,
          typename std::enable_if<has_vector_inst<DA>() &&
+                                !is_complex<DA>() &&
+                                !is_complex<DB>() &&
                                 vector_idx_pair<DA,DB>(),bool>::type = true>
-inline DotResult<DA> _dot(const DA& a,
-                         const DB& b,
-                         const vector_length_t l,
-                         const DSP* = nullptr)
+inline DotResult<DotFieldResult<DA,DB>> _dot(const DA& a,
+                                              const DB& b,
+                                              const vector_length_t l,
+                                              const DSP* = nullptr)
 {
-    using Acc = DotResult<DA>;
-    using T = typename traits<DA>::Scalar;
-    using Temp = typename vector_traits<T>::temp_accumulator;
-    constexpr int nb_lanes = vector_traits<T>::nb_lanes;
+    using ScalarResult = DotFieldResult<DA,DB> ;
+    using Acc = DotResult<ScalarResult>;
+    using Temp = typename vector_traits<ScalarResult>::temp_accumulator;
+    constexpr int nb_lanes = vector_traits<ScalarResult>::nb_lanes;
     constexpr unsigned int U = DSP_UNROLL;
     index_t i;
 
     Acc acc = Acc{};
-    Temp vacc = vector_traits<T>::temp_acc_zero();
+    Temp vacc = vector_traits<ScalarResult>::temp_acc_zero();
 
     for(i=0 ; i <= l-(nb_lanes<<U); i += (nb_lanes<<U))
     {
@@ -218,16 +227,18 @@ inline DotResult<DA> _dot(const DA& a,
 
 template<typename DA,typename DB,
          typename std::enable_if<has_vector_inst<DA>() &&
-                                 vector_idx_pair<DA,DB>(),bool>::type = true>
+                                 vector_idx_pair<DA,DB>() &&
+                                 !is_complex<DA>() &&
+                                 !is_complex<DB>(),bool>::type = true>
 inline void _swap(DA&& a,
                   DB&& b,
                   const vector_length_t l,
                   const DSP* = nullptr)
 {
-      using Scalar = typename ElementType<DA>::type;
+      using Scalar = typename ElementType<remove_constref_t<DA>>::type;
       using Vector = typename vector_traits<Scalar>::vector;
 
-      constexpr int nb_lanes = vector_traits<typename ElementType<DA>::type>::nb_lanes;
+      constexpr int nb_lanes = vector_traits<typename ElementType<remove_constref_t<DA>>::type>::nb_lanes;
       index_t i=0;
       Vector tmpa,tmpb;
 

diff --git a/dsppp/Include/dsppp/DSP/matrix_multiply.hpp b/dsppp/Include/dsppp/DSP/matrix_multiply.hpp
@@ -19,6 +19,8 @@
 template<typename MA,
          typename MB,
          typename std::enable_if<
+         !is_complex<MA>() &&
+         !is_complex<MB>() &&
          std::is_same<typename traits<MA>::Scalar,Q15>::value &&
          number_traits<typename traits<MA>::Scalar>::is_fixed,bool>::type = true>
 __STATIC_INLINE void _arm_mat_trans(
@@ -115,6 +117,9 @@ template<typename M,
          typename V,
          typename RES,
          typename std::enable_if<
+         !is_complex<M>() &&
+         !is_complex<V>() &&
+         !is_complex<RES>() &&
          !std::is_same<typename traits<M>::Scalar,Q31>::value &&
          number_traits<typename traits<M>::Scalar>::is_fixed,bool>::type = true>
 inline void _dot_m_v(RES &res,
@@ -253,6 +258,10 @@ template<typename MA,
          typename RES,
          typename TMP,
          typename std::enable_if<
+         !is_complex<MA>() &&
+         !is_complex<MB>() &&
+         !is_complex<RES>() &&
+         !is_complex<TMP>() &&
          !std::is_same<typename traits<MA>::Scalar,Q31>::value &&
          number_traits<typename traits<MA>::Scalar>::is_fixed,bool>::type = true>
 __STATIC_INLINE void _dot_m_m(const MA&pSrcA,const MB&pSrcB,

diff --git a/dsppp/Include/dsppp/DSP/q15.hpp b/dsppp/Include/dsppp/DSP/q15.hpp
@@ -126,7 +126,7 @@ struct vector_traits<Q15,DSP,typename std::enable_if<true>::type>
 
 
 /**
- * \ingroup DSPNumber
+ * \ingroup DSPQ15Number
  */
 namespace inner {
 
@@ -157,6 +157,11 @@ namespace inner {
        return(Q15DSPVector(__QSUB16(0, a.v)));
     };
 
+    __STATIC_FORCEINLINE Q15DSPVector vconjugate(const Q15DSPVector a)
+    {
+       return(a);
+    };
+
     __STATIC_FORCEINLINE Q15DSPVector vadd(const Q15DSPVector a,
                                            const Q15DSPVector b)
     {

diff --git a/dsppp/Include/dsppp/DSP/q7.hpp b/dsppp/Include/dsppp/DSP/q7.hpp
@@ -93,6 +93,11 @@ namespace inner {
        return(Q7DSPVector(__QSUB8(0, a.v)));
     };
 
+    __STATIC_FORCEINLINE Q7DSPVector vconjugate(const Q7DSPVector a)
+    {
+       return(a);
+    };
+
     __STATIC_FORCEINLINE Q7DSPVector vadd(const Q7DSPVector a,
                                            const Q7DSPVector b)
     {