From f7638715ddb19b1eee9baac0d51c87d13b4c0963 Mon Sep 17 00:00:00 2001 From: Yvan Mokwinski Date: Mon, 27 Mar 2023 10:45:39 -0600 Subject: [PATCH 1/5] Adding spmv mixed precisions (#528) * Adding spmv with matrix in single precision and vectors in double precisions * complex case * update changelog --- CHANGELOG.md | 1 + clients/common/rocsparse_host.cpp | 348 ++++---- clients/include/rocsparse_common.yaml | 4 + clients/testings/testing_spmv_bsr.cpp | 26 +- clients/testings/testing_spmv_coo.cpp | 18 +- clients/testings/testing_spmv_coo_aos.cpp | 18 +- clients/testings/testing_spmv_csc.cpp | 26 +- clients/testings/testing_spmv_csr.cpp | 27 +- clients/testings/testing_spmv_ell.cpp | 19 +- clients/tests/test_spmv_bsr.yaml | 54 +- clients/tests/test_spmv_coo.yaml | 68 ++ clients/tests/test_spmv_coo_aos.yaml | 69 +- clients/tests/test_spmv_csc.yaml | 65 ++ clients/tests/test_spmv_csr.yaml | 70 +- clients/tests/test_spmv_ell.yaml | 55 ++ library/include/rocsparse-complex-types.h | 13 +- library/include/rocsparse-functions.h | 22 + library/src/level2/rocsparse_bsrmv.cpp | 761 ++++++++++++++++++ .../level2/rocsparse_bsrxmv_spzl_16x16.cpp | 26 +- .../level2/rocsparse_bsrxmv_spzl_17_32.cpp | 26 +- .../src/level2/rocsparse_bsrxmv_spzl_2x2.cpp | 24 + .../src/level2/rocsparse_bsrxmv_spzl_3x3.cpp | 26 +- .../src/level2/rocsparse_bsrxmv_spzl_4x4.cpp | 26 +- .../src/level2/rocsparse_bsrxmv_spzl_5x5.cpp | 26 +- .../src/level2/rocsparse_bsrxmv_spzl_8x8.cpp | 26 +- .../level2/rocsparse_bsrxmv_spzl_general.cpp | 27 +- library/src/level2/rocsparse_coomv.cpp | 15 + library/src/level2/rocsparse_coomv_aos.cpp | 17 +- library/src/level2/rocsparse_cscmv.cpp | 27 +- library/src/level2/rocsparse_csrmv.cpp | 25 + library/src/level2/rocsparse_ellmv.cpp | 15 + 31 files changed, 1814 insertions(+), 156 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index addef5d6..28649be8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ Full documentation for rocSPARSE is available at [rocsparse.readthedocs.io](https://rocsparse.readthedocs.io/en/latest/). ## rocSPARSE 2.5.2 for ROCm 5.6.0 + ### Improved - Fixed a memory leak in csritsv diff --git a/clients/common/rocsparse_host.cpp b/clients/common/rocsparse_host.cpp index 4a7dc294..ba45564f 100644 --- a/clients/common/rocsparse_host.cpp +++ b/clients/common/rocsparse_host.cpp @@ -10352,50 +10352,51 @@ template void host_coosort_by_column(rocsparse_int M, TTYPE* A, \ ITYPE ld); -#define INSTANTIATE5(ITYPE, JTYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ - template void host_bsrmv(rocsparse_direction dir, \ - rocsparse_operation trans, \ - JTYPE mb, \ - JTYPE nb, \ - ITYPE nnzb, \ - TTYPE alpha, \ - const ITYPE* bsr_row_ptr, \ - const JTYPE* bsr_col_ind, \ - const ATYPE* bsr_val, \ - JTYPE bsr_dim, \ - const XTYPE* x, \ - TTYPE beta, \ - YTYPE* y, \ - rocsparse_index_base base); \ - template void host_cscmv(rocsparse_operation trans, \ - JTYPE M, \ - JTYPE N, \ - ITYPE nnz, \ - TTYPE alpha, \ - const ITYPE* csc_col_ptr, \ - const JTYPE* csc_row_ind, \ - const ATYPE* csc_val, \ - const XTYPE* x, \ - TTYPE beta, \ - YTYPE* y, \ - rocsparse_index_base base, \ - rocsparse_matrix_type matrix_type, \ - rocsparse_spmv_alg algo); \ - template void host_csrmv(rocsparse_operation trans, \ - JTYPE M, \ - JTYPE N, \ - ITYPE nnz, \ - TTYPE alpha, \ - const ITYPE* csr_row_ptr, \ - const JTYPE* csr_col_ind, \ - const ATYPE* csr_val, \ - const XTYPE* x, \ - TTYPE beta, \ - YTYPE* y, \ - rocsparse_index_base base, \ - rocsparse_matrix_type matrix_type, \ - rocsparse_spmv_alg algo, \ - bool force_conj); +#define INSTANTIATE_IJAXYT(ITYPE, JTYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ + template void host_bsrmv(rocsparse_direction dir, \ + rocsparse_operation trans, \ + JTYPE mb, \ + JTYPE nb, \ + ITYPE nnzb, \ + TTYPE alpha, \ + const ITYPE* bsr_row_ptr, \ + const JTYPE* bsr_col_ind, \ + const ATYPE* bsr_val, \ + JTYPE bsr_dim, \ + const XTYPE* x, \ + TTYPE beta, \ + YTYPE* y, \ + rocsparse_index_base base); \ + template void host_cscmv(rocsparse_operation trans, \ + JTYPE M, \ + JTYPE N, \ + ITYPE nnz, \ + TTYPE alpha, \ + const ITYPE* csc_col_ptr, \ + const JTYPE* csc_row_ind, \ + const ATYPE* csc_val, \ + const XTYPE* x, \ + TTYPE beta, \ + YTYPE* y, \ + rocsparse_index_base base, \ + rocsparse_matrix_type matrix_type, \ + rocsparse_spmv_alg algo); \ + template void host_csrmv(rocsparse_operation trans, \ + JTYPE M, \ + JTYPE N, \ + ITYPE nnz, \ + TTYPE alpha, \ + const ITYPE* csr_row_ptr, \ + const JTYPE* csr_col_ind, \ + const ATYPE* csr_val, \ + const XTYPE* x, \ + TTYPE beta, \ + YTYPE* y, \ + rocsparse_index_base base, \ + rocsparse_matrix_type matrix_type, \ + rocsparse_spmv_alg algo, \ + bool force_conj) + #define INSTANTIATE6(ITYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ template void host_coomv(rocsparse_operation trans, \ @@ -10485,103 +10486,142 @@ INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, double); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_double_complex); -INSTANTIATE5(int32_t, int32_t, int8_t, int8_t, int32_t, int32_t); -INSTANTIATE5(int64_t, int32_t, int8_t, int8_t, int32_t, int32_t); -INSTANTIATE5(int64_t, int64_t, int8_t, int8_t, int32_t, int32_t); -INSTANTIATE5(int32_t, int32_t, int8_t, int8_t, float, float); -INSTANTIATE5(int64_t, int32_t, int8_t, int8_t, float, float); -INSTANTIATE5(int64_t, int64_t, int8_t, int8_t, float, float); -INSTANTIATE5(int32_t, - int32_t, - float, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int64_t, - int32_t, - float, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int64_t, - int64_t, - float, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int32_t, - int32_t, - double, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE5(int64_t, - int32_t, - double, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE5(int64_t, - int64_t, - double, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE5(int32_t, int32_t, float, float, float, float); -INSTANTIATE5(int64_t, int32_t, float, float, float, float); -INSTANTIATE5(int64_t, int64_t, float, float, float, float); -INSTANTIATE5(int32_t, int32_t, double, double, double, double); -INSTANTIATE5(int64_t, int32_t, double, double, double, double); -INSTANTIATE5(int64_t, int64_t, double, double, double, double); -INSTANTIATE5(int32_t, - int32_t, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int64_t, - int32_t, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int64_t, - int64_t, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE5(int32_t, - int32_t, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE5(int64_t, - int32_t, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE5(int64_t, - int64_t, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); + INSTANTIATE6(int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int32_t, int8_t, int8_t, float, float); INSTANTIATE6(int64_t, int8_t, int8_t, float, float); -INSTANTIATE6( - int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); -INSTANTIATE6( - int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); -INSTANTIATE6( - int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); -INSTANTIATE6( - int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE6(int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + + +INSTANTIATE_IJAXYT(int32_t, int32_t, int8_t, int8_t, int32_t, int32_t); +INSTANTIATE_IJAXYT(int64_t, int32_t, int8_t, int8_t, int32_t, int32_t); +INSTANTIATE_IJAXYT(int64_t, int64_t, int8_t, int8_t, int32_t, int32_t); +INSTANTIATE_IJAXYT(int32_t, int32_t, int8_t, int8_t, float, float); +INSTANTIATE_IJAXYT(int64_t, int32_t, int8_t, int8_t, float, float); +INSTANTIATE_IJAXYT(int64_t, int64_t, int8_t, int8_t, float, float); + +INSTANTIATE_IJAXYT(int32_t, int32_t, float, double, double, double); +INSTANTIATE_IJAXYT(int64_t, int32_t, float, double, double, double); +INSTANTIATE_IJAXYT(int64_t, int64_t, float, double, double, double); + +INSTANTIATE_IJAXYT(int32_t, + int32_t, + float, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int64_t, + int32_t, + float, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int64_t, + int64_t, + float, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int32_t, + int32_t, + double, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int32_t, + double, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int64_t, + double, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + +INSTANTIATE_IJAXYT(int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + +INSTANTIATE_IJAXYT(int32_t, int32_t, float, float, float, float); +INSTANTIATE_IJAXYT(int64_t, int32_t, float, float, float, float); +INSTANTIATE_IJAXYT(int64_t, int64_t, float, float, float, float); +INSTANTIATE_IJAXYT(int32_t, int32_t, double, double, double, double); +INSTANTIATE_IJAXYT(int64_t, int32_t, double, double, double, double); +INSTANTIATE_IJAXYT(int64_t, int64_t, double, double, double, double); +INSTANTIATE_IJAXYT(int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IJAXYT(int32_t, + int32_t, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int32_t, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IJAXYT(int64_t, + int64_t, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + +INSTANTIATE_IAXYT(int32_t, int8_t, int8_t, int32_t, int32_t); +INSTANTIATE_IAXYT(int64_t, int8_t, int8_t, int32_t, int32_t); +INSTANTIATE_IAXYT(int32_t, int8_t, int8_t, float, float); +INSTANTIATE_IAXYT(int64_t, int8_t, int8_t, float, float); +INSTANTIATE_IAXYT(int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); +INSTANTIATE_IAXYT(int32_t, float, double, double, double); +INSTANTIATE_IAXYT(int64_t, float, double, double, double); +INSTANTIATE_IAXYT(int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + + +INSTANTIATE6(int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); +INSTANTIATE6(int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + + +INSTANTIATE6(int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + + INSTANTIATE6(int32_t, float, float, float, float); INSTANTIATE6(int64_t, float, float, float, float); INSTANTIATE6(int32_t, double, double, double, double); @@ -10606,3 +10646,39 @@ INSTANTIATE6(int64_t, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_IAXYT(int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IAXYT(int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IAXYT(int32_t, float, float, float, float); +INSTANTIATE_IAXYT(int64_t, float, float, float, float); +INSTANTIATE_IAXYT(int32_t, double, double, double, double); +INSTANTIATE_IAXYT(int64_t, double, double, double, double); +INSTANTIATE_IAXYT(int32_t, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IAXYT(int64_t, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_IAXYT(int32_t, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_IAXYT(int64_t, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + diff --git a/clients/include/rocsparse_common.yaml b/clients/include/rocsparse_common.yaml index 9924dc62..cdfec98d 100644 --- a/clients/include/rocsparse_common.yaml +++ b/clients/include/rocsparse_common.yaml @@ -237,6 +237,8 @@ Real precisions: &real_precisions { a_type: f32_r, b_type: f32_r, c_type: f32_r, x_type: f32_r, y_type: f32_r, compute_type: f32_r } - &double_precision { a_type: f64_r, b_type: f64_r, c_type: f64_r, x_type: f64_r, y_type: f64_r, compute_type: f64_r } + - &float32_float64_float64_float64 + { a_type: f32_r, x_type: f64_r, y_type: f64_r, compute_type: f64_r } Complex precisions: &complex_precisions - &float32_cmplx32_cmplx32_cmplx32_precision @@ -247,6 +249,8 @@ Complex precisions: &complex_precisions { a_type: f32_c, b_type: f32_c, c_type: f32_c, x_type: f32_c, y_type: f32_c, compute_type: f32_c } - &double_precision_complex { a_type: f64_c, b_type: f64_c, c_type: f64_c, x_type: f64_c, y_type: f64_c, compute_type: f64_c } + - &cmplx32_cmplx64_cmplx64_cmplx64 + { a_type: f32_c, x_type: f64_c, y_type: f64_c, compute_type: f64_c } C precisions real: &single_only_precisions - *single_precision diff --git a/clients/testings/testing_spmv_bsr.cpp b/clients/testings/testing_spmv_bsr.cpp index a2a7bc11..54498d67 100644 --- a/clients/testings/testing_spmv_bsr.cpp +++ b/clients/testings/testing_spmv_bsr.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -83,6 +83,11 @@ INSTANTIATE_MIXED(int64_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(int32_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int64_t, float, double, double, double); + INSTANTIATE_MIXED(int32_t, int32_t, double, @@ -102,4 +107,23 @@ INSTANTIATE_MIXED(int64_t, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE_MIXED(int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_bsr_extra(const Arguments& arg) {} diff --git a/clients/testings/testing_spmv_coo.cpp b/clients/testings/testing_spmv_coo.cpp index 6bef4ba1..7d4459a6 100644 --- a/clients/testings/testing_spmv_coo.cpp +++ b/clients/testings/testing_spmv_coo.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -64,8 +64,24 @@ INSTANTIATE_MIXED( int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_MIXED( int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, float, double, double, double); + INSTANTIATE_MIXED( int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_coo_extra(const Arguments& arg) {} diff --git a/clients/testings/testing_spmv_coo_aos.cpp b/clients/testings/testing_spmv_coo_aos.cpp index 74093e38..60a2dc53 100644 --- a/clients/testings/testing_spmv_coo_aos.cpp +++ b/clients/testings/testing_spmv_coo_aos.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -63,8 +63,24 @@ INSTANTIATE_MIXED( int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_MIXED( int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, float, double, double, double); + INSTANTIATE_MIXED( int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_coo_aos_extra(const Arguments& arg) {} diff --git a/clients/testings/testing_spmv_csc.cpp b/clients/testings/testing_spmv_csc.cpp index d980f21b..c6235a3c 100644 --- a/clients/testings/testing_spmv_csc.cpp +++ b/clients/testings/testing_spmv_csc.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -83,6 +83,11 @@ INSTANTIATE_MIXED(int64_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(int32_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int64_t, float, double, double, double); + INSTANTIATE_MIXED(int32_t, int32_t, double, @@ -102,4 +107,23 @@ INSTANTIATE_MIXED(int64_t, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE_MIXED(int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_csc_extra(const Arguments& arg) {} diff --git a/clients/testings/testing_spmv_csr.cpp b/clients/testings/testing_spmv_csr.cpp index aba2752c..7603e3e1 100644 --- a/clients/testings/testing_spmv_csr.cpp +++ b/clients/testings/testing_spmv_csr.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -65,6 +65,11 @@ INSTANTIATE_MIXED(int64_t, int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_MIXED(int32_t, int32_t, int8_t, int8_t, float, float); INSTANTIATE_MIXED(int64_t, int32_t, int8_t, int8_t, float, float); INSTANTIATE_MIXED(int64_t, int64_t, int8_t, int8_t, float, float); + +INSTANTIATE_MIXED(int32_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, int64_t, float, double, double, double); + INSTANTIATE_MIXED(int32_t, int32_t, float, @@ -83,6 +88,7 @@ INSTANTIATE_MIXED(int64_t, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + INSTANTIATE_MIXED(int32_t, int32_t, double, @@ -102,4 +108,23 @@ INSTANTIATE_MIXED(int64_t, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE_MIXED(int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_csr_extra(const Arguments& arg) {} diff --git a/clients/testings/testing_spmv_ell.cpp b/clients/testings/testing_spmv_ell.cpp index e9776c94..ed4018d3 100644 --- a/clients/testings/testing_spmv_ell.cpp +++ b/clients/testings/testing_spmv_ell.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -59,12 +59,29 @@ INSTANTIATE_MIXED(int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_MIXED(int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_MIXED(int32_t, int8_t, int8_t, float, float); INSTANTIATE_MIXED(int64_t, int8_t, int8_t, float, float); + INSTANTIATE_MIXED( int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_MIXED( int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(int32_t, float, double, double, double); +INSTANTIATE_MIXED(int64_t, float, double, double, double); + INSTANTIATE_MIXED( int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); + void testing_spmv_ell_extra(const Arguments& arg) {} diff --git a/clients/tests/test_spmv_bsr.yaml b/clients/tests/test_spmv_bsr.yaml index 6ea697ec..b189e7f7 100644 --- a/clients/tests/test_spmv_bsr.yaml +++ b/clients/tests/test_spmv_bsr.yaml @@ -218,7 +218,31 @@ Tests: category: quick function: spmv_bsr indextype: *i32_i64 - precision: *int8_int8_int32_int32_precision + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + block_dim: [12] + alpha_beta: *alpha_beta_range_checkin + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + +- name: spmv_bsr + category: pre_checkin + function: spmv_bsr + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: [534, 1604, 3413, 75196] + N: [578, 4109, 9458, 34254] + block_dim: [16] + alpha_beta: *alpha_beta_range_quick + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_random] + +- name: spmv_bsr + category: quick + function: spmv_bsr + indextype: *i32_i64 + precision: *int8_int8_int32_int32_axyt_precision M: [34, 104, 343, 5196] N: [57, 109, 458, 3425] block_dim: [12] @@ -238,6 +262,7 @@ Tests: baseA: [rocsparse_index_base_one] matrix: [rocsparse_matrix_random] + - name: spmv_bsr_file category: pre_checkin function: spmv_bsr @@ -263,3 +288,30 @@ Tests: matrix: [rocsparse_matrix_file_rocalution] filename: [Chevron3, bmwcra_1] + + +- name: spmv_bsr_file + category: pre_checkin + function: spmv_bsr + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [48374] + N: [42846] + block_dim: [4] + alpha_beta: *alpha_beta_range_checkin + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + +- name: spmv_bsr_file + category: nightly + function: spmv_bsr + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + block_dim: [7] + alpha_beta: *alpha_beta_range_nightly + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + filename: [Chevron3, + bmwcra_1] diff --git a/clients/tests/test_spmv_coo.yaml b/clients/tests/test_spmv_coo.yaml index 83111c36..48c4b8b3 100644 --- a/clients/tests/test_spmv_coo.yaml +++ b/clients/tests/test_spmv_coo.yaml @@ -259,6 +259,39 @@ Tests: # # mixed precision # +- name: spmv_coo_file + category: quick + function: spmv_coo + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_quick + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo] + filename: [bmwcra_1, + amazon0312, + sme3Dc] + +- name: spmv_coo + category: pre_checkin + function: spmv_coo + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo_atomic] + + + - name: spmv_coo_file category: quick @@ -291,6 +324,41 @@ Tests: storage: [rocsparse_storage_mode_sorted] spmv_alg: [rocsparse_spmv_alg_coo_atomic] + +- name: spmv_coo + category: pre_checkin + function: spmv_coo + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [16, 78, 294, 482, 68302] + N: [16, 93, 297, 657, 46342] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo] + +- name: spmv_coo_file + category: nightly + function: spmv_coo + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_nightly + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo_atomic] + filename: [Chebyshev4, + shipsec1, + scircuit] + + + + - name: spmv_coo category: pre_checkin function: spmv_coo diff --git a/clients/tests/test_spmv_coo_aos.yaml b/clients/tests/test_spmv_coo_aos.yaml index ba275e41..756a77d6 100644 --- a/clients/tests/test_spmv_coo_aos.yaml +++ b/clients/tests/test_spmv_coo_aos.yaml @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. +# Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -241,6 +241,37 @@ Tests: # # mixed precision # +- name: spmv_coo_file + category: quick + function: spmv_coo + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_quick + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo] + filename: [bmwcra_1, + amazon0312, + sme3Dc] + +- name: spmv_coo + category: pre_checkin + function: spmv_coo + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo_atomic] + - name: spmv_coo_file category: quick @@ -303,3 +334,39 @@ Tests: filename: [Chebyshev4, shipsec1, scircuit] + + +- name: spmv_coo + category: pre_checkin + function: spmv_coo + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [16, 78, 294, 482, 68302] + N: [16, 93, 297, 657, 46342] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo] + +- name: spmv_coo_file + category: nightly + function: spmv_coo + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_nightly + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + storage: [rocsparse_storage_mode_sorted] + spmv_alg: [rocsparse_spmv_alg_coo_atomic] + filename: [Chebyshev4, + shipsec1, + scircuit] + + + + diff --git a/clients/tests/test_spmv_csc.yaml b/clients/tests/test_spmv_csc.yaml index 05072e3c..b26c2c7d 100644 --- a/clients/tests/test_spmv_csc.yaml +++ b/clients/tests/test_spmv_csc.yaml @@ -406,6 +406,37 @@ Tests: # # mixed precision # +- name: spmv_csc_file + category: quick + function: spmv_csc + indextype: *i32i32_i64i32_i64i64 + precision: *float32_float64_float64_float64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_quick + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_stream] + filename: [bmwcra_1, + amazon0312, + sme3Dc] + +- name: spmv_csc + category: pre_checkin + function: spmv_csc + indextype: *i32i32_i64i32_i64i64 + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + - name: spmv_csc_file category: quick @@ -438,6 +469,40 @@ Tests: matrix_type: [rocsparse_matrix_type_general] spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + + + +- name: spmv_csc + category: pre_checkin + function: spmv_csc + indextype: *i32i32_i64i32_i64i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [16, 78, 294, 482, 68302] + N: [16, 93, 297, 657, 46342] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_stream] + +- name: spmv_csc_file + category: nightly + function: spmv_csc + indextype: *i32i32_i64i32_i64i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_nightly + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + filename: [Chebyshev4, + shipsec1] + + - name: spmv_csc category: pre_checkin function: spmv_csc diff --git a/clients/tests/test_spmv_csr.yaml b/clients/tests/test_spmv_csr.yaml index 9df2601d..c24f6af2 100644 --- a/clients/tests/test_spmv_csr.yaml +++ b/clients/tests/test_spmv_csr.yaml @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. +# Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -412,7 +412,39 @@ Tests: category: quick function: spmv_csr indextype: *i32i32_i64i32_i64i64 - precision: *int8_int8_int32_int32_precision + precision: *float32_float64_float64_float64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_quick + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_stream] + filename: [bmwcra_1, + amazon0312, + sme3Dc] + +- name: spmv_csr + category: pre_checkin + function: spmv_csr + indextype: *i32i32_i64i32_i64i64 + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + + +- name: spmv_csr_file + category: quick + function: spmv_csr + indextype: *i32i32_i64i32_i64i64 + precision: *int8_int8_int32_int32_axyt_precision M: 1 N: 1 alpha_beta: *alpha_beta_range_quick @@ -439,6 +471,40 @@ Tests: matrix_type: [rocsparse_matrix_type_general] spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + +- name: spmv_csr + category: pre_checkin + function: spmv_csr + indextype: *i32i32_i64i32_i64i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [16, 78, 294, 482, 68302] + N: [16, 93, 297, 657, 46342] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_stream] + +- name: spmv_csr_file + category: nightly + function: spmv_csr + indextype: *i32i32_i64i32_i64i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_nightly + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + matrix_type: [rocsparse_matrix_type_general] + spmv_alg: [rocsparse_spmv_alg_csr_adaptive] + filename: [Chebyshev4, + shipsec1, + scircuit] + + + - name: spmv_csr category: pre_checkin function: spmv_csr diff --git a/clients/tests/test_spmv_ell.yaml b/clients/tests/test_spmv_ell.yaml index ea01c202..7cc67d1c 100644 --- a/clients/tests/test_spmv_ell.yaml +++ b/clients/tests/test_spmv_ell.yaml @@ -214,6 +214,35 @@ Tests: # # mixed precision # +- name: spmv_ell_file + category: quick + function: spmv_ell + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_quick + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + filename: [bmwcra_1, + amazon0312, + sme3Dc] + +- name: spmv_ell + category: pre_checkin + function: spmv_ell + indextype: *i32_i64 + precision: *float32_float64_float64_float64 + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + + + - name: spmv_ell_file category: quick @@ -267,3 +296,29 @@ Tests: matrix: [rocsparse_matrix_file_rocalution] filename: [shipsec1, scircuit] + +- name: spmv_ell + category: pre_checkin + function: spmv_ell + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: [16, 78, 294, 482, 68302] + N: [16, 93, 297, 657, 46342] + alpha_beta: *alpha_beta_range_checkin + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + +- name: spmv_ell_file + category: nightly + function: spmv_ell + indextype: *i32_i64 + precision: *cmplx32_cmplx64_cmplx64_cmplx64 + M: 1 + N: 1 + alpha_beta: *alpha_beta_range_nightly + transA: [rocsparse_operation_none] + baseA: [rocsparse_index_base_one] + matrix: [rocsparse_matrix_file_rocalution] + filename: [shipsec1, + scircuit] diff --git a/library/include/rocsparse-complex-types.h b/library/include/rocsparse-complex-types.h index e4810384..6dbaf6e4 100644 --- a/library/include/rocsparse-complex-types.h +++ b/library/include/rocsparse-complex-types.h @@ -60,7 +60,9 @@ class rocsparse_complex_num public: __device__ __host__ rocsparse_complex_num(void) = default; __device__ __host__ rocsparse_complex_num(const rocsparse_complex_num&) = default; - __device__ __host__ rocsparse_complex_num(rocsparse_complex_num&&) = default; + template + __device__ __host__ rocsparse_complex_num(const rocsparse_complex_num& a); + __device__ __host__ rocsparse_complex_num(rocsparse_complex_num&&) = default; __device__ __host__ rocsparse_complex_num& operator=(const rocsparse_complex_num& rhs) = default; __device__ __host__ rocsparse_complex_num& operator=(rocsparse_complex_num&& rhs) = default; @@ -279,6 +281,15 @@ template class rocsparse_complex_num_check; using rocsparse_float_complex = rocsparse_complex_num; using rocsparse_double_complex = rocsparse_complex_num; +template +template +__device__ __host__ + rocsparse_complex_num::rocsparse_complex_num(const rocsparse_complex_num& a) + : x(std::real(a)) + , y(std::imag(a)) +{ +} + #endif /* __cplusplus < 201402L || (!defined(__HIPCC__)) */ #endif /* _ROCSPARSE_COMPLEX_TYPES_H_ */ diff --git a/library/include/rocsparse-functions.h b/library/include/rocsparse-functions.h index eb1eb470..8c08d42b 100644 --- a/library/include/rocsparse-functions.h +++ b/library/include/rocsparse-functions.h @@ -20193,6 +20193,17 @@ rocsparse_status rocsparse_spvv(rocsparse_handle handle, * |-------------------------|--------------------------|--------------------------| * \endverbatim * +* Mixed-regular real precisions +* \verbatim +* |----------------------------|----------------------------| +* | A | X / Y / compute_type | +* |----------------------------|----------------------------| +* | rocsparse_datatype_f32_r | rocsparse_datatype_f64_r | +* |----------------------------|----------------------------| +* | rocsparse_datatype_f32_c | rocsparse_datatype_f64_c | +* |----------------------------|----------------------------| +* \endverbatim +* * Mixed-regular Complex precisions * \verbatim * |----------------------------|----------------------------| @@ -20312,6 +20323,17 @@ __attribute__((deprecated("This function is deprecated and will be removed in a * |----------------------------------------------------| * \endverbatim * +* Mixed-regular real precisions +* \verbatim +* |----------------------------|----------------------------| +* | A | X / Y / compute_type | +* |----------------------------|----------------------------| +* | rocsparse_datatype_f32_r | rocsparse_datatype_f64_r | +* |----------------------------|----------------------------| +* | rocsparse_datatype_f32_c | rocsparse_datatype_f64_c | +* |----------------------------|----------------------------| +* \endverbatim +* * Mixed precisions: * \verbatim * |-------------------------|--------------------------|--------------------------| diff --git a/library/src/level2/rocsparse_bsrmv.cpp b/library/src/level2/rocsparse_bsrmv.cpp index f8ea6c33..f9391c63 100644 --- a/library/src/level2/rocsparse_bsrmv.cpp +++ b/library/src/level2/rocsparse_bsrmv.cpp @@ -23,6 +23,767 @@ * ************************************************************************ */ #include "rocsparse_bsrmv_ex.hpp" +#include "rocsparse_bsrxmv_spzl.hpp" +#include "rocsparse_csrmv.hpp" + +template +rocsparse_status rocsparse_bsrmv_analysis_template(rocsparse_handle handle, + rocsparse_direction dir, + rocsparse_operation trans, + J mb, + J nb, + I nnzb, + const rocsparse_mat_descr descr, + const A* bsr_val, + const I* bsr_row_ptr, + const J* bsr_col_ind, + J block_dim, + rocsparse_mat_info info) +{ + // Check for valid handle + if(handle == nullptr) + { + return rocsparse_status_invalid_handle; + } + + // Check for valid matrix descriptor and info struct + if(descr == nullptr || info == nullptr) + { + return rocsparse_status_invalid_pointer; + } + + // Logging + log_trace(handle, + replaceX("rocsparse_Xbsrmv_analysis"), + dir, + trans, + mb, + nb, + nnzb, + (const void*&)descr, + (const void*&)bsr_val, + (const void*&)bsr_row_ptr, + (const void*&)bsr_col_ind, + block_dim, + (const void*&)info); + + if(rocsparse_enum_utils::is_invalid(dir)) + { + return rocsparse_status_invalid_value; + } + + if(rocsparse_enum_utils::is_invalid(trans)) + { + return rocsparse_status_invalid_value; + } + + if(trans != rocsparse_operation_none) + { + return rocsparse_status_not_implemented; + } + + // Check matrix type + if(descr->type != rocsparse_matrix_type_general) + { + return rocsparse_status_not_implemented; + } + + // Check sizes + if(mb < 0 || nb < 0 || nnzb < 0 || block_dim < 0) + { + return rocsparse_status_invalid_size; + } + + // Quick return if possible + if(mb == 0 || nb == 0 || block_dim == 0) + { + return rocsparse_status_success; + } + + // Check the rest of pointer arguments + if(mb > 0 && bsr_row_ptr == nullptr) + { + return rocsparse_status_invalid_pointer; + } + + // value arrays and column indices arrays must both be null (zero matrix) or both not null + if((bsr_val == nullptr && bsr_col_ind != nullptr) + || (bsr_val != nullptr && bsr_col_ind == nullptr)) + { + return rocsparse_status_invalid_pointer; + } + + if(nnzb != 0 && (bsr_val == nullptr && bsr_col_ind == nullptr)) + { + return rocsparse_status_invalid_pointer; + } + + if(descr->storage_mode == rocsparse_storage_mode_sorted) + { + if(block_dim == 1) + { + return rocsparse_csrmv_analysis_template( + handle, trans, mb, nb, nnzb, descr, bsr_val, bsr_row_ptr, bsr_col_ind, info); + } + } + + return rocsparse_status_success; +} + +template +rocsparse_status rocsparse_bsrmv_template_dispatch(rocsparse_handle handle, + rocsparse_direction dir, + rocsparse_operation trans, + J mb, + J nb, + I nnzb, + U alpha_device_host, + const rocsparse_mat_descr descr, + const A* bsr_val, + const I* bsr_row_ptr, + const J* bsr_col_ind, + J block_dim, + const X* x, + U beta_device_host, + Y* y) +{ + if(trans != rocsparse_operation_none) + { + return rocsparse_status_not_implemented; + } + + // + // block_dim == 1 is the CSR case + // + if(block_dim == 1) + { + return rocsparse_csrmv_template_dispatch(handle, + trans, + mb, + nb, + nnzb, + alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_row_ptr + 1, + bsr_col_ind, + x, + beta_device_host, + y, + false); + } + + // LCOV_EXCL_START + // Run different bsrmv kernels + if(handle->wavefront_size == 32) + { + bsrxmvn_general(handle, + dir, + mb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + bsr_col_ind, + bsr_val, + block_dim, + x, + beta_device_host, + y, + descr->base); + return rocsparse_status_success; + } + // LCOV_EXCL_STOP + + if(block_dim == 2) + { + bsrxmvn_2x2(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim == 3) + { + bsrxmvn_3x3(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim == 4) + { + bsrxmvn_4x4(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim == 5) + { + bsrxmvn_5x5(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim == 8) + { + bsrxmvn_8x8(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim == 16) + { + bsrxmvn_16x16(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + x, + beta_device_host, + y, + descr->base); + } + else if(block_dim > 16 && block_dim <= 32) + { + + bsrxmvn_17_32(handle, + dir, + mb, + nnzb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + + bsr_col_ind, + bsr_val, + block_dim, + x, + beta_device_host, + y, + descr->base); + } + else + { + bsrxmvn_general(handle, + dir, + mb, + alpha_device_host, + 0, + nullptr, + bsr_row_ptr, + nullptr, + bsr_col_ind, + bsr_val, + block_dim, + x, + beta_device_host, + y, + descr->base); + } + + return rocsparse_status_success; +} + +template +rocsparse_status rocsparse_bsrmv_adaptive_template_dispatch(rocsparse_handle handle, + rocsparse_direction dir, + rocsparse_operation trans, + J mb, + J nb, + I nnzb, + U alpha_device_host, + const rocsparse_mat_descr descr, + const A* bsr_val, + const I* bsr_row_ptr, + const J* bsr_col_ind, + J block_dim, + rocsparse_csrmv_info info, + const X* x, + U beta_device_host, + Y* y) +{ + if(trans != rocsparse_operation_none || descr->storage_mode != rocsparse_storage_mode_sorted) + { + return rocsparse_status_not_implemented; + } + + // block_dim == 1 is the CSR case + if(block_dim == 1) + { + return rocsparse_csrmv_adaptive_template_dispatch(handle, + trans, + mb, + nb, + nnzb, + alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + info, + x, + beta_device_host, + y, + false); + } + + return rocsparse_bsrmv_template_dispatch(handle, + dir, + trans, + mb, + nb, + nnzb, + alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + block_dim, + x, + beta_device_host, + y); +} + +template +rocsparse_status rocsparse_bsrmv_template(rocsparse_handle handle, + rocsparse_direction dir, + rocsparse_operation trans, + J mb, + J nb, + I nnzb, + const T* alpha_device_host, + const rocsparse_mat_descr descr, + const A* bsr_val, + const I* bsr_row_ptr, + const J* bsr_col_ind, + J block_dim, + rocsparse_mat_info info, + const X* x, + const T* beta_device_host, + Y* y) +{ + // + // Check for valid handle and matrix descriptor + // + if(handle == nullptr) + { + return rocsparse_status_invalid_handle; + } + + if(descr == nullptr) + { + return rocsparse_status_invalid_pointer; + } + + // + // Logging + // + log_trace(handle, + replaceX("rocsparse_Xbsrmv"), + dir, + trans, + mb, + nb, + nnzb, + LOG_TRACE_SCALAR_VALUE(handle, alpha_device_host), + (const void*&)descr, + (const void*&)bsr_val, + (const void*&)bsr_row_ptr, + (const void*&)bsr_col_ind, + block_dim, + (const void*&)x, + LOG_TRACE_SCALAR_VALUE(handle, beta_device_host), + (const void*&)y); + + log_bench(handle, + "./rocsparse-bench -f bsrmv -r", + replaceX("X"), + "--mtx " + "--blockdim", + block_dim, + "--alpha", + LOG_BENCH_SCALAR_VALUE(handle, alpha_device_host), + "--beta", + LOG_BENCH_SCALAR_VALUE(handle, beta_device_host)); + + if(rocsparse_enum_utils::is_invalid(dir)) + { + return rocsparse_status_invalid_value; + } + + if(rocsparse_enum_utils::is_invalid(trans)) + { + return rocsparse_status_invalid_value; + } + + if(trans != rocsparse_operation_none) + { + return rocsparse_status_not_implemented; + } + + // Check matrix type + if(descr->type != rocsparse_matrix_type_general) + { + return rocsparse_status_not_implemented; + } + + // + // Check sizes + // + if(mb < 0 || nb < 0 || nnzb < 0 || block_dim < 0) + { + return rocsparse_status_invalid_size; + } + + // + // Quick return if possible + // + if(mb == 0 || nb == 0 || block_dim == 0) + { + return rocsparse_status_success; + } + + // + // Check pointer arguments + // + if(alpha_device_host == nullptr || beta_device_host == nullptr) + { + return rocsparse_status_invalid_pointer; + } + + // + // Another quick return. + // + if(handle->pointer_mode == rocsparse_pointer_mode_host + && *alpha_device_host == static_cast(0) && *beta_device_host == static_cast(1)) + { + return rocsparse_status_success; + } + + // + // Check the rest of pointer arguments + // + if((mb > 0 && bsr_row_ptr == nullptr) || x == nullptr || y == nullptr) + { + return rocsparse_status_invalid_pointer; + } + + // value arrays and column indices arrays must both be null (zero matrix) or both not null + if((bsr_val == nullptr && bsr_col_ind != nullptr) + || (bsr_val != nullptr && bsr_col_ind == nullptr)) + { + return rocsparse_status_invalid_pointer; + } + + if(nnzb != 0 && (bsr_val == nullptr && bsr_col_ind == nullptr)) + { + return rocsparse_status_invalid_pointer; + } + + if(info == nullptr || info->csrmv_info == nullptr || trans != rocsparse_operation_none + || descr->storage_mode != rocsparse_storage_mode_sorted) + { + // If bsrmv info is not available, call bsrmv general + if(handle->pointer_mode == rocsparse_pointer_mode_device) + { + return rocsparse_bsrmv_template_dispatch(handle, + dir, + trans, + mb, + nb, + nnzb, + alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + block_dim, + x, + beta_device_host, + y); + } + else + { + return rocsparse_bsrmv_template_dispatch(handle, + dir, + trans, + mb, + nb, + nnzb, + *alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + block_dim, + x, + *beta_device_host, + y); + } + } + else + { + // If bsrmv info is available, call bsrmv adaptive + if(handle->pointer_mode == rocsparse_pointer_mode_device) + { + return rocsparse_bsrmv_adaptive_template_dispatch(handle, + dir, + trans, + mb, + nb, + nnzb, + alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + block_dim, + info->csrmv_info, + x, + beta_device_host, + y); + } + else + { + return rocsparse_bsrmv_adaptive_template_dispatch(handle, + dir, + trans, + mb, + nb, + nnzb, + *alpha_device_host, + descr, + bsr_val, + bsr_row_ptr, + bsr_col_ind, + block_dim, + info->csrmv_info, + x, + *beta_device_host, + y); + } + } +} + +#define INSTANTIATE(TTYPE, ITYPE, JTYPE) \ + template rocsparse_status rocsparse_bsrmv_analysis_template(rocsparse_handle handle, \ + rocsparse_direction dir, \ + rocsparse_operation trans, \ + JTYPE mb, \ + JTYPE nb, \ + ITYPE nnzb, \ + const rocsparse_mat_descr descr, \ + const TTYPE* bsr_val, \ + const ITYPE* bsr_row_ptr, \ + const JTYPE* bsr_col_ind, \ + JTYPE block_dim, \ + rocsparse_mat_info info); \ + template rocsparse_status rocsparse_bsrmv_template(rocsparse_handle handle, \ + rocsparse_direction dir, \ + rocsparse_operation trans, \ + JTYPE mb, \ + JTYPE nb, \ + ITYPE nnzb, \ + const TTYPE* alpha_device_host, \ + const rocsparse_mat_descr descr, \ + const TTYPE* bsr_val, \ + const ITYPE* bsr_row_ptr, \ + const JTYPE* bsr_col_ind, \ + JTYPE block_dim, \ + rocsparse_mat_info info, \ + const TTYPE* x, \ + const TTYPE* beta_device_host, \ + TTYPE* y); + +INSTANTIATE(float, int32_t, int32_t); +INSTANTIATE(float, int64_t, int32_t); +INSTANTIATE(float, int64_t, int64_t); +INSTANTIATE(double, int32_t, int32_t); +INSTANTIATE(double, int64_t, int32_t); +INSTANTIATE(double, int64_t, int64_t); +INSTANTIATE(rocsparse_float_complex, int32_t, int32_t); +INSTANTIATE(rocsparse_float_complex, int64_t, int32_t); +INSTANTIATE(rocsparse_float_complex, int64_t, int64_t); +INSTANTIATE(rocsparse_double_complex, int32_t, int32_t); +INSTANTIATE(rocsparse_double_complex, int64_t, int32_t); +INSTANTIATE(rocsparse_double_complex, int64_t, int64_t); +#undef INSTANTIATE + +#define INSTANTIATE_MIXED_ANALYSIS(ITYPE, JTYPE, ATYPE) \ + template rocsparse_status rocsparse_bsrmv_analysis_template(rocsparse_handle handle, \ + rocsparse_direction dir, \ + rocsparse_operation trans, \ + JTYPE mb, \ + JTYPE nb, \ + ITYPE nnzb, \ + const rocsparse_mat_descr descr, \ + const ATYPE* bsr_val, \ + const ITYPE* bsr_row_ptr, \ + const JTYPE* bsr_col_ind, \ + JTYPE block_dim, \ + rocsparse_mat_info info) + +INSTANTIATE_MIXED_ANALYSIS(int32_t, int32_t, int8_t); +INSTANTIATE_MIXED_ANALYSIS(int64_t, int32_t, int8_t); +INSTANTIATE_MIXED_ANALYSIS(int64_t, int64_t, int8_t); +#undef INSTANTIATE_MIXED_ANALYSIS + +#define INSTANTIATE_MIXED(TTYPE, ITYPE, JTYPE, ATYPE, XTYPE, YTYPE) \ + template rocsparse_status rocsparse_bsrmv_template(rocsparse_handle handle, \ + rocsparse_direction dir, \ + rocsparse_operation trans, \ + JTYPE mb, \ + JTYPE nb, \ + ITYPE nnzb, \ + const TTYPE* alpha_device_host, \ + const rocsparse_mat_descr descr, \ + const ATYPE* bsr_val, \ + const ITYPE* bsr_row_ptr, \ + const JTYPE* bsr_col_ind, \ + JTYPE block_dim, \ + rocsparse_mat_info info, \ + const XTYPE* x, \ + const TTYPE* beta_device_host, \ + YTYPE* y) + +INSTANTIATE_MIXED(int32_t, int32_t, int32_t, int8_t, int8_t, int32_t); +INSTANTIATE_MIXED(int32_t, int64_t, int32_t, int8_t, int8_t, int32_t); +INSTANTIATE_MIXED(int32_t, int64_t, int64_t, int8_t, int8_t, int32_t); +INSTANTIATE_MIXED(float, int32_t, int32_t, int8_t, int8_t, float); +INSTANTIATE_MIXED(float, int64_t, int32_t, int8_t, int8_t, float); +INSTANTIATE_MIXED(float, int64_t, int64_t, int8_t, int8_t, float); +INSTANTIATE_MIXED(rocsparse_float_complex, + int32_t, + int32_t, + float, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_MIXED(rocsparse_float_complex, + int64_t, + int32_t, + float, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_MIXED(rocsparse_float_complex, + int64_t, + int64_t, + float, + rocsparse_float_complex, + rocsparse_float_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + double, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + double, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + double, + rocsparse_double_complex, + rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + +#undef INSTANTIATE_MIXED +>>>>>>> bb80b364 (Adding spmv mixed precisions (#528)) /* * =========================================================================== diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_16x16.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_16x16.cpp index 666b1ff7..cc9f9bde 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_16x16.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_16x16.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -354,4 +354,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_17_32.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_17_32.cpp index fa4cb99e..4e98d683 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_17_32.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_17_32.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -682,4 +682,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_2x2.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_2x2.cpp index b00e2157..eed090e0 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_2x2.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_2x2.cpp @@ -450,4 +450,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_3x3.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_3x3.cpp index d257f890..8d16315e 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_3x3.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_3x3.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -458,4 +458,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_4x4.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_4x4.cpp index aa150c25..45d3487a 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_4x4.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_4x4.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -488,4 +488,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_5x5.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_5x5.cpp index 56660f57..7a1901d7 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_5x5.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_5x5.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -617,4 +617,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_8x8.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_8x8.cpp index 4712cfba..d5d5ddd8 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_8x8.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_8x8.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -606,4 +606,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_bsrxmv_spzl_general.cpp b/library/src/level2/rocsparse_bsrxmv_spzl_general.cpp index c9550a38..0fc056e1 100644 --- a/library/src/level2/rocsparse_bsrxmv_spzl_general.cpp +++ b/library/src/level2/rocsparse_bsrxmv_spzl_general.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -355,6 +355,11 @@ INSTANTIATE_MIXED(rocsparse_float_complex, float, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + INSTANTIATE_MIXED(rocsparse_double_complex, int32_t, int32_t, @@ -373,4 +378,24 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_coomv.cpp b/library/src/level2/rocsparse_coomv.cpp index 0e8e4665..6ec872d9 100644 --- a/library/src/level2/rocsparse_coomv.cpp +++ b/library/src/level2/rocsparse_coomv.cpp @@ -853,6 +853,21 @@ INSTANTIATE_MIXED( rocsparse_double_complex, int32_t, double, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( rocsparse_double_complex, int64_t, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED /* diff --git a/library/src/level2/rocsparse_coomv_aos.cpp b/library/src/level2/rocsparse_coomv_aos.cpp index a2d241d9..a3d06e2a 100644 --- a/library/src/level2/rocsparse_coomv_aos.cpp +++ b/library/src/level2/rocsparse_coomv_aos.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -622,4 +622,19 @@ INSTANTIATE_MIXED( rocsparse_double_complex, int32_t, double, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( rocsparse_double_complex, int64_t, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_cscmv.cpp b/library/src/level2/rocsparse_cscmv.cpp index 1b6bbef0..dc9f8fc4 100644 --- a/library/src/level2/rocsparse_cscmv.cpp +++ b/library/src/level2/rocsparse_cscmv.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -251,6 +251,7 @@ INSTANTIATE_MIXED(rocsparse_float_complex, float, rocsparse_float_complex, rocsparse_float_complex); + INSTANTIATE_MIXED(rocsparse_double_complex, int32_t, int32_t, @@ -269,4 +270,28 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED diff --git a/library/src/level2/rocsparse_csrmv.cpp b/library/src/level2/rocsparse_csrmv.cpp index 8b219f34..27817385 100644 --- a/library/src/level2/rocsparse_csrmv.cpp +++ b/library/src/level2/rocsparse_csrmv.cpp @@ -1518,6 +1518,11 @@ INSTANTIATE_MIXED(rocsparse_float_complex, float, rocsparse_float_complex, rocsparse_float_complex); + +INSTANTIATE_MIXED(double, int32_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, int64_t, float, double, double); + INSTANTIATE_MIXED(rocsparse_double_complex, int32_t, int32_t, @@ -1536,6 +1541,26 @@ INSTANTIATE_MIXED(rocsparse_double_complex, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED /* diff --git a/library/src/level2/rocsparse_ellmv.cpp b/library/src/level2/rocsparse_ellmv.cpp index ffce8c79..0a5e5f8c 100644 --- a/library/src/level2/rocsparse_ellmv.cpp +++ b/library/src/level2/rocsparse_ellmv.cpp @@ -314,6 +314,21 @@ INSTANTIATE_MIXED( rocsparse_double_complex, int32_t, double, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE_MIXED( rocsparse_double_complex, int64_t, double, rocsparse_double_complex, rocsparse_double_complex); + +INSTANTIATE_MIXED(double, int32_t, float, double, double); +INSTANTIATE_MIXED(double, int64_t, float, double, double); + +INSTANTIATE_MIXED(rocsparse_double_complex, + int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE_MIXED(rocsparse_double_complex, + int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex); + #undef INSTANTIATE_MIXED /* From 4b65da686e70164a173848808bec61f7df89a317 Mon Sep 17 00:00:00 2001 From: Yvan Mokwinski Date: Thu, 25 May 2023 15:32:22 -0600 Subject: [PATCH 2/5] next --- clients/common/rocsparse_host.cpp | 148 ++++++++++--------------- clients/tests/test_spmv_bsr.yaml | 28 ++--- clients/tests/test_spmv_csr.yaml | 2 +- library/src/level2/rocsparse_bsrmv.cpp | 3 +- 4 files changed, 74 insertions(+), 107 deletions(-) diff --git a/clients/common/rocsparse_host.cpp b/clients/common/rocsparse_host.cpp index ba45564f..29dca548 100644 --- a/clients/common/rocsparse_host.cpp +++ b/clients/common/rocsparse_host.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -7410,7 +7410,7 @@ void host_gebsr_to_csr(rocsparse_direction direction, { for(rocsparse_int c = 0; c < col_block_dim; ++c) { - rocsparse_int col = col_block_dim * j + c; + rocsparse_int col = col_block_dim * j + c; rocsparse_int index = start * row_block_dim * col_block_dim + (end - start) * col_block_dim * r + (k - start) * col_block_dim + c; @@ -9958,41 +9958,41 @@ template void host_coosort_by_column(rocsparse_int M, std::vector& coo_col_ind, std::vector& coo_val); -#define INSTANTIATE1(TYPE) \ - template void host_bsr_to_csr(rocsparse_direction direction, \ - rocsparse_int mb, \ - rocsparse_int nb, \ - rocsparse_int nnzb, \ - const std::vector& bsr_val, \ - const std::vector& bsr_row_ptr, \ - const std::vector& bsr_col_ind, \ - rocsparse_int block_dim, \ - rocsparse_index_base bsr_base, \ - std::vector& csr_val, \ - std::vector& csr_row_ptr, \ - std::vector& csr_col_ind, \ - rocsparse_index_base csr_base); \ - template void host_csr_to_bsr(rocsparse_direction direction, \ - rocsparse_int m, \ - rocsparse_int n, \ - rocsparse_int nnz, \ - const std::vector& csr_val, \ - const std::vector& csr_row_ptr, \ - const std::vector& csr_col_ind, \ - rocsparse_int block_dim, \ - rocsparse_index_base csr_base, \ - std::vector& bsr_val, \ - std::vector& bsr_row_ptr, \ - std::vector& bsr_col_ind, \ - rocsparse_index_base bsr_base); \ - template void host_bsrpad_value(rocsparse_int m, \ - rocsparse_int mb, \ - rocsparse_int nnzb, \ - rocsparse_int block_dim, \ - TYPE value, \ - TYPE * bsr_val, \ - const rocsparse_int* bsr_row_ptr, \ - const rocsparse_int* bsr_col_ind, \ +#define INSTANTIATE1(TYPE) \ + template void host_bsr_to_csr(rocsparse_direction direction, \ + rocsparse_int mb, \ + rocsparse_int nb, \ + rocsparse_int nnzb, \ + const std::vector& bsr_val, \ + const std::vector& bsr_row_ptr, \ + const std::vector& bsr_col_ind, \ + rocsparse_int block_dim, \ + rocsparse_index_base bsr_base, \ + std::vector& csr_val, \ + std::vector& csr_row_ptr, \ + std::vector& csr_col_ind, \ + rocsparse_index_base csr_base); \ + template void host_csr_to_bsr(rocsparse_direction direction, \ + rocsparse_int m, \ + rocsparse_int n, \ + rocsparse_int nnz, \ + const std::vector& csr_val, \ + const std::vector& csr_row_ptr, \ + const std::vector& csr_col_ind, \ + rocsparse_int block_dim, \ + rocsparse_index_base csr_base, \ + std::vector& bsr_val, \ + std::vector& bsr_row_ptr, \ + std::vector& bsr_col_ind, \ + rocsparse_index_base bsr_base); \ + template void host_bsrpad_value(rocsparse_int m, \ + rocsparse_int mb, \ + rocsparse_int nnzb, \ + rocsparse_int block_dim, \ + TYPE value, \ + TYPE * bsr_val, \ + const rocsparse_int* bsr_row_ptr, \ + const rocsparse_int* bsr_col_ind, \ rocsparse_index_base bsr_base); #define INSTANTIATE2(ITYPE, TTYPE) \ @@ -10397,7 +10397,6 @@ template void host_coosort_by_column(rocsparse_int M, rocsparse_spmv_alg algo, \ bool force_conj) - #define INSTANTIATE6(ITYPE, ATYPE, XTYPE, YTYPE, TTYPE) \ template void host_coomv(rocsparse_operation trans, \ ITYPE M, \ @@ -10486,14 +10485,12 @@ INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, double); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_float_complex); INSTANTIATE4(rocsparse_direction_column, int64_t, int64_t, rocsparse_double_complex); - - INSTANTIATE6(int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int64_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE6(int32_t, int8_t, int8_t, float, float); INSTANTIATE6(int64_t, int8_t, int8_t, float, float); -INSTANTIATE6(int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); - +INSTANTIATE6( + int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); INSTANTIATE_IJAXYT(int32_t, int32_t, int8_t, int8_t, int32_t, int32_t); INSTANTIATE_IJAXYT(int64_t, int32_t, int8_t, int8_t, int32_t, int32_t); @@ -10605,22 +10602,13 @@ INSTANTIATE_IJAXYT(int64_t, rocsparse_double_complex, rocsparse_double_complex); -INSTANTIATE_IAXYT(int32_t, int8_t, int8_t, int32_t, int32_t); -INSTANTIATE_IAXYT(int64_t, int8_t, int8_t, int32_t, int32_t); -INSTANTIATE_IAXYT(int32_t, int8_t, int8_t, float, float); -INSTANTIATE_IAXYT(int64_t, int8_t, int8_t, float, float); -INSTANTIATE_IAXYT(int32_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); -INSTANTIATE_IAXYT(int32_t, float, double, double, double); -INSTANTIATE_IAXYT(int64_t, float, double, double, double); -INSTANTIATE_IAXYT(int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); - - -INSTANTIATE6(int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); -INSTANTIATE6(int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); - - -INSTANTIATE6(int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE6( + int64_t, float, rocsparse_float_complex, rocsparse_float_complex, rocsparse_float_complex); +INSTANTIATE6( + int32_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); +INSTANTIATE6( + int64_t, double, rocsparse_double_complex, rocsparse_double_complex, rocsparse_double_complex); INSTANTIATE6(int32_t, float, float, float, float); INSTANTIATE6(int64_t, float, float, float, float); @@ -10647,38 +10635,16 @@ INSTANTIATE6(int64_t, rocsparse_double_complex, rocsparse_double_complex); -INSTANTIATE_IAXYT(int32_t, - rocsparse_float_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE_IAXYT(int64_t, - rocsparse_float_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE_IAXYT(int32_t, float, float, float, float); -INSTANTIATE_IAXYT(int64_t, float, float, float, float); -INSTANTIATE_IAXYT(int32_t, double, double, double, double); -INSTANTIATE_IAXYT(int64_t, double, double, double, double); -INSTANTIATE_IAXYT(int32_t, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE_IAXYT(int64_t, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex, - rocsparse_float_complex); -INSTANTIATE_IAXYT(int32_t, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); -INSTANTIATE_IAXYT(int64_t, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex, - rocsparse_double_complex); +INSTANTIATE6(int32_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE6(int64_t, + rocsparse_float_complex, + rocsparse_double_complex, + rocsparse_double_complex, + rocsparse_double_complex); +INSTANTIATE6(int32_t, float, double, double, double); +INSTANTIATE6(int64_t, float, double, double, double); diff --git a/clients/tests/test_spmv_bsr.yaml b/clients/tests/test_spmv_bsr.yaml index b189e7f7..79389c37 100644 --- a/clients/tests/test_spmv_bsr.yaml +++ b/clients/tests/test_spmv_bsr.yaml @@ -1,5 +1,5 @@ # ######################################################################## -# Copyright (C) 2022 Advanced Micro Devices, Inc. All rights Reserved. +# Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -214,6 +214,19 @@ Tests: # mixed precision # +- name: spmv_bsr + category: quick + function: spmv_bsr + indextype: *i32_i64 + precision: *int8_int8_int32_int32_precision + M: [34, 104, 343, 5196] + N: [57, 109, 458, 3425] + block_dim: [12] + alpha_beta: *alpha_beta_range_checkin + baseA: [rocsparse_index_base_zero] + matrix: [rocsparse_matrix_random] + + - name: spmv_bsr category: quick function: spmv_bsr @@ -226,6 +239,7 @@ Tests: baseA: [rocsparse_index_base_zero] matrix: [rocsparse_matrix_random] + - name: spmv_bsr category: pre_checkin function: spmv_bsr @@ -238,18 +252,6 @@ Tests: baseA: [rocsparse_index_base_one] matrix: [rocsparse_matrix_random] -- name: spmv_bsr - category: quick - function: spmv_bsr - indextype: *i32_i64 - precision: *int8_int8_int32_int32_axyt_precision - M: [34, 104, 343, 5196] - N: [57, 109, 458, 3425] - block_dim: [12] - alpha_beta: *alpha_beta_range_checkin - baseA: [rocsparse_index_base_zero] - matrix: [rocsparse_matrix_random] - - name: spmv_bsr category: pre_checkin function: spmv_bsr diff --git a/clients/tests/test_spmv_csr.yaml b/clients/tests/test_spmv_csr.yaml index c24f6af2..2b411412 100644 --- a/clients/tests/test_spmv_csr.yaml +++ b/clients/tests/test_spmv_csr.yaml @@ -444,7 +444,7 @@ Tests: category: quick function: spmv_csr indextype: *i32i32_i64i32_i64i64 - precision: *int8_int8_int32_int32_axyt_precision + precision: *int8_int8_int32_int32_precision M: 1 N: 1 alpha_beta: *alpha_beta_range_quick diff --git a/library/src/level2/rocsparse_bsrmv.cpp b/library/src/level2/rocsparse_bsrmv.cpp index f9391c63..f3ff13a3 100644 --- a/library/src/level2/rocsparse_bsrmv.cpp +++ b/library/src/level2/rocsparse_bsrmv.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -783,7 +783,6 @@ INSTANTIATE_MIXED(rocsparse_double_complex, rocsparse_double_complex); #undef INSTANTIATE_MIXED ->>>>>>> bb80b364 (Adding spmv mixed precisions (#528)) /* * =========================================================================== From 53eb6e6c8b19a5453ffd2fe109cb95724981fda6 Mon Sep 17 00:00:00 2001 From: Yvan Mokwinski Date: Thu, 25 May 2023 15:37:43 -0600 Subject: [PATCH 3/5] spmv_ex not deprecated. --- library/include/rocsparse-functions.h | 30 +++++++++++++-------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/library/include/rocsparse-functions.h b/library/include/rocsparse-functions.h index 8c08d42b..ee5dcbe8 100644 --- a/library/include/rocsparse-functions.h +++ b/library/include/rocsparse-functions.h @@ -1,5 +1,5 @@ /* ************************************************************************ -* Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. +* Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21112,21 +21112,19 @@ rocsparse_status rocsparse_spsm(rocsparse_handle handle, * \endcode */ /**@{*/ -__attribute__((deprecated("This function is deprecated and will be removed in a future release. " - "Use rocsparse_spmm_ex instead."))) ROCSPARSE_EXPORT rocsparse_status - rocsparse_spmm_ex(rocsparse_handle handle, - rocsparse_operation trans_A, - rocsparse_operation trans_B, - const void* alpha, - const rocsparse_spmat_descr mat_A, - const rocsparse_dnmat_descr mat_B, - const void* beta, - const rocsparse_dnmat_descr mat_C, - rocsparse_datatype compute_type, - rocsparse_spmm_alg alg, - rocsparse_spmm_stage stage, - size_t* buffer_size, - void* temp_buffer); +ROCSPARSE_EXPORT rocsparse_status rocsparse_spmm_ex(rocsparse_handle handle, + rocsparse_operation trans_A, + rocsparse_operation trans_B, + const void* alpha, + const rocsparse_spmat_descr mat_A, + const rocsparse_dnmat_descr mat_B, + const void* beta, + const rocsparse_dnmat_descr mat_C, + rocsparse_datatype compute_type, + rocsparse_spmm_alg alg, + rocsparse_spmm_stage stage, + size_t* buffer_size, + void* temp_buffer); ROCSPARSE_EXPORT rocsparse_status rocsparse_spmm(rocsparse_handle handle, rocsparse_operation trans_A, From eacfa534117bb1ccca697833f7d79ee225189436 Mon Sep 17 00:00:00 2001 From: Yvan Mokwinski Date: Tue, 30 May 2023 08:43:26 -0600 Subject: [PATCH 4/5] format --- clients/testings/testing_bsr2csr.cpp | 4 ++-- clients/testings/testing_prune_csr2csr.cpp | 4 ++-- clients/testings/testing_prune_csr2csr_by_percentage.cpp | 4 ++-- library/src/conversion/rocsparse_coo2dense.cpp | 3 +-- library/src/include/common.h | 6 +++--- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/clients/testings/testing_bsr2csr.cpp b/clients/testings/testing_bsr2csr.cpp index e0cc926f..5922203a 100644 --- a/clients/testings/testing_bsr2csr.cpp +++ b/clients/testings/testing_bsr2csr.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -68,7 +68,7 @@ void testing_bsr2csr_bad_arg(const Arguments& arg) // Check block_dim == 0 block_dim = 0; EXPECT_ROCSPARSE_STATUS(rocsparse_bsr2csr(PARAMS), rocsparse_status_invalid_size); - + #undef PARAMS } diff --git a/clients/testings/testing_prune_csr2csr.cpp b/clients/testings/testing_prune_csr2csr.cpp index 24b821bf..0cc74f04 100644 --- a/clients/testings/testing_prune_csr2csr.cpp +++ b/clients/testings/testing_prune_csr2csr.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -128,7 +128,7 @@ void testing_prune_csr2csr(const Arguments& arg) device_vector d_csr_row_ptr_A(M + 1); device_vector d_csr_col_ind_A(nnz_A); device_vector d_csr_val_A(nnz_A); - device_scalar d_threshold(h_threshold); + device_scalar d_threshold(h_threshold); // Copy data from CPU to device CHECK_HIP_ERROR(hipMemcpy( diff --git a/clients/testings/testing_prune_csr2csr_by_percentage.cpp b/clients/testings/testing_prune_csr2csr_by_percentage.cpp index e92619ee..886b0a36 100644 --- a/clients/testings/testing_prune_csr2csr_by_percentage.cpp +++ b/clients/testings/testing_prune_csr2csr_by_percentage.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -94,7 +94,7 @@ void testing_prune_csr2csr_by_percentage_bad_arg(const Arguments& arg) rocsparse_status_not_implemented); EXPECT_ROCSPARSE_STATUS(rocsparse_prune_csr2csr_by_percentage(PARAMS), rocsparse_status_not_implemented); - + CHECK_ROCSPARSE_ERROR( rocsparse_set_mat_storage_mode(csr_descr_A, rocsparse_storage_mode_sorted)); CHECK_ROCSPARSE_ERROR( diff --git a/library/src/conversion/rocsparse_coo2dense.cpp b/library/src/conversion/rocsparse_coo2dense.cpp index fff9b945..39636999 100644 --- a/library/src/conversion/rocsparse_coo2dense.cpp +++ b/library/src/conversion/rocsparse_coo2dense.cpp @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ -* Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights Reserved. +* Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,6 @@ #include "rocsparse_coo2dense.hpp" - #include "common.h" #include "coo2dense_device.h" diff --git a/library/src/include/common.h b/library/src/include/common.h index 7573f148..74c4a3a2 100644 --- a/library/src/include/common.h +++ b/library/src/include/common.h @@ -1,6 +1,6 @@ /*! \file */ /* ************************************************************************ - * Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights Reserved. + * Copyright (C) 2018-2023 Advanced Micro Devices, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -961,7 +961,7 @@ __launch_bounds__(BLOCKSIZE) ROCSPARSE_KERNEL template __launch_bounds__(BLOCKSIZE) ROCSPARSE_KERNEL -void memset2d_kernel(I m, I n, T value, T* __restrict__ data, I ld, rocsparse_order order) + void memset2d_kernel(I m, I n, T value, T* __restrict__ data, I ld, rocsparse_order order) { I gid = hipBlockIdx_x * BLOCKSIZE + hipThreadIdx_x; @@ -974,4 +974,4 @@ void memset2d_kernel(I m, I n, T value, T* __restrict__ data, I ld, rocsparse_or I lid = (order == rocsparse_order_column) ? gid % m : gid % n; data[lid + ld * wid] = value; -} \ No newline at end of file +} From 742957a764728f3ea9c3d2fd3b3412d718660bd1 Mon Sep 17 00:00:00 2001 From: Yvan Mokwinski Date: Tue, 30 May 2023 10:32:27 -0600 Subject: [PATCH 5/5] putting back deprecation --- library/include/rocsparse-functions.h | 28 ++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/library/include/rocsparse-functions.h b/library/include/rocsparse-functions.h index ee5dcbe8..0e060aec 100644 --- a/library/include/rocsparse-functions.h +++ b/library/include/rocsparse-functions.h @@ -21112,19 +21112,21 @@ rocsparse_status rocsparse_spsm(rocsparse_handle handle, * \endcode */ /**@{*/ -ROCSPARSE_EXPORT rocsparse_status rocsparse_spmm_ex(rocsparse_handle handle, - rocsparse_operation trans_A, - rocsparse_operation trans_B, - const void* alpha, - const rocsparse_spmat_descr mat_A, - const rocsparse_dnmat_descr mat_B, - const void* beta, - const rocsparse_dnmat_descr mat_C, - rocsparse_datatype compute_type, - rocsparse_spmm_alg alg, - rocsparse_spmm_stage stage, - size_t* buffer_size, - void* temp_buffer); +__attribute__((deprecated("This function is deprecated and will be removed in a future release. " + "Use rocsparse_spmm_ex instead."))) ROCSPARSE_EXPORT rocsparse_status + rocsparse_spmm_ex(rocsparse_handle handle, + rocsparse_operation trans_A, + rocsparse_operation trans_B, + const void* alpha, + const rocsparse_spmat_descr mat_A, + const rocsparse_dnmat_descr mat_B, + const void* beta, + const rocsparse_dnmat_descr mat_C, + rocsparse_datatype compute_type, + rocsparse_spmm_alg alg, + rocsparse_spmm_stage stage, + size_t* buffer_size, + void* temp_buffer); ROCSPARSE_EXPORT rocsparse_status rocsparse_spmm(rocsparse_handle handle, rocsparse_operation trans_A,