diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
index d28c8fd48c8..c5b451f7b40 100644
--- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
+++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
@@ -100,9 +100,9 @@ int main(int argc, char *argv[])
     b->copy_from(host_x.get());
 
     // Calculate initial residual by overwriting b
-    auto initres = 0.0;
+    auto initres = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&initres);
+    b->compute_norm2(lend(initres));
 
     // copy b again
     b->copy_from(host_x.get());
@@ -144,15 +144,15 @@ int main(int argc, char *argv[])
     time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
     auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());
 
     std::cout << "Initial residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, initres);
+    write(std::cout, lend(initres));
     std::cout << "Final residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
     std::cout << "Implicit residual norm squared (r^2):\n";
     gko::write(std::cout, lend(impl_res));
 
diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp
index 033bf72b0be..89cf8dd984d 100644
--- a/examples/cb-gmres/cb-gmres.cpp
+++ b/examples/cb-gmres/cb-gmres.cpp
@@ -138,8 +138,8 @@ int main(int argc, char *argv[])
         b_host->at(i, 0) =
             ValueType{1} / std::sqrt(static_cast<ValueType>(A_size[0]));
     }
-    auto b_norm = 0.0;
-    b_host->compute_norm2(&b_norm);
+    auto b_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
+    b_host->compute_norm2(lend(b_norm));
     auto b = clone(exec, lend(b_host));
 
     // As an initial guess, use the right-hand side
@@ -195,21 +195,21 @@ int main(int argc, char *argv[])
     // To measure if your solution has actually converged, the error of the
     // solution is measured.
     // To compute the residual, the (advanced) apply method is used.
-    auto res_norm_keep = 0.0;
-    auto res_norm_reduce = 0.0;
+    auto res_norm_keep = gko::initialize<real_vec>({0.0}, exec->get_master());
+    auto res_norm_reduce = gko::initialize<real_vec>({0.0}, exec->get_master());
     auto tmp = gko::clone(gko::lend(b));
 
     // tmp = Ax - tmp
     A->apply(1.0, lend(x_keep), -1.0, lend(tmp));
-    tmp->compute_norm2(&res_norm_keep);
+    tmp->compute_norm2(lend(res_norm_keep));
 
     std::cout << "\nResidual norm without compression:\n";
-    gko::write(std::cout, res_norm_keep);
+    gko::write(std::cout, lend(res_norm_keep));
 
     tmp->copy_from(lend(b));
     A->apply(1.0, lend(x_reduce), -1.0, lend(tmp));
-    tmp->compute_norm2(&res_norm_reduce);
+    tmp->compute_norm2(lend(res_norm_reduce));
 
     std::cout << "\nResidual norm with compression:\n";
-    gko::write(std::cout, res_norm_reduce);
+    write(std::cout, lend(res_norm_reduce));
 }
diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp
index d03fe20c2a5..77b1d930afa 100644
--- a/examples/custom-logger/custom-logger.cpp
+++ b/examples/custom-logger/custom-logger.cpp
@@ -71,11 +71,13 @@ gko::remove_complex<ValueType> compute_norm(
     // Get the executor of the vector
     auto exec = b->get_executor();
     // Initialize a result scalar containing the value 0.0.
-    auto b_norm = gko::remove_complex<ValueType>{0.0};
+    auto b_norm =
+        gko::initialize<gko::matrix::Dense<gko::remove_complex<ValueType>>>(
+            {0.0}, exec);
     // Use the dense `compute_norm2` function to compute the norm.
-    b->compute_norm2(&b_norm);
+    b->compute_norm2(gko::lend(b_norm));
     // Use the other utility function to return the norm contained in `b_norm`
-    return b_norm;
+    return get_first_element(gko::lend(b_norm));
 }
 
 // Custom logger class which intercepts the residual norm scalar and solution
@@ -345,10 +347,10 @@ int main(int argc, char *argv[])
     // call the apply method, which in this case is an spmv and equivalent to
     // the LAPACK z_spmv routine. Finally, you compute the euclidean 2-norm with
     // the compute_norm2 function.
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, gko::lend(x), -1.0, gko::lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index f73a67b2ff4..f209d3611b9 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -127,12 +127,12 @@ void run_solver(volatile bool *stop_iteration_process,
     write(std::cout, lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r): \n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
 
 
diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
index 44f22ef45dd..559a95d9a7b 100644
--- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
+++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
@@ -134,10 +134,10 @@ int main(int argc, char *argv[])
     write(std::cout, gko::lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, gko::lend(x), -1.0, gko::lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
index 1a233ce1331..458d16adf2b 100644
--- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
+++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
@@ -185,14 +185,14 @@ int main(int argc, char *argv[])
     write(std::cout, gko::lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, gko::lend(x), -1.0, gko::lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "GMRES iteration count:     " << logger->get_num_iterations()
               << "\n";
     std::cout << "GMRES execution time [ms]: "
               << static_cast<double>(time.count()) / 100000000.0 << "\n";
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
index 2b5a808339a..b26045c0d74 100644
--- a/examples/iterative-refinement/iterative-refinement.cpp
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -101,9 +101,9 @@ int main(int argc, char *argv[])
     b->copy_from(host_x.get());
 
     // Calculate initial residual by overwriting b
-    auto initres = 0.0;
+    auto initres = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&initres);
+    b->compute_norm2(lend(initres));
 
     // copy b again
     b->copy_from(host_x.get());
@@ -146,14 +146,14 @@ int main(int argc, char *argv[])
     time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Initial residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, initres);
+    write(std::cout, lend(initres));
     std::cout << "Final residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 
     // Print solver statistics
     std::cout << "IR iteration count:     " << logger->get_num_iterations()
diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp
index 2fc24885537..58d6e6f8ad5 100644
--- a/examples/mixed-precision-ir/mixed-precision-ir.cpp
+++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp
@@ -108,9 +108,9 @@ int main(int argc, char *argv[])
     b->copy_from(host_x.get());
 
     // Calculate initial residual by overwriting b
-    auto initres = 0.0;
+    auto initres = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&initres);
+    b->compute_norm2(lend(initres));
 
     // Build lower-precision system matrix and residual
     auto solver_A = solver_mtx::create(exec);
@@ -137,7 +137,7 @@ int main(int argc, char *argv[])
     // Solve system
     exec->synchronize();
     std::chrono::nanoseconds time(0);
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     auto inner_solution = solver_vec::create(exec);
     auto outer_delta = vec::create(exec);
     auto tic = std::chrono::steady_clock::now();
@@ -147,10 +147,11 @@ int main(int argc, char *argv[])
 
         // convert residual to inner precision
         outer_residual->convert_to(lend(inner_residual));
-        outer_residual->compute_norm2(&res);
+        outer_residual->compute_norm2(lend(res));
 
         // break if we exceed the number of iterations or have converged
-        if (iter > max_outer_iters || res / initres < outer_reduction_factor) {
+        if (iter > max_outer_iters ||
+            res->at(0) / initres->at(0) < outer_reduction_factor) {
             break;
         }
 
@@ -176,12 +177,12 @@ int main(int argc, char *argv[])
 
     // Calculate residual
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Initial residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, initres);
+    write(std::cout, lend(initres));
     std::cout << "Final residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 
     // Print solver statistics
     std::cout << "MPIR iteration count:     " << iter << std::endl;
diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp
index c6a9edb4270..f709dd2a757 100644
--- a/examples/mixed-spmv/mixed-spmv.cpp
+++ b/examples/mixed-spmv/mixed-spmv.cpp
@@ -244,41 +244,42 @@ int main(int argc, char *argv[])
     // method, which in this case is an axpy and equivalent to the LAPACK axpy
     // routine. Finally, you compute the euclidean 2-norm with the compute_norm2
     // function.
-    auto hp_x_norm = 0.0;
-    auto lp_diff_norm = 0.0;
-    auto hplp_diff_norm = 0.0;
-    auto lplp_diff_norm = 0.0;
-    auto lphp_diff_norm = 0.0;
+    auto hp_x_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
+    auto lp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
+    auto hplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
+    auto lplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
+    auto lphp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
     auto lp_diff = hp_x->clone();
     auto hplp_diff = hp_x->clone();
     auto lplp_diff = hp_x->clone();
     auto lphp_diff = hp_x->clone();
 
-    hp_x->compute_norm2(&hp_x_norm);
+    hp_x->compute_norm2(lend(hp_x_norm));
     lp_diff->add_scaled(-1.0, lend(lp_x));
-    lp_diff->compute_norm2(&lp_diff_norm);
+    lp_diff->compute_norm2(lend(lp_diff_norm));
     hplp_diff->add_scaled(-1.0, lend(hplp_x));
-    hplp_diff->compute_norm2(&hplp_diff_norm);
+    hplp_diff->compute_norm2(lend(hplp_diff_norm));
     lplp_diff->add_scaled(-1.0, lend(lplp_x));
-    lplp_diff->compute_norm2(&lplp_diff_norm);
+    lplp_diff->compute_norm2(lend(lplp_diff_norm));
     lphp_diff->add_scaled(-1.0, lend(lphp_x));
-    lphp_diff->compute_norm2(&lphp_diff_norm);
+    lphp_diff->compute_norm2(lend(lphp_diff_norm));
     exec->synchronize();
 
     std::cout.precision(10);
     std::cout << std::scientific;
     std::cout << "High Precision time(s): " << hp_sec << std::endl;
-    std::cout << "High Precision result norm: " << hp_x_norm << std::endl;
+    std::cout << "High Precision result norm: " << hp_x_norm->get_values()[0]
+              << std::endl;
     std::cout << "Low Precision time(s): " << lp_sec << std::endl;
-    std::cout << "Low Precision relative error: " << lp_diff_norm / hp_x_norm
+    std::cout << "Low Precision relative error: "
+              << lp_diff_norm->get_values()[0] / hp_x_norm->get_values()[0]
               << "\n";
     std::cout << "Hp * Lp -> Hp time(s): " << hplp_sec << std::endl;
-    std::cout << "Hp * Lp -> Hp relative error: " << hplp_diff_norm / hp_x_norm
+    std::cout << "Hp * Lp -> Hp relative error: "
+              << hplp_diff_norm->get_values()[0] / hp_x_norm->get_values()[0]
               << "\n";
     std::cout << "Lp * Lp -> Hp time(s): " << lplp_sec << std::endl;
-    std::cout << "Lp * Lp -> Hp relative error: " << lplp_diff_norm / hp_x_norm
-              << "\n";
-    std::cout << "Lp * Hp -> Hp time(s): " << lplp_sec << std::endl;
-    std::cout << "Lp * Hp -> Hp relative error: " << lphp_diff_norm / hp_x_norm
+    std::cout << "Lp * Lp -> Hp relative error: "
+              << lplp_diff_norm->get_values()[0] / hp_x_norm->get_values()[0]
               << "\n";
 }
diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp
index eb45b26adab..c05747d21e3 100644
--- a/examples/papi-logging/papi-logging.cpp
+++ b/examples/papi-logging/papi-logging.cpp
@@ -213,10 +213,10 @@ int main(int argc, char *argv[])
     write(std::cout, lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r): \n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp
index 9961e418ec8..214977c864a 100644
--- a/examples/preconditioned-solver/preconditioned-solver.cpp
+++ b/examples/preconditioned-solver/preconditioned-solver.cpp
@@ -116,10 +116,10 @@ int main(int argc, char *argv[])
     write(std::cout, lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp
index 35cf5cb46b6..3541f70b280 100644
--- a/examples/simple-solver-logging/simple-solver-logging.cpp
+++ b/examples/simple-solver-logging/simple-solver-logging.cpp
@@ -187,10 +187,10 @@ int main(int argc, char *argv[])
     write(std::cout, lend(x));
 
     // Calculate residual
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp
index 72ee9eab9a6..d925a18d049 100644
--- a/examples/simple-solver/simple-solver.cpp
+++ b/examples/simple-solver/simple-solver.cpp
@@ -161,10 +161,10 @@ int main(int argc, char *argv[])
     // which in this case is an spmv and equivalent to the LAPACK z_spmv
     // routine. Finally, you compute the euclidean 2-norm with the compute_norm2
     // function.
-    auto res = 0.0;
+    auto res = gko::initialize<real_vec>({0.0}, exec->get_master());
     A->apply(1.0, lend(x), -1.0, lend(b));
-    b->compute_norm2(&res);
+    b->compute_norm2(lend(res));
 
     std::cout << "Residual norm sqrt(r^T r):\n";
-    gko::write(std::cout, res);
+    write(std::cout, lend(res));
 }
diff --git a/include/ginkgo/core/base/mtx_io.hpp b/include/ginkgo/core/base/mtx_io.hpp
index d9a47771f85..545a312bf01 100644
--- a/include/ginkgo/core/base/mtx_io.hpp
+++ b/include/ginkgo/core/base/mtx_io.hpp
@@ -141,26 +141,6 @@ inline void write(StreamType &&os, MatrixType *matrix,
     write_raw(os, data, layout);
 }
 
-/**
- * Helper to write a single value in the same format as @ref write
- *
- * @note Cannot use ADL to find this overload.
- *
- * @tparam ValueType  a scalar type.
- * @tparam StreamType  type of stream used to write the data to
- *
- * @param os  output stream where the data is to be written
- * @param val  the value to write
- * @param layout  the layout used in the output
- */
-template <typename ValueType, typename StreamType>
-inline void write(StreamType &&os, ValueType val,
-                  layout_type layout = layout_type::array)
-{
-    matrix_data<ValueType, int32> data{{val}};
-    write_raw(os, data, layout);
-}
-
 
 }  // namespace gko
 
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index ba64969e900..bf28d51e9f7 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -682,20 +682,6 @@ class Dense
                                make_temporary_output_clone(exec, result).get());
     }
 
-    /**
-     * Overload for scalar result, see @ref compute_dot
-     * Assumes that the result is placed in host memory
-     */
-    void compute_dot(const LinOp *b, ValueType *result) const
-    {
-        GKO_ASSERT_EQUAL_COLS(b, dim<2>(1, 1));
-        auto exec = this->get_executor();
-        auto dense_result = Dense::create(
-            exec->get_master(), dim<2>{1, 1},
-            Array<ValueType>::view(exec->get_master(), 1, result), 1);
-        this->compute_dot(b, dense_result.get());
-    }
-
     /**
      * Computes the column-wise dot product of `conj(this matrix)` and `b`.
      *
@@ -712,21 +698,6 @@ class Dense
             make_temporary_output_clone(exec, result).get());
     }
 
-    /**
-     * Overload for scalar result, see @ref compute_conj_dot
-     * Assumes that the result is placed in host memory
-     */
-    void compute_conj_dot(const LinOp *b, ValueType *result) const
-    {
-        GKO_ASSERT_EQUAL_COLS(b, dim<2>(1, 1));
-        auto exec = this->get_executor();
-        auto dense_result = Dense::create(
-            exec->get_master(), dim<2>{1, 1},
-            Array<ValueType>::view(exec->get_master(), 1, result), 1);
-        this->compute_conj_dot(b, dense_result.get());
-    }
-
-
     /**
      * Computes the column-wise Euclidian (L^2) norm of this matrix.
      *
@@ -741,20 +712,6 @@ class Dense
             make_temporary_output_clone(exec, result).get());
     }
 
-    /**
-     * Overload for scalar result, see @ref compute_norm2
-     * Assumes that the result is placed in host memory
-     */
-    void compute_norm2(ValueType *result) const
-    {
-        GKO_ASSERT_EQUAL_COLS(this, dim<2>(1, 1));
-        auto exec = this->get_executor();
-        auto dense_result = Dense::create(
-            exec->get_master(), dim<2>{1, 1},
-            Array<ValueType>::view(exec->get_master(), 1, result), 1);
-        this->compute_norm2(dense_result.get());
-    }
-
     /**
      * Create a submatrix from the original matrix.
      * Warning: defining stride for this create_submatrix method might cause