From 697783f143b0d2ed1d9893a94aad866bb55507e9 Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Wed, 15 May 2024 11:31:24 -0700 Subject: [PATCH] more blas --- CMakeLists.txt | 4 +++- Justfile | 3 +++ README.md | 4 ++++ docs/reference.md | 11 +++++++++++ src/array.h | 7 +++++++ src/blas.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/niko.h | 4 ++-- src/words.c | 32 +++++++++++++++++++++++--------- test_suite | 26 ++++++++++++++++++++++++++ 9 files changed, 120 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e295c8e..8d5174f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,10 @@ project(niko VERSION 0.1 DESCRIPTION "niko programming language" LANGUAGES C) set(CMAKE_COMPILE_WARNING_AS_ERROR ON) find_package(BLAS REQUIRED) +find_package(LAPACK REQUIRED) add_compile_options( + -DLAPACK_ILP64 -mavx2 -std=gnu2x -mmemset-strategy=vector_loop:-1:noalign @@ -30,7 +32,7 @@ add_custom_command( add_executable(niko src/main.c src/words.c src/print.c src/blas.c ${CMAKE_CURRENT_BINARY_DIR}/lexer.c) -target_link_libraries(niko m blas) +target_link_libraries(niko m blas lapack) execute_process( diff --git a/Justfile b/Justfile index 4ad7839..fcc92c9 100644 --- a/Justfile +++ b/Justfile @@ -17,6 +17,9 @@ clean: valgrind: valgrind --leak-check=full --track-origins=yes --show-reachable=yes bin/niko -t test_suite -v +valgrind-expr EXPR="10000000 zeros": build + valgrind --leak-check=full --track-origins=yes --show-reachable=yes bin/niko -e "{{EXPR}}" + callgrind EXPR="10000000 zeros": release rm -f callgrind.out.* cachegrind.out.* # diff --git a/README.md b/README.md index 3cc9a81..4e590e7 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ Niko is a stack-oriented array programming language. It is primarily inspired by Forth, Lisp and only then by modern array languages. +# Building and Running + +Dependencies: `gcc`, `glibc`, `re2c`, `openblas` (or other blas/lapack package). + ## Syntax Niko program consists of a sequence of words separated by whitespaces. diff --git a/docs/reference.md b/docs/reference.md index 993f516..81397b4 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -26,6 +26,7 @@ |`ones`|`( s -> x )`|creates array with shape `s` filled with ones |`index`|`( s -> x )`|creates array with shape `s` filled values from `0` to the length-1 |`reshape`|`( x s -> y)`|reshapes `x` according to the shape `s` repeating or truncating if necessary +|`pascal`|`( n -> x )`|creates `n*n` pascal matrix ## Array Introspection @@ -65,3 +66,13 @@ ### `.` `x ->` + +## BLAS/LAPACK + +### Low-level words + +|Word|Signature|Description| +|---|---|---| +|`blas_gemm`|`( x y -> x * y )`|matrix multiplication +|`lapack_getrf`|`( x -> x ipiv info )`|matrix factorization +|`lapack_getri`|`( x ipiv -> x )`|matrix inversion diff --git a/src/array.h b/src/array.h index 7ab778a..96460c1 100644 --- a/src/array.h +++ b/src/array.h @@ -181,6 +181,13 @@ INLINE array_t* array_new_slice(array_t* x, size_t n, shape_t s, const void* p) memcpy(y + 1, s.d, dims_sizeof(s.r)); return y; } +INLINE array_t* array_new_copy(const array_t* a) { return array_new(a->t, a->n, array_shape(a), array_data(a)); } +INLINE array_t* array_cow(array_t* a) { + if (a->rc == 1) return (array_t*)a; + array_t* y = array_new_copy(a); + array_dec_ref(a); + return y; +} INLINE bool array_is_scalar(const array_t* a) { return a->r == 0; } INLINE array_t* array_new_scalar(type_t t, const void* x) { return array_new(t, 1, shape_scalar(), x); } diff --git a/src/blas.c b/src/blas.c index b53b6e0..ada071b 100644 --- a/src/blas.c +++ b/src/blas.c @@ -1,6 +1,7 @@ #include "niko.h" #include +#include DEF_WORD("blas_gemm", blas_gemm) { STATUS_CHECK(stack_len(stack) > 1, "stack underflow: 2 values expected"); @@ -28,4 +29,44 @@ DEF_WORD("blas_gemm", blas_gemm) { stack_push(stack, z); STATUS_OK; +} + +static_assert(sizeof(lapack_int) == sizeof(i64)); + +DEF_WORD("lapack_getrf", lapack_getrf) { + borrow(array_t) x = array_cow(stack_pop(stack)); + STATUS_CHECK(x->r == 2, "rank 2 expected"); + STATUS_CHECK(x->t == T_F64, "(todo) f64 expected"); + + dim_t m = array_dims(x)[0]; + dim_t n = array_dims(x)[1]; + + borrow(array_t) ipiv = array_alloc(T_I64, n, shape_1d(&n)); + t_i64 status = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, m, n, array_mut_data(x), n, array_mut_data(ipiv)); + + stack_push(stack, x); + stack_push(stack, ipiv); + stack_push(stack, array_new_scalar_t_i64(status)); + STATUS_OK; +} + + +DEF_WORD("lapack_getri", lapack_getri) { + own(array_t) ipiv = stack_pop(stack); + STATUS_CHECK(ipiv->t == T_I64, "i64 expected"); + + borrow(array_t) x = array_cow(stack_pop(stack)); + STATUS_CHECK(x->r == 2, "rank 2 expected"); + STATUS_CHECK(x->t == T_F64, "(todo) f64 expected"); + + dim_t m = array_dims(x)[0]; + dim_t n = array_dims(x)[1]; + + STATUS_CHECK(m == n, "square matrix expected"); + STATUS_CHECK(n == ipiv->n, "n elements expected"); + + LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, array_mut_data(x), n, array_data(ipiv)); + + stack_push(stack, x); + STATUS_OK; } \ No newline at end of file diff --git a/src/niko.h b/src/niko.h index 22764b7..6b00203 100644 --- a/src/niko.h +++ b/src/niko.h @@ -52,7 +52,7 @@ INLINE result_t result_err(string_t msg) { return (result_t){.ok = false, .eithe INLINE PRINTF(1, 2) result_t result_errf(const char* format, ...) { return result_err(VA_ARGS_FWD(format, string_vnewf(format, args))); } -#define RESULT_UNWRAP(r) \ +#define RESULT_UNWRAP(r) \ ({ \ result_t __result = (r); \ if (!__result.ok) return status_err(__result.either.e); \ @@ -152,7 +152,7 @@ STATUS_T interpreter_dict_entry(interpreter_t* inter, dict_entry_t* e); #define REGISTER_WORD(w, n) \ STATUS_T w_##n(interpreter_t* inter, stack_t* stack); \ - CONSTRUCTOR void w_##n##_register() { \ + CONSTRUCTOR void __register_w_##n() { \ entry_vector_add(&global_dict, (dict_entry_t){string_newf(w), array_new_scalar_t_ffi(w_##n)}); \ } diff --git a/src/words.c b/src/words.c index 9a9c53e..abaa262 100644 --- a/src/words.c +++ b/src/words.c @@ -237,7 +237,7 @@ STATUS_T w_binop(stack_t* stack, type_t t, binop_t kernel, binop_t x_scalar_kern GEN_BINOP_SPECIALIZATION(name, t_f64, t_i64, t_f64, op) \ GEN_BINOP_SPECIALIZATION(name, t_f64, t_f64, t_f64, op) \ t_ffi name##_table[T_MAX][T_MAX] = {}; \ - CONSTRUCTOR void reg_##name() { \ + CONSTRUCTOR void __register_w_##name() { \ name##_table[T_I64][T_I64] = name##_t_i64_t_i64; \ name##_table[T_F64][T_I64] = name##_t_f64_t_i64; \ name##_table[T_I64][T_F64] = name##_t_i64_t_f64; \ @@ -307,9 +307,7 @@ DEF_WORD("reshape", reshape) { shape_t s = create_shape(x); array_t* z = array_alloc(y->t, shape_len(s), s); size_t ys = type_sizeof(y->t, y->n); - DO(i, type_sizeof(y->t, z->n)) { - ((char*)array_mut_data(z))[i] = ((char*)array_data(y))[i % ys]; - } + DO(i, type_sizeof(y->t, z->n)) { ((char*)array_mut_data(z))[i] = ((char*)array_data(y))[i % ys]; } stack_push(stack, z); STATUS_OK; } @@ -335,7 +333,6 @@ DEF_WORD("\\c", slash_clear) { STATUS_OK; } - // fold DEF_WORD("fold", fold) { @@ -384,12 +381,29 @@ DEF_WORD("fold_rank", fold_rank) { DO(i, x->n / l) { array_t* y = array_new_slice(x, l, cell, ptr + stride * i); stack_push(stack, y); - if (i > 0) { - STATUS_UNWRAP(interpreter_dict_entry(inter, e)); - } + if (i > 0) STATUS_UNWRAP(interpreter_dict_entry(inter, e)); } STATUS_OK; } -DEF_WORD("+'fold", plus_fold) { NOT_IMPLEMENTED; } \ No newline at end of file +DEF_WORD("+'fold", plus_fold) { NOT_IMPLEMENTED; } + +DEF_WORD("pascal", pascal) { + own(array_t) x = stack_pop(stack); + size_t n; + STATUS_UNWRAP(as_size_t(x, &n)); + + dim_t dims[2] = {n, n}; + array_t* y = array_alloc(T_I64, n * n, shape_create(2, dims)); + t_i64* ptr = array_mut_data(y); + + DO(i, n) { + ptr[i] = 1; + ptr[i * n] = 1; + } + DO(i, n) DO(j, n) if (i > 0 && j > 0) ptr[i * n + j] = ptr[i * n + j - 1] + ptr[(i - 1) * n + j]; + + stack_push(stack, y); + STATUS_OK; +} \ No newline at end of file diff --git a/test_suite b/test_suite index e880936..6c80d02 100644 --- a/test_suite +++ b/test_suite @@ -129,6 +129,9 @@ [ [ 0 1 2 3 ] [ 4 0 1 2 ] [ 3 4 0 1 ] ] > 5 index [ 2 2 ] reshape . [ [ 0 1 ] [ 2 3 ] ] +# pascal +> 4 pascal . +[ [ 1 1 1 1 ] [ 1 2 3 4 ] [ 1 3 6 10 ] [ 1 4 10 20 ] ] ## information about arrays # shape > 10 zeros shape . @@ -323,3 +326,26 @@ ERROR: stack underflow: 1 value expected # blas_gemm > [ 2 4 ] index 1. + [ 4 3 ] index 1. + blas_gemm . [ [ 70. 80. 90. ] [ 158. 184. 210. ] ] +## lapack +# lapack_getrf +> [ 2 2 ] index 0. + lapack_getrf . . . +0 +[ 2 2 ] +[ [ 2. 3. ] [ 0. 1. ] ] +# lapack_getri +# 2x2 +> : m_inverse lapack_getrf drop lapack_getri ; +> [ 2 2 ] index 0. + m_inverse . +[ [ -1.5 0.5 ] [ 1. 0. ] ] +> [ 2 2 ] index 0. + dup m_inverse blas_gemm . +[ [ 1. 0. ] [ 0. 1. ] ] +# 3x3 +> 3 pascal 0. + m_inverse . +[ [ 3. -3. 1. ] [ -3. 5. -2. ] [ 1. -2. 1. ] ] +> 3 pascal 0. + dup m_inverse blas_gemm . +[ [ 1. 0. 0. ] [ 0. 1. 0. ] [ 0. 0. 1. ] ] +# 4x4 +> 4 pascal 0. + m_inverse . +[ [ 4. -6. 4. -1. ] [ -6. 14. -11. 3. ] [ 4. -11. 10. -3. ] [ -1. 3. -3. 1. ] ] +> 4 pascal 0. + dup m_inverse blas_gemm . +[ [ 1. -8.88178419700125e-16 0. 0. ] [ 8.88178419700125e-16 1. 0. -8.88178419700125e-16 ] [ 3.5527136788005e-15 5.32907051820075e-15 0.999999999999998 -1.77635683940025e-15 ] [ 3.5527136788005e-15 1.06581410364015e-14 -1.06581410364015e-14 1. ] ]