Skip to content

Commit

Permalink
more blas
Browse files Browse the repository at this point in the history
  • Loading branch information
mikea committed May 15, 2024
1 parent 4a73b73 commit 697783f
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 12 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ project(niko VERSION 0.1 DESCRIPTION "niko programming language" LANGUAGES C)
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)

find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)

add_compile_options(
-DLAPACK_ILP64
-mavx2
-std=gnu2x
-mmemset-strategy=vector_loop:-1:noalign
Expand All @@ -30,7 +32,7 @@ add_custom_command(
add_executable(niko
src/main.c src/words.c src/print.c src/blas.c
${CMAKE_CURRENT_BINARY_DIR}/lexer.c)
target_link_libraries(niko m blas)
target_link_libraries(niko m blas lapack)


execute_process(
Expand Down
3 changes: 3 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ clean:
valgrind:
valgrind --leak-check=full --track-origins=yes --show-reachable=yes bin/niko -t test_suite -v

valgrind-expr EXPR="10000000 zeros": build
valgrind --leak-check=full --track-origins=yes --show-reachable=yes bin/niko -e "{{EXPR}}"

callgrind EXPR="10000000 zeros": release
rm -f callgrind.out.* cachegrind.out.*
#
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Niko is a stack-oriented array programming language.
It is primarily inspired by Forth, Lisp and only then by modern array languages.

# Building and Running

Dependencies: `gcc`, `glibc`, `re2c`, `openblas` (or other blas/lapack package).

## Syntax

Niko program consists of a sequence of words separated by whitespaces.
Expand Down
11 changes: 11 additions & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
|`ones`|`( s -> x )`|creates array with shape `s` filled with ones
|`index`|`( s -> x )`|creates array with shape `s` filled values from `0` to the length-1
|`reshape`|`( x s -> y)`|reshapes `x` according to the shape `s` repeating or truncating if necessary
|`pascal`|`( n -> x )`|creates `n*n` pascal matrix


## Array Introspection
Expand Down Expand Up @@ -65,3 +66,13 @@
### `.`

`x ->`

## BLAS/LAPACK

### Low-level words

|Word|Signature|Description|
|---|---|---|
|`blas_gemm`|`( x y -> x * y )`|matrix multiplication
|`lapack_getrf`|`( x -> x ipiv info )`|matrix factorization
|`lapack_getri`|`( x ipiv -> x )`|matrix inversion
7 changes: 7 additions & 0 deletions src/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ INLINE array_t* array_new_slice(array_t* x, size_t n, shape_t s, const void* p)
memcpy(y + 1, s.d, dims_sizeof(s.r));
return y;
}
INLINE array_t* array_new_copy(const array_t* a) { return array_new(a->t, a->n, array_shape(a), array_data(a)); }
INLINE array_t* array_cow(array_t* a) {
if (a->rc == 1) return (array_t*)a;
array_t* y = array_new_copy(a);
array_dec_ref(a);
return y;
}

INLINE bool array_is_scalar(const array_t* a) { return a->r == 0; }
INLINE array_t* array_new_scalar(type_t t, const void* x) { return array_new(t, 1, shape_scalar(), x); }
Expand Down
41 changes: 41 additions & 0 deletions src/blas.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "niko.h"

#include <cblas.h>
#include <lapacke.h>

DEF_WORD("blas_gemm", blas_gemm) {
STATUS_CHECK(stack_len(stack) > 1, "stack underflow: 2 values expected");
Expand Down Expand Up @@ -28,4 +29,44 @@ DEF_WORD("blas_gemm", blas_gemm) {

stack_push(stack, z);
STATUS_OK;
}

static_assert(sizeof(lapack_int) == sizeof(i64));

DEF_WORD("lapack_getrf", lapack_getrf) {
borrow(array_t) x = array_cow(stack_pop(stack));
STATUS_CHECK(x->r == 2, "rank 2 expected");
STATUS_CHECK(x->t == T_F64, "(todo) f64 expected");

dim_t m = array_dims(x)[0];
dim_t n = array_dims(x)[1];

borrow(array_t) ipiv = array_alloc(T_I64, n, shape_1d(&n));
t_i64 status = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, m, n, array_mut_data(x), n, array_mut_data(ipiv));

stack_push(stack, x);
stack_push(stack, ipiv);
stack_push(stack, array_new_scalar_t_i64(status));
STATUS_OK;
}


DEF_WORD("lapack_getri", lapack_getri) {
own(array_t) ipiv = stack_pop(stack);
STATUS_CHECK(ipiv->t == T_I64, "i64 expected");

borrow(array_t) x = array_cow(stack_pop(stack));
STATUS_CHECK(x->r == 2, "rank 2 expected");
STATUS_CHECK(x->t == T_F64, "(todo) f64 expected");

dim_t m = array_dims(x)[0];
dim_t n = array_dims(x)[1];

STATUS_CHECK(m == n, "square matrix expected");
STATUS_CHECK(n == ipiv->n, "n elements expected");

LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, array_mut_data(x), n, array_data(ipiv));

stack_push(stack, x);
STATUS_OK;
}
4 changes: 2 additions & 2 deletions src/niko.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ INLINE result_t result_err(string_t msg) { return (result_t){.ok = false, .eithe
INLINE PRINTF(1, 2) result_t result_errf(const char* format, ...) {
return result_err(VA_ARGS_FWD(format, string_vnewf(format, args)));
}
#define RESULT_UNWRAP(r) \
#define RESULT_UNWRAP(r) \
({ \
result_t __result = (r); \
if (!__result.ok) return status_err(__result.either.e); \
Expand Down Expand Up @@ -152,7 +152,7 @@ STATUS_T interpreter_dict_entry(interpreter_t* inter, dict_entry_t* e);

#define REGISTER_WORD(w, n) \
STATUS_T w_##n(interpreter_t* inter, stack_t* stack); \
CONSTRUCTOR void w_##n##_register() { \
CONSTRUCTOR void __register_w_##n() { \
entry_vector_add(&global_dict, (dict_entry_t){string_newf(w), array_new_scalar_t_ffi(w_##n)}); \
}

Expand Down
32 changes: 23 additions & 9 deletions src/words.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ STATUS_T w_binop(stack_t* stack, type_t t, binop_t kernel, binop_t x_scalar_kern
GEN_BINOP_SPECIALIZATION(name, t_f64, t_i64, t_f64, op) \
GEN_BINOP_SPECIALIZATION(name, t_f64, t_f64, t_f64, op) \
t_ffi name##_table[T_MAX][T_MAX] = {}; \
CONSTRUCTOR void reg_##name() { \
CONSTRUCTOR void __register_w_##name() { \
name##_table[T_I64][T_I64] = name##_t_i64_t_i64; \
name##_table[T_F64][T_I64] = name##_t_f64_t_i64; \
name##_table[T_I64][T_F64] = name##_t_i64_t_f64; \
Expand Down Expand Up @@ -307,9 +307,7 @@ DEF_WORD("reshape", reshape) {
shape_t s = create_shape(x);
array_t* z = array_alloc(y->t, shape_len(s), s);
size_t ys = type_sizeof(y->t, y->n);
DO(i, type_sizeof(y->t, z->n)) {
((char*)array_mut_data(z))[i] = ((char*)array_data(y))[i % ys];
}
DO(i, type_sizeof(y->t, z->n)) { ((char*)array_mut_data(z))[i] = ((char*)array_data(y))[i % ys]; }
stack_push(stack, z);
STATUS_OK;
}
Expand All @@ -335,7 +333,6 @@ DEF_WORD("\\c", slash_clear) {
STATUS_OK;
}


// fold

DEF_WORD("fold", fold) {
Expand Down Expand Up @@ -384,12 +381,29 @@ DEF_WORD("fold_rank", fold_rank) {
DO(i, x->n / l) {
array_t* y = array_new_slice(x, l, cell, ptr + stride * i);
stack_push(stack, y);
if (i > 0) {
STATUS_UNWRAP(interpreter_dict_entry(inter, e));
}
if (i > 0) STATUS_UNWRAP(interpreter_dict_entry(inter, e));
}

STATUS_OK;
}

DEF_WORD("+'fold", plus_fold) { NOT_IMPLEMENTED; }
DEF_WORD("+'fold", plus_fold) { NOT_IMPLEMENTED; }

DEF_WORD("pascal", pascal) {
own(array_t) x = stack_pop(stack);
size_t n;
STATUS_UNWRAP(as_size_t(x, &n));

dim_t dims[2] = {n, n};
array_t* y = array_alloc(T_I64, n * n, shape_create(2, dims));
t_i64* ptr = array_mut_data(y);

DO(i, n) {
ptr[i] = 1;
ptr[i * n] = 1;
}
DO(i, n) DO(j, n) if (i > 0 && j > 0) ptr[i * n + j] = ptr[i * n + j - 1] + ptr[(i - 1) * n + j];

stack_push(stack, y);
STATUS_OK;
}
26 changes: 26 additions & 0 deletions test_suite
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@
[ [ 0 1 2 3 ] [ 4 0 1 2 ] [ 3 4 0 1 ] ]
> 5 index [ 2 2 ] reshape .
[ [ 0 1 ] [ 2 3 ] ]
# pascal
> 4 pascal .
[ [ 1 1 1 1 ] [ 1 2 3 4 ] [ 1 3 6 10 ] [ 1 4 10 20 ] ]
## information about arrays
# shape
> 10 zeros shape .
Expand Down Expand Up @@ -323,3 +326,26 @@ ERROR: stack underflow: 1 value expected
# blas_gemm
> [ 2 4 ] index 1. + [ 4 3 ] index 1. + blas_gemm .
[ [ 70. 80. 90. ] [ 158. 184. 210. ] ]
## lapack
# lapack_getrf
> [ 2 2 ] index 0. + lapack_getrf . . .
0
[ 2 2 ]
[ [ 2. 3. ] [ 0. 1. ] ]
# lapack_getri
# 2x2
> : m_inverse lapack_getrf drop lapack_getri ;
> [ 2 2 ] index 0. + m_inverse .
[ [ -1.5 0.5 ] [ 1. 0. ] ]
> [ 2 2 ] index 0. + dup m_inverse blas_gemm .
[ [ 1. 0. ] [ 0. 1. ] ]
# 3x3
> 3 pascal 0. + m_inverse .
[ [ 3. -3. 1. ] [ -3. 5. -2. ] [ 1. -2. 1. ] ]
> 3 pascal 0. + dup m_inverse blas_gemm .
[ [ 1. 0. 0. ] [ 0. 1. 0. ] [ 0. 0. 1. ] ]
# 4x4
> 4 pascal 0. + m_inverse .
[ [ 4. -6. 4. -1. ] [ -6. 14. -11. 3. ] [ 4. -11. 10. -3. ] [ -1. 3. -3. 1. ] ]
> 4 pascal 0. + dup m_inverse blas_gemm .
[ [ 1. -8.88178419700125e-16 0. 0. ] [ 8.88178419700125e-16 1. 0. -8.88178419700125e-16 ] [ 3.5527136788005e-15 5.32907051820075e-15 0.999999999999998 -1.77635683940025e-15 ] [ 3.5527136788005e-15 1.06581410364015e-14 -1.06581410364015e-14 1. ] ]

0 comments on commit 697783f

Please sign in to comment.