Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add basic JS support by using custom Tensor replacement #62

Merged
merged 13 commits into from
Feb 19, 2024
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
run: |
cd datamancer
nimble -y test
nimble -y testJs

- name: Build docs
if: >
Expand Down
18 changes: 16 additions & 2 deletions README.org
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@

* Datamancer
[[https://github.com/SciNim/datamancer/workflows/datamancer%20CI/badge.svg]]
[[https://matrix.to/#/#nim-science:envs.net][https://img.shields.io/static/v1?message=join%20chat&color=blue&label=nim-science&logo=matrix&logoColor=gold&style=flat-square&.svg]]
[[https://discord.gg/f5hA9UK3dY][https://img.shields.io/discord/371759389889003530?color=blue&label=nim-science&logo=discord&logoColor=gold&style=flat-square&.svg]]

~Datamancer~ is a DataFrame library for Nim, which is heavily inspired
by [[https://dplyr.tidyverse.org/][dplyr]].

** Comparison to other dataframe libraries

Check out the following gist for a comparison of this library with
dplyr (R) and pandas (Python):

https://gist.github.com/Vindaar/6908c038707c7d8293049edb3d204f84


** Documentation

The documentation is found at:
Expand All @@ -34,6 +35,19 @@ nimble install datamancer
#+END_SRC
away.

** Backend targets

The library supports both Nim's C/C++ backends as well as the
JavaScript target. The latter was added in ~v0.4.2~ and is still
experimental. Certain features are currently not supported (reading
files from disk, reading CSVs from a URL).

Note also that on older versions than current devel (as of
<2024-02-19 Mon 14:47>) including the current stable, in certain
applications the formula macro ~f{}~ (see below) does not resolve
types in the way it should on the JS backend. You might need to give
explicit type hints in more cases than usual (also see below).

** Features and formulas

The data frame provides the "5 verbs" of [[https://dplyr.tidyverse.org/][dplyr]] and more. Main implemented functions:
Expand Down
5 changes: 4 additions & 1 deletion datamancer.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ srcDir = "src"
# Dependencies

requires "nim >= 1.2.0"
requires "https://github.com/Vindaar/seqmath >= 0.1.11"
requires "https://github.com/Vindaar/seqmath >= 0.2.1"
requires "arraymancer >= 0.7.28"

task test, "Run standard tests":
Expand All @@ -21,6 +21,9 @@ task test, "Run standard tests":
exec "nim c -r tests/testsFormula.nim"
exec "nim c -r tests/testParse.nim"

task testJs, "Run test for JS":
exec "nim js -r tests/testdf_js_simple.nim"

import os, strutils, strformat
const
pkgName = "datamancer"
Expand Down
8 changes: 6 additions & 2 deletions src/datamancer.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
## .. include:: ./docs/datamancer.rst

import datamancer / [dataframe, io]
export dataframe, io
when not defined(js):
import datamancer / [dataframe, io]
export dataframe, io
else:
import datamancer / [dataframe, io]
export dataframe, io
23 changes: 14 additions & 9 deletions src/datamancer/column.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import arraymancer/tensor
when not defined(js):
import arraymancer/tensor
else:
import seq_tensor

import std / [sugar, strformat, tables, macros, strutils]
import value

Expand Down Expand Up @@ -346,14 +350,15 @@ proc toColumn*[C: ColumnLike; T](_: typedesc[C], t: Tensor[T]): C =
doAssert false, "This should not happen!"
#{.error: "Cannot store " & $T & " in a regular `Column`.".}

proc toColumn*[T: not SupportedTypes](t: openArray[T] | Tensor[T]): auto =
## Tries to convert the given input data to a matching generic `Column*`
## type. Errors at CT if there is no matching `Column*` defined so far.
when typeof(t) is Tensor:
let x = t
else:
let x = t.toTensor()
result = colType(T).toColumn(t)
when not defined(js) or (NimMajor, NimMinor, NimPatch) >= (2, 1, 0):
proc toColumn*[T: not SupportedTypes](t: openArray[T] | Tensor[T]): auto =
## Tries to convert the given input data to a matching generic `Column*`
## type. Errors at CT if there is no matching `Column*` defined so far.
when typeof(t) is Tensor:
let x = t
else:
let x = t.toTensor()
result = colType(T).toColumn(t)

proc toColumn*[C: ColumnLike; T](_: typedesc[C], t: openArray[T]): C =
if t.len > 0:
Expand Down
10 changes: 7 additions & 3 deletions src/datamancer/dataframe.nim
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import std / [macros, tables, strutils, options, sets, hashes, math,
sequtils, stats, strformat, algorithm, typetraits]

import arraymancer/tensor
export tensor
when not defined(js):
import arraymancer/tensor
export tensor
else:
import seq_tensor
export seq_tensor

import value
export value
Expand Down Expand Up @@ -784,7 +788,7 @@
doAssert df.len == 3
doAssert df["x", int] == [1, 3, 5].toTensor
doAssert df["y", int] == [2, 5, 10].toTensor
{.warning: "Using `add` to add rows to a DF individually is very slow. Be " &

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / linux (version-2-0)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / linux (version-1-6)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / linux (devel)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / macos (version-2-0)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / windows (version-1-6)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / windows (version-2-0)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / macos (devel)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / windows (devel)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]

Check warning on line 791 in src/datamancer/dataframe.nim

View workflow job for this annotation

GitHub Actions / macos (version-1-6)

Using `add` to add rows to a DF individually is very slow. Be sure to only add very few rows using this proc! [User]
"sure to only add very few rows using this proc!".}
if args.tupleLen != df.ncols and df.ncols != 0:
raise newException(ValueError, "Input tuple `args` length must be equal " &
Expand Down Expand Up @@ -1983,7 +1987,7 @@
for k in getKeys(result):
if result[k].kind != colConstant: # if constant nothing to short
withNativeTensor(result[k], t):
result.asgn(k, toColumn(t[_ ..< result.len]))
result.asgn(k, toColumn(t[0 ..< result.len]))
result[k].len = result.len

proc innerJoin*[C: ColumnLike](dfs: varargs[DataTable[C]], by: string): DataTable[C] =
Expand Down
7 changes: 5 additions & 2 deletions src/datamancer/formulaExp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -709,8 +709,11 @@ proc convertLoop(p: Preface, dtype, fctColResType, loop: NimNode,
fnKind: FormulaKind,
generateLoop: bool): NimNode =
let memCopyable = ["float", "int", "bool"]
let isMemCopyable = dtype.strVal in memCopyable and
p.args.allIt(it.colType.strVal in memCopyable)
when defined(js):
let isMemcopyable = false
else:
let isMemCopyable = dtype.strVal in memCopyable and
p.args.allIt(it.colType.strVal in memCopyable)
proc genForLoop(p: Preface, loop: NimNode, fkKind: FormulaKind): NimNode =
var mpreface = p
let loopIndexed = fixupTensorIndices(loop, mpreface,
Expand Down
Loading
Loading