RNN classification example #19

mazko · 2019-03-05T20:23:04Z

When classify a sequence, we would like the network to have one output, instead of a sequence of output. According to 01user.md to classify a sequence kad_avg was mentioned. I tried this on mnist. It works but
i am not sure how to train such network. During training process we don't even know output values other then last one. In this line memcpy(&y[k][b * d->n_out], d->y[s], d->n_out * sizeof(float)); each y in sequence of output has same value d->y[s] which looks strange.

#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "kann_extra/kann_data.h"
#include "kann.h"

typedef struct {
  int n_in, n_out, ulen, n;
  float **x, **y;
} train_data;

static void train(kann_t *ann, train_data *d, float lr, int mini_size, int max_epoch, const char *fn, int n_threads)
{
  float **x, **y, *r, best_cost = 1e30f;
  int epoch, j, n_var, *shuf;
  kann_t *ua;

  n_var = kann_size_var(ann);
  r = (float*)calloc(n_var, sizeof(float));
  x = (float**)malloc(d->ulen * sizeof(float*));
  y = (float**)malloc(d->ulen * sizeof(float*));
  for (j = 0; j < d->ulen; ++j) {
    x[j] = (float*)calloc(mini_size * d->n_in, sizeof(float));
    y[j] = (float*)calloc(mini_size * d->n_out, sizeof(float));
  }
  shuf = (int*)calloc(d->n, sizeof(int));

  ua = kann_unroll(ann, d->ulen);
  kann_set_batch_size(ua, mini_size);
  kann_mt(ua, n_threads, mini_size);
  kann_feed_bind(ua, KANN_F_IN,    0, x);
  kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
  kann_switch(ua, 1);
  for (epoch = 0; epoch < max_epoch; ++epoch) {
    kann_shuffle(d->n, shuf);
    double cost = 0.0;
    int tot = 0, tot_base = 0, n_cerr = 0;
    for (j = 0; j < d->n - mini_size; j += mini_size) {
      int b, k;
      for (k = 0; k < d->ulen; ++k) {
        for (b = 0; b < mini_size; ++b) {
          int s = shuf[j + b];
          memcpy(&x[k][b * d->n_in], &d->x[s][k * d->n_in], d->n_in * sizeof(float));
          memcpy(&y[k][b * d->n_out], d->y[s], d->n_out * sizeof(float));
        }
      }
      cost += kann_cost(ua, 0, 1) * d->ulen * mini_size;
      n_cerr += kann_class_error(ua, &k);
      tot_base += k;
      //kad_check_grad(ua->n, ua->v, ua->n-1);
      kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
      tot += d->ulen * mini_size;
    }
    if (cost < best_cost) {
      best_cost = cost;
      if (fn) kann_save(fn, ann);
    }
    fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0f * n_cerr / tot_base);
  }

  kann_delete_unrolled(ua);

  for (j = 0; j < d->ulen; ++j) {
    free(y[j]); free(x[j]);
  }
  free(y); free(x); free(r); free(shuf);
}

static train_data* create_train_data(kann_t *ann, kann_data_t *x, kann_data_t *y)
{
  train_data *d;
  d = (train_data*)malloc(sizeof(*d));
  assert(d);
  assert(x->n_row == y->n_row);
  d->x = x->x;
  d->y = y->x;
  d->ulen = 28; // 28x28
  d->n = x->n_row;
  d->n_in = kann_dim_in(ann);
  d->n_out = kann_dim_out(ann);
  return d;
}

int main(int argc, char *argv[])
{
  kann_t *ann;
  kann_data_t *x, *y;
  char *fn_in = 0, *fn_out = 0;
  int c, i, mini_size = 64, max_epoch = 50, seed = 84, n_h_layers = 1, n_h_neurons = 64, norm = 1, n_threads = 1;
  float lr = 0.001f, dropout = 0.2f;

  while ((c = getopt(argc, argv, "i:o:m:l:n:d:s:t:N")) >= 0) {
    if (c == 'i') fn_in = optarg;
    else if (c == 'o') fn_out = optarg;
    else if (c == 'm') max_epoch = atoi(optarg);
    else if (c == 'l') n_h_layers = atoi(optarg);
    else if (c == 'n') n_h_neurons = atoi(optarg);
    else if (c == 'd') dropout = atof(optarg);
    else if (c == 's') seed = atoi(optarg);
    else if (c == 't') n_threads = atoi(optarg);
    else if (c == 'N') norm = 0;
  }

  if (argc - optind == 0 || (argc - optind == 1 && fn_in == 0)) {
    FILE *fp = stdout;
    fprintf(fp, "Usage: mnist-cnn [-i model] [-o model] [-t nThreads] <x.knd> [y.knd]\n");
    return 1;
  }

  kad_trap_fe();
  kann_srand(seed);
  if (fn_in) {
    ann = kann_load(fn_in);
  } else {
    kad_node_t *t;
    int rnn_flag = KANN_RNN_VAR_H0;
    if (norm) rnn_flag |= KANN_RNN_NORM;
    t = kann_layer_input(28); // 28x28
    for (i = 0; i < n_h_layers; ++i) {
      t = kann_layer_gru(t, n_h_neurons, rnn_flag);
      t = kann_layer_dropout(t, dropout);
    }
    t = kad_avg(1, &t);
    ann = kann_new(kann_layer_cost(t, 10, KANN_C_CEB), 0);
  }

  x = kann_data_read(argv[optind]);
  assert(x->n_col == 28 * 28);
  y = argc - optind >= 2? kann_data_read(argv[optind+1]) : 0;

  if (y) { // training
    assert(y->n_col == 10);
    if (n_threads > 1) kann_mt(ann, n_threads, mini_size);
    train_data *d;
    d = create_train_data(ann, x, y);
    train(ann, d, lr, mini_size, max_epoch, fn_out, n_threads);
    free(d);
    kann_data_free(y);
  } else { // applying
    int i, j, k, n_out;
    kann_switch(ann, 0);
    n_out = kann_dim_out(ann);
    assert(n_out == 10);
    for (i = 0; i < x->n_row; ++i) {
      const float *y;
      kann_rnn_start(ann);
      for(k = 0; k < 28; ++k) {
        float x1[28];
        memcpy(x1, &x->x[i][k * 28], sizeof(x1));
        y = kann_apply1(ann, x1);
      }
      if (x->rname) printf("%s\t", x->rname[i]);
      for (j = 0; j < n_out; ++j) {
        if (j) putchar('\t');
        printf("%.3g", y[j] + 1.0f - 1.0f);
      }
      putchar('\n');
      kann_rnn_end(ann);
    }
  }

  kann_data_free(x);
  kann_delete(ann);
  return 0;
}

It would be great to see any simple rnn classification example.

The text was updated successfully, but these errors were encountered:

attractivechaos · 2019-03-05T20:39:50Z

If I am right, you can replace kad_avg(1, &t) with kad_select(1, &t, -1), which selects the last hidden vector. I haven't touched kann for a while, so I could be wrong...

    kad_node_t *t;
    int rnn_flag = KANN_RNN_VAR_H0;
    if (norm) rnn_flag |= KANN_RNN_NORM;
    t = kann_layer_input(28); // 28x28
    for (i = 0; i < n_h_layers; ++i) {
      t = kann_layer_gru(t, n_h_neurons, rnn_flag);
      t = kann_layer_dropout(t, dropout);
    }
    // replace "t = kad_avg(1, &t);" with:
    t = kad_select(1, &t, -1);
    ann = kann_new(kann_layer_cost(t, 10, KANN_C_CEB), 0);

attractivechaos · 2019-03-05T20:45:28Z

i am not sure how to train such network.

When you use kad_avg() or kad_select(), there will be a single output of shape (batch,10) in case of MNIST (independent of unrolled length ulen). The following part needs to be changed to:

  n_var = kann_size_var(ann);
  r = (float*)calloc(n_var, sizeof(float));
  x = (float**)malloc(d->ulen * sizeof(float*));
  y = (float**)malloc(1 * sizeof(float*));
  for (j = 0; j < d->ulen; ++j)
    x[j] = (float*)calloc(mini_size * d->n_in, sizeof(float));
  y[0] = (float*)calloc(mini_size * d->n_out, sizeof(float));
  shuf = (int*)calloc(d->n, sizeof(int));

each y in sequence of output has same value d->y[s]

KANN only uses the first d->y[0], so your training is still successful.

It would be great to see any simple rnn classification example.

Yes, indeed. I should add such an example at some point...

mazko · 2019-03-10T09:47:40Z

Thank you for clarifying. I changed a little bit train function according to your suggestions.

static void train(kann_t *ann, train_data *d, float lr, int mini_size, int max_epoch, const char *fn, int n_threads)
{
  float **x, **y, *r, best_cost = 1e30f;
  int epoch, j, n_var, *shuf;
  kann_t *ua;

  n_var = kann_size_var(ann);
  r = (float*)calloc(n_var, sizeof(float));
  x = (float**)malloc(d->ulen * sizeof(float*));
  y = (float**)malloc(1 * sizeof(float*));
  for (j = 0; j < d->ulen; ++j) {
    x[j] = (float*)calloc(mini_size * d->n_in, sizeof(float));
  }
  y[0] = (float*)calloc(mini_size * d->n_out, sizeof(float));
  shuf = (int*)calloc(d->n, sizeof(int));

  ua = kann_unroll(ann, d->ulen);
  kann_set_batch_size(ua, mini_size);
  kann_mt(ua, n_threads, mini_size);
  kann_feed_bind(ua, KANN_F_IN,    0, x);
  kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
  kann_switch(ua, 1);
  for (epoch = 0; epoch < max_epoch; ++epoch) {
    kann_shuffle(d->n, shuf);
    double cost = 0.0;
    int tot = 0, tot_base = 0, n_cerr = 0;
    for (j = 0; j < d->n - mini_size; j += mini_size) {
      int b, k;
      for (b = 0; b < mini_size; ++b) {
        int s = shuf[j + b];
        for (k = 0; k < d->ulen; ++k) {
          memcpy(&x[k][b * d->n_in], &d->x[s][k * d->n_in], d->n_in * sizeof(float));
        }
        memcpy(&y[0][b * d->n_out], d->y[s], d->n_out * sizeof(float));
      }
      cost += kann_cost(ua, 0, 1) * d->ulen * mini_size;
      n_cerr += kann_class_error(ua, &k);
      tot_base += k;
      //kad_check_grad(ua->n, ua->v, ua->n-1);
      kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
      tot += d->ulen * mini_size;
    }
    if (cost < best_cost) {
      best_cost = cost;
      if (fn) kann_save(fn, ann);
    }
    fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0f * n_cerr / tot_base);
  }

  kann_delete_unrolled(ua);

  for (j = 0; j < d->ulen; ++j) {
    free(x[j]);
  }
  free(y[0]); free(y); free(x); free(r); free(shuf);
}

Both previous and current implementations produce exactly the same mnist-rnn.kan file.

I also tried kad_select instead of kad_avg and it seems first one learns faster on mnist.

~$ ./mnist-rnn -o mnist-rnn.kan kann-data/mnist-train-?.knd
~$ ./mnist-rnn -i mnist-rnn.kan kann-data/mnist-test-x.knd | kann-data/mnist-eval.pl
Error rate: 2.45% # kad_avg(1, &t);
Error rate: 1.42% # kad_select(1, &t, -1);

lh3 · 2019-03-10T13:01:47Z

Thanks a lot for the confirmation. Do you mind if I modify your code a little bit and add to the "examples/" directory? I will say something like "Code was written by Oleg Mazko (@mazko) and modified by Attractive Chaos". Is that ok?

mazko · 2019-03-10T14:16:05Z

Thanks a lot for the confirmation. Do you mind if I modify your code a little bit and add to the "examples/" directory? I will say something like "Code was written by Oleg Mazko (@mazko) and modified by Attractive Chaos". Is that ok?

Of course it's ok.

There is also a project https://github.com/42io/c_keyword_spotting which is based on kann. I am planing to add rnn there too in a couple weeks or so.

shipleyxie · 2019-03-21T03:36:28Z

Thanks a lot for the confirmation. Do you mind if I modify your code a little bit and add to the "examples/" directory? I will say something like "Code was written by Oleg Mazko (@mazko) and modified by Attractive Chaos". Is that ok?

Of course it's ok.

There is also a project https://github.com/42io/c_keyword_spotting which is based on kann. I am planing to add rnn there too in a couple weeks or so.

Have you finished this goal to applay GRU in Kann? @mazko

mazko closed this as completed Mar 10, 2019

mazko mentioned this issue Apr 10, 2019

Convolutional recurrent neural network #31

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

RNN classification example #19

RNN classification example #19

mazko commented Mar 5, 2019

attractivechaos commented Mar 5, 2019

attractivechaos commented Mar 5, 2019 •

edited

Loading

mazko commented Mar 10, 2019

lh3 commented Mar 10, 2019

mazko commented Mar 10, 2019

shipleyxie commented Mar 21, 2019

RNN classification example #19

RNN classification example #19

Comments

mazko commented Mar 5, 2019

attractivechaos commented Mar 5, 2019

attractivechaos commented Mar 5, 2019 • edited Loading

mazko commented Mar 10, 2019

lh3 commented Mar 10, 2019

mazko commented Mar 10, 2019

shipleyxie commented Mar 21, 2019

attractivechaos commented Mar 5, 2019 •

edited

Loading