Explorar o código

Some new extensions

mingodad %!s(int64=6) %!d(string=hai) anos
pai
achega
bdcd4db94f
Modificáronse 6 ficheiros con 5130 adicións e 0 borrados
  1. 1052 0
      SquiLu-ext/nn.c
  2. 160 0
      SquiLu-ext/nn.h
  3. 522 0
      SquiLu-ext/sq_ipc.cpp
  4. 557 0
      SquiLu-ext/sq_lpsolve.cpp
  5. 1474 0
      SquiLu-ext/sq_nn.cpp
  6. 1365 0
      SquiLu-ext/sq_subprocess.cpp

+ 1052 - 0
SquiLu-ext/nn.c

@@ -0,0 +1,1052 @@
+/* RPROP Neural Networks implementation
+ * See: http://deeplearning.cs.cmu.edu/pdfs/Rprop.pdf
+ *
+ * Copyright (c) 2003-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Disque nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+#include "nn.h"
+
+#if defined(USE_AVX512)
+#define USING_SIMD
+#include <immintrin.h>
+
+typedef __m512 simdf_t;
+#define  SIMDF_SIZE 16
+
+#define simdf_zero() _mm512_setzero_ps()
+#define simdf_set1f(x) _mm512_set1_ps(x)
+#define simdf_loadu(x) _mm512_loadu_ps(x)
+#define simdf_load(x) _mm512_load_ps(x)
+#define simdf_mul(a,b) _mm512_mul_ps(a,b)
+#define simdf_add(a,b) _mm512_add_ps(a,b)
+#define simdf_storeu(a,b) _mm512_storeu_ps(a,b)
+#define simdf_store(a,b) _mm512_store_ps(a,b)
+
+//let the compiler optmize this
+#define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + \
+                                x[8] + x[9] + x[10] + x[11] + x[12] + x[13] + x[14] + x[15])
+
+#define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", \
+                                __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], \
+                                x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]);
+#endif
+
+#if defined(USE_AVX)
+#define USING_SIMD
+#include <immintrin.h>
+
+typedef __m256 simdf_t;
+#define  SIMDF_SIZE 8
+
+#define simdf_zero() _mm256_setzero_ps()
+#define simdf_set1f(x) _mm256_set1_ps(x)
+#define simdf_loadu(x) _mm256_loadu_ps(x)
+#define simdf_load(x) _mm256_load_ps(x)
+#define simdf_mul(a,b) _mm256_mul_ps(a,b)
+#define simdf_add(a,b) _mm256_add_ps(a,b)
+#define simdf_storeu(a,b) _mm256_storeu_ps(a,b)
+#define simdf_store(a,b) _mm256_store_ps(a,b)
+
+//let the compiler optmize this
+#define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7])
+
+#define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f\n", \
+                                __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7]);
+#endif
+
+#if defined(USE_SSE)
+#define USING_SIMD
+#include <xmmintrin.h>
+#include <pmmintrin.h>
+
+typedef __m128 simdf_t;
+#define  SIMDF_SIZE 4
+
+#define simdf_zero() _mm_setzero_ps()
+#define simdf_set1f(x) _mm_set1_ps(x)
+#define simdf_loadu(x) _mm_loadu_ps(x)
+#define simdf_load(x) _mm_load_ps(x)
+#define simdf_mul(a,b) _mm_mul_ps(a,b)
+#define simdf_add(a,b) _mm_add_ps(a,b)
+#define simdf_storeu(a,b) _mm_storeu_ps(a,b)
+#define simdf_store(a,b) _mm_store_ps(a,b)
+
+//let the compiler optmize this
+#define simdf_sum(x) (x[0] + x[1] + x[2] + x[3])
+
+#define simdf_show(x) printf("%d : %f, %f, %f, %f\n", __LINE__, x[0], x[1], x[2], x[3]);
+#endif
+
+#if defined(USE_NEON)
+#define USING_SIMD
+#include <arm_neon.h>
+
+typedef ann_float_t32x4_t simdf_t;
+#define  SIMDF_SIZE 4
+
+#define simdf_zero() vdupq_n_f32(0.0f)
+#define simdf_set1f(x) vdupq_n_f32(x);
+#define simdf_loadu(x) vld1q_f32(x)
+#define simdf_load(x) vld1q_f32(x)
+#define simdf_mul(a,b) vmulq_f32(a,b)
+#define simdf_add(a,b) vaddq_f32(a,b)
+#define simdf_storeu(a,b) vst1q_f32((ann_float_t32_t*)a,b)
+#define simdf_store(a,b) vst1q_f32((ann_float_t32_t*)a,b)
+
+//let the compiler optmize this
+#define simdf_sum(x) (x[0] + x[1] + x[2] + x[3])
+
+#define simdf_show(x) printf("%d : %f, %f, %f, %f\n", __LINE__, x[0], x[1], x[2], x[3]);
+#endif
+
+#ifndef SIMDF_SIZE
+#define SIMDF_SIZE 1
+#endif // SIMDF_SIZE
+
+#define ANN_SIZEOF_ann_float_t sizeof(ann_float_t)
+#define ANN_ALIGN_BASE (SIMDF_SIZE * ANN_SIZEOF_ann_float_t)
+#define ANN_ALIGN_ROUND(x) ((x%ANN_ALIGN_BASE) ? (((x/ANN_ALIGN_BASE)+1)*ANN_ALIGN_BASE) : (size_t)x)
+
+#ifndef HAS_ANN_MALLOC
+#define ann_malloc(x) malloc(x)
+#define ann_free(x) free(x)
+#else
+extern void *ann_malloc(size_t sz);
+extern void ann_free(void *ptr);
+#endif
+/*
+void *nnpmalloc(int line, size_t sz) {
+	printf("%d : %zu : %zu\n", line, sz, ANN_ALIGN_ROUND(sz));
+	return malloc(sz);
+}
+#define ann_malloc(x) nnpmalloc(__LINE__, x)
+*/
+
+/* Node Transfer Function */
+ann_float_t AnnTransferFunctionSigmoid(ann_float_t x) {
+    //if(x < -15) return 0;
+    //else if(x > 15) return 1;
+    return ((ann_float_t)1.0)/(1.0+exp(-x));
+}
+
+ann_float_t AnnTransferFunctionRelu(ann_float_t x) {
+    return (x > 0.0) ? x : 0.0;
+}
+
+ann_float_t AnnTransferFunctionTanh(ann_float_t x) {
+    return tanh(x);
+}
+
+/*
+ann_float_t AnnDerivativeIdentity(ann_float_t x) {
+    return 1;
+}
+*/
+
+ann_float_t AnnDerivativeSigmoid(ann_float_t x) {
+    return x*(1-x);
+}
+
+ann_float_t AnnDerivativeTanh(ann_float_t x) {
+    return (1-x)*(1+x);
+}
+
+ann_float_t AnnDerivativeRelu(ann_float_t x) {
+    return (x > 0) ? 1 : 0;
+}
+
+/* Reset layer data to zero-units */
+void AnnResetLayer(AnnLayer *layer) {
+    layer->units = 0;
+    layer->units_aligned = 0;
+    layer->output = NULL;
+    layer->error = NULL;
+    layer->weight = NULL;
+    layer->gradient = NULL;
+    layer->pgradient = NULL;
+    layer->delta = NULL;
+    layer->sgradient = NULL;
+}
+
+/* Allocate and return an initialized N-layers network */
+AnnRprop *AnnAlloc(int layers) {
+    AnnRprop *net;
+    int i;
+
+    /* Alloc the net structure */
+    if ((net = ann_malloc(sizeof(*net))) == NULL)
+        return NULL;
+    /* Alloc layers */
+    if ((net->layer = ann_malloc(sizeof(AnnLayer)*layers)) == NULL) {
+        ann_free(net);
+        return NULL;
+    }
+    net->layers = layers;
+    net->flags = 0;
+    net->rprop_nminus = ANN_DEFAULT_RPROP_NMINUS;
+    net->rprop_nplus = ANN_DEFAULT_RPROP_NPLUS;
+    net->rprop_maxupdate = ANN_DEFAULT_RPROP_MAXUPDATE;
+    net->rprop_minupdate = ANN_DEFAULT_RPROP_MINUPDATE;
+    net->node_transf_func = AnnTransferFunctionSigmoid;
+    net->derivative_func = AnnDerivativeSigmoid;
+    /* Init layers */
+    for (i = 0; i < layers; i++)
+        AnnResetLayer(&net->layer[i]);
+    return net;
+}
+
+/* Free a single layer */
+void AnnFreeLayer(AnnLayer *layer)
+{
+    ann_free(layer->output);
+    ann_free(layer->error);
+    ann_free(layer->weight);
+    ann_free(layer->gradient);
+    ann_free(layer->pgradient);
+    ann_free(layer->delta);
+    ann_free(layer->sgradient);
+    AnnResetLayer(layer);
+}
+
+/* Free the target net */
+void AnnFree(AnnRprop *net)
+{
+    int i;
+
+    /* Free layer data */
+    for (i = 0; i < net->layers; i++) AnnFreeLayer(&net->layer[i]);
+    /* Free allocated layers structures */
+    ann_free(net->layer);
+    /* And the main structure itself */
+    ann_free(net);
+}
+
+/* Init a layer of the net with the specified number of units.
+ * Return non-zero on out of memory. */
+int AnnInitLayer(AnnRprop *net, int i, int units, int bias) {
+    if (bias) units++; /* Take count of the bias unit */
+    size_t ann_float_t_units = ANN_ALIGN_ROUND(units*ANN_SIZEOF_ann_float_t);
+    size_t units_aligned = ann_float_t_units/ANN_SIZEOF_ann_float_t;
+    size_t ann_float_t_units_units = 0;
+    AnnLayer *layer = &ANN_LAYER(net, i);
+    layer->units = units;
+    layer->units_aligned = units_aligned;
+    layer->output = ann_malloc(ann_float_t_units);
+    layer->error = ann_malloc(ann_float_t_units);
+    if (i) { /* not for output layer */
+        ann_float_t_units_units = ann_float_t_units*ANN_LAYER(net, i-1).units;
+        layer->weight = ann_malloc(ann_float_t_units_units);
+        layer->gradient = ann_malloc(ann_float_t_units_units);
+        layer->pgradient = ann_malloc(ann_float_t_units_units);
+        layer->delta = ann_malloc(ann_float_t_units_units);
+        layer->sgradient = ann_malloc(ann_float_t_units_units);
+    }
+    /* Check for out of memory conditions */
+    if (layer->output == NULL ||
+        layer->error == NULL ||
+        (i && layer->weight == NULL) ||
+        (i && layer->gradient == NULL) ||
+        (i && layer->pgradient == NULL) ||
+        (i && layer->sgradient == NULL) ||
+        (i && layer->delta == NULL))
+    {
+        AnnFreeLayer(layer);
+        AnnResetLayer(layer);
+        return 1;
+    }
+    /* Set all the values to zero */
+    memset(layer->output, 0, ann_float_t_units);
+    memset(layer->error, 0, ann_float_t_units);
+    if (i) {
+        memset(layer->weight, 0, ann_float_t_units_units);
+        memset(layer->gradient, 0, ann_float_t_units_units);
+        memset(layer->pgradient, 0, ann_float_t_units_units);
+        memset(layer->delta, 0, ann_float_t_units_units);
+        memset(layer->sgradient, 0, ann_float_t_units_units);
+    }
+    /* Set the bias unit output to 1 */
+    if (bias) layer->output[units-1] = 1;
+    return 0;
+}
+
+/* Clone a network. On out of memory NULL is returned. */
+AnnRprop *AnnClone(const AnnRprop* net) {
+    AnnRprop* copy;
+    int j;
+
+    if ((copy = AnnAlloc(ANN_LAYERS(net))) == NULL) return NULL;
+    for (j = 0; j < ANN_LAYERS(net); j++) {
+        AnnLayer *ldst;
+        const AnnLayer *lsrc;
+        int units = ANN_UNITS(net,j);
+        int bias = j > 0;
+        if (AnnInitLayer(copy, j, units-bias, bias)) {
+            AnnFree(copy);
+            return NULL;
+        }
+        int ann_float_t_units = units*ANN_SIZEOF_ann_float_t;
+        lsrc = &net->layer[j];
+        ldst = &copy->layer[j];
+        if (lsrc->output)
+            memcpy(ldst->output, lsrc->output, ann_float_t_units);
+        if (lsrc->error)
+            memcpy(ldst->error, lsrc->error, ann_float_t_units);
+        if (j) {
+            int weights = ANN_WEIGHTS(net,j);
+            ann_float_t_units = weights*ANN_SIZEOF_ann_float_t;
+            if (lsrc->weight)
+                memcpy(ldst->weight, lsrc->weight, ann_float_t_units);
+            if (lsrc->gradient)
+                memcpy(ldst->gradient, lsrc->gradient, ann_float_t_units);
+            if (lsrc->pgradient)
+                memcpy(ldst->pgradient, lsrc->pgradient, ann_float_t_units);
+            if (lsrc->delta)
+                memcpy(ldst->delta, lsrc->delta, ann_float_t_units);
+            if (lsrc->sgradient)
+                memcpy(ldst->sgradient, lsrc->sgradient, ann_float_t_units);
+        }
+    }
+    copy->rprop_nminus = net->rprop_nminus;
+    copy->rprop_nplus = net->rprop_nplus;
+    copy->rprop_maxupdate = net->rprop_maxupdate;
+    copy->rprop_minupdate = net->rprop_minupdate;
+    copy->flags = net->flags;
+    copy->node_transf_func = net->node_transf_func;
+    copy->derivative_func = net->derivative_func;
+    return copy;
+}
+
+/* Create a N-layer input/hidden/output net.
+ * The units array should specify the number of
+ * units in every layer from the output to the input layer. */
+AnnRprop *AnnCreateNet(int layers, int *units) {
+    AnnRprop *net;
+    int i;
+
+    if ((net = AnnAlloc(layers)) == NULL) return NULL;
+    for (i = 0; i < layers; i++) {
+        if (AnnInitLayer(net, i, units[i], i > 0)) {
+            AnnFree(net);
+            return NULL;
+        }
+    }
+    AnnSetRandomWeights(net);
+    AnnSetDeltas(net, ANN_RPROP_INITIAL_DELTA);
+    ANN_LEARN_RATE(net) = ANN_DEFAULT_LEARN_RATE;
+    return net;
+}
+
+/* Return the total number of weights this NN has. */
+size_t AnnCountWeights(AnnRprop *net) {
+    size_t weights = 0;
+    for (int i = ANN_LAYERS(net)-1; i > 0; i--) {
+        int nextunits = ANN_UNITS(net, i-1);
+        int units = ANN_UNITS(net, i);
+        if (i > 1) nextunits--; /* we don't output on bias units */
+        weights += units*nextunits;
+    }
+    return weights;
+}
+
+/* Create a 4-layer input/hidden/output net */
+AnnRprop *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits) {
+    int units[4];
+
+    units[0] = ounits;
+    units[1] = hunits2;
+    units[2] = hunits;
+    units[3] = iunits;
+    return AnnCreateNet(4, units);
+}
+
+/* Create a 3-layer input/hidden/output net */
+AnnRprop *AnnCreateNet3(int iunits, int hunits, int ounits) {
+    int units[3];
+
+    units[0] = ounits;
+    units[1] = hunits;
+    units[2] = iunits;
+    return AnnCreateNet(3, units);
+}
+
+
+/* Create a 2-layer "linear" network. */
+AnnRprop *AnnCreateNet2(int iunits, int ounits) {
+    int units[2];
+
+    units[0] = ounits;
+    units[1] = iunits;
+    return AnnCreateNet(2, units);
+}
+
+void AnnSimulate(AnnRprop *net) {
+    int i, j, k;
+
+    for (i = ANN_LAYERS(net)-1; i > 0; i--) {
+        AnnLayer *layer = &ANN_LAYER(net, i);
+        int nextunits = ANN_UNITS(net, i-1);
+        int units_aligned = layer->units_aligned;
+        int units = layer->units;
+        if (i > 1) nextunits--; /* dont output on bias units */
+#ifdef USING_SIMD
+        int xps, psteps = units/SIMDF_SIZE;
+#endif // USING_SIMD
+        for (j = 0; j < nextunits; j++) {
+            ann_float_t A = 0; /* Activation final value. */
+            ann_float_t *w = layer->weight + j*units_aligned;
+            ann_float_t *o = layer->output;
+            k = 0;
+
+#ifdef USING_SIMD
+            if(psteps)
+            {
+                simdf_t sumA = simdf_zero();
+                for (xps = 0; xps < psteps; xps++) {
+                    simdf_t weights = simdf_load(w);
+                    simdf_t outputs = simdf_load(o);
+                    simdf_t prod = simdf_mul(weights,outputs);
+                    sumA = simdf_add(sumA, prod);
+                    w += SIMDF_SIZE;
+                    o += SIMDF_SIZE;
+                }
+                A += simdf_sum(sumA);
+                k += psteps*SIMDF_SIZE;
+            }
+#endif
+
+            /* Handle final piece shorter than SIMDF_SIZE . */
+            for (; k < units; k++) {
+                A += (*w++) * (*o++);
+            }
+            ANN_OUTPUT(net, i-1, j) = (*net->node_transf_func)(A); //sigmoid(A);
+        }
+    }
+}
+
+/* Create a Tcl procedure that simulates the neural network */
+void Ann2Tcl(const AnnRprop *net) {
+    int i, j, k;
+
+    printf("proc ann input {\n");
+    printf("    set output {");
+    for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) {
+        printf("0 ");
+    }
+    printf("}\n");
+    printf("    proc sigmoid x {return [expr {1/(1+exp(-$x))}]}\n");
+    for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) {
+      printf("    set input_%d [lindex $input %d]\n", i, i);
+    }
+    for (i = ANN_LAYERS(net)-1; i > 0; i--) {
+        int nextunits = ANN_UNITS(net, i-1);
+        int units = ANN_UNITS(net, i);
+        //if (i > 1) nextunits--; /* dont output on bias units */
+        for (j = 0; j < nextunits; j++) {
+            ann_float_t W;
+            if (i == 1) {
+                printf("    lset output %d ", j);
+            } else {
+                printf("    set O_%d_%d", i-1, j);
+            }
+            printf(" [sigmoid [expr { \\\n");
+            for (k = 0; k < units; k++) {
+                W = ANN_WEIGHT(net, i, k, j);
+                if (i > 1 && k == units-1) {
+                    printf("        (%.9f)", W);
+                } else if (i == ANN_LAYERS(net)-1) {
+                    printf("        (%.9f*$input_%d)", W, k);
+                } else {
+                    printf("        (%.9f*$O_%d_%d)", W, i, k);
+                }
+                if ((k+1) < units) printf("+ \\\n");
+            }
+            printf("}]]\n");
+        }
+    }
+    printf("    return $output\n");
+    printf("}\n");
+}
+
+/* Create a Javascript procedure that simulates the neural network */
+void Ann2Js(const AnnRprop *net) {
+    int i, j, k;
+
+    printf("function ann( input ) {\n");
+    printf("    var output = [");
+    for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) {
+	if(i) printf(", ");
+        printf("0");
+    }
+    printf("];\n");
+    printf("    var sigmoid = function(x) {return 1.0/(1.0+Math.exp(-x));};\n");
+    for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) {
+      printf("    var input_%d = input[%d];\n", i, i);
+    }
+    for (i = ANN_LAYERS(net)-1; i > 0; i--) {
+        int nextunits = ANN_UNITS(net, i-1);
+        int units = ANN_UNITS(net, i);
+        //if (i > 1) nextunits--; /* dont output on bias units */
+        for (j = 0; j < nextunits; j++) {
+            ann_float_t W;
+            if (i == 1) {
+                printf("    output[%d]", j);
+            } else {
+                printf("    var O_%d_%d", i-1, j);
+            }
+            printf(" = sigmoid(\n");
+            for (k = 0; k < units; k++) {
+                W = ANN_WEIGHT(net, i, k, j);
+                if (i > 1 && k == units-1) {
+                    printf("        (%.9f)", W);
+                } else if (i == ANN_LAYERS(net)-1) {
+                    printf("        (%.9f*input_%d)", W, k);
+                } else {
+                    printf("        (%.9f*O_%d_%d)", W, i, k);
+                }
+                if ((k+1) < units) printf("+\n");
+            }
+            printf(");\n");
+        }
+    }
+    printf("    return output;\n");
+    printf("}\n");
+}
+
+/* Print a network representation */
+void AnnPrint(const AnnRprop *net) {
+    int i, j, k;
+
+    for (i = 0; i < ANN_LAYERS(net); i++) {
+        char *layertype = "Hidden";
+        if (i == 0) layertype = "Output";
+        if (i == ANN_LAYERS(net)-1) layertype = "Input";
+        printf("%s layer %d, units %d\n", layertype, i, ANN_UNITS(net,i));
+        if (i) {
+            /* Don't compute the bias unit as a target. */
+            int targets = ANN_UNITS(net,i-1) - (i-1>0);
+            /* Weights */
+            printf("\tW");
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                printf("(");
+                for (k = 0; k < targets; k++) {
+                    printf("%f", ANN_WEIGHT(net,i,j,k));
+                    if (k != targets-1) printf(" ");
+                }
+                printf(") ");
+            }
+            printf("\n");
+            /* Gradients */
+            printf("\tg");
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                printf("[");
+                for (k = 0; k < targets; k++) {
+                    printf("%f", ANN_GRADIENT(net,i,j,k));
+                    if (k != targets-1) printf(" ");
+                }
+                printf("] ");
+            }
+            printf("\n");
+            /* SGradients */
+            printf("\tG");
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                printf("[");
+                for (k = 0; k < targets; k++) {
+                    printf("%f", ANN_SGRADIENT(net,i,j,k));
+                    if (k != targets-1) printf(" ");
+                }
+                printf("] ");
+            }
+            printf("\n");
+            /* Gradients at t-1 */
+            printf("\tP");
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                printf("[");
+                for (k = 0; k < targets; k++) {
+                    printf("%f", ANN_PGRADIENT(net,i,j,k));
+                    if (k != targets-1) printf(" ");
+                }
+                printf("] ");
+            }
+            printf("\n");
+            /* Delta */
+            printf("\tD");
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                printf("|");
+                for (k = 0; k < targets; k++) {
+                    printf("%f", ANN_DELTA(net,i,j,k));
+                    if (k != targets-1) printf(" ");
+                }
+                printf("| ");
+            }
+            printf("\n");
+        }
+        for (j = 0; j < ANN_UNITS(net,i); j++) {
+            printf("\tO: %f ", ANN_OUTPUT(net,i,j));
+        }
+        printf("\n");
+        printf("\tE /");
+        for (j = 0; j < ANN_UNITS(net,i); j++) {
+            printf("%f ", ANN_ERROR(net,i,j));
+        }
+        printf("/\n");
+    }
+}
+
+/* Calcuate the global error of the net. This is just the
+ * Root Mean Square (RMS) error, which is half the sum of the squared
+ * errors. */
+ann_float_t AnnGlobalError(AnnRprop *net, ann_float_t *desired) {
+    ann_float_t e, t;
+    int i, outputs = ANN_OUTPUT_UNITS(net);
+
+    e = 0;
+    for (i = 0; i < outputs; i++) {
+        t = desired[i] - ANN_OUTPUT_NODE(net,i);
+        e += t*t; /* No need for fabs(t), t*t will always be positive. */
+    }
+    return .5*e;
+}
+
+/* Set the network input */
+void AnnSetInput(AnnRprop *net, ann_float_t *input)
+{
+    int i, inputs = ANN_INPUT_UNITS(net);
+
+    for (i = 0; i < inputs; i++) ANN_INPUT_NODE(net,i) = input[i];
+}
+
+/* Simulate the net, and return the global error */
+ann_float_t AnnSimulateError(AnnRprop *net, ann_float_t *input, ann_float_t *desired) {
+    AnnSetInput(net, input);
+    AnnSimulate(net);
+    return AnnGlobalError(net, desired);
+}
+
+/* Compute the error vector y-t in the output unit. This error depends
+ * on the loss function we use. */
+void AnnCalculateOutputError(AnnRprop *net, ann_float_t *desired) {
+    int units = ANN_OUTPUT_UNITS(net);
+    ann_float_t factor = (ann_float_t)2/units;
+    AnnLayer *layer = &ANN_LAYER(net, 0);
+    for (int j = 0; j < units; j++) {
+        layer->error[j] = factor * (layer->output[j] - desired[j]);
+    }
+}
+
+/* Calculate gradients with a trivial and slow algorithm, this
+ * is useful to check that the real implementation is working
+ * well, comparing the results.
+ *
+ * The algorithm used is: to compute the error function in two
+ * points (E1, with the real weight, and E2 with the weight W = W + 0.1),
+ * than the approximation of the gradient is G = (E2-E1)/0.1. */
+#define GTRIVIAL_DELTA 0.001
+void AnnCalculateGradientsTrivial(AnnRprop *net, ann_float_t *desired) {
+    int j, i, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        for (i = 0; i < weights; i++) {
+            ann_float_t t, e1, e2;
+            AnnLayer *layer = &ANN_LAYER(net,j);
+
+            /* Calculate the value of the error function
+             * in this point. */
+            AnnSimulate(net);
+            e1 = AnnGlobalError(net, desired);
+            t = layer->weight[i];
+            /* Calculate the error a bit on the right */
+            layer->weight[i] += GTRIVIAL_DELTA;
+            AnnSimulate(net);
+            e2 = AnnGlobalError(net, desired);
+            /* Restore the original weight */
+            layer->weight[i] = t;
+            /* Calculate the gradient */
+            layer->gradient[i] = (e2-e1)/GTRIVIAL_DELTA;
+        }
+    }
+}
+
+/* Calculate gradients using the back propagation algorithm */
+void AnnCalculateGradients(AnnRprop *net, ann_float_t *desired) {
+    int j, layers = ANN_LAYERS(net)-1;
+
+    /* Populate the error vector net->layer[0]->error according
+     * to the loss function. */
+    AnnCalculateOutputError(net,desired);
+
+    /* Back-propagate the error and compute the gradient
+     * for every weight in the net. */
+    for (j = 0; j < layers; j++) {
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        AnnLayer *prev_layer = &ANN_LAYER(net, j+1);
+        int i, units = layer->units;
+        int prevunits = prev_layer->units;
+        int prevunits_aligned = prev_layer->units_aligned;
+#ifdef USING_SIMD
+        int xps, psteps = prevunits/SIMDF_SIZE;
+        simdf_t es;
+#endif // USING_SIMD
+        /* Skip bias units, they have no connections with the previous
+         * layers. */
+        if (j > 1) units--;
+        /* Reset the next layer errors array */
+        //for (i = 0; i < prevunits; i++) prev_layer->error[i] = 0;
+        memset(prev_layer->error, 0, ANN_SIZEOF_ann_float_t*prevunits);
+        /* For every node in this layer ... */
+        for (i = 0; i < units; i++) {
+            ann_float_t error_signal, ei, oi, derivative;
+            int k;
+
+            /* Compute gradient. */
+            ei = layer->error[i];
+            oi = layer->output[i];
+
+            /* Common derivatives:
+             *
+             * identity: 1
+             * sigmoid: oi*(1-oi)
+             * softmax: oi*(1-oi)
+             * tanh:    (1-oi)*(1+oi), that's 1-(oi*oi)
+             * relu:    (oi > 0) ? 1 : 0
+             */
+            //derivative = oi*(1-oi);
+            derivative = (*net->derivative_func)(oi);
+            error_signal = ei*derivative;
+
+            /* For every weight between this node and
+             * the previous layer's nodes: */
+            ann_float_t *g = prev_layer->gradient + i*prevunits_aligned;
+            ann_float_t *w = prev_layer->weight + i*prevunits_aligned;
+            ann_float_t *o = prev_layer->output;
+            ann_float_t *e = prev_layer->error;
+
+            /* 1. Calculate the gradient */
+            k = 0;
+
+#ifdef USING_SIMD
+            if(psteps)
+            {
+                es = simdf_set1f(error_signal);
+//printf("%d : %ld\n", __LINE__, ((long)o & 15));
+                for (xps = 0; xps < psteps; xps++) {
+                    simdf_t outputs = simdf_load(o);
+                    simdf_t gradients = simdf_mul(es,outputs);
+                    simdf_store(g, gradients);
+                    o += SIMDF_SIZE;
+                    g += SIMDF_SIZE;
+                }
+                k += psteps*SIMDF_SIZE;
+            }
+#endif
+
+            /* Handle final piece shorter than SIMDF_SIZE . */
+            for (; k < prevunits; k++) *g++ = error_signal*(*o++);
+
+            /* 2. And back-propagate the error to the previous layer */
+            k = 0;
+
+#ifdef USING_SIMD
+            if(psteps)
+            {
+//printf("%d : %ld\n", __LINE__, ((long)w & 15));
+                for (xps = 0; xps < psteps; xps++) {
+                    simdf_t weights = simdf_load(w);
+                    simdf_t errors = simdf_load(e);
+                    simdf_t prod = simdf_mul(es, weights);
+                    simdf_store(e, simdf_add(prod , errors));
+                    e += SIMDF_SIZE;
+                    w += SIMDF_SIZE;
+                }
+                k += psteps*SIMDF_SIZE;
+            }
+#endif
+            /* Handle final piece shorter than SIMDF_SIZE . */
+            for (; k < prevunits; k++) {
+                (*e++) += error_signal * (*w++);
+            }
+        }
+    }
+}
+
+/* Set the delta values of the net to a given value */
+void AnnSetDeltas(AnnRprop *net, ann_float_t val) {
+    int j, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        int i;
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        for (i = 0; i < weights; i++) layer->delta[i] = val;
+    }
+}
+
+/* Set the sgradient values to zero */
+void AnnResetSgradient(AnnRprop *net) {
+    int j, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net, j);
+        memset(ANN_LAYER(net, j).sgradient, 0, ANN_SIZEOF_ann_float_t*weights);
+    }
+}
+
+/* Set random weights in the range -0.05,+0.05 */
+void AnnSetRandomWeights(AnnRprop *net) {
+    int i, j, k;
+
+    for (i = 1; i < ANN_LAYERS(net); i++) {
+        for (k = 0; k < ANN_UNITS(net, i-1); k++) {
+            for (j = 0; j < ANN_UNITS(net, i); j++) {
+                ANN_WEIGHT(net,i,j,k) = -0.05+.1*(rand()/(RAND_MAX+1.0));
+            }
+        }
+    }
+}
+
+/* Scale the net weights of the given factor */
+void AnnScaleWeights(AnnRprop *net, ann_float_t factor) {
+    int j, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        int i;
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        for (i = 0; i < weights; i++)
+            layer->weight[i] *= factor;
+    }
+}
+
+/* Update the sgradient, that's the sum of the weight's gradient for every
+ * element of the training set. This is used for the RPROP algorithm
+ * that works with the sign of the derivative for the whole set. */
+void AnnUpdateSgradient(AnnRprop *net) {
+    int j, i, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        ann_float_t *sg = net->layer[j].sgradient;
+        ann_float_t *g = net->layer[j].gradient;
+        i = 0;
+#ifdef USING_SIMD
+            int psteps = weights/SIMDF_SIZE;
+            if(psteps)
+            {
+                int xps;
+                for (xps = 0; xps < psteps; xps++) {
+                    simdf_t sgradient = simdf_load(sg);
+                    simdf_t gradient = simdf_load(g);
+                    simdf_store(sg, simdf_add( sgradient, gradient));
+                    sg += SIMDF_SIZE;
+                    g += SIMDF_SIZE;
+                }
+                i += psteps*SIMDF_SIZE;
+            }
+#endif
+        /* Handle final piece shorter than SIMDF_SIZE . */
+        for (; i < weights; i++)
+            (*sg++) += (*g++);
+    }
+}
+
+/* Helper function for RPROP, returns -1 if n < 0, +1 if n > 0, 0 if n == 0 */
+static inline ann_float_t sign(ann_float_t n) {
+    if (n > 0) return +1.0;
+    if (n < 0) return -1.0;
+    return 0.0;
+}
+
+/* The core of the RPROP algorithm.
+ *
+ * Note that:
+ * sgradient is the set-wise gradient.
+ * delta is the per-weight update value. */
+void AnnAdjustWeightsResilientBP(AnnRprop *net) {
+    int j, i, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j) - (j-1>0);
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        for (i = 0; i < weights; i++) {
+            ann_float_t sgradient = layer->sgradient[i];
+            ann_float_t t = layer->pgradient[i] * sgradient;
+            ann_float_t delta = layer->delta[i];
+            if (t > 0) {
+                delta = ANN_MIN(delta*ANN_RPROP_NPLUS(net),ANN_RPROP_MAXUPDATE(net));
+                ann_float_t wdelta = -sign(sgradient) * delta;
+                layer->weight[i] += wdelta;
+                layer->delta[i] = delta;
+                layer->pgradient[i] = sgradient;
+            } else if (t < 0) {
+                ann_float_t past_wdelta = -sign(layer->pgradient[i]) * delta;
+                delta = ANN_MAX(delta*ANN_RPROP_NMINUS(net),ANN_RPROP_MINUPDATE(net));
+                layer->weight[i] -= past_wdelta;
+                layer->delta[i] = delta;
+                layer->pgradient[i] = 0;
+            } else { /* t == 0 */
+                ann_float_t wdelta = -sign(sgradient) * delta;
+                layer->weight[i] += wdelta;
+                layer->pgradient[i] = sgradient;
+            }
+        }
+    }
+}
+
+/* Resilient Backpropagation Epoch */
+ann_float_t AnnResilientBPEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen) {
+    ann_float_t error = 0;
+    int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
+
+    AnnResetSgradient(net);
+    for (j = 0; j < setlen; j++) {
+        error += AnnSimulateError(net, input, desired);
+        AnnCalculateGradients(net, desired);
+        AnnUpdateSgradient(net);
+        input += inputs;
+        desired += outputs;
+    }
+    AnnAdjustWeightsResilientBP(net);
+    return error / setlen;
+}
+
+/* Update the deltas using the gradient descend algorithm.
+ * Gradients should be already computed with AnnCalculateGraidents(). */
+void AnnUpdateDeltasGD(AnnRprop *net) {
+    int j, i, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        for (i = 0; i < weights; i++)
+            layer->delta[i] += layer->gradient[i];
+    }
+}
+
+/* Adjust net weights using the (already) calculated deltas. */
+void AnnAdjustWeights(AnnRprop *net, int setlen) {
+    int j, i, layers = ANN_LAYERS(net);
+
+    for (j = 1; j < layers; j++) {
+        int weights = ANN_WEIGHTS(net,j);
+        AnnLayer *layer = &ANN_LAYER(net, j);
+        for (i = 0; i < weights; i++) {
+            layer->weight[i] -= ANN_LEARN_RATE(net)/setlen*layer->delta[i];
+        }
+    }
+}
+
+/* Gradient Descend training */
+ann_float_t AnnGDEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen) {
+    ann_float_t error = 0;
+    int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
+
+    for (j = 0; j < setlen; j++) {
+        AnnSetDeltas(net, 0);
+        error += AnnSimulateError(net, input, desidered);
+        AnnCalculateGradients(net, desidered);
+        AnnUpdateDeltasGD(net);
+        input += inputs;
+        desidered += outputs;
+        AnnAdjustWeights(net,setlen);
+    }
+    return error / setlen;
+}
+
+/* This function, called after AnnSimulate(), will return 1 if there is
+ * an error in the detected class (compared to the desired output),
+ * othewise 0 is returned. */
+int AnnTestClassError(AnnRprop *net, ann_float_t *desired) {
+    int i, outputs = ANN_OUTPUT_UNITS(net);
+    int classid, outid;
+    ann_float_t max = 0;
+
+    /* Get the class ID from the test dataset output. */
+    classid = 0;
+    for (i = 0; i < outputs; i++)
+        if (desired[i] == 1) break;
+    classid = i;
+
+    /* Get the network classification. */
+    max = ANN_OUTPUT_NODE(net,0);
+    outid = 0;
+    for (i = 1; i < outputs; i++) {
+        ann_float_t o = ANN_OUTPUT_NODE(net,i);
+        if (o > max) {
+            outid = i;
+            max = o;
+        }
+    }
+    return outid != classid;
+}
+
+/* Simulate the entire test dataset with the neural network and returns the
+ * average error of all the entries tested. */
+void AnnTestError(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen, ann_float_t *avgerr, ann_float_t *classerr) {
+    ann_float_t error = 0;
+    int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
+    int class_errors = 0;
+
+    for (j = 0; j < setlen; j++) {
+        error += AnnSimulateError(net, input, desired);
+        if (classerr)
+            class_errors += AnnTestClassError(net, desired);
+        input += inputs;
+        desired += outputs;
+    }
+    if (avgerr) *avgerr = error/setlen;
+    if (classerr) *classerr = (ann_float_t)class_errors*100/setlen;
+}
+
+/* Train the net */
+ann_float_t AnnTrainWithAlgoFunc(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr,
+                                        int maxepochs, int setlen, AnnTrainAlgoFunc algo_func) {
+    int i = 0;
+    ann_float_t e = maxerr+1;
+
+    while (i++ < maxepochs && e >= maxerr) {
+        e = (*algo_func)(net, input, desired, setlen);
+    }
+    return e;
+}
+
+
+ann_float_t AnnTrain(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr, int maxepochs,
+                                                                                int setlen, int algo) {
+    AnnTrainAlgoFunc algo_func;
+    if(algo == ANN_ALGO_BPROP) algo_func = AnnResilientBPEpoch;
+    else if(algo == ANN_ALGO_GD) algo_func = AnnGDEpoch;
+    else return -1;
+
+    return AnnTrainWithAlgoFunc(net, input, desired, maxerr, maxepochs, setlen, algo_func);
+}

+ 160 - 0
SquiLu-ext/nn.h

@@ -0,0 +1,160 @@
+/* RPROP Neural Networks implementation
+ * See: http://deeplearning.cs.cmu.edu/pdfs/Rprop.pdf
+ *
+ * Copyright (c) 2003-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Disque nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SQNN_H
+#define __SQNN_H
+
+//#include <assert.h>
+
+typedef float ann_float_t;
+typedef ann_float_t (*AnnDerivativeFunc)(ann_float_t v);
+
+/* Data structures.
+ * Nets are not so 'dynamic', but enough to support
+ * an arbitrary number of layers, with arbitrary units for layer.
+ * Only fully connected feed-forward networks are supported. */
+typedef struct {
+	ann_float_t *output;		/* output[i], output of i-th unit */
+	ann_float_t *error;		/* error[i], output error of i-th unit*/
+	ann_float_t *weight;		/* weight[(i*units)+j] */
+				/* weight between unit i-th and next j-th */
+	ann_float_t *gradient;	/* gradient[(i*units)+j] gradient */
+	ann_float_t *sgradient;	/* gradient for the full training set */
+				/* only used for RPROP */
+	ann_float_t *pgradient;	/* pastgradient[(i*units)+j] t-1 gradient */
+				/* (t-1 sgradient for resilient BP) */
+	ann_float_t *delta;		/* delta[(i*units)+j] cumulative update */
+				/* (per-weight delta for RPROP) */
+	int units;	/*moved to last position for alignment purposes*/
+	int units_aligned; /*units rounded up for alignment*/
+} AnnLayer;
+
+/* Feed forward network structure */
+typedef struct {
+	AnnLayer *layer;
+	int flags;
+	int layers;
+	AnnDerivativeFunc node_transf_func;
+	AnnDerivativeFunc derivative_func;
+	ann_float_t rprop_nminus;
+	ann_float_t rprop_nplus;
+	ann_float_t rprop_maxupdate;
+	ann_float_t rprop_minupdate;
+	ann_float_t learn_rate; /* Used for GD training. */
+} AnnRprop;
+
+typedef ann_float_t (*AnnTrainAlgoFunc)(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen);
+
+/* Raw interface to data structures */
+#define ANN_LAYERS(net) (net)->layers
+#define ANN_LAYER(net, l) (net)->layer[/*assert(l >= 0),*/l]
+#define ANN_OUTPUT(net,l,i) ANN_LAYER(net, l).output[i]
+#define ANN_ERROR(net,l,i) ANN_LAYER(net, l).error[i]
+#define ANN_LAYER_IDX(net,l,i,j) (((j)*ANN_LAYER(net, l).units_aligned)+(i))
+#define ANN_WEIGHT(net,l,i,j) ANN_LAYER(net, l).weight[ANN_LAYER_IDX(net,l,i,j)]
+#define ANN_GRADIENT(net,l,i,j) ANN_LAYER(net, l).gradient[ANN_LAYER_IDX(net,l,i,j)]
+#define ANN_SGRADIENT(net,l,i,j) ANN_LAYER(net, l).sgradient[ANN_LAYER_IDX(net,l,i,j)]
+#define ANN_PGRADIENT(net,l,i,j) ANN_LAYER(net, l).pgradient[ANN_LAYER_IDX(net,l,i,j)]
+#define ANN_DELTA(net,l,i,j) ANN_LAYER(net, l).delta[ANN_LAYER_IDX(net,l,i,j)]
+#define ANN_UNITS(net,l) ANN_LAYER(net, l).units
+#define ANN_UNITS_ALLOCATED(net,l) ANN_LAYER(net, l).units_aligned
+#define ANN_WEIGHTS(net,l) (ANN_UNITS(net,l)*ANN_UNITS(net,l-1))
+#define ANN_OUTPUT_NODE(net,i) ANN_OUTPUT(net,0,i)
+#define ANN_INPUT_NODE(net,i) ANN_OUTPUT(net,(ANN_LAYERS(net))-1,i)
+#define ANN_OUTPUT_UNITS(net) ANN_UNITS(net,0)
+#define ANN_INPUT_UNITS(net) (ANN_UNITS(net,(ANN_LAYERS(net))-1)-1)
+#define ANN_RPROP_NMINUS(net) (net)->rprop_nminus
+#define ANN_RPROP_NPLUS(net) (net)->rprop_nplus
+#define ANN_RPROP_MAXUPDATE(net) (net)->rprop_maxupdate
+#define ANN_RPROP_MINUPDATE(net) (net)->rprop_minupdate
+#define ANN_LEARN_RATE(net) (net)->learn_rate
+
+/* Constants */
+#define ANN_DEFAULT_RPROP_NMINUS 0.5
+#define ANN_DEFAULT_RPROP_NPLUS 1.2
+#define ANN_DEFAULT_RPROP_MAXUPDATE 50
+#define ANN_DEFAULT_RPROP_MINUPDATE 0.000001
+#define ANN_RPROP_INITIAL_DELTA 0.1
+#define ANN_DEFAULT_LEARN_RATE 0.1
+#define ANN_ALGO_BPROP 0
+#define ANN_ALGO_GD 1
+
+/* Misc */
+#define ANN_MAX(a,b) (((a)>(b))?(a):(b))
+#define ANN_MIN(a,b) (((a)<(b))?(a):(b))
+
+/* Prototypes */
+ann_float_t AnnTransferFunctionSigmoid(ann_float_t x);
+ann_float_t AnnTransferFunctionRelu(ann_float_t x);
+ann_float_t AnnTransferFunctionTanh(ann_float_t x);
+//ann_float_t AnnDerivativeIdentity(ann_float_t x);
+ann_float_t AnnDerivativeSigmoid(ann_float_t x);
+ann_float_t AnnDerivativeTanh(ann_float_t x);
+ann_float_t AnnDerivativeRelu(ann_float_t x);
+
+void AnnResetLayer(AnnLayer *layer);
+AnnRprop *AnnAlloc(int layers);
+void AnnFreeLayer(AnnLayer *layer);
+void AnnFree(AnnRprop *net);
+int AnnInitLayer(AnnRprop *net, int i, int units, int bias);
+AnnRprop *AnnCreateNet(int layers, int *units);
+AnnRprop *AnnCreateNet2(int iunits, int ounits);
+AnnRprop *AnnCreateNet3(int iunits, int hunits, int ounits);
+AnnRprop *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits);
+AnnRprop *AnnClone(const AnnRprop* net);
+size_t AnnCountWeights(AnnRprop *net);
+void AnnSimulate(AnnRprop *net);
+void Ann2Tcl(const AnnRprop *net);
+void Ann2Js(const AnnRprop *net);
+void AnnPrint(const AnnRprop *net);
+ann_float_t AnnGlobalError(AnnRprop *net, ann_float_t *desidered);
+void AnnSetInput(AnnRprop *net, ann_float_t *input);
+ann_float_t AnnSimulateError(AnnRprop *net, ann_float_t *input, ann_float_t *desidered);
+void AnnCalculateGradientsTrivial(AnnRprop *net, ann_float_t *desidered);
+void AnnCalculateGradients(AnnRprop *net, ann_float_t *desidered);
+void AnnSetDeltas(AnnRprop *net, ann_float_t val);
+void AnnResetDeltas(AnnRprop *net);
+void AnnResetSgradient(AnnRprop *net);
+void AnnSetRandomWeights(AnnRprop *net);
+void AnnScaleWeights(AnnRprop *net, ann_float_t factor);
+void AnnUpdateDeltasGD(AnnRprop *net);
+void AnnUpdateDeltasGDM(AnnRprop *net);
+void AnnUpdateSgradient(AnnRprop *net);
+void AnnAdjustWeights(AnnRprop *net, int setlen);
+ann_float_t AnnBatchGDEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen);
+ann_float_t AnnBatchGDMEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen);
+void AnnAdjustWeightsResilientBP(AnnRprop *net);
+ann_float_t AnnResilientBPEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen);
+ann_float_t AnnTrainWithAlgoFunc(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, ann_float_t maxerr, int maxepochs, int setlen, AnnTrainAlgoFunc algo_func);
+ann_float_t AnnTrain(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, ann_float_t maxerr, int maxepochs, int setlen, int algo);
+void AnnTestError(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen, ann_float_t *avgerr, ann_float_t *classerr);
+
+#endif /* __SQNN_H */

+ 522 - 0
SquiLu-ext/sq_ipc.cpp

@@ -0,0 +1,522 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//#ifdef USE_IPC
+
+#include "squirrel.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>  /* for malloc */
+#include <assert.h>  /* for a few sanity tests */
+#include <sqstdio.h>
+#include <sqstdfile.h>
+
+//#include "FileLock.h"
+/* check for POSIX */
+#if defined( unix ) || defined( __unix ) || defined( __unix__ ) || \
+    (defined( __APPLE__ ) && defined( __MACH__ )) || \
+    HAVE_UNISTD_H
+#  include <unistd.h>
+#  if defined( _POSIX_VERSION ) && _POSIX_VERSION >= 200112L
+#    define HAVE_FLOCK
+#include <stddef.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+typedef off_t ipc_flock_off_t;
+#  endif
+#endif
+
+
+/* check for Windows */
+#if !defined( HAVE_FLOCK ) && \
+    defined( _WIN32 ) && !defined( __CYGWIN__ )
+#  define HAVE_FLOCK
+#include <stddef.h>
+#include <ctype.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <io.h>
+
+
+typedef ULONGLONG ipc_flock_off_t;
+#endif
+
+static int ipc_err( char const* file, int line, char const* func,
+                       int code ) {
+  if( code != 0 ) {
+    if( func != NULL )
+      fprintf( stderr, "[%s:%d] error return (%d) in function '%s'\n",
+               file, line, code, func );
+    else
+      fprintf( stderr, "[%s:%d]: error return (%d)\n",
+               file, line, code );
+    fflush( stderr );
+  }
+  return code;
+}
+
+#ifndef NDEBUG
+#  if (defined( __STDC_VERSION__ ) && __STDC_VERSION__  >= 199901L) || \
+      defined( __GNUC__ ) || defined( __clang__ )
+#    define IPC_ERR( code ) (ipc_err( __FILE__, __LINE__, __func__, (int)(code) ))
+#  elif defined( _MSC_VER ) && _MSC_VER >= 1100L
+#    define IPC_ERR( code ) (ipc_err( __FILE__, __LINE__, __FUNCTION__, (int)(code) ))
+#  else
+#    define IPC_ERR( code ) (ipc_err( __FILE__, __LINE__, NULL, (int)(code) ))
+#  endif
+#else
+#  define IPC_ERR( code ) ((int)(code))
+#endif
+
+#define IPC_EINTR( _rv, _call ) \
+  do { \
+    _rv = _call; \
+  } while( _rv < 0 && errno == EINTR )
+
+#if 0
+//memmap
+#include <stddef.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+
+typedef off_t ipc_mmap_off_t;
+
+#define MEMFILE_R  1
+#define MEMFILE_W  2
+#define MEMFILE_RW (MEMFILE_R|MEMFILE_W)
+
+typedef struct {
+  void* addr;
+  size_t len;
+} ipc_mmap_handle;
+
+static void* ipc_mmap_addr( ipc_mmap_handle* h ) {
+  return h->addr;
+}
+
+static size_t ipc_mmap_size( ipc_mmap_handle* h ) {
+  return h->len;
+}
+
+
+static void ipc_mmap_error( char* buf, size_t len, int code ) {
+  if( len > 0 && strerror_r( code, buf, len ) != (int)0 ) {
+    strncpy( buf, "unknown error", len-1 );
+    buf[ len-1 ] = '\0';
+  }
+}
+
+
+static size_t ipc_mmap_pagesize( void ) {
+  long result = sysconf( _SC_PAGESIZE );
+  if( result < 1 )
+    result = 4096;
+  return (size_t)result;
+}
+
+
+static int ipc_mmap_open( ipc_mmap_handle* h, char const* name,
+                          int mode, off_t offset, size_t size ) {
+  int fd, oflags = 0, mmflags = 0;
+  if( (mode & MEMFILE_RW) == MEMFILE_RW ) {
+    oflags = O_RDWR;
+    mmflags = PROT_READ | PROT_WRITE;
+  } else if( mode & MEMFILE_R ) {
+    oflags = O_RDONLY;
+    mmflags = PROT_READ;
+  } else if( mode & MEMFILE_W ) {
+    oflags = O_RDWR;
+    mmflags = PROT_WRITE;
+  }
+#ifdef O_CLOEXEC
+  oflags |= O_CLOEXEC;
+#endif
+  fd = open( name, oflags );
+  if( fd < 0 )
+    return IPC_ERR( errno );
+  h->len = size;
+  if( size == 0 ) { /* figure out its size */
+    struct stat buf;
+    if( fstat( fd, &buf ) < 0 ) {
+      int saved_errno = errno;
+      close( fd );
+      return IPC_ERR( saved_errno );
+    }
+    if( buf.st_size < offset ) {
+      close( fd );
+      return IPC_ERR( EINVAL );
+    }
+    if( buf.st_size - offset > (size_t)-1 )
+      h->len = (size_t)-1;
+    else
+      h->len = buf.st_size - offset;
+  }
+  /* create mmap */
+  h->addr = mmap( NULL, h->len, mmflags, MAP_SHARED, fd, offset );
+  if( h->addr == MAP_FAILED ) {
+    int saved_errno = errno;
+    close( fd );
+    return IPC_ERR( saved_errno );
+  }
+  close( fd ); /* we don't need it anymore! */
+  return 0;
+}
+
+
+static int ipc_mmap_close( ipc_mmap_handle* h ) {
+  int rv = munmap( h->addr, h->len );
+  if( rv < 0 )
+    return IPC_ERR( errno );
+  return 0;
+}
+
+
+#if defined( _POSIX_SYNCHRONIZED_IO ) && _POSIX_SYNCHRONIZED_IO > 0
+#  define IPC_MMAP_HAVE_FLUSH
+static int ipc_mmap_flush( ipc_mmap_handle* h, size_t pos ) {
+  int rv = msync( h->addr, pos, MS_ASYNC|MS_INVALIDATE );
+  if( rv < 0 )
+    return IPC_ERR( errno );
+  return 0;
+}
+#endif
+
+//#ifdef HAVE_MMAP
+
+#define NAME "ipc.mmap"
+
+typedef struct {
+  ipc_mmap_handle h; /* platform specific data */
+  /* extra management info: */
+  char is_valid;
+} sq_mmap_handle;
+
+
+static SQRESULT mmap_pusherror( HSQUIRRELVM v, int code ) {
+  char buf[ IPC_MAXERRMSG ];
+  ipc_mmap_error( buf, sizeof( buf ), code );
+  return sq_throwerror(v, _SC("%s"), buf);
+}
+
+
+static SQRESULT sq_mmap_close(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS(v);
+	GET_file_INSTANCE();
+    int rv = 0;
+    if(!self->is_valid) return sq_throwerror(v, _SC("attempt to use invalid mmap object"));
+    rv = ipc_mmap_close( &self->h );
+    if( rv != 0 ) return mmap_pusherror(v, rv);
+    self->is_valid = 0;
+    sq_pushbool(v, SQTrue);
+    return 1;
+}
+
+
+static SQRESULT sq_mmap_gc( HSQUIRRELVM v ) {
+  l_mmap_handle* h = lua_touserdata( L, 1 );
+  if( h->is_valid )
+    ipc_mmap_close( &h->h );
+  return 0;
+}
+
+
+#ifdef IPC_MMAP_HAVE_FLUSH
+static SQRESULT sq_mmap_flush( HSQUIRRELVM v ) {
+	SQ_FUNC_VARS(v);
+	GET_file_INSTANCE();
+	SQ_GET_INTEGER(v, 2, pos);
+    int rv = 0;
+    if( !self->is_valid ) return sq_throwerror(v, _SC("attempt to use invalid mmap object"));
+    rv = ipc_mmap_flush( &self->h, pos );
+    if( rv != 0 ) return mmap_pusherror(v, rv);
+    sq_pushbool(v, SQTrue);
+    return 1;
+}
+#endif
+
+static SQRESULT getMmapMode(HSQUIRRELVM v, const char *mode)
+{
+    if(mode)
+    {
+        switch(mode[0])
+        {
+        case 'r':
+            if(mode[1] == 'w') return MEMFILE_RW;
+            return MEMFILE_R;
+        break;
+
+        case 'w':
+            return MEMFILE_W;
+            break;
+        }
+    }
+    return sq_throwerror(v, _SC("invalid parameter mode '%s'"), mode);;
+}
+
+
+static SQRESULT sq_mmap_open(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS(v);
+	GET_file_INSTANCE();
+
+	SQ_GET_STRING(v, 2, name);
+	SQ_GET_STRING(v, 3, mode);
+	SQ_OPT_INTEGER(v, 4, offset, 0);
+	SQ_OPT_INTEGER(v, 5, nbytes, 0);
+
+    int mmode = getMmapMode(v, mode);
+    if(mmode < 0) return mmode;
+    ipc_mmap_off_t offset = offset;
+    size_t size = nbytes;
+    l_mmap_handle* h = lua_newuserdata( L, sizeof( *h ) );
+    int rv = 0;
+    h->is_valid = 0;
+    luaL_getmetatable( L, NAME );
+    lua_setmetatable( L, -2 );
+    rv = ipc_mmap_open( &h->h, name, mode, offset, size );
+    if( rv != 0 ) return mmap_pusherror( L, rv );
+    h->is_valid = 1;
+    sq_pushcfunction(v, sq_mmap_close );
+#ifdef IPC_MMAP_HAVE_FLUSH
+    sq_pushcfunction(v, l_mmap_flush );
+    memfile_new( L, ipc_mmap_addr( &h->h ), ipc_mmap_size( &h->h ),
+               mode, -3, -2, -1 );
+#else
+    memfile_new( L, ipc_mmap_addr( &h->h ), ipc_mmap_size( &h->h ),
+               mode, -2, -1, 0 );
+#endif
+  return 1;
+}
+
+//#endif // HAVE_MMAP
+#endif // 0
+
+//file lock
+static void ipc_flock_error( char* buf, size_t len, int code ) {
+#ifdef _WIN32
+    if( len > 0 ) {
+        if( 0 == FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM |
+                                 FORMAT_MESSAGE_IGNORE_INSERTS,
+                                 NULL,
+                                 code,
+                                 0,
+                                 buf,
+                                 len,
+                                 NULL ) ) {
+              strncpy( buf, "unknown error", len-1 );
+              buf[ len-1 ] = '\0';
+        } else { /* Windows puts an extra newline in there! */
+              size_t n = strlen( buf );
+              while( n > 0 && isspace( (unsigned char)buf[ --n ] ) )
+                buf[ n ] = '\0';
+        }
+    }
+#else
+    if( len > 0 && strerror_r( code, buf, len ) != (int)0 ) {
+        strncpy( buf, "unknown error", len-1 );
+        buf[ len-1 ] = '\0';
+    }
+#endif // _WIN32
+}
+
+
+static int ipc_flock_lock( FILE* f, int is_wlock, int* could_lock,
+                           ipc_flock_off_t start,
+                           ipc_flock_off_t len ) {
+#ifdef _WIN32
+    HANDLE fh = (HANDLE)_get_osfhandle( _fileno( f ) );
+    DWORD flags = is_wlock ? LOCKFILE_EXCLUSIVE_LOCK : 0;
+    DWORD lenlo = (DWORD)len, lenhi = (DWORD)(len >> 32);
+    OVERLAPPED ov;
+    if( fh == (HANDLE)INVALID_HANDLE_VALUE ) return IPC_ERR( ERROR_INVALID_HANDLE );
+    if( could_lock != NULL ) flags |= LOCKFILE_FAIL_IMMEDIATELY;
+    ov.Offset = (DWORD)start;
+    ov.OffsetHigh = (DWORD)(start >> 32);
+    ov.hEvent = NULL;
+    if( len == 0 ) lenhi = lenlo = (DWORD)-1;
+    if( !LockFileEx( fh, flags, 0, lenlo, lenhi, &ov ) ) {
+        int code = GetLastError();
+        if( could_lock != NULL && (code == ERROR_LOCK_VIOLATION || code == ERROR_IO_PENDING) )
+        {
+            *could_lock = 0;
+            return 0;
+        }
+        return IPC_ERR( code );
+    }
+#else
+    int rv = 0;
+    int fd = fileno( f );
+    int op = could_lock != NULL ? F_SETLK : F_SETLKW;
+    struct flock fl;
+    fl.l_type = is_wlock ? F_WRLCK : F_RDLCK;
+    fl.l_whence = SEEK_SET;
+    fl.l_start = start;
+    fl.l_len = len;
+    IPC_EINTR( rv, fcntl( fd, op, &fl ) );
+    if( rv < 0 ) {
+        if( could_lock != NULL && (errno == EACCES || errno == EAGAIN) ) {
+            *could_lock = 0;
+            return 0;
+        }
+        return IPC_ERR( errno );
+    }
+#endif
+    if( could_lock != NULL ) *could_lock = 1;
+    return 0;
+}
+
+
+static int ipc_flock_unlock( FILE* f, ipc_flock_off_t start,
+                             ipc_flock_off_t len ) {
+#ifdef _WIN32
+    HANDLE fh = (HANDLE)_get_osfhandle( _fileno( f ) );
+    DWORD lenlo = (DWORD)len, lenhi = (DWORD)(len >> 32);
+    DWORD offlo = (DWORD)start, offhi = (DWORD)(start >> 32);
+    if( fh == (HANDLE)INVALID_HANDLE_VALUE ) return IPC_ERR( ERROR_INVALID_HANDLE );
+    if( len == 0 ) lenhi = lenlo = (DWORD)-1;
+    if( !UnlockFile( fh, offlo, offhi, lenlo, lenhi ) ) return IPC_ERR( GetLastError() );
+#else
+    struct flock fl;
+    fl.l_type = F_UNLCK;
+    fl.l_whence = SEEK_SET;
+    fl.l_start = start;
+    fl.l_len = len;
+    if( fcntl( fileno( f ), F_SETLK, &fl ) < 0 ) return IPC_ERR( errno );
+#endif // _WIN32
+    return 0;
+}
+
+/* maximum expected length of error messages */
+#define IPC_MAXERRMSG 200
+
+static SQRESULT flock_pusherror(HSQUIRRELVM v, int code) {
+  char buf[ IPC_MAXERRMSG ];
+  ipc_flock_error( buf, sizeof( buf ), code );
+  return sq_throwerror(v, _SC("%s"), buf);
+}
+
+static void invalidate_input_buffer( FILE* f ) {
+  /* Linux (and apparently many other implementations) discard
+   * unread characters from the input buffer if fflush is called on
+   * an input file, but this is not guaranteed by ISO C. */
+  fflush( f );
+  /* This should also invalidate the input buffer unless the
+   * implementation checks for that specific case. */
+  fseek( f, 0, SEEK_CUR );
+  /* If both methods don't work, we are out of luck. But using
+   * low-level file locking with buffered IO is a bad idea
+   * anyway! */
+}
+
+SQ_OPT_STRING_STRLEN();
+
+#define GET_file_INSTANCE() SQ_GET_INSTANCE(v, 1, SQFile, SQSTD_FILE_TYPE_TAG) \
+	if(self == NULL) return sq_throwerror(v, _SC("file object already closed"));
+
+
+static SQRESULT getFlockMode(HSQUIRRELVM v, const char *mode)
+{
+    if(mode)
+    {
+        switch(mode[0])
+        {
+        case 'r':
+            if(mode[1] == 'w') return 1;
+            return 0;
+        break;
+
+        case 'w':
+            return 1;
+            break;
+        }
+    }
+    return sq_throwerror(v, _SC("invalid parameter mode '%s'"), mode);;
+}
+
+static SQRESULT sq_FileLock_lock0(HSQUIRRELVM v, int isTry){
+	SQ_FUNC_VARS(v);
+	GET_file_INSTANCE();
+
+	SQ_GET_STRING(v, 2, mode);
+	SQ_OPT_INTEGER(v, 3, offset, 0);
+	SQ_OPT_INTEGER(v, 4, nbytes, 0);
+
+    FILE *fp = (FILE*)self->GetHandle();
+    int is_wlock = getFlockMode(v, mode);
+    if(is_wlock < 0) return is_wlock;
+    ipc_flock_off_t start = offset;
+    ipc_flock_off_t len = nbytes;
+    int could_lock = isTry ? 0 : 1;
+    int rv = ipc_flock_lock(fp, (isTry ? is_wlock : 0), &could_lock, start, len);
+    if( rv != 0 ) return flock_pusherror(v, rv);
+    /* try to flush input buffer */
+    if(could_lock) invalidate_input_buffer(fp);
+    sq_pushbool(v, could_lock);
+    return 1;
+}
+
+static SQRESULT sq_FileLock_lock(HSQUIRRELVM v){
+    return sq_FileLock_lock0(v, 0);
+}
+
+static SQRESULT sq_FileLock_trylock(HSQUIRRELVM v){
+    return sq_FileLock_lock0(v, 1);
+}
+
+static SQRESULT sq_FileLock_unlock(HSQUIRRELVM v){
+	SQ_FUNC_VARS(v);
+	GET_file_INSTANCE();
+
+	SQ_OPT_INTEGER(v, 2, offset, 0);
+	SQ_OPT_INTEGER(v, 3, nbytes, 0);
+
+    FILE *fp = (FILE*)self->GetHandle();
+    ipc_flock_off_t start = offset;
+    ipc_flock_off_t len = nbytes;
+    int rv = 0;
+    fflush(fp); /* flush output buffer */
+    rv = ipc_flock_unlock(fp, start, len);
+    if( rv != 0 ) return flock_pusherror(v, rv);
+    sq_pushbool(v, SQTrue);
+    return 1;
+}
+
+#define _DECL_FILELOCK_FUNC(name,nparams,pmask) {_SC(#name),sq_FileLock_##name,nparams,pmask}
+static SQRegFunction FileLock_obj_funcs[]={
+	_DECL_FILELOCK_FUNC(lock, -2, _SC("xsii")),
+	_DECL_FILELOCK_FUNC(trylock, -2, _SC("xsii")),
+	_DECL_FILELOCK_FUNC(unlock, -1, _SC("x")),
+	{0,0}
+};
+#undef _DECL_FILELOCK_FUNC
+
+/* This defines a function that opens up your library. */
+SQRESULT sqext_register_ipc (HSQUIRRELVM v) {
+	//add a namespace FileLock
+    sq_pushstring(v, SQSTD_FILE_CLASS_TYPE_TAG, -1);
+    if(sq_getonregistrytable(v) == SQ_OK){
+        sq_insert_reg_funcs(v, FileLock_obj_funcs);
+        sq_poptop(v);
+        return SQ_OK;
+    }
+    return sq_throwerror(v, _SC("file class not found"));
+}
+
+#ifdef __cplusplus
+}
+
+//#endif //USE_IPC
+
+#endif
+

+ 557 - 0
SquiLu-ext/sq_lpsolve.cpp

@@ -0,0 +1,557 @@
+#if defined(SQ_USE_LPSOLVE) || defined(SQ_USE_LPSOLVE_STATIC)
+
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "squirrel.h"
+#include "sqstdblobimpl.h"
+
+#include "lp_lib.h"
+
+////////
+#include "dynamic_library.h"
+
+/*SquiLu
+
+local library_functions = [
+    ["void", "lp_solve_version", "int *majorversion, int *minorversion, int *release, int *build"],
+    ["lprec*", "make_lp", "int rows, int columns"],
+    ["MYBOOL", "resize_lp", "lprec *lp, int rows, int columns"],
+    ["lprec*", "read_LP", "char *filename, int verbose, char *lp_name"],
+    ["lprec*", "read_MPS", "char *filename, int options"],
+    ["lprec*", "read_freeMPS", "char *filename, int options"],
+    ["void", "delete_lp", "lprec *plp"],
+
+    //next entry should be the last one
+    //to make valid the test made on load_library function
+    ["void", "free_lp", "lprec **plp"],
+];
+
+function write_library_functions_declaration(){
+    foreach(k,v in library_functions) {
+        putsnl("typedef " + v[0] + " (*" + v[1] + "_t)(" + v[2] + ");");
+        putsnl("static " + v[1] + "_t dl" + v[1] + " = 0;");
+    }
+}
+
+function write_library_functions_load(){
+    foreach(k,v in library_functions){
+        putsnl("dl" + v[1] + " = (" + v[1] + "_t) libdyn.dlsym(\"" + v[1] + "\");");
+        putsnl("if(!dl" + v[1] + ") return false;");
+    }
+}
+
+function write_library_functions_static_defines(){
+    foreach(k,v in library_functions){
+        putsnl("#define dl" + v[1] + " " + v[1]);
+    }
+}
+SquiLu*/
+
+#ifdef SQ_USE_LPSOLVE_STATIC
+
+#define load_library(x) true
+
+//@write_library_functions_static_defines()
+// generated-code:begin
+#define dllp_solve_version lp_solve_version
+#define dlmake_lp make_lp
+#define dlresize_lp resize_lp
+#define dlread_LP read_LP
+#define dlread_MPS read_MPS
+#define dlread_freeMPS read_freeMPS
+#define dldelete_lp delete_lp
+#define dlfree_lp free_lp
+// generated-code:end
+
+#else
+
+static DynamicLibrary libdyn;
+
+//@write_library_functions_declaration();
+// generated-code:begin
+typedef void (*lp_solve_version_t)(int *majorversion, int *minorversion, int *release, int *build);
+static lp_solve_version_t dllp_solve_version = 0;
+typedef lprec* (*make_lp_t)(int rows, int columns);
+static make_lp_t dlmake_lp = 0;
+typedef MYBOOL (*resize_lp_t)(lprec *lp, int rows, int columns);
+static resize_lp_t dlresize_lp = 0;
+typedef lprec* (*read_LP_t)(char *filename, int verbose, char *lp_name);
+static read_LP_t dlread_LP = 0;
+typedef lprec* (*read_MPS_t)(char *filename, int options);
+static read_MPS_t dlread_MPS = 0;
+typedef lprec* (*read_freeMPS_t)(char *filename, int options);
+static read_freeMPS_t dlread_freeMPS = 0;
+typedef void (*delete_lp_t)(lprec *plp);
+static delete_lp_t dldelete_lp = 0;
+typedef void (*free_lp_t)(lprec **plp);
+static free_lp_t dlfree_lp = 0;
+// generated-code:end
+
+static const char *dynamicLibName = DYNLIB_FOR_OS(liblpsolve55);
+
+static bool load_library(const char *libname)
+{
+    if(dlfree_lp) return true;
+    if(libdyn.open(libname))
+    {
+        //@write_library_functions_load();
+// generated-code:begin
+dllp_solve_version = (lp_solve_version_t) libdyn.dlsym("lp_solve_version");
+if(!dllp_solve_version) return false;
+dlmake_lp = (make_lp_t) libdyn.dlsym("make_lp");
+if(!dlmake_lp) return false;
+dlresize_lp = (resize_lp_t) libdyn.dlsym("resize_lp");
+if(!dlresize_lp) return false;
+dlread_LP = (read_LP_t) libdyn.dlsym("read_LP");
+if(!dlread_LP) return false;
+dlread_MPS = (read_MPS_t) libdyn.dlsym("read_MPS");
+if(!dlread_MPS) return false;
+dlread_freeMPS = (read_freeMPS_t) libdyn.dlsym("read_freeMPS");
+if(!dlread_freeMPS) return false;
+dldelete_lp = (delete_lp_t) libdyn.dlsym("delete_lp");
+if(!dldelete_lp) return false;
+dlfree_lp = (free_lp_t) libdyn.dlsym("free_lp");
+if(!dlfree_lp) return false;
+// generated-code:end
+
+        return true;
+    }
+    return false;
+}
+
+#endif // SQ_USE_LPSOLVE_STATIC
+
+SQ_OPT_STRING_STRLEN();
+
+static const SQChar LPSOLVE_Tag[]   = _SC("SQLPSolve");
+#define GET_lpsolve_INSTANCE() SQ_GET_INSTANCE(v, 1, lprec, LPSOLVE_Tag) \
+	if(self == NULL) return sq_throwerror(v, _SC("sqlpsolve object already closed"));
+
+static SQRESULT sq_lpsolve_releasehook(SQUserPointer p, SQInteger /*size*/, void */*ep*/)
+{
+	lprec *self = ((lprec *)p);
+	if(self)
+    {
+        dldelete_lp(self);
+    }
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_constructor(HSQUIRRELVM v)
+{
+    if(!load_library(dynamicLibName)) return sq_throwerror(v, _SC("Failed to load liblpsolve !"));
+	SQ_FUNC_VARS_NO_TOP(v);
+	SQ_GET_INTEGER(v, 2, constraints);
+	SQ_GET_INTEGER(v, 3, variables);
+
+    lprec *self = dlmake_lp(constraints, variables);
+
+	sq_setinstanceup(v,1,self);
+	sq_setreleasehook(v,1,sq_lpsolve_releasehook);
+	return 0;
+}
+
+static SQRESULT sq_lpsolve_version(HSQUIRRELVM v)
+{
+    int majorversion, minorversion, release, build;
+    dllp_solve_version(&majorversion, &minorversion, &release, &build);
+	sq_pushfstring(v,_SC("lp_solve %d.%d.%d.%d"), majorversion, minorversion, release, build);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_loadlib(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_STRING(v, 2, libname);
+    sq_pushbool(v, load_library(libname));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_verbose(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, mode);
+	//dlset_verbose(self, mode);
+	self->set_verbose(self, mode);
+	return 0;
+}
+
+static SQRESULT sq_lpsolve_set_obj_fn(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+    const SQInteger ary_idx = 2;
+    if(sq_gettype(v, ary_idx) == OT_ARRAY)
+    {
+        SQInteger atype;
+        if(sq_arraygettype(v, ary_idx, &atype) != SQ_OK && atype == eat_SQFloat64Array) return sq_throwerror(v, _SC("SQFloat64Array expected"));
+        void *ary;
+        sq_arraygetrawdata(v, ary_idx, &ary);
+        sq_pushbool(v, self->set_obj_fn(self, (REAL*)ary));
+    }
+    else sq_pushbool(v, SQFalse);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_add_constraint(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+    const SQInteger ary_idx = 2;
+    if(sq_gettype(v, ary_idx) == OT_ARRAY)
+    {
+        SQInteger atype;
+        if(sq_arraygettype(v, ary_idx, &atype) != SQ_OK && atype == eat_SQFloat64Array) return sq_throwerror(v, _SC("SQFloat64Array expected"));
+        void *ary;
+        sq_arraygetrawdata(v, ary_idx, &ary);
+        SQ_GET_INTEGER(v, 3, constr_type);
+        SQ_GET_FLOAT(v, 4, rh);
+        //for(SQInteger i= 0, len =sq_getsize(v, ary_idx); i < len; ++i) printf("add_constraint %d %f\n", (int) i, ((REAL*)ary)[i]);
+        sq_pushbool(v, self->add_constraint(self, (REAL*)ary, constr_type, rh));
+    }
+    else sq_pushbool(v, SQFalse);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_lowbo(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, colnr);
+	SQ_GET_FLOAT(v, 3, val);
+	sq_pushbool(v, self->set_lowbo(self, colnr, val));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_upbo(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, rownr);
+	SQ_GET_FLOAT(v, 3, val);
+	sq_pushbool(v, self->set_upbo(self, rownr, val));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_col_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, colnr);
+	SQ_GET_STRING(v, 3, name);
+	sq_pushbool(v, self->set_col_name(self, colnr, (SQChar*)name));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_col_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, colnr);
+	sq_pushstring(v, self->get_col_name(self, colnr), -1);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_row_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, rownr);
+	SQ_GET_STRING(v, 3, name);
+	sq_pushbool(v, self->set_row_name(self, rownr, (SQChar*)name));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_row_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, rownr);
+	sq_pushstring(v, self->get_row_name(self, rownr), -1);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_lp_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_STRING(v, 2, name);
+	sq_pushbool(v, self->set_lp_name(self, (SQChar*)name));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_lp_name(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushstring(v, self->get_lp_name(self), -1);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_write_lp(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_STRING(v, 2, filename);
+	sq_pushbool(v, self->write_lp(self, (SQChar*)filename));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_read_problem(HSQUIRRELVM v, lprec *self)
+{
+	if(self)
+    {
+        sq_pushstring(v, LPSOLVE_Tag, -1);
+        if(sq_getonroottable(v) == SQ_OK){
+            if(sq_createinstance(v, -1) == SQ_OK){
+                sq_setinstanceup(v, -1, self);
+                sq_setreleasehook(v,-1,sq_lpsolve_releasehook);
+            }
+        }
+    }
+    else sq_pushnull(v);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_read_LP(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+	SQ_GET_STRING(v, 2, filename);
+	SQ_GET_INTEGER(v, 3, verbose);
+	SQ_GET_STRING(v, 4, lp_name);
+
+	lprec *self = dlread_LP((SQChar*)filename, verbose, (SQChar*)lp_name);
+	return sq_lpsolve_read_problem(v, self);
+}
+
+static SQRESULT sq_lpsolve_read_MPS(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+	SQ_GET_STRING(v, 2, filename);
+	SQ_GET_INTEGER(v, 3, options);
+
+	lprec *self = dlread_MPS((SQChar*)filename, options);
+	return sq_lpsolve_read_problem(v, self);
+}
+
+static SQRESULT sq_lpsolve_read_freeMPS(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+	SQ_GET_STRING(v, 2, filename);
+	SQ_GET_INTEGER(v, 3, options);
+
+	lprec *self = dlread_freeMPS((SQChar*)filename, options);
+	return sq_lpsolve_read_problem(v, self);
+}
+
+static SQRESULT sq_lpsolve_get_mat(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, rownr);
+	SQ_GET_INTEGER(v, 3, colnr);
+	sq_pushfloat(v, self->get_mat(self, rownr, colnr));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_mat(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, rownr);
+	SQ_GET_INTEGER(v, 3, colnr);
+	SQ_GET_FLOAT(v, 4, value);
+	sq_pushbool(v, self->set_mat(self, rownr, colnr, value));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_solve(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushinteger(v, self->solve(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_objective(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushfloat(v, self->get_objective(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_variables(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+    const SQInteger ary_idx = 2;
+    if(sq_gettype(v, ary_idx) == OT_ARRAY)
+    {
+        SQInteger atype;
+        if(sq_arraygettype(v, ary_idx, &atype) != SQ_OK && atype == eat_SQFloat64Array) return sq_throwerror(v, _SC("SQFloat64Array expected"));
+        int  ncols = self->get_Ncolumns(self);
+        if(sq_getsize(v, ary_idx) != ncols) return sq_throwerror(v, _SC("SQFloat64Array of size %d expected"), ncols);
+        void *ary;
+        sq_arraygetrawdata(v, ary_idx, &ary);
+        sq_pushbool(v, self->get_variables(self, (REAL*)ary));
+    }
+    else sq_pushbool(v, SQFalse);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_constraints(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+    const SQInteger ary_idx = 2;
+    if(sq_gettype(v, ary_idx) == OT_ARRAY)
+    {
+        SQInteger atype;
+        if(sq_arraygettype(v, ary_idx, &atype) != SQ_OK && atype == eat_SQFloat64Array) return sq_throwerror(v, _SC("SQFloat64Array expected"));
+        int  nrows = self->get_Nrows(self);
+        if(sq_getsize(v, ary_idx) != nrows) return sq_throwerror(v, _SC("SQFloat64Array of size %d expected"), nrows);
+        void *ary;
+        sq_arraygetrawdata(v, ary_idx, &ary);
+        sq_pushbool(v, self->get_constraints(self, (REAL*)ary));
+    }
+    else sq_pushbool(v, SQFalse);
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_nonzeros(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushinteger(v, self->get_nonzeros(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_Nrows(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushinteger(v, self->get_Nrows(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_Ncolumns(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushinteger(v, self->get_Ncolumns(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_get_timeout(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	sq_pushinteger(v, self->get_timeout(self));
+	return 1;
+}
+
+static SQRESULT sq_lpsolve_set_timeout(HSQUIRRELVM v)
+{
+	SQ_FUNC_VARS_NO_TOP(v);
+    GET_lpsolve_INSTANCE();
+	SQ_GET_INTEGER(v, 2, sectimeout);
+	self->set_timeout(self, sectimeout);
+	return 0;
+}
+
+#define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),sq_lpsolve_##name,nparams,tycheck}
+static SQRegFunction sq_lpsolve_methods[] =
+{
+	_DECL_FUNC(constructor,3,_SC(".ii")),
+    _DECL_FUNC(version,1,_SC(".")),
+    _DECL_FUNC(loadlib,2,_SC(".s")),
+    _DECL_FUNC(set_verbose,2,_SC("xi")),
+    _DECL_FUNC(set_obj_fn,2,_SC("xa")),
+    _DECL_FUNC(add_constraint,4,_SC("xain")),
+    _DECL_FUNC(set_lowbo,3,_SC("xin")),
+    _DECL_FUNC(set_upbo,3,_SC("xin")),
+    _DECL_FUNC(set_lp_name,2,_SC("xs")),
+    _DECL_FUNC(get_lp_name,1,_SC("x")),
+    _DECL_FUNC(set_col_name,3,_SC("xis")),
+    _DECL_FUNC(get_col_name,2,_SC("xi")),
+    _DECL_FUNC(set_row_name,3,_SC("xis")),
+    _DECL_FUNC(get_row_name,2,_SC("xi")),
+    _DECL_FUNC(write_lp,2,_SC("xs")),
+    _DECL_FUNC(read_LP,4,_SC(".sis")),
+    _DECL_FUNC(read_MPS,3,_SC(".si")),
+    _DECL_FUNC(read_freeMPS,3,_SC(".si")),
+    _DECL_FUNC(get_mat,3,_SC("xii")),
+    _DECL_FUNC(set_mat,4,_SC("xiin")),
+    _DECL_FUNC(solve,1,_SC("x")),
+    _DECL_FUNC(get_objective,1,_SC("x")),
+    _DECL_FUNC(get_variables,2,_SC("xa")),
+    _DECL_FUNC(get_constraints,2,_SC("xa")),
+    _DECL_FUNC(get_nonzeros,1,_SC("x")),
+    _DECL_FUNC(get_Nrows,1,_SC("x")),
+    _DECL_FUNC(get_Ncolumns,1,_SC("x")),
+    _DECL_FUNC(set_timeout,2,_SC("xi")),
+    _DECL_FUNC(get_timeout,1,_SC("x")),
+    {0,0}
+};
+#undef _DECL_FUNC
+
+typedef struct {
+  const SQChar *Str;
+  SQInteger Val;
+} KeyIntType, * KeyIntPtrType;
+
+static KeyIntType lpsolve_constants[] = {
+    #define MK_CONST(c) {_SC(#c), c}
+    MK_CONST(NEUTRAL),
+    MK_CONST(CRITICAL),
+    MK_CONST(SEVERE),
+    MK_CONST(IMPORTANT),
+    MK_CONST(NORMAL),
+    MK_CONST(DETAILED),
+    MK_CONST(FULL),
+
+    MK_CONST(FR),
+    MK_CONST(LE),
+    MK_CONST(GE),
+    MK_CONST(EQ),
+    MK_CONST(OF),
+    {0,0}
+};
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+SQRESULT sqext_register_lpsolve(HSQUIRRELVM v)
+{
+    sq_pushstring(v,LPSOLVE_Tag,-1);
+    sq_newclass(v,SQFalse);
+    sq_settypetag(v,-1,(SQUserPointer)LPSOLVE_Tag);
+    sq_insert_reg_funcs(v, sq_lpsolve_methods);
+
+    //add constants
+    KeyIntPtrType KeyIntPtr;
+    for (KeyIntPtr = lpsolve_constants; KeyIntPtr->Str; KeyIntPtr++) {
+        sq_pushstring(v, KeyIntPtr->Str, -1);    //first the key
+        sq_pushinteger(v, KeyIntPtr->Val);       //then the value
+        sq_newslot(v, -3, SQTrue);              //store then
+    }
+
+    sq_newslot(v,-3,SQTrue);
+
+    return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SQ_USE_LPSOLVE
+
+
+

+ 1474 - 0
SquiLu-ext/sq_nn.cpp

@@ -0,0 +1,1474 @@
+#include <stdio.h>
+#include "squirrel.h"
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdlib.h>
+#include <sys/time.h>
+//#include <pthread.h>
+
+SQ_OPT_STRING_STRLEN();
+
+extern "C" {
+#include "nn.h"
+
+void *ann_malloc(size_t sz)
+{
+    return sq_malloc(sz);
+}
+void ann_free(void *p)
+{
+    sq_free(p, 0);
+}
+}
+
+#define NR_FLAG_NONE 0
+#define NR_FLAG_TRAINING (1<<0)         /* NN is training in a thread. */
+#define NR_FLAG_REGRESSOR (1<<1)        /* NN will be used for regression. */
+#define NR_FLAG_CLASSIFIER (1<<2)       /* NN will be used for classification.*/
+#define NR_FLAG_NORMALIZE (1<<3)        /* Perform input/output normalization.*/
+#define NR_FLAG_AUTO_STOP (1<<4)        /* Auto stop on training. */
+#define NR_FLAG_OF_DETECTED (1<<5)      /* Auto stopped on overfitting. */
+#define NR_FLAG_BACKTRACK (1<<6)        /* Auto stop with backtracking. */
+
+/* Flags to persist when saving the NN. */
+#define NR_FLAG_TO_PERSIST (NR_FLAG_REGRESSOR| \
+                            NR_FLAG_CLASSIFIER| \
+                            NR_FLAG_NORMALIZE| \
+                            NR_FLAG_OF_DETECTED)
+
+/* Flags to transfer after training. */
+#define NR_FLAG_TO_TRANSFER (NR_FLAG_OF_DETECTED)
+
+#define NR_MAX_LAYERS 32
+#define NR_RDB_ENC_VER 2
+
+typedef struct {
+    uint32_t len, maxlen;
+    float *inputs, *outputs;
+} NRDataset;
+
+typedef struct {
+    uint64_t id;        /* Neural network unique ID. */
+    uint64_t training_total_steps; /* How many steps of trainig the network
+                                      received. A step is a single input/output
+                                      pattern presented to the net (counting
+                                      the same pattern multiple times) */
+    uint64_t training_total_ms;   /* Total milliseconds time of training. */
+    uint64_t training_max_cycles; /* Max cycles of a single training. */
+    uint64_t training_max_ms; /* Max time of a single training. */
+    uint32_t flags;     /* NR_FLAG_... */
+    uint32_t epochs;    /* Number of training epochs so far. */
+    AnnRprop *nn;     /* Neural network structure. */
+    NRDataset dataset;  /* Training dataset. */
+    NRDataset test;     /* Testing dataset. */
+    float dataset_error;   /* Average error in the training dataset. */
+    float test_error;      /* Average error in the test dataset. */
+    float test_class_error;    /* Percentage of wrong classifications in test
+                                   dataset. Only applicable to nets flagged with
+                                   NR_FLAG_CLASSIFIER. */
+    /* For normalized (NR_FLAG_NORMALIZE) networks. */
+    float *inorm;          /* Inputs normalization factors. */
+    float *onorm;          /* Outputs normalization factors. */
+} NRTypeObject;
+#if 0
+typedef struct {
+    //RedisModuleString *key; /* Key name of the NN we are training.
+    //                           Set to NULL for unused slots. */
+    int db_id;              /* DB ID where the key is. */
+    pthread_t tid;          /* Thread ID of the trainer. */
+    int in_progress;        /* 0 if training terminated. */
+    NRTypeObject *nr;       /* A copy of the NN we are training. */
+    float dataset_error;    /* Dataset error in the last cycle. */
+    float test_error;       /* Test error in the last cycle. */
+    float class_error;      /* Percentage of wrong classifications. */
+    int curcycle;           /* Current cycle. */
+} NRPendingTraining;
+#endif
+/* We take an array with NNs currently training in other threads.
+ * Every time an NN command is called, we try to see if there are
+ * finished trainings, in order to udpate weights of the original
+ * NN stored into the key (we work on a copy on the other thread).*/
+#define NR_PENDING_TRAINING_MAX_LEN 32
+
+#if 0
+#define REDISMODULE_ERR -1
+#define REDISMODULE_OK 0
+
+#define RedisModuleCtx void
+
+#define RedisModule_Log(ctx, log_level, msg)
+
+#define UNUSED(V) ((void) V)
+typedef SQString RedisModuleString;
+
+static uint64_t NRNextId = 1; /* Next neural network unique ID. */
+
+#define RedisModule_Alloc(x) sq_malloc(x)
+
+static void *RedisModule_Calloc(size_t nelm, size_t sz)
+{
+    size_t malloc_size = nelm * sz;
+    void *ptr = sq_malloc(malloc_size);
+    if(ptr) memset(ptr, 0, malloc_size);
+    return ptr;
+}
+
+static void *RedisModule_Realloc(void *oldPtr, size_t sz)
+{
+    void *ptr = sq_realloc(oldPtr, 0, sz);
+    return ptr;
+}
+#define RedisModule_Free(x) sq_free(x, 0)
+
+static pthread_mutex_t NRPendingTrainingMutex = PTHREAD_MUTEX_INITIALIZER;
+/* All the followings must be accessed after acquiring the mutex. */
+static NRPendingTraining NRTrainings[NR_PENDING_TRAINING_MAX_LEN];
+static int NRPendingTrainingCount = 0; /* Number of pending trainings. */
+
+/* ========================== Low level object API ========================== */
+
+long long NRMilliseconds(void) {
+    struct timeval tv;
+    long long ust;
+
+    gettimeofday(&tv, NULL);
+    ust = ((long long)tv.tv_sec)*1000000;
+    ust += tv.tv_usec;
+    return ust/1000;
+}
+
+/* Create a network with the specified parameters. Note that the layers
+ * must be specified from the output layer[0] to the input
+ * layer[N]. Each element in the integer array 'layer' specify how many
+ * units there are in the corresponding layer. */
+static NRTypeObject *createNRTypeObject(int flags, int *layers, int numlayers, int dset_len, int test_len) {
+    NRTypeObject *o;
+    o = (NRTypeObject*)RedisModule_Calloc(1,sizeof(*o));
+    o->id = NRNextId++;
+    o->flags = flags;
+    o->nn = AnnCreateNet(numlayers,layers);
+    o->dataset.maxlen = dset_len;
+    o->test.maxlen = test_len;
+    int ilen = ANN_INPUT_UNITS(o->nn);
+    int olen = ANN_OUTPUT_UNITS(o->nn);
+    o->inorm = (float*)RedisModule_Calloc(1,sizeof(float)*ilen);
+    o->onorm = (float*)RedisModule_Calloc(1,sizeof(float)*olen);
+    for (int j = 0; j < ilen; j++) o->inorm[j] = 1;
+    for (int j = 0; j < olen; j++) o->onorm[j] = 1;
+    return o;
+}
+
+/* Insert data (observations needed to train and test the NN) into the
+ * NN object. While the learning and testing datasets are yet not full
+ * the observed pattern is inserted evenly in one or the other side in
+ * order to make sure the two datasets are populated evenly. When both
+ * are already full, a random elmenet from one or the other (doing
+ * a random weighted choice depending on the length) is substituted with
+ * the new item. */
+#define NR_INSERT_NO_TARGET 0   /* Auto select where to insert. */
+#define NR_INSERT_TRAIN 1       /* Insert in training dataset. */
+#define NR_INSERT_TEST 2        /* Insert in testing dataset. */
+static void NRTypeInsertData(NRTypeObject *o, float *inputs, float *outputs,
+                      int target_ds) {
+    NRDataset *target = NULL;
+
+    /* Check if there is no dataset at all. This may be a valid setup
+     * with online learning, sample by sample. */
+    if (o->dataset.maxlen == 0 && o->test.maxlen == 0) return;
+
+    /* If the user specified a target, select it. */
+    if (target_ds == NR_INSERT_TRAIN) target = &o->dataset;
+    else if (target_ds == NR_INSERT_TEST) target = &o->test;
+
+    /* If no target is specified, but there is only one possible
+     * target, select it ASAP. */
+    if (o->dataset.maxlen == 0) {
+        target = &o->test;
+    } else if (o->test.maxlen == 0) {
+        target = &o->dataset;
+    }
+
+    /* Otherwise choose as the target to populate the one with less data
+     * relatively to its size. */
+    if (target == NULL) {
+        /* If one of the two datasets are still not full, pick
+         * based on fill percentage. Otherwise pick a random
+         * target relatively to their size. */
+        if (o->dataset.len != o->dataset.maxlen ||
+            o->test.len != o->dataset.len)
+        {
+            float fill_a = (float)o->dataset.len / o->dataset.maxlen;
+            float fill_b = (float)o->test.len / o->test.maxlen;
+            target = (fill_a <= fill_b) ? &o->dataset : &o->test;
+        } else {
+            double r = rand()/RAND_MAX;
+            double sumlen = o->dataset.maxlen + o->test.maxlen;
+            if (r < (double)o->dataset.maxlen/sumlen) {
+                target = &o->dataset;
+            } else {
+                target = &o->test;
+            }
+        }
+    }
+
+    /* Append if there is room or substitute with a random entry. */
+    size_t idx;
+    int j, numin = ANN_INPUT_UNITS(o->nn),
+           numout = ANN_OUTPUT_UNITS(o->nn);
+
+    if (target->maxlen == target->len) {
+        idx = rand() % target->maxlen;
+    } else {
+        idx = target->len;
+        target->len++;
+        target->inputs = (float*)RedisModule_Realloc(target->inputs,
+            sizeof(float)*numin*target->len);
+        target->outputs = (float*)RedisModule_Realloc(target->outputs,
+            sizeof(float)*numout*target->len);
+    }
+
+    /* Finally store the values at position. */
+    for (j = 0; j < numin; j++)
+        target->inputs[idx*numin+j] = inputs[j];
+    for (j = 0; j < numout; j++)
+        target->outputs[idx*numout+j] = outputs[j];
+}
+
+/* Free the specified dataset. */
+void NRDatasetFree(NRDataset *dset) {
+    RedisModule_Free(dset->inputs);
+    RedisModule_Free(dset->outputs);
+}
+
+/* Free a whole NN object. */
+void NRTypeReleaseObject(NRTypeObject *o) {
+    AnnFree(o->nn);
+    NRDatasetFree(&o->dataset);
+    NRDatasetFree(&o->test);
+    RedisModule_Free(o->inorm);
+    RedisModule_Free(o->onorm);
+    RedisModule_Free(o);
+}
+
+/* ================================ Training =============================== */
+
+/* Clone a neural network object, including the training and test dataset.
+ * We use cloning in order to train in a different thread, and later
+ * copy the weights back into the original NN.
+ *
+ * Note when 'newid' is 0, the copied object NN unique ID is the same as the
+ * original as normally this is what we want, in order to later match the
+ * trained network with the object stored at the specified key
+ * in the pending traning structure.
+ *
+ * However if the copy is performed with other goals, 'newid' should
+ * be set to non-zero in order to create a net with a different ID. */
+NRTypeObject *NRClone(NRTypeObject *o, int newid) {
+    NRTypeObject *copy;
+    copy = (NRTypeObject*)RedisModule_Calloc(1,sizeof(*o));
+    *copy = *o;
+    if (newid) copy->id = NRNextId++;
+    copy->nn = AnnClone(o->nn);
+    copy->dataset = o->dataset;
+    copy->test = o->test;
+
+    int ilen = ANN_INPUT_UNITS(o->nn);
+    int olen = ANN_OUTPUT_UNITS(o->nn);
+    copy->dataset.inputs = (float*)RedisModule_Alloc(sizeof(float)*ilen*o->dataset.len);
+    copy->dataset.outputs = (float*)RedisModule_Alloc(sizeof(float)*olen*o->dataset.len);
+    copy->test.inputs = (float*)RedisModule_Alloc(sizeof(float)*ilen*o->test.len);
+    copy->test.outputs = (float*)RedisModule_Alloc(sizeof(float)*olen*o->test.len);
+    memcpy(copy->dataset.inputs,o->dataset.inputs,sizeof(float)*ilen*o->dataset.len);
+    memcpy(copy->dataset.outputs,o->dataset.outputs,sizeof(float)*olen*o->dataset.len);
+    memcpy(copy->test.inputs,o->test.inputs,sizeof(float)*ilen*o->test.len);
+    memcpy(copy->test.outputs,o->test.outputs,sizeof(float)*olen*o->test.len);
+
+    copy->inorm = (float*)RedisModule_Alloc(sizeof(float)*ilen);
+    copy->onorm = (float*)RedisModule_Alloc(sizeof(float)*olen);
+    memcpy(copy->inorm,o->inorm,sizeof(float)*ilen);
+    memcpy(copy->onorm,o->onorm,sizeof(float)*olen);
+    return copy;
+}
+
+/* Transfer the weights from the source to the destination NN.
+ * This is used after the learning process finished in a different
+ * thread in order to transfer the learning back to the orignal
+ * NN. */
+static void NRTransferWeights(RedisModuleCtx *ctx, NRTypeObject *dst, NRTypeObject *src) {
+    if (dst->id != src->id) {
+        RedisModule_Log(ctx,"warning",
+            "NSTransferWeight(): source and destination neural network IDs "
+            "don't match. This is unexpected, probably a bug inside the "
+            "module. Weights not transferred back to the origina NN.");
+        return;
+    }
+
+    /* It would be faster to memcpy just the weight array for each layer,
+     * however this way we access the NN in a more abstract way, and should
+     * be fast enough in most cases. We can always optimized it later. */
+    AnnFree(dst->nn);
+    dst->nn = AnnClone(src->nn);
+    dst->training_total_steps = src->training_total_steps;
+    dst->training_total_ms = src->training_total_ms;
+    dst->dataset_error = src->dataset_error;
+    dst->test_error = src->test_error;
+    dst->test_class_error = src->test_class_error;
+    dst->flags |= src->flags & NR_FLAG_TO_TRANSFER;
+
+    int ilen = ANN_INPUT_UNITS(src->nn);
+    int olen = ANN_OUTPUT_UNITS(src->nn);
+    memcpy(dst->inorm,src->inorm,sizeof(float)*ilen);
+    memcpy(dst->onorm,src->onorm,sizeof(float)*olen);
+}
+
+/* Threaded training entry point.
+ *
+ * To get some clue about overfitting algorithm behavior:
+ * #define NR_TRAINING_DEBUG 1
+ */
+void *NRTrainingThreadMain(void *arg) {
+    NRPendingTraining *pt = (NRPendingTraining*)arg;
+    NRTypeObject *nr = pt->nr;
+    int training_iterations = 1;
+    float train_error = 0;
+    float test_error = 0;
+    float class_error = 0;
+    float past_train_error = 1.0/0.0;
+    float past_test_error = 1.0/0.0;
+    int auto_stop = nr->flags & NR_FLAG_AUTO_STOP;
+    int backtrack = nr->flags & NR_FLAG_BACKTRACK;
+
+    uint64_t cycles = 0;
+    long long start = NRMilliseconds();
+    long long cycle_time;
+    int overfitting_count = 0;
+    int overfitting_limit = 5;
+    float best_test_error = 1.0/0.0;
+
+    nr->flags &= ~NR_FLAG_TO_TRANSFER;
+
+    /* If the network is auto normalized, we need to trasnform the inputs
+     * in a way that's acceptable for the NN. We just find the maximum
+     * absolute value, and divide for it, to get a -1,1 range. There
+     * are more advanced transformations that are usually performed that
+     * could be implemented in the future.
+     *
+     * Note that we compute the normalization vectors for all the inputs
+     * and outputs, however if the network is a classifier, flagged with
+     * (NR_FLAG_CLASSIFIER), no output normalization will be done since
+     * the data is already in 0/1 format. */
+    if ((nr->flags & NR_FLAG_NORMALIZE) && nr->dataset.len) {
+        int ilen = ANN_INPUT_UNITS(nr->nn);
+        int olen = ANN_OUTPUT_UNITS(nr->nn);
+        float *imax = nr->inorm;
+        float *omax = nr->onorm;
+        float *inputs = nr->dataset.inputs;
+        float *outputs = nr->dataset.outputs;
+        for (int i = 0; i < ilen; i++) imax[i] = 1;
+        for (int i = 0; i < olen; i++) omax[i] = 1;
+
+        /* Compute the max values vectors. */
+        for (uint32_t j = 0; j < nr->dataset.len; j++) {
+            for (int i = 0; i < ilen; i++)
+                if (fabs(inputs[i]) > imax[i]) imax[i] = fabs(inputs[i]);
+            for (int i = 0; i < olen; i++)
+                if (fabs(outputs[i]) > omax[i]) omax[i] = fabs(outputs[i]);
+            inputs += ilen;
+            outputs += olen;
+        }
+
+        /* Likely we are not seeing what will really be the true input/output
+         * maximum value, so we multiply the maximum values found by a constant.
+         * However if the max is exactly "1" we assume it's a classification
+         * input and don't alter it. */
+        for (int i = 0; i < ilen; i++) if (imax[i] != 1) imax[i] *= 1.2;
+        for (int i = 0; i < olen; i++) if (omax[i] != 1) omax[i] *= 1.2;
+
+        /* We can normalize the dataset directly: after the training it will
+         * be discarded anyway. */
+        inputs = nr->dataset.inputs;
+        outputs = nr->dataset.outputs;
+        for (uint32_t j = 0; j < nr->dataset.len; j++) {
+            for (int i = 0; i < ilen; i++) inputs[i] /= nr->inorm[i];
+            if (!(nr->flags & NR_FLAG_CLASSIFIER))
+                for (int i = 0; i < olen; i++) outputs[i] /= nr->onorm[i];
+            inputs += ilen;
+            outputs += olen;
+        }
+
+        inputs = nr->test.inputs;
+        outputs = nr->test.outputs;
+        for (uint32_t j = 0; j < nr->test.len; j++) {
+            for (int i = 0; i < ilen; i++) inputs[i] /= nr->inorm[i];
+            if (!(nr->flags & NR_FLAG_CLASSIFIER))
+                for (int i = 0; i < olen; i++) outputs[i] /= nr->onorm[i];
+            inputs += ilen;
+            outputs += olen;
+        }
+    }
+
+    AnnRprop *saved = NULL;  /* Saved to recover on overfitting. */
+    float saved_error;          /* The test error of the saved NN. */
+    float saved_train_error;    /* The training dataset error of the saved NN */
+    float saved_class_error;    /* The % of classification errors of saved NN */
+
+    while(1) {
+        long long cycle_start = NRMilliseconds();
+
+        train_error = AnnTrain(nr->nn,
+                               nr->dataset.inputs,
+                               nr->dataset.outputs,
+                               0,
+                               training_iterations,
+                               nr->dataset.len,
+                               ANN_ALGO_BPROP);
+        cycle_time = NRMilliseconds() - cycle_start;
+        nr->training_total_steps += nr->dataset.len*training_iterations;
+
+        /* Evaluate the error in the case of auto training, stop it
+         * once we see that the error in the traning set is decreasing
+         * while the one in the test set is not. */
+        if (auto_stop) {
+            AnnTestError(nr->nn,
+                         nr->test.inputs,
+                         nr->test.outputs,
+                         nr->test.len, &test_error, &class_error);
+
+            if (train_error < past_train_error &&
+                test_error > past_test_error)
+            {
+                overfitting_count++;
+                #ifdef NR_TRAINING_DEBUG
+                printf("+YCLE %lld: [%d] %f VS %f\n", (long long)cycles,
+                    overfitting_count, train_error, test_error);
+                #endif
+                if (overfitting_count == overfitting_limit) {
+                    nr->flags |= NR_FLAG_OF_DETECTED;
+                    break;
+                }
+            } else if (overfitting_count > 0) {
+                #ifdef NR_TRAINING_DEBUG
+                printf("-YCLE %lld: [%d] %f VS %f\n", (long long)cycles,
+                    overfitting_count, train_error, test_error);
+                #endif
+                overfitting_count--;
+            }
+
+            /* Save all the networks with a score better than the currently
+             * saved network. This can be a bit costly, but is safe: one
+             * cycle of training more and overfitting can ruin it all. */
+            if (backtrack && (saved == NULL || test_error < saved_error)) {
+                #ifdef NR_TRAINING_DEBUG
+                printf("SAVED! %f < %f\n", test_error, saved_error);
+                #endif
+                saved_error = test_error;
+                saved_train_error = train_error;
+                saved_class_error = class_error;
+                if (saved) AnnFree(saved);
+                saved = AnnClone(nr->nn);
+            }
+
+            /* Best network found? Reset the overfitting hints counter. */
+            if (test_error < best_test_error) {
+                overfitting_count = 0;
+                best_test_error = test_error;
+                #ifdef NR_TRAINING_DEBUG
+                printf("BEST! %lld: <%d> %f VS %f\n", (long long)cycles,
+                    overfitting_limit,train_error, test_error);
+                #endif
+            }
+
+           /* Also stop if the loss is zero in both datasets. */
+            if (train_error < 0.000000000000001 &&
+                test_error  < 0.000000000000001) break;
+        }
+
+        cycles++;
+        long long total_time = NRMilliseconds()-start;
+
+        /* Cycles and milliseconds stop conditions. */
+        if (nr->training_max_cycles && cycles == nr->training_max_cycles)
+            break;
+        if (nr->training_max_ms && total_time > (long long)nr->training_max_ms)
+            break;
+
+        /* If this is a long training, to do just a single training iteration
+         * for each cycle is not optimal: tune the number of iterations to
+         * at least take 100 milliseconds. */
+        if (total_time > 10000 && cycle_time < 100) training_iterations++;
+
+        past_train_error = train_error;
+        past_test_error = test_error;
+
+        /* Update stats for NR.THREADS to show progresses. */
+        pthread_mutex_lock(&NRPendingTrainingMutex);
+        pt->dataset_error = train_error;
+        pt->test_error = test_error;
+        if (nr->flags & NR_FLAG_CLASSIFIER) pt->class_error = class_error;
+        pt->curcycle = cycles;
+        pthread_mutex_unlock(&NRPendingTrainingMutex);
+    }
+
+    /* If auto stop is disabled, we still need to compute the test error
+     * in order to return this information to the main thread. */
+    if (!auto_stop) {
+        AnnTestError(nr->nn,
+                     nr->test.inputs,
+                     nr->test.outputs,
+                     nr->test.len, &test_error, &class_error);
+    }
+
+    /* If both autostop and backtracking are enabled, we may have
+     * a better network saved! */
+    if (auto_stop && backtrack) {
+        if (saved && saved_error < test_error) {
+            #ifdef NR_TRAINING_DEBUG
+            printf("BACKTRACK: Saved network used!\n");
+            #endif
+            AnnFree(nr->nn);
+            nr->nn = saved;
+            test_error = saved_error;
+            train_error = saved_train_error;
+            class_error = saved_class_error;
+        } else if (saved) {
+            AnnFree(saved);
+        }
+    }
+
+    if (nr->flags & NR_FLAG_CLASSIFIER) nr->test_class_error = class_error;
+    nr->dataset_error = train_error;
+    nr->test_error = test_error;
+    nr->training_total_ms += NRMilliseconds()-start;
+
+    /* Signal that the training process has finished, it's up to the main
+     * thread to cleanup this training slot, copying the weights to the
+     * original neural network and reclaiming memory for the copy we
+     * used to work. */
+    pthread_mutex_lock(&NRPendingTrainingMutex);
+    pt->in_progress = 0;
+    pthread_mutex_unlock(&NRPendingTrainingMutex);
+    return NULL;
+}
+
+/* Start a background training in another thread. Return REDISMODULE_ERR if
+ * there is no free slot for training, as we already reached the maximum of
+ * networks we can train in parallel.
+ *
+ * The 'flags' argument specifies the additional NN flags to pass to the
+ * training ruotine:
+ *
+ *  NR_FLAG_AUTO_STOP -- Automatically stop training on overtraining.
+ *  NR_FLAG_BACKTRACK -- Save current NN state when overfitting is likely.
+ */
+int NRStartTraining(RedisModuleCtx *ctx, RedisModuleString *key, int dbid, NRTypeObject *nr) {
+    pthread_mutex_lock(&NRPendingTrainingMutex);
+    if (NRPendingTrainingCount == NR_PENDING_TRAINING_MAX_LEN) {
+        pthread_mutex_unlock(&NRPendingTrainingMutex);
+        return REDISMODULE_ERR;
+    }
+
+    /* Setup our trainig data. */
+    NRPendingTraining *pt = &NRTrainings[NRPendingTrainingCount];
+    //pt->key = RedisModule_CreateStringFromString(ctx,key);
+    //RedisModule_RetainString(ctx,pt->key);
+    pt->db_id = dbid;
+    pt->in_progress = 1;
+    pt->nr = NRClone(nr,0);
+    pt->dataset_error = 0;
+    pt->test_error = 0;
+    pt->class_error = 0;
+    pt->curcycle = 0;
+    if (pthread_create(&pt->tid,NULL,NRTrainingThreadMain,pt) != 0) {
+        RedisModule_Log(ctx,"warning","Unable to create a new pthread in NRStartTraining()");
+        //RedisModule_FreeString(ctx,pt->key);
+        pt->key = NULL;
+        NRTypeReleaseObject(pt->nr);
+        pthread_mutex_unlock(&NRPendingTrainingMutex);
+        return REDISMODULE_ERR;
+    }
+    NRPendingTrainingCount++;
+    nr->flags |= NR_FLAG_TRAINING;
+    nr->flags &= ~NR_FLAG_TO_TRANSFER;
+    pthread_mutex_unlock(&NRPendingTrainingMutex);
+    return REDISMODULE_OK;
+}
+
+/* Check if there are threads that terminated the NN training, and
+ * collect the info they computed (that is the new NN). */
+int NRCollectThreads(RedisModuleCtx *ctx) {
+    int collected = 0;
+    pthread_mutex_lock(&NRPendingTrainingMutex);
+    for (int j = 0; j < NRPendingTrainingCount; j++) {
+        NRPendingTraining *pt = &NRTrainings[j];
+        if (pt->in_progress == 0) {
+            /* Training terminated. Let's see if the key
+             * is still there and NN ID matches. */
+            int orig_id = RedisModule_GetSelectedDb(ctx);
+            if (orig_id != pt->db_id) RedisModule_SelectDb(ctx,pt->db_id);
+            RedisModuleKey *key = RedisModule_OpenKey(ctx,pt->key,
+                REDISMODULE_READ|REDISMODULE_WRITE);
+            if (RedisModule_ModuleTypeGetType(key) == NRType) {
+                NRTypeObject *nr = RedisModule_ModuleTypeGetValue(key);
+                if (nr->id == pt->nr->id) {
+                    NRTransferWeights(ctx,nr,pt->nr);
+                    nr->flags &= ~NR_FLAG_TRAINING;
+                }
+                RedisModule_FreeString(ctx,pt->key);
+                pt->key = NULL;
+                NRTypeReleaseObject(pt->nr);
+                NRPendingTrainingCount--;
+                memcpy(&NRTrainings[j],&NRTrainings[j+1],
+                    (NRPendingTrainingCount-j)*sizeof(NRTrainings[0]));
+            }
+            if (orig_id != pt->db_id) RedisModule_SelectDb(ctx,orig_id);
+            collected++;
+        }
+    }
+    pthread_mutex_unlock(&NRPendingTrainingMutex);
+    return collected;
+}
+#endif // 0
+
+#define RedisModule_Free(x) sq_free(x, 0)
+
+static void *RedisModule_Calloc(size_t nelm, size_t sz)
+{
+    size_t malloc_size = nelm * sz;
+    void *ptr = sq_malloc(malloc_size);
+    if(ptr) memset(ptr, 0, malloc_size);
+    return ptr;
+}
+
+static void *RedisModule_Realloc(void *oldPtr, size_t sz)
+{
+    void *ptr = sq_realloc(oldPtr, 0, sz);
+    return ptr;
+}
+
+static uint64_t NRNextId = 1; /* Next neural network unique ID. */
+
+long long NRMilliseconds(void) {
+    struct timeval tv;
+    long long ust;
+
+    gettimeofday(&tv, NULL);
+    ust = ((long long)tv.tv_sec)*1000000;
+    ust += tv.tv_usec;
+    return ust/1000;
+}
+
+/* Create a network with the specified parameters. Note that the layers
+ * must be specified from the output layer[0] to the input
+ * layer[N]. Each element in the integer array 'layer' specify how many
+ * units there are in the corresponding layer. */
+static NRTypeObject *createNRTypeObject(int flags, int *layers, int numlayers, int dset_len, int test_len) {
+    NRTypeObject *o;
+    o = (NRTypeObject*)RedisModule_Calloc(1,sizeof(*o));
+    o->id = NRNextId++;
+    o->flags = flags;
+    o->nn = AnnCreateNet(numlayers,layers);
+    o->dataset.maxlen = dset_len;
+    o->test.maxlen = test_len;
+    int ilen = ANN_INPUT_UNITS(o->nn);
+    int olen = ANN_OUTPUT_UNITS(o->nn);
+    o->inorm = (float*)RedisModule_Calloc(1,sizeof(float)*ilen);
+    o->onorm = (float*)RedisModule_Calloc(1,sizeof(float)*olen);
+    for (int j = 0; j < ilen; j++) o->inorm[j] = 1;
+    for (int j = 0; j < olen; j++) o->onorm[j] = 1;
+    return o;
+}
+
+/* Insert data (observations needed to train and test the NN) into the
+ * NN object. While the learning and testing datasets are yet not full
+ * the observed pattern is inserted evenly in one or the other side in
+ * order to make sure the two datasets are populated evenly. When both
+ * are already full, a random elmenet from one or the other (doing
+ * a random weighted choice depending on the length) is substituted with
+ * the new item. */
+#define NR_INSERT_NO_TARGET 0   /* Auto select where to insert. */
+#define NR_INSERT_TRAIN 1       /* Insert in training dataset. */
+#define NR_INSERT_TEST 2        /* Insert in testing dataset. */
+static void NRTypeInsertData(NRTypeObject *o, float *inputs, float *outputs,
+                      int target_ds) {
+    NRDataset *target = NULL;
+
+    /* Check if there is no dataset at all. This may be a valid setup
+     * with online learning, sample by sample. */
+    if (o->dataset.maxlen == 0 && o->test.maxlen == 0) return;
+
+    /* If the user specified a target, select it. */
+    if (target_ds == NR_INSERT_TRAIN) target = &o->dataset;
+    else if (target_ds == NR_INSERT_TEST) target = &o->test;
+
+    /* If no target is specified, but there is only one possible
+     * target, select it ASAP. */
+    if (o->dataset.maxlen == 0) {
+        target = &o->test;
+    } else if (o->test.maxlen == 0) {
+        target = &o->dataset;
+    }
+
+    /* Otherwise choose as the target to populate the one with less data
+     * relatively to its size. */
+    if (target == NULL) {
+        /* If one of the two datasets are still not full, pick
+         * based on fill percentage. Otherwise pick a random
+         * target relatively to their size. */
+        if (o->dataset.len != o->dataset.maxlen ||
+            o->test.len != o->dataset.len)
+        {
+            float fill_a = (float)o->dataset.len / o->dataset.maxlen;
+            float fill_b = (float)o->test.len / o->test.maxlen;
+            target = (fill_a <= fill_b) ? &o->dataset : &o->test;
+        } else {
+            double r = rand()/RAND_MAX;
+            double sumlen = o->dataset.maxlen + o->test.maxlen;
+            if (r < (double)o->dataset.maxlen/sumlen) {
+                target = &o->dataset;
+            } else {
+                target = &o->test;
+            }
+        }
+    }
+
+    /* Append if there is room or substitute with a random entry. */
+    size_t idx;
+    int j, numin = ANN_INPUT_UNITS(o->nn),
+           numout = ANN_OUTPUT_UNITS(o->nn);
+
+    if (target->maxlen == target->len) {
+        idx = rand() % target->maxlen;
+    } else {
+        idx = target->len;
+        target->len++;
+        target->inputs = (float*)RedisModule_Realloc(target->inputs,
+            sizeof(float)*numin*target->len);
+        target->outputs = (float*)RedisModule_Realloc(target->outputs,
+            sizeof(float)*numout*target->len);
+    }
+
+    /* Finally store the values at position. */
+    for (j = 0; j < numin; j++)
+        target->inputs[idx*numin+j] = inputs[j];
+    for (j = 0; j < numout; j++)
+        target->outputs[idx*numout+j] = outputs[j];
+}
+
+/* Free the specified dataset. */
+void NRDatasetFree(NRDataset *dset) {
+    RedisModule_Free(dset->inputs);
+    RedisModule_Free(dset->outputs);
+}
+
+/* Free a whole NN object. */
+void NRTypeReleaseObject(NRTypeObject *o) {
+    AnnFree(o->nn);
+    NRDatasetFree(&o->dataset);
+    NRDatasetFree(&o->test);
+    RedisModule_Free(o->inorm);
+    RedisModule_Free(o->onorm);
+    RedisModule_Free(o);
+}
+
+static const SQChar sq_nn_TAG[] = _SC("AnnRprop");
+
+static SQRESULT sq_nn_release_hook(SQUserPointer p, SQInteger size, void */*ep*/) {
+    NRTypeObject *self = (NRTypeObject *)p;
+    if(self) NRTypeReleaseObject(self);
+    return 0;
+}
+
+/*
+** Creates a new AnnRprop.
+*/
+static SQRESULT sq_nn_constructor (HSQUIRRELVM v) {
+    SQ_FUNC_VARS(v);
+    SQ_GET_INTEGER(v, 2, flags);
+    SQ_GET_INTEGER(v, 3, ninputs);
+    const SQInteger nhidden_pos = 4;
+    SQ_GET_INTEGER(v, 5, noutputs);
+    SQ_OPT_INTEGER(v, 6, ndata, 0);
+    SQ_OPT_INTEGER(v, 7, ntest, 0);
+
+    if(!(
+            ((flags & NR_FLAG_CLASSIFIER) && !(flags & NR_FLAG_REGRESSOR))
+            || (!(flags & NR_FLAG_CLASSIFIER) && (flags & NR_FLAG_REGRESSOR))
+         )
+       )
+        return sq_throwerror(v, _SC("invalid neural network type. Must be "
+                                    "CLASSIFIER or REGRESSOR"));
+
+    int layers[NR_MAX_LAYERS], num_layers=0;
+    layers[num_layers++] = noutputs;
+
+    /* Our NN library takes the definition of layers in the opposite
+     * order, swap the layers array. */
+    SQInteger asize = sq_getsize(v, nhidden_pos);
+    for(int i=asize-1; i >= 0; --i)
+    {
+        sq_pushinteger(v, i);
+        sq_get(v, nhidden_pos);
+        SQInteger nhidden;
+        SQRESULT rc = sq_getinteger(v, -1, &nhidden);
+        if(rc != SQ_OK) return sq_throwerror(v, _SC("only integers expected on hidden layers array"));
+        layers[num_layers++] = nhidden;
+        sq_poptop(v);
+    }
+
+    layers[num_layers++] = ninputs;
+    //for(int i=0; i < num_layers; ++i) printf("layers %d : %d\n", i, layers[i]);
+
+    NRTypeObject *self = createNRTypeObject(flags, layers, num_layers, ndata, ntest);
+
+    if(self){
+        self->flags = flags;
+        sq_setinstanceup(v, 1, self);
+        sq_setreleasehook(v, 1, sq_nn_release_hook);
+        return 1;
+    }
+    delete self;
+    return sq_throwerror(v, _SC("failed to create AnnRprop"));
+}
+
+#define SQ_GET_NN_INSTANCE(v, at) SQ_GET_INSTANCE_VAR(v, at, NRTypeObject, self, sq_nn_TAG)
+
+static SQRESULT sq_nn_observe(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    SQ_OPT_INTEGER(v, 4, target, NR_INSERT_NO_TARGET);
+
+    SQInteger ilen = ANN_INPUT_UNITS(self->nn);
+    SQInteger olen = ANN_OUTPUT_UNITS(self->nn);
+    SQInteger oargs = (self->flags & NR_FLAG_CLASSIFIER) ? 1 : olen;
+
+    const SQInteger inputs_pos = 2;
+    const SQInteger outputs_pos = 3;
+
+    SQInteger asize_inputs = sq_getsize(v, inputs_pos);
+    SQInteger asize_outputs = sq_getsize(v, outputs_pos);
+
+    if((ilen != asize_inputs) || (oargs != asize_outputs))
+        return sq_throwerror(v, _SC( "number of arguments does not "
+            "match the number of " _PRINT_INT_FMT " inputs and " _PRINT_INT_FMT " outputs in the neural network"),
+                             ilen, oargs);
+
+    const SQInteger inputs_alloc_size = sizeof(float)*ilen;
+    const SQInteger outputs_alloc_size = sizeof(float)*olen;
+
+    float *inputs = (float*)sq_malloc(inputs_alloc_size);
+    for(SQInteger i=0; i < ilen; ++i)
+    {
+        sq_pushinteger(v, i);
+        sq_get(v, inputs_pos);
+        SQFloat fnum;
+        SQRESULT rc = sq_getfloat(v, -1, &fnum);
+        if(rc != SQ_OK)
+        {
+            sq_free(inputs, inputs_alloc_size);
+            return sq_throwerror(v, _SC("only numbers expected on input array"));
+        }
+        inputs[i] = fnum;
+        sq_poptop(v);
+    }
+
+    float *outputs = (float*)sq_malloc(outputs_alloc_size);
+    for(SQInteger i=0; i < oargs; ++i)
+    {
+        sq_pushinteger(v, i);
+        sq_get(v, outputs_pos);
+        SQFloat fnum;
+        SQRESULT rc = sq_getfloat(v, -1, &fnum);
+        if(rc != SQ_OK)
+        {
+            sq_free(inputs, inputs_alloc_size);
+            sq_free(outputs, outputs_alloc_size);
+            return sq_throwerror(v, _SC("only numbers expected on output array"));
+        }
+        if (self->flags & NR_FLAG_CLASSIFIER) {
+            int classid = fnum;
+            if (classid != fnum || fnum >= olen || fnum < 0) {
+                sq_free(inputs, inputs_alloc_size);
+                sq_free(outputs, outputs_alloc_size);
+                return sq_throwerror(v, _SC("classifier network output must be an integer "
+                    "in the range from 0 to outputs-1."));
+            }
+            memset(outputs,0, outputs_alloc_size);
+            outputs[classid] = 1;
+        } else {
+            outputs[i] = fnum;
+        }
+
+        sq_poptop(v);
+    }
+
+    NRTypeInsertData(self,inputs,outputs,target);
+    sq_free(inputs, inputs_alloc_size);
+    sq_free(outputs, outputs_alloc_size);
+
+	return 0;
+}
+
+static SQRESULT sq_nn_train(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    SQ_OPT_INTEGER(v, 2, opt_max_cycles, 0);
+    SQ_OPT_INTEGER(v, 3, opt_max_ms, 10000);
+    SQ_OPT_INTEGER(v, 4, opt_flags, 0);
+
+    NRTypeObject *nr = self;
+
+    nr->training_max_cycles = opt_max_cycles;
+    nr->training_max_ms = opt_max_ms;
+    if(opt_flags & NR_FLAG_AUTO_STOP) nr->flags |= NR_FLAG_AUTO_STOP;
+    if(opt_flags & NR_FLAG_BACKTRACK) nr->flags |= NR_FLAG_BACKTRACK;
+
+    /* Overfitting detection compares error rate in testing/training data,
+     * so does not work without entries in the testing dataset. */
+    if (nr->flags & NR_FLAG_AUTO_STOP && nr->test.len == 0) {
+        return sq_throwerror(v, _SC("Can't start training with AUTOSTOP option: "
+            "overfitting detection requires a non zero length testing dataset"));
+    }
+
+    int training_iterations = 1;
+    float train_error = 0;
+    float test_error = 0;
+    float class_error = 0;
+    float past_train_error = 1.0/0.0;
+    float past_test_error = 1.0/0.0;
+    int auto_stop = nr->flags & NR_FLAG_AUTO_STOP;
+    int backtrack = nr->flags & NR_FLAG_BACKTRACK;
+
+    uint64_t cycles = 0;
+    long long start = NRMilliseconds();
+    long long cycle_time;
+    int overfitting_count = 0;
+    int overfitting_limit = 5;
+    float best_test_error = 1.0/0.0;
+
+    nr->flags &= ~NR_FLAG_TO_TRANSFER;
+
+    /* If the network is auto normalized, we need to trasnform the inputs
+     * in a way that's acceptable for the NN. We just find the maximum
+     * absolute value, and divide for it, to get a -1,1 range. There
+     * are more advanced transformations that are usually performed that
+     * could be implemented in the future.
+     *
+     * Note that we compute the normalization vectors for all the inputs
+     * and outputs, however if the network is a classifier, flagged with
+     * (NR_FLAG_CLASSIFIER), no output normalization will be done since
+     * the data is already in 0/1 format. */
+    if ((nr->flags & NR_FLAG_NORMALIZE) && nr->dataset.len) {
+        int ilen = ANN_INPUT_UNITS(nr->nn);
+        int olen = ANN_OUTPUT_UNITS(nr->nn);
+        float *imax = nr->inorm;
+        float *omax = nr->onorm;
+        float *inputs = nr->dataset.inputs;
+        float *outputs = nr->dataset.outputs;
+        for (int i = 0; i < ilen; i++) imax[i] = 1;
+        for (int i = 0; i < olen; i++) omax[i] = 1;
+
+        /* Compute the max values vectors. */
+        for (uint32_t j = 0; j < nr->dataset.len; j++) {
+            for (int i = 0; i < ilen; i++)
+                if (fabs(inputs[i]) > imax[i]) imax[i] = fabs(inputs[i]);
+            for (int i = 0; i < olen; i++)
+                if (fabs(outputs[i]) > omax[i]) omax[i] = fabs(outputs[i]);
+            inputs += ilen;
+            outputs += olen;
+        }
+
+        /* Likely we are not seeing what will really be the true input/output
+         * maximum value, so we multiply the maximum values found by a constant.
+         * However if the max is exactly "1" we assume it's a classification
+         * input and don't alter it. */
+        for (int i = 0; i < ilen; i++) if (imax[i] != 1) imax[i] *= 1.2;
+        for (int i = 0; i < olen; i++) if (omax[i] != 1) omax[i] *= 1.2;
+
+        /* We can normalize the dataset directly: after the training it will
+         * be discarded anyway. */
+        inputs = nr->dataset.inputs;
+        outputs = nr->dataset.outputs;
+        for (uint32_t j = 0; j < nr->dataset.len; j++) {
+            for (int i = 0; i < ilen; i++) inputs[i] /= nr->inorm[i];
+            if (!(nr->flags & NR_FLAG_CLASSIFIER))
+                for (int i = 0; i < olen; i++) outputs[i] /= nr->onorm[i];
+            inputs += ilen;
+            outputs += olen;
+        }
+
+        inputs = nr->test.inputs;
+        outputs = nr->test.outputs;
+        for (uint32_t j = 0; j < nr->test.len; j++) {
+            for (int i = 0; i < ilen; i++) inputs[i] /= nr->inorm[i];
+            if (!(nr->flags & NR_FLAG_CLASSIFIER))
+                for (int i = 0; i < olen; i++) outputs[i] /= nr->onorm[i];
+            inputs += ilen;
+            outputs += olen;
+        }
+    }
+
+    AnnRprop *saved = NULL;  /* Saved to recover on overfitting. */
+    float saved_error;          /* The test error of the saved NN. */
+    float saved_train_error;    /* The training dataset error of the saved NN */
+    float saved_class_error;    /* The % of classification errors of saved NN */
+
+    while(1) {
+        long long cycle_start = NRMilliseconds();
+
+        train_error = AnnTrain(nr->nn,
+                               nr->dataset.inputs,
+                               nr->dataset.outputs,
+                               0,
+                               training_iterations,
+                               nr->dataset.len,
+                               ANN_ALGO_BPROP);
+        cycle_time = NRMilliseconds() - cycle_start;
+        nr->training_total_steps += nr->dataset.len*training_iterations;
+
+        /* Evaluate the error in the case of auto training, stop it
+         * once we see that the error in the traning set is decreasing
+         * while the one in the test set is not. */
+        if (auto_stop) {
+            AnnTestError(nr->nn,
+                         nr->test.inputs,
+                         nr->test.outputs,
+                         nr->test.len, &test_error, &class_error);
+
+            if (train_error < past_train_error &&
+                test_error > past_test_error)
+            {
+                overfitting_count++;
+                #ifdef NR_TRAINING_DEBUG
+                printf("+YCLE %lld: [%d] %f VS %f\n", (long long)cycles,
+                    overfitting_count, train_error, test_error);
+                #endif
+                if (overfitting_count == overfitting_limit) {
+                    nr->flags |= NR_FLAG_OF_DETECTED;
+                    break;
+                }
+            } else if (overfitting_count > 0) {
+                #ifdef NR_TRAINING_DEBUG
+                printf("-YCLE %lld: [%d] %f VS %f\n", (long long)cycles,
+                    overfitting_count, train_error, test_error);
+                #endif
+                overfitting_count--;
+            }
+
+            /* Save all the networks with a score better than the currently
+             * saved network. This can be a bit costly, but is safe: one
+             * cycle of training more and overfitting can ruin it all. */
+            if (backtrack && (saved == NULL || test_error < saved_error)) {
+                #ifdef NR_TRAINING_DEBUG
+                printf("SAVED! %f < %f\n", test_error, saved_error);
+                #endif
+                saved_error = test_error;
+                saved_train_error = train_error;
+                saved_class_error = class_error;
+                if (saved) AnnFree(saved);
+                saved = AnnClone(nr->nn);
+            }
+
+            /* Best network found? Reset the overfitting hints counter. */
+            if (test_error < best_test_error) {
+                overfitting_count = 0;
+                best_test_error = test_error;
+                #ifdef NR_TRAINING_DEBUG
+                printf("BEST! %lld: <%d> %f VS %f\n", (long long)cycles,
+                    overfitting_limit,train_error, test_error);
+                #endif
+            }
+
+           /* Also stop if the loss is zero in both datasets. */
+            if (train_error < 0.000000000000001 &&
+                test_error  < 0.000000000000001) break;
+        }
+
+        cycles++;
+        long long total_time = NRMilliseconds()-start;
+
+        /* Cycles and milliseconds stop conditions. */
+        if (nr->training_max_cycles && cycles == nr->training_max_cycles)
+            break;
+        if (nr->training_max_ms && total_time > (long long)nr->training_max_ms)
+            break;
+
+        /* If this is a long training, to do just a single training iteration
+         * for each cycle is not optimal: tune the number of iterations to
+         * at least take 100 milliseconds. */
+        if (total_time > 10000 && cycle_time < 100) training_iterations++;
+
+        past_train_error = train_error;
+        past_test_error = test_error;
+    }
+
+    /* If auto stop is disabled, we still need to compute the test error
+     * in order to return this information to the main thread. */
+    if (!auto_stop) {
+        AnnTestError(nr->nn,
+                     nr->test.inputs,
+                     nr->test.outputs,
+                     nr->test.len, &test_error, &class_error);
+    }
+
+    /* If both autostop and backtracking are enabled, we may have
+     * a better network saved! */
+    if (auto_stop && backtrack) {
+        if (saved && saved_error < test_error) {
+            #ifdef NR_TRAINING_DEBUG
+            printf("BACKTRACK: Saved network used!\n");
+            #endif
+            AnnFree(nr->nn);
+            nr->nn = saved;
+            test_error = saved_error;
+            train_error = saved_train_error;
+            class_error = saved_class_error;
+        } else if (saved) {
+            AnnFree(saved);
+        }
+    }
+
+    if (nr->flags & NR_FLAG_CLASSIFIER) nr->test_class_error = class_error;
+    nr->dataset_error = train_error;
+    nr->test_error = test_error;
+    nr->training_total_ms += NRMilliseconds()-start;
+
+	return 0;
+}
+
+static SQRESULT sq_nn_run(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+
+    SQInteger asize_inputs = sq_getsize(v, 2);
+    SQInteger ilen = ANN_INPUT_UNITS(self->nn);
+    if(ilen != asize_inputs)
+        return sq_throwerror(v, _SC("wrong number of inputs " _PRINT_INT_FMT " for expected " _PRINT_INT_FMT), asize_inputs, ilen);
+
+    for(SQInteger i=0; i < ilen; ++i)
+    {
+        sq_pushinteger(v, i);
+        sq_get(v, 2);
+        SQFloat fnum;
+        SQRESULT rc = sq_getfloat(v, -1, &fnum);
+        if(rc != SQ_OK)
+        {
+            return sq_throwerror(v, _SC("only numbers expected on input array"));
+        }
+        if (self->flags & NR_FLAG_NORMALIZE) fnum /= self->inorm[i];
+        ANN_INPUT_NODE(self->nn,i) = fnum;
+        sq_poptop(v);
+    }
+    AnnSimulate(self->nn);
+
+    /* Output the raw net output or the class ID if the network
+     * is a classifier and the command invoked was NR.CLASS. */
+    int olen = ANN_OUTPUT_UNITS(self->nn);
+    sq_newarray(v, olen);
+    for(int j = 0; j < olen; j++) {
+        float output = ANN_OUTPUT_NODE(self->nn,j);
+        if (!(self->flags & NR_FLAG_CLASSIFIER) &&
+             (self->flags & NR_FLAG_NORMALIZE))
+        {
+            output *= self->onorm[j];
+        }
+        sq_pushfloat(v, output);
+        sq_arrayset(v, -2, j);
+    }
+
+    return 1;
+}
+
+static SQRESULT sq_nn_classify(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+
+    if (!(self->flags & NR_FLAG_CLASSIFIER))
+        return sq_throwerror(v, _SC("you can't call classify with a regressor network."));
+
+    SQInteger asize_inputs = sq_getsize(v, 2);
+    SQInteger ilen = ANN_INPUT_UNITS(self->nn);
+    if(ilen != asize_inputs)
+        return sq_throwerror(v, _SC("wrong number of inputs %d for expected %d"), (int)asize_inputs, (int)ilen);
+
+    for(SQInteger i=0; i < ilen; ++i)
+    {
+        sq_pushinteger(v, i);
+        sq_get(v, 2);
+        SQFloat fnum;
+        SQRESULT rc = sq_getfloat(v, -1, &fnum);
+        if(rc != SQ_OK)
+        {
+            return sq_throwerror(v, _SC("only numbers expected on input array"));
+        }
+        if (self->flags & NR_FLAG_NORMALIZE) fnum /= self->inorm[i];
+        ANN_INPUT_NODE(self->nn,i) = fnum;
+        sq_poptop(v);
+    }
+    AnnSimulate(self->nn);
+
+    /* Output the raw net output or the class ID if the network
+     * is a classifier and the command invoked was NR.CLASS. */
+    int olen = ANN_OUTPUT_UNITS(self->nn);
+    float fmax = ANN_OUTPUT_NODE(self->nn,0);
+    int max_class = 0;
+    for(int j = 1; j < olen; j++) {
+        float output = ANN_OUTPUT_NODE(self->nn,j);
+        if (output > fmax) {
+            fmax = output;
+            max_class = j;
+        }
+    }
+    sq_pushinteger(v, max_class);
+
+    return 1;
+}
+
+#define ADD_T_TABLE_STR(sk, sv) \
+    sq_pushstring(v, sk, -1); \
+    sq_pushstring(v, sv, -1); \
+    sq_rawset(v, -3);
+
+#define ADD_T_TABLE_INT(sk, sv) \
+    sq_pushstring(v, sk, -1); \
+    sq_pushinteger(v, sv); \
+    sq_rawset(v, -3);
+
+#define ADD_T_TABLE_FLOAT(sk, sv) \
+    sq_pushstring(v, sk, -1); \
+    sq_pushfloat(v, sv); \
+    sq_rawset(v, -3);
+
+static SQRESULT sq_nn_info(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+
+    sq_newtable(v);
+
+    ADD_T_TABLE_INT("id", self->id);
+    ADD_T_TABLE_STR("type", (self->flags & NR_FLAG_CLASSIFIER) ? "classifier" : "regressor");
+    ADD_T_TABLE_INT("auto-normalization", !!(self->flags & NR_FLAG_NORMALIZE));
+    ADD_T_TABLE_INT("training", !!(self->flags & NR_FLAG_TRAINING));
+
+    sq_pushliteral(v, _SC("layout"));
+    sq_newarray(v, ANN_LAYERS(self->nn));
+    for (int ai=0, i = ANN_LAYERS(self->nn)-1; i >= 0; i--, ++ai) {
+        int units = ANN_UNITS(self->nn,i);
+        if (i != 0) units--; /* Don't count the bias unit. */
+        sq_pushinteger(v, units);
+        sq_arrayset(v, -2, ai);
+    }
+    sq_rawset(v, -3);
+
+    ADD_T_TABLE_INT("training-dataset-maxlen", self->dataset.maxlen);
+    ADD_T_TABLE_INT("training-dataset-len", self->dataset.len);
+    ADD_T_TABLE_INT("test-dataset-maxlen", self->test.maxlen);
+    ADD_T_TABLE_INT("test-dataset-len", self->test.len);
+    ADD_T_TABLE_INT("training-total-steps", self->training_total_steps);
+    ADD_T_TABLE_INT("training-total-cycles", self->dataset.len ?
+            (self->training_total_steps / self->dataset.len) : 0);
+
+    float tms = (float)self->training_total_ms/1000;
+    ADD_T_TABLE_FLOAT("training-total-seconds", tms);
+    ADD_T_TABLE_FLOAT("dataset-error", self->dataset_error);
+    ADD_T_TABLE_FLOAT("test-error", self->test_error);
+
+    if (self->flags & NR_FLAG_CLASSIFIER) {
+        ADD_T_TABLE_FLOAT("classification-errors-perc", self->test_class_error);
+    }
+
+    ADD_T_TABLE_STR("overfitting-detected", (self->flags & NR_FLAG_OF_DETECTED) ? "yes" : "no");
+
+	return 1;
+}
+
+static SQRESULT sq_nn_clone(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    AnnRprop *clone = AnnClone(self->nn);
+    if(clone)
+    {
+        sq_pushstring(v, sq_nn_TAG, -1);
+        if(sq_getonregistrytable(v) == SQ_ERROR) return SQ_ERROR;
+        sq_createinstance(v, -1);
+        sq_setinstanceup(v, -1, clone);
+        sq_setreleasehook(v, -1, sq_nn_release_hook);
+    }
+    else sq_pushnull(v);
+	return 1;
+}
+
+#define SQ_NN_GET_SET_FLOAT(func_name) \
+static SQRESULT sq_nn_##func_name(HSQUIRRELVM v)\
+{\
+    SQ_FUNC_VARS(v);\
+    SQ_GET_NN_INSTANCE(v, 1);\
+    if(_top_ == 1)\
+    {\
+        sq_pushfloat(v, self->nn->func_name);\
+        return 1;\
+    }\
+    SQ_GET_FLOAT(v, 2, func_name);\
+    self->nn->func_name = func_name;\
+	return 0;\
+}
+
+SQ_NN_GET_SET_FLOAT(learn_rate);
+SQ_NN_GET_SET_FLOAT(rprop_nminus);
+SQ_NN_GET_SET_FLOAT(rprop_nplus);
+SQ_NN_GET_SET_FLOAT(rprop_maxupdate);
+SQ_NN_GET_SET_FLOAT(rprop_minupdate);
+
+static SQRESULT sq_nn_flags(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    if(_top_ == 1)
+    {
+        sq_pushinteger(v, self->nn->flags);
+        return 1;
+    }
+    SQ_GET_INTEGER(v, 2, flags);
+    self->nn->flags = flags;
+	return 0;
+}
+
+static SQRESULT sq_nn_weights(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    sq_pushfloat(v, AnnCountWeights(self->nn));
+    return 1;
+}
+
+static SQRESULT sq_nn_weight(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    SQ_GET_INTEGER(v, 2, layer);
+    SQ_GET_INTEGER(v, 3, i);
+    SQ_GET_INTEGER(v, 4, j);
+
+    if(layer < 0 && layer >= self->nn->layers) return sq_throwerror(v, _SC("layer out of range"));
+    //if(i < 0 && i >= self->layer[layer]) return sq_throwerror(v, _("layer out of range"));
+
+    float *weight = &ANN_WEIGHT(self->nn, layer, i, j);
+    if(_top_ == 4)
+    {
+        sq_pushfloat(v, *weight);
+        return 1;
+    }
+    SQ_GET_FLOAT(v, 5, new_weight);
+    *weight = new_weight;
+	return 0;
+}
+
+static SQRESULT sq_nn_Ann2Tcl(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    Ann2Tcl(self->nn);
+	return 0;
+}
+
+static SQRESULT sq_nn_Ann2Js(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    Ann2Js(self->nn);
+	return 0;
+}
+
+static SQRESULT sq_nn_AnnPrint(HSQUIRRELVM v)
+{
+    SQ_FUNC_VARS_NO_TOP(v);
+    SQ_GET_NN_INSTANCE(v, 1);
+    AnnPrint(self->nn);
+	return 0;
+}
+
+#define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),sq_nn_##name,nparams,tycheck}
+static SQRegFunction sq_nn_methods[] =
+{
+    _DECL_FUNC(constructor, -5,_SC("xiiaiii")),
+    _DECL_FUNC(clone, 1,_SC("x")),
+    _DECL_FUNC(Ann2Tcl, 1,_SC("x")),
+    _DECL_FUNC(Ann2Js, 1,_SC("x")),
+    _DECL_FUNC(AnnPrint, 1,_SC("x")),
+    _DECL_FUNC(flags, -1,_SC("xi")),
+    _DECL_FUNC(learn_rate, -1,_SC("xf")),
+    _DECL_FUNC(rprop_nminus, -1,_SC("xf")),
+    _DECL_FUNC(rprop_nplus, -1,_SC("xf")),
+    _DECL_FUNC(rprop_maxupdate, -1,_SC("xf")),
+    _DECL_FUNC(rprop_minupdate, -1,_SC("xf")),
+    _DECL_FUNC(weights, 1,_SC("x")),
+    _DECL_FUNC(weight, -4,_SC("xiiif")),
+    _DECL_FUNC(observe, -3,_SC("xaai")),
+    _DECL_FUNC(train, -1,_SC("xiii")),
+    _DECL_FUNC(run, 2,_SC("xa")),
+    _DECL_FUNC(classify, 2,_SC("xa")),
+    _DECL_FUNC(info, 1,_SC("x")),
+    {0,0}
+};
+#undef _DECL_FUNC
+
+typedef struct {
+  const SQChar *Str;
+  SQInteger Val;
+} KeyIntType, * KeyIntPtrType;
+
+static KeyIntType sqpcre2_constants[] = {
+    #define MK_CONST(c) {_SC(#c), NR_##c}
+    #define MK_CONST_FLAG(c) {_SC(#c), NR_FLAG_##c}
+
+	MK_CONST_FLAG(NONE),
+	MK_CONST_FLAG(TRAINING),
+	MK_CONST_FLAG(REGRESSOR),
+	MK_CONST_FLAG(CLASSIFIER),
+	MK_CONST_FLAG(NORMALIZE),
+	MK_CONST_FLAG(AUTO_STOP),
+	MK_CONST_FLAG(OF_DETECTED),
+	MK_CONST_FLAG(BACKTRACK),
+	MK_CONST_FLAG(TO_PERSIST),
+	MK_CONST_FLAG(TO_TRANSFER),
+	MK_CONST(MAX_LAYERS),
+	MK_CONST(RDB_ENC_VER),
+	MK_CONST(INSERT_TRAIN),
+	MK_CONST(INSERT_TEST),
+    {0,0}
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* This defines a function that opens up your library. */
+SQRESULT sqext_register_nn (HSQUIRRELVM v) {
+	sq_pushstring(v,sq_nn_TAG,-1);
+	sq_newclass(v,SQFalse);
+	sq_settypetag(v,-1,(void*)sq_nn_TAG);
+    sq_insert_reg_funcs(v, sq_nn_methods);
+
+	//add constants
+	KeyIntPtrType KeyIntPtr;
+	for (KeyIntPtr = sqpcre2_constants; KeyIntPtr->Str; KeyIntPtr++) {
+		sq_pushstring(v, KeyIntPtr->Str, -1);    //first the key
+		sq_pushinteger(v, KeyIntPtr->Val);       //then the value
+		sq_newslot(v, -3, SQFalse);              //store then
+	}
+
+	sq_newslot(v,-3,SQTrue);
+
+	return SQ_OK;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+

+ 1365 - 0
SquiLu-ext/sq_subprocess.cpp

@@ -0,0 +1,1365 @@
+/* Copyright (c) 2010 Joshua Phillips
+ * Ported on 2016 to SquiLu by Domingo Alvarez Duarte
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef OS_POSIX
+#define _POSIX_SOURCE
+#endif
+
+#if !defined(OS_WINDOWS) && !defined(OS_POSIX)
+#error None of these are defined: OS_WINDOWS, OS_POSIX
+#else
+
+#include "squirrel.h"
+#include "sqstdblobimpl.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+SQ_OPT_STRING_STRLEN();
+
+#if defined(OS_POSIX)
+#include <unistd.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+typedef int filedes_t;
+
+/* return 1 if the named directory exists and is a directory */
+static int direxists(const char *fname)
+{
+    struct stat statbuf;
+    if (stat(fname, &statbuf)){
+        return 0;
+    }
+    return !!S_ISDIR(statbuf.st_mode);
+}
+
+#elif defined(OS_WINDOWS)
+#include "windows.h"
+
+/* Some SDKs don't define this */
+#ifndef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD) -1)
+#endif
+
+typedef HANDLE filedes_t;
+
+/* return 1 if the named directory exists and is a directory */
+static int direxists(const char *fname)
+{
+    DWORD result;
+    result = GetFileAttributes(fname);
+    if (result == INVALID_FILE_ATTRIBUTES) return 0;
+    return !!(result & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+#endif /* defined(OS_WINDOWS) */
+
+/* Some systems don't define these, but we use them as indices for our arrays.
+   I probably oughtn't, in case a system doesn't use 0, 1 and 2 for these. */
+#ifndef STDIN_FILENO
+#define STDIN_FILENO 0
+#define STDOUT_FILENO 1
+#define STDERR_FILENO 2
+#endif
+
+/* This is the proc object, which is stored as Lua userdata */
+struct proc {
+#if defined(OS_POSIX)
+    pid_t pid;
+#elif defined(OS_WINDOWS)
+    DWORD pid;
+    HANDLE hProcess;
+#endif
+    int exitcode;
+    unsigned char done; /* set to 1 when child has finished and closed */
+};
+
+/* Lua registry key for proc metatable */
+#define SP_PROC_META "subprocess_proc*"
+
+/* Environment keys */
+/* This is an integer index into the environment of C functions in this module.
+   At this index is stored a table of [pid]=proc items. The items in this table
+   will all have their `done` fields set to false. This table is at present only
+   used for the `subprocess.wait` function.
+   On POSIX, it is used to get the proc object corresponding to a pid. On
+   Windows, it is used to assemble a HANDLE array for WaitForMultipleObjects. */
+#define SP_LIST "subprocess_pid_list"
+
+/* Check to see if object at the given index is a proc object.
+   Return pointer to proc object, or NULL if it isn't. */
+static struct proc *toproc(HSQUIRRELVM v, int index)
+{
+    int eq;
+    if (lua_type(L, index) != LUA_TUSERDATA) return NULL;
+    lua_getmetatable(L, index);
+    luaL_getmetatable(L, SP_PROC_META);
+    eq = lua_equal(L, -2, -1);
+    lua_pop(L, 2);
+    if (!eq) return NULL;
+    return lua_touserdata(L, index);
+}
+
+/* Same but raise an error instead of returning NULL */
+#define checkproc(L, index) ((struct proc *) luaL_checkudata((L), (index), SP_PROC_META))
+
+/* Create and return a new proc object */
+static struct proc *newproc(HSQUIRRELVM v)
+{
+    struct proc *proc = sq_newuserdata(v, sizeof *proc);
+    proc->done = 1;
+    proc->pid = 0;
+    luaL_getmetatable(L, SP_PROC_META);
+    lua_setmetatable(L, -2);
+    lua_newtable(L);
+    lua_setfenv(L, -2);
+    return proc;
+}
+
+/* Mark a process (at index) as done */
+static void doneproc(HSQUIRRELVM v, int index)
+{
+    struct proc *proc = toproc(L, index);
+    if (!proc){
+        fputs("subprocess.c: doneproc: not a proc\n", stderr);
+    } else {
+        proc->done = 1;
+        /* remove proc from SP_LIST */
+        sq_reservestack(v, 4);
+        sq_pushvalue(v, index);    /* stack: proc */
+        luaL_getmetatable(L, SP_LIST);
+        /* stack: proc list */
+        if (lua_isnil(L, -1)){
+            fputs("subprocess.c: XXX: SP_LIST IS NIL\n", stderr);
+        } else {
+            lua_pushinteger(L, proc->pid);      /* stack: proc list pid */
+            lua_pushvalue(L, -1);               /* stack: proc list pid pid */
+            lua_gettable(L, -3);                /* stack: proc list pid proc2 */
+            if (!lua_equal(L, -4, -1)){
+                /* lookup by pid didn't work */
+                fputs("subprocess.c: doneproc: XXX: pid lookup in SP_LIST failed\n", stderr);
+                lua_pop(L, 2);                  /* stack: proc list */
+            } else {
+                lua_pop(L, 1);                  /* stack: proc list pid */
+                lua_pushnil(L);                 /* stack: proc list pid nil */
+                lua_settable(L, -3);            /* stack: proc list */
+            }
+            /* stack: proc list */
+        }
+        sq_pop(v, 2);
+    }
+}
+
+/* Remove old SP_LIST entries by polling them.
+   Calling this every now and again can avoid leaking proc objects
+   that are not waited for. */
+static SQRESULT sq_subprocess_prune(HSQUIRRELVM v)
+{
+    int top = lua_gettop(L);
+    sq_reservestack(v, 5);
+    luaL_getmetatable(L, SP_LIST);
+    if (lua_isnil(L, -1)){
+        lua_pop(L, 1);
+        return 0;
+    }
+    lua_pushnil(L);
+    while (lua_next(L, -2)){
+        lua_getfield(L, -1, "poll");
+        lua_pushvalue(L, -2);
+        lua_call(L, 1, 0);
+        lua_pop(L, 1);
+    }
+    lua_settop(L, top);
+    return 0;
+}
+
+/* Special constants for popen arguments. */
+static char PIPE, STDOUT;
+
+/* Names of standard file handles. */
+static const char *fd_names[3] = {"stdin", "stdout", "stderr"};
+
+/* Information about what to do for a standard file handle.
+   This is constructed from popen arguments. */
+struct fdinfo {
+    enum {
+        FDMODE_INHERIT = 0,  /* fd is inherited from parent */
+        FDMODE_FILENAME,     /* open named file */
+        FDMODE_FILEDES,      /* use a file descriptor */
+        FDMODE_FILEOBJ,      /* use FILE* */
+        FDMODE_PIPE,         /* create and use pipe */
+        FDMODE_STDOUT        /* redirect to stdout (only for stderr) */
+    } mode;
+    union {
+        const char *filename;
+        filedes_t filedes;
+        FILE *fileobj;
+    } info;
+};
+
+/* Close multiple file descriptors */
+static void closefds(filedes_t *fds, int n)
+{
+    int i;
+    for (i=0; i<n; ++i){
+#if defined(OS_POSIX)
+        if (fds[i] != -1)
+            close(fds[i]);
+#elif defined(OS_WINDOWS)
+        if (fds[i] != INVALID_HANDLE_VALUE)
+            CloseHandle(fds[i]);
+#endif
+    }
+}
+
+/* Close multiple C files */
+static void closefiles(FILE **files, int n)
+{
+    int i;
+    for (i=0; i<n; ++i)
+        if (files[i] != NULL)
+            fclose(files[i]);
+}
+
+/* Free multiple strings */
+static void freestrings(char **strs, int n)
+{
+    int i;
+    for (i=0; i<n; ++i)
+        if (strs[i] != NULL)
+            sq_free(strs[i], -1);
+}
+
+#ifdef OS_WINDOWS
+/* Copy a Windows error into a buffer */
+static void copy_w32error(char errmsg_out[], size_t errmsg_len, DWORD error)
+{
+    if (FormatMessage(
+        FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0,
+        (void *) errmsg_out, errmsg_len, NULL) == 0)
+    {
+        strncpy(errmsg_out, "failed to get error message", errmsg_len + 1);
+    }
+}
+
+/* Push a Windows error onto a Lua stack */
+static void push_w32error(HSQUIRRELVM v, DWORD error)
+{
+    LPTSTR buf;
+    if (FormatMessage(
+        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
+        NULL, error, 0, (void *) &buf, 1, NULL) == 0)
+    {
+        lua_pushliteral(L, "failed to get error message");
+    } else {
+        lua_pushstring(L, buf);
+        LocalFree(buf);
+    }
+}
+
+/* n is 0, 1 or 2
+   return handle for standard input/output/error */
+static HANDLE getstdhandle(int n)
+{
+    DWORD n2;
+    switch (n){
+        case 0: n2 = STD_INPUT_HANDLE; break;
+        case 1: n2 = STD_OUTPUT_HANDLE; break;
+        case 2: n2 = STD_ERROR_HANDLE; break;
+        default: return INVALID_HANDLE_VALUE;
+    }
+    return GetStdHandle(n2);
+}
+
+struct str {
+    char *data;
+    size_t len;
+    size_t size; /* size allocated */
+};
+
+static void str_init(struct str *s)
+{
+    s->data = NULL;
+    s->len = 0;
+    s->size = 0;
+}
+
+/* Append n chars from s2 */
+static int str_appendlstr(struct str *s, char *s2, size_t n)
+{
+    void *newp;
+    if (s->size < s->len + n){
+        if (s->size < 16) s->size = 16;
+        while (s->size < s->len + n)
+            s->size = (s->size * 3) / 2;
+        newp = realloc(s->data, s->size + 1);
+        if (newp == NULL){
+            free(s->data);
+            return 0;
+        }
+        s->data = newp;
+    }
+    memcpy(s->data + s->len, s2, n);
+    s->len += n;
+    s->data[s->len] = '\0';
+    return 1;
+}
+
+static int str_appendc(struct str *s, char ch)
+{
+    return str_appendlstr(s, &ch, 1);
+}
+
+/* Compiles command line for CreateProcess. Returns malloc'd string. */
+static char *compile_cmdline(const char *const *args)
+{
+    /*  "      --> \"
+        \"     --> \\\"
+        \<NUL> --> \\    */
+    struct str str;
+    const char *arg;
+    str_init(&str);
+    while (*args != NULL){
+        arg = *args++;
+        if (!str_appendc(&str, '"')) return NULL;
+        while (arg[0]){
+            if (arg[0] == '"'){
+                if (!str_appendlstr(&str, "\\\"", 2)) return NULL;
+            } else if (arg[0] == '\\'){
+                if (arg[1] == '"' || arg[1] == '\0'){
+                    if (!str_appendlstr(&str, "\\\\", 2)) return NULL;
+                } else {
+                    if (!str_appendc(&str, '\\')) return NULL;
+                }
+            } else {
+                if (!str_appendc(&str, arg[0])) return NULL;
+            }
+            arg++;
+        }
+        if (!str_appendlstr(&str, "\" ", 2)) return NULL;
+    }
+    str.data[str.len - 1] = '\0';
+    return str.data;
+}
+#endif
+
+/* Function for opening subprocesses. Returns 0 on success and -1 on failure.
+   On failure, errmsg_out shall contain a '\0'-terminated error message. */
+static int dopopen(const char *const *args,  /* program arguments with NULL sentinel */
+                   const char *executable,   /* actual executable */
+                   struct fdinfo fdinfo[3],  /* info for stdin/stdout/stderr */
+                   int close_fds,            /* 1 to close all fds */
+                   int binary,               /* 1 to use binary files */
+                   const char *cwd,          /* working directory for program */
+                   struct proc *proc,        /* populated on success! */
+                   FILE *pipe_ends_out[3],   /* pipe ends are put here */
+                   char errmsg_out[],        /* written to on failure */
+                   size_t errmsg_len         /* length of errmsg_out (EXCLUDING sentinel) */
+                  )
+#if defined(OS_POSIX)
+{
+    int fds[3];
+    int i;
+    struct fdinfo *fdi;
+    int piperw[2];
+    int errpipe[2]; /* pipe for returning error status */
+    int flags;
+    int en; /* saved errno */
+    int count;
+    pid_t pid;
+
+    errmsg_out[errmsg_len] = '\0';
+
+    for (i=0; i<3; ++i)
+        pipe_ends_out[i] = NULL;
+
+    /* Manage stdin/stdout/stderr */
+    for (i=0; i<3; ++i){
+        fdi = &fdinfo[i];
+        switch (fdi->mode){
+            case FDMODE_INHERIT:
+inherit:
+                fds[i] = dup(i);
+                if (fds[i] == -1){
+fd_failure:
+                    strncpy(errmsg_out, strerror(errno), errmsg_len + 1);
+                    closefds(fds, i);
+                    closefiles(pipe_ends_out, i);
+                    return -1;
+                }
+                break;
+            case FDMODE_FILENAME:
+                if (i == STDIN_FILENO){
+                    if ((fds[i] = open(fdi->info.filename, O_RDONLY)) == -1) goto fd_failure;
+                } else {
+                    if ((fds[i] = creat(fdi->info.filename, 0666)) == -1) goto fd_failure;
+                }
+                break;
+            case FDMODE_FILEDES:
+                if ((fds[i] = dup(fdi->info.filedes)) == -1) goto fd_failure;
+                break;
+            case FDMODE_FILEOBJ:
+                if ((fds[i] = dup(fileno(fdi->info.fileobj))) == -1) goto fd_failure;
+                break;
+            case FDMODE_PIPE:
+                if (pipe(piperw) == -1) goto fd_failure;
+                if (i == STDIN_FILENO){
+                    fds[i] = piperw[0]; /* give read end to process */
+                    if ((pipe_ends_out[i] = fdopen(piperw[1], "w")) == NULL) goto fd_failure;
+                } else {
+                    fds[i] = piperw[1]; /* give write end to process */
+                    if ((pipe_ends_out[i] = fdopen(piperw[0], "r")) == NULL) goto fd_failure;
+                }
+                break;
+            case FDMODE_STDOUT:
+                if (i == STDERR_FILENO){
+                    if ((fds[STDERR_FILENO] = dup(fds[STDOUT_FILENO])) == -1) goto fd_failure;
+                } else goto inherit;
+                break;
+        }
+    }
+
+    /* Find executable name */
+    if (!executable){
+        /* use first arg */
+        executable = args[0];
+    }
+    assert(executable != NULL);
+
+    /* Create a pipe for returning error status */
+    if (pipe(errpipe) == -1){
+        strncpy(errmsg_out, strerror(errno), errmsg_len + 1);
+        closefds(fds, 3);
+        closefiles(pipe_ends_out, 3);
+        return -1;
+    }
+    /* Make write end close on exec */
+    flags = fcntl(errpipe[1], F_GETFD);
+    if (flags == -1){
+pipe_failure:
+        strncpy(errmsg_out, strerror(errno), errmsg_len + 1);
+        closefds(errpipe, 2);
+        closefds(fds, 3);
+        closefiles(pipe_ends_out, 3);
+        return -1;
+    }
+    if (fcntl(errpipe[1], F_SETFD, flags | FD_CLOEXEC) == -1) goto pipe_failure;
+
+    /* Do the fork/exec (TODO: use vfork somehow?) */
+    pid = fork();
+    if (pid == -1) goto pipe_failure;
+    else if (pid == 0){
+        /* child */
+        close(errpipe[0]);
+
+        /* dup file descriptors */
+        for (i=0; i<3; ++i){
+            if (dup2(fds[i], i) == -1) goto child_failure;
+        }
+
+        /* close other fds */
+        if (close_fds){
+            for (i=3; i<sysconf(_SC_OPEN_MAX); ++i){
+                if (i != errpipe[1])
+                    close(i);
+            }
+        }
+
+        /* change directory */
+        if (cwd && chdir(cwd)) goto child_failure;
+
+        /* exec! Farewell, subprocess.c! */
+        execvp(executable, (char *const*) args); /* XXX: const cast */
+
+        /* Oh dear, we're still here. */
+child_failure:
+        en = errno;
+        write(errpipe[1], &en, sizeof en);
+        _exit(1);
+    }
+
+    /* parent */
+    /* close unneeded fds */
+    closefds(fds, 3);
+    close(errpipe[1]);
+
+    /* read errno from child */
+    while ((count = read(errpipe[0], &en, sizeof en)) == -1)
+        if (errno != EAGAIN && errno != EINTR) break;
+    if (count > 0){
+        /* exec failed */
+        close(errpipe[0]);
+        strncpy(errmsg_out, strerror(en), errmsg_len + 1);
+        return -1;
+    }
+    close(errpipe[0]);
+
+    /* Child is now running */
+    proc->done = 0;
+    proc->pid = pid;
+    return 0;
+}
+#elif defined(OS_WINDOWS)
+{
+    HANDLE hfiles[3], piper, pipew, hfile;
+    int i, fd;
+    struct fdinfo *fdi;
+    SECURITY_ATTRIBUTES secattr;
+    STARTUPINFO si;
+    PROCESS_INFORMATION pi;
+    char *cmdline;
+
+    errmsg_out[errmsg_len] = '\0';
+
+    /* Create a SECURITY_ATTRIBUTES for inheritable handles */
+    secattr.nLength = sizeof secattr;
+    secattr.lpSecurityDescriptor = NULL;
+    secattr.bInheritHandle = TRUE;
+
+    for (i=0; i<3; ++i)
+        pipe_ends_out[i] = NULL;
+
+    /* Manage stdin/stdout/stderr */
+    for (i=0; i<3; ++i){
+        fdi = &fdinfo[i];
+        switch (fdi->mode){
+            case FDMODE_INHERIT:
+inherit:
+                /* XXX: duplicated file handles share the
+                   same object (and thus file cursor, etc.).
+                   CreateFile might be a better idea. */
+                hfile = getstdhandle(i);
+                if (hfile == INVALID_HANDLE_VALUE){
+fd_failure:
+                    copy_w32error(errmsg_out, errmsg_len, GetLastError());
+                    closefds(hfiles, i);
+                    closefiles(pipe_ends_out, i);
+                    return -1;
+                }
+dup_hfile:
+                if (DuplicateHandle(GetCurrentProcess(), hfile,
+                    GetCurrentProcess(), &hfiles[i], 0, TRUE,
+                    DUPLICATE_SAME_ACCESS) == 0)
+                {
+                    goto fd_failure;
+                }
+                break;
+            case FDMODE_FILENAME:
+                if (i == STDIN_FILENO){
+                    hfiles[i] = CreateFile(
+                        fdi->info.filename,
+                        GENERIC_READ,
+                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                        &secattr,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+                } else {
+                    hfiles[i] = CreateFile(
+                        fdi->info.filename,
+                        GENERIC_WRITE,
+                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                        &secattr,
+                        CREATE_ALWAYS,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+                }
+                if (hfiles[i] == INVALID_HANDLE_VALUE){
+                    goto fd_failure;
+                }
+                break;
+            case FDMODE_FILEDES:
+                if (DuplicateHandle(GetCurrentProcess(), fdi->info.filedes,
+                    GetCurrentProcess(), &hfiles[i], 0, TRUE,
+                    DUPLICATE_SAME_ACCESS) == 0)
+                {
+                    goto fd_failure;
+                }
+                break;
+            case FDMODE_FILEOBJ:
+                fd = _fileno(fdi->info.fileobj);
+                if (fd == -1){
+get_osf_failure:
+                    strncpy(errmsg_out, strerror(errno), errmsg_len + 1);
+failure:
+                    closefds(hfiles, i);
+                    closefiles(pipe_ends_out, i);
+                    return -1;
+                }
+                hfile = (HANDLE) _get_osfhandle(fd);
+                if (hfile == INVALID_HANDLE_VALUE) goto get_osf_failure;
+                goto dup_hfile;
+            case FDMODE_PIPE:
+                if (CreatePipe(&piper, &pipew, &secattr, 0) == 0)
+                    goto fd_failure;
+                if (i == STDIN_FILENO){
+                    hfiles[i] = piper;
+                    fd = _open_osfhandle((long) pipew, binary ? 0 : _O_TEXT);
+                    if (fd == -1){
+                        strncpy(errmsg_out, "_open_osfhandle failed", errmsg_len + 1);
+                        goto failure;
+                    }
+                    pipe_ends_out[i] = _fdopen(fd, "w");
+                    if (pipe_ends_out[i] == 0){
+                        strncpy(errmsg_out, "_fdopen failed", errmsg_len + 1);
+                        goto failure;
+                    }
+                } else {
+                    hfiles[i] = pipew;
+                    fd = _open_osfhandle((long) piper, _O_RDONLY | (binary ? 0 : _O_TEXT));
+                    if (fd == -1){
+                        strncpy(errmsg_out, "_open_osfhandle failed", errmsg_len + 1);
+                        goto failure;
+                    }
+                    pipe_ends_out[i] = _fdopen(fd, "r");
+                    if (pipe_ends_out[i] == 0){
+                        strncpy(errmsg_out, "_fdopen failed", errmsg_len + 1);
+                        goto failure;
+                    }
+                }
+                break;
+            case FDMODE_STDOUT:
+                if (i == STDERR_FILENO){
+                    hfile = hfiles[STDOUT_FILENO];
+                    goto dup_hfile;
+                } else goto inherit;
+        }
+    }
+
+    /* Find executable name */
+    if (executable == NULL){
+        /* use first arg */
+        /*executable = args[0];*/
+    }
+
+    /* Compile command line into string. Yuck. */
+    cmdline = compile_cmdline(args);
+    if (!cmdline){
+        strncpy(errmsg_out, "memory full", errmsg_len + 1);
+        closefds(hfiles, 3);
+        closefiles(pipe_ends_out, 3);
+        return -1;
+    }
+
+
+    si.cb = sizeof si;
+    si.lpReserved = NULL;
+    si.lpDesktop = NULL;
+    si.lpTitle = NULL;
+    si.dwFlags = STARTF_USESTDHANDLES;
+    si.cbReserved2 = 0;
+    si.lpReserved2 = NULL;
+    si.hStdInput = hfiles[0];
+    si.hStdOutput = hfiles[1];
+    si.hStdError = hfiles[2];
+
+    if (CreateProcess(
+        executable, /* lpApplicationName */
+        cmdline,    /* lpCommandLine */
+        NULL,       /* lpProcessAttributes */
+        NULL,       /* lpThreadAttributes */
+        TRUE,       /* bInheritHandles */
+        0,          /* dwCreationFlags */
+        NULL,       /* lpEnvironment */
+        cwd,        /* lpCurrentDirectory */
+        &si,        /* lpStartupInfo */
+        &pi)        /* lpProcessInformation */
+    == 0){
+        copy_w32error(errmsg_out, errmsg_len, GetLastError());
+        free(cmdline);
+        closefds(hfiles, 3);
+        closefiles(pipe_ends_out, 3);
+        return -1;
+    }
+    CloseHandle(pi.hThread); /* Don't want this handle */
+    free(cmdline);
+    closefds(hfiles, 3); /* XXX: is this correct? */
+    proc->done = 0;
+    proc->pid = pi.dwProcessId;
+    proc->hProcess = pi.hProcess;
+    return 0;
+}
+#endif
+
+/* popen {arg0, arg1, arg2, ..., [executable=...]} */
+static SQRESULT sq_subprocess_popen(HSQUIRRELVM v)
+{
+    struct proc *proc = NULL;
+
+    /* List of arguments (malloc'd NULL-terminated array of C strings.
+       The C strings are owned by Lua) */
+    int nargs = 0;
+    const char **args = NULL;
+    /* Command to run (owned by Lua) */
+    const char *executable = NULL;
+    /* Directory to run it in (owned by Lua) */
+    const char *cwd = NULL;
+    /* File options */
+    struct fdinfo fdinfo[3];
+    /* Close fds? */
+    int close_fds = 0;
+    /* Use binary mode for files? */
+    int binary = 0;
+
+    FILE *pipe_ends[3] = {NULL, NULL, NULL};
+    int i, result;
+    FILE *f;
+    const char *s;
+
+    char errmsg_buf[256];
+
+    prune(v);
+
+    luaL_checktype(L, 1, LUA_TTABLE);
+    lua_settop(L, 1);
+
+    proc = newproc(v);
+
+    /* Stack: kwargs proc <strings etc....>
+       Lua strings are left on the stack while they are needed,
+       and Lua can garbage-collect them later. */
+
+    /* get arguments */
+    nargs = lua_objlen(L, 1);
+    if (nargs == 0) return luaL_error(L, "no arguments specified");
+    args = lua_newuserdata(L, (nargs + 1) * sizeof *args); /*alloc((nargs + 1) * sizeof *args);*/
+    if (!args) return luaL_error(L, "memory full");
+    for (i=0; i<=nargs; ++i) args[i] = NULL;
+    luaL_checkstack(L, nargs, "cannot grow stack");
+    for (i=1; i<=nargs; ++i){
+        lua_rawgeti(L, 1, i);
+        s = lua_tostring(L, -1);
+        if (!s){
+            /*freestrings(args, nargs);
+            free(args);*/
+            return luaL_error(L, "popen argument %d not a string", (int) i);
+
+        }
+        args[i-1] = s; /*strdup(s);*/
+        /*if (args[i-1] == NULL){
+strings_failure:
+            freestrings(args, nargs);
+            free(args);
+            return luaL_error(L, "memory full");
+        } */
+        /*lua_pop(L, 1);*/
+    }
+
+    luaL_checkstack(L, 12, "cannot grow stack");
+
+    /* get executable string */
+    lua_getfield(L, 1, "executable");
+    s = lua_tostring(L, -1);
+    if (s){
+        executable = s; /*strdup(s);*/
+        /*if (executable == NULL) goto strings_failure;*/
+    } else lua_pop(L, 1);
+    /*lua_pop(L, 1); */ /* to match lua_getfield */
+
+    /* get directory name */
+    lua_getfield(L, 1, "cwd");
+    cwd = lua_tostring(L, -1);
+    if (cwd == NULL) lua_pop(L, 1);
+    else {
+    /*if (lua_isstring(L, -1)){
+        cwd = lua_tostring(L, -1);*/ /*strdup(lua_tostring(L, -1));
+        if (!cwd){
+            free(executable);
+            freestrings(args, nargs);
+            free(args);
+            return luaL_error(L, "memory full");
+        }                            */
+        /* make sure the cwd exists */
+        if (!direxists(cwd)){
+            /*free(executable);
+            freestrings(args, nargs);*/
+            /*free(args);*/
+            return luaL_error(L, "directory `%s' does not exist", cwd);
+        }
+    }
+    /*lua_pop(L, 1);*/
+
+    /* close_fds */
+    lua_getfield(L, 1, "close_fds");
+    close_fds = lua_toboolean(L, -1);
+    lua_pop(L, 1);
+
+    /* binary */
+    lua_getfield(L, 1, "binary");
+    binary = lua_toboolean(L, -1);
+    lua_pop(L, 1);
+
+    /* handle stdin/stdout/stderr */
+    for (i=0; i<3; ++i){
+        lua_getfield(L, 1, fd_names[i]);
+        if (lua_isnil(L, -1)){
+            fdinfo[i].mode = FDMODE_INHERIT;
+            lua_pop(L, 1);
+        } else if (lua_touserdata(L, -1) == &PIPE){
+            fdinfo[i].mode = FDMODE_PIPE;
+            lua_pop(L, 1);
+        } else if (lua_touserdata(L, -1) == &STDOUT){
+            if (i == STDERR_FILENO /*&& fdinfo[STDOUT_FILENO].mode == FDMODE_PIPE*/){
+                fdinfo[i].mode = FDMODE_STDOUT;
+            } else {
+                lua_pushliteral(L, "STDOUT must be used only for stderr"/* when stdout is set to PIPE"*/);
+files_failure:
+                /*for (j=0; j<i; ++j){
+                    if (fdinfo[j].mode == FDMODE_FILENAME)
+                        free(fdinfo[j].info.filename);
+                }
+                free(executable);
+                freestrings(args, nargs);
+                free(args);*/
+                return lua_error(L);
+            }
+            lua_pop(L, 1);
+        } else if (lua_isstring(L, -1)){
+            /* open a file */
+            fdinfo[i].mode = FDMODE_FILENAME;
+            /*if ((fdinfo[i].info.filename = strdup(lua_tostring(L, -1))) == NULL){
+                lua_pushliteral(L, "out of memory");
+                goto files_failure;
+            } */
+            fdinfo[i].info.filename = lua_tostring(L, -1);
+            /* do not pop */
+        } else if (lua_isnumber(L, -1)){
+            /* use this fd */
+            fdinfo[i].mode = FDMODE_FILEDES;
+            fdinfo[i].info.filedes = (filedes_t) lua_tointeger(L, -1);
+            lua_pop(L, 1);
+        } else {
+            f = liolib_copy_tofile(L, -1);
+            if (f){
+                fdinfo[i].mode = FDMODE_FILEOBJ;
+                fdinfo[i].info.fileobj = f;
+            } else {
+                /* huh? */
+                lua_pushfstring(L, "unexpected value for %s", fd_names[i]);
+                goto files_failure;
+            }
+            lua_pop(L, 1);
+        }
+    }
+
+    result = dopopen(args, executable, fdinfo, close_fds, binary, cwd, proc, pipe_ends, errmsg_buf, 255);
+    /*for (i=0; i<3; ++i)
+        if (fdinfo[i].mode == FDMODE_FILENAME)
+            free(fdinfo[i].info.filename);
+    free(executable);
+    freestrings(args, nargs);
+    free(args);*/
+    if (result == -1){
+        /* failed */
+        return luaL_error(L, "popen failed: %s", errmsg_buf);
+    }
+
+    /* Put pipe objects in proc userdata's environment */
+    lua_getfenv(L, 2);
+    for (i=0; i<3; ++i){
+        if (pipe_ends[i]){
+            *liolib_copy_newfile(L) = pipe_ends[i];
+            lua_setfield(L, -2, fd_names[i]);
+        }
+    }
+    lua_pop(L, 1);
+
+    /* Put proc object in SP_LIST table */
+    luaL_getmetatable(L, SP_LIST);
+    if (lua_isnil(L, -1)){
+        fputs("subprocess.c: XXX: SP_LIST IS NIL\n", stderr);
+    } else {
+        lua_pushinteger(L, proc->pid); /* stack: list pid */
+        lua_pushvalue(L, 2);           /* stack: list pid proc */
+        lua_settable(L, -3);           /* stack: list */
+    }
+    lua_pop(L, 1);
+
+    /* Return the proc */
+    lua_settop(L, 2);
+    return 1;
+}
+
+/* __gc */
+static int proc_gc(HSQUIRRELVM v)
+{
+    struct proc *proc = checkproc(v, 1);
+    if (!proc->done){
+#if defined(OS_POSIX)
+        /* Try to wait for process to avoid leaving zombie.
+           If the process hasn't finished yet, we'll end up leaving a zombie. */
+        int stat;
+        waitpid(proc->pid, &stat, WNOHANG);
+#elif defined(OS_WINDOWS)
+        CloseHandle(proc->hProcess);
+#endif
+        doneproc(v, 1);
+    }
+    return 0;
+}
+
+/* __index */
+static int proc_index(HSQUIRRELVM v)
+{
+    struct proc *proc;
+    const char *s;
+    lua_settop(L, 2);
+    proc = checkproc(v, 1);
+    /* first check environment table */
+    lua_getfenv(L, 1);
+    lua_pushvalue(L, 2);
+    lua_gettable(L, 3);
+    if (!lua_isnil(L, 4)) return 1;
+    lua_pop(L, 2);
+    /* next check metatable */
+    lua_getmetatable(L, 1);
+    lua_pushvalue(L, 2);
+    lua_gettable(L, 3);
+    if (!lua_isnil(L, 4)) return 1;
+    lua_pop(L, 2);
+    /* lastly, fixed fields */
+    s = lua_tostring(L, 2);
+    if (!strcmp(s, "pid")){
+        lua_pushinteger(L, proc->pid);
+        return 1;
+    } else if (!strcmp(s, "exitcode") && proc->done){
+        lua_pushinteger(L, proc->exitcode);
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/* Push string representation of process on stack */
+static int proc_tostring(HSQUIRRELVM v)
+{
+    struct proc *proc = checkproc(L, 1);
+    if (proc->done)
+        lua_pushliteral(L, "(finished process)");
+    else
+        lua_pushfstring(L, "process (%d)", (int) proc->pid);
+    return 1;
+}
+
+#if defined(OS_POSIX)
+/* Get exitcode from wait's 'stat' value */
+static int getexitcode(int stat)
+{
+    if (WIFEXITED(stat))
+        return WEXITSTATUS(stat);
+    else if (WIFSIGNALED(stat))
+        return -WTERMSIG(stat);
+    else if (WIFSTOPPED(stat))
+        return -WSTOPSIG(stat);
+    else {
+        fputs("child disappeared into black hole\n", stderr);
+        return -1;
+    }
+}
+#endif
+
+/* Wait for, or poll, a process */
+static int do_waitpid(HSQUIRRELVM v, struct proc *proc, int wait)
+#if defined(OS_POSIX)
+{
+    int stat, options;
+
+    if (proc->done){
+        lua_pushinteger(L, proc->exitcode);
+        return 1;
+    }
+
+    if (wait) options = 0;
+    else options = WNOHANG;
+    switch (waitpid(proc->pid, &stat, options)){
+        case -1:
+            return luaL_error(L, strerror(errno));
+        case 0:
+            /* child still running */
+            lua_pushnil(L);
+            return 1;
+        default:
+            proc->exitcode = getexitcode(stat);
+            doneproc(L, 1);
+            lua_pushinteger(L, proc->exitcode);
+            return 1;
+    }
+}
+#elif defined(OS_WINDOWS)
+{
+    DWORD dwMilliseconds, retval, exitcode;
+
+    if (proc->done){
+        lua_pushinteger(L, proc->exitcode);
+        return 1;
+    }
+    if (wait) dwMilliseconds = INFINITE;
+    else dwMilliseconds = 0;
+    retval = WaitForSingleObject(proc->hProcess, dwMilliseconds);
+    switch (retval){
+        case WAIT_FAILED:
+failure:
+            push_w32error(L, GetLastError());
+            return lua_error(L);
+        case WAIT_OBJECT_0:
+            /* child finished */
+            if (GetExitCodeProcess(proc->hProcess, &exitcode) == 0){
+                goto failure;
+            }
+            CloseHandle(proc->hProcess);
+            proc->exitcode = exitcode;
+            doneproc(L, 1);
+            lua_pushinteger(L, proc->exitcode);
+            return 1;
+        case WAIT_TIMEOUT:
+        default:
+            /* child still running */
+            lua_pushnil(L);
+            return 1;
+    }
+}
+#endif
+
+static int SQRESULT sq_process_poll(HSQUIRRELVM v)
+{
+    return do_waitpid(L, checkproc(L, 1), 0);
+}
+
+static int SQRESULT sq_process_wait(HSQUIRRELVM v)
+{
+    return do_waitpid(L, checkproc(L, 1), 1);
+}
+
+#if defined(OS_POSIX)
+static SQRESULT sq_process_send_signal(HSQUIRRELVM v)
+{
+    struct proc *proc = checkproc(L, 1);
+    int sig = luaL_checkinteger(L, 2);
+    if (!proc->done){
+        if (kill(proc->pid, sig)){
+            return luaL_error(L, "kill: %s", strerror(errno));
+        }
+        proc->exitcode = -sig;
+        doneproc(L, 1);
+    }
+    return 0;
+}
+
+static SQRESULT sq_process_terminate(HSQUIRRELVM v)
+{
+    lua_settop(L, 1);
+    lua_pushinteger(L, SIGTERM);
+    return proc_send_signal(L);
+}
+
+static SQRESULT sq_process_kill(HSQUIRRELVM v)
+{
+    lua_settop(L, 1);
+    lua_pushinteger(L, SIGKILL);
+    return proc_send_signal(L);
+}
+#elif defined(OS_WINDOWS)
+static SQRESULT sq_process_terminate(HSQUIRRELVM v)
+{
+    struct proc *proc = checkproc(L, 1);
+    if (!proc->done){
+        if (TerminateProcess(proc->hProcess, -9) == 0){
+            push_w32error(L, GetLastError());
+            return lua_error(L);
+        }
+        CloseHandle(proc->hProcess);
+        proc->exitcode = -9;
+        doneproc(L, 1);
+    }
+    return 0;
+}
+#endif
+
+static const luaL_Reg proc_meta[] = {
+    {"__tostring", proc_tostring},
+    {"__gc", proc_gc},
+    {"__index", proc_index},
+    {"poll", proc_poll},
+    {"wait", proc_wait},
+#if defined(OS_POSIX)
+    {"send_signal", proc_send_signal},
+    {"terminate", proc_terminate},
+    {"kill", proc_kill},
+#elif defined(OS_WINDOWS)
+    {"terminate", proc_terminate},
+    {"kill", proc_terminate},
+#endif
+    {NULL, NULL}
+};
+
+#define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),  sq_process_##name,nparams,tycheck}
+static SQRegFunction sq_process_methods[] =
+{
+    _DECL_FUNC(constructor,  -2, _SC("xsi")),
+    _DECL_FUNC(poll,  1, _SC("x")),
+    _DECL_FUNC(wait,  1, _SC("x")),
+#if defined(OS_POSIX)
+    _DECL_FUNC(send_signal,  1, _SC("x")),
+    _DECL_FUNC(terminate,  1, _SC("x")),
+    _DECL_FUNC(kill,  1, _SC("x")),
+#elif defined(OS_WINDOWS)
+    _DECL_FUNC(terminate,  1, _SC("x")),
+    _DECL_FUNC(kill,  1, _SC("x")),
+#endif
+
+    {0,0}
+};
+#undef _DECL_FUNC
+
+/* convenience functions */
+static SQRESULT sq_subprocess_call(HSQUIRRELVM v)
+{
+    int r = superpopen(L);
+    if (r != 1){
+        return r;
+    }
+    lua_replace(L, 1);
+    lua_settop(L, 1);
+    return proc_wait(L);
+}
+
+static SQRESULT sq_subprocess_call_capture(HSQUIRRELVM v)
+{
+    int r;
+    lua_settop(L, 1);
+    luaL_checktype(L, 1, LUA_TTABLE);
+    lua_getfield(L, 1, "stdout");
+    lua_pushlightuserdata(L, &PIPE);
+    lua_setfield(L, 1, "stdout");
+    r = superpopen(L);
+    if (r != 1) return r;
+    /* stack: args oldstdout sp */
+    /* restore old stdout value in table */
+    lua_pushvalue(L, 2);
+    lua_setfield(L, 1, "stdout");
+    lua_replace(L, 1);
+    lua_settop(L, 1);
+    /* stack: sp */
+    lua_getfield(L, 1, "stdout");
+    lua_getfield(L, 2, "read");
+    lua_pushvalue(L, 2);
+    lua_pushliteral(L, "*a");
+    lua_call(L, 2, 2);
+    /* stack: sp stdout a b */
+    /* close stdout, rather than relying on GC */
+    lua_getfield(L, 2, "close");
+    lua_pushvalue(L, 2);
+    lua_call(L, 1, 0);
+    /* wait for child (to avoid leaving a zombie) */
+    lua_getfield(L, 1, "wait");
+    lua_pushvalue(L, 1);
+    lua_call(L, 1, 1);
+    /* return exitcode, content */
+    lua_pushvalue(L, 3);
+    return 2;
+}
+
+/* Miscellaneous */
+
+static SQRESULT sq_subprocess_wait(HSQUIRRELVM v)
+{
+    struct proc *proc;
+#if defined(OS_POSIX)
+    int stat;
+    pid_t pid;
+    int exitcode;
+#elif defined(OS_WINDOWS)
+    HANDLE *handles = NULL, hProcess;
+    int i, nprocs;
+    DWORD retval;
+    DWORD exitcode;
+#endif
+
+    luaL_getmetatable(L, SP_LIST);
+    if (lua_isnil(L, -1))
+        return luaL_error(L, "SP_LIST is nil");
+#if defined(OS_POSIX)
+    pid = wait(&stat);
+    if (pid == -1){
+        lua_pushnil(L);
+        lua_pushstring(L, strerror(errno));
+        return 2;
+    }
+    exitcode = getexitcode(stat);
+    /* find proc object corresponding to pid */
+    lua_pushinteger(L, pid);
+    lua_pushvalue(L, -1);    /* stack: list pid pid */
+    lua_gettable(L, -3);     /* stack: list pid proc */
+    if (lua_isnil(L, -1)){
+        fprintf(stderr, "subprocess.c: XXX: cannot find proc object for pid %d\n", (int) pid);
+    }
+    lua_replace(L, -3);     /* stack: proc pid */
+    lua_pop(L, 1);          /* stack: proc */
+    /* update proc object */
+    proc = toproc(L, -1);
+    if (!proc){
+        fputs("subprocess.c: XXX: proc list entry is wrong type\n", stderr);
+    } else {
+        proc->exitcode = exitcode;
+        doneproc(L, -1);
+    }
+    lua_pushinteger(L, exitcode);
+    lua_pushinteger(L, pid);
+    /* stack: proc exitcode pid */
+    return 3;
+#elif defined(OS_WINDOWS)
+    /* count number of procs there are */
+    nprocs = sq_getsize(v, -1);
+    /* stack: list */
+    if (nprocs > 0){
+        handles = malloc(nprocs * sizeof *handles);
+        if (!handles)
+            return luaL_error(L, "memory full");
+        i = 0;
+        lua_pushnil(L);
+        while (lua_next(L, -2)){
+            proc = toproc(L, -1);
+            if (proc && !proc->done && i < nprocs){
+                handles[i++] = proc->hProcess;
+            } else if (proc && !proc->done){
+                fputs("subprocess.c: XXX: handles array allocated too small\n", stderr);
+            } else if (!proc){
+                fputs("foreign object in SP_LIST\n", stderr);
+            }
+            lua_pop(L, 1);
+        }
+    } else i = 0;
+    if (i > 0){
+        if (i > MAXIMUM_WAIT_OBJECTS){
+            free(handles);
+            return luaL_error(L, "too many wait objects: %d", i);
+        }
+        retval = WaitForMultipleObjects(i, handles, FALSE, INFINITE);
+        if (retval >= WAIT_OBJECT_0 && retval < (DWORD)(WAIT_OBJECT_0 + i)){
+            hProcess = handles[retval - WAIT_OBJECT_0];
+            free(handles);
+            /* find this process again in the table */
+            lua_pushnil(L);
+            while (lua_next(L, -2)){
+                proc = toproc(L, -1);
+                if (proc && !proc->done && proc->hProcess == hProcess){
+                    /* it's this one */
+                    if (GetExitCodeProcess(proc->hProcess, &exitcode) == 0){
+                        {
+                            char buf[256];
+                            copy_w32error(buf, 255, GetLastError());
+                            fprintf(stderr, "GetExitCodeProcess failed: %s\n", buf);
+                        }
+                        proc->exitcode = -1; /*  :-\  */
+                    } else {
+                        proc->exitcode = exitcode;
+                    }
+                    CloseHandle(proc->hProcess);
+                    doneproc(L, -1);
+                    lua_pushinteger(L, exitcode);
+                    lua_pushinteger(L, proc->pid); /* stack: list key proc exitcode pid */
+                    return 3;
+                }
+                lua_pop(L, 1);
+            }
+            fputs("proc has mysteriously disappeared from table!\n", stderr);
+            return 0;
+        } else if (retval == WAIT_FAILED){
+            free(handles);
+            push_w32error(L, GetLastError());
+            return lua_error(L);
+        } else {
+            free(handles);
+            return luaL_error(L, "WaitForMultipleObjects failed unexpectedly");
+        }
+    } else {
+        free(handles);
+        lua_pushnil(L);
+        lua_pushliteral(L, "no processes to wait for");
+        return 2;
+    }
+#endif
+}
+
+static const luaL_Reg subprocess[] = {
+    /* {"pipe", superpipe}, */
+    {"popen", superpopen},
+    {"call", call},
+    {"call_capture", call_capture},
+    {"wait", superwait},
+    {"prune", prune},
+    {NULL, NULL}
+};
+
+#define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),  sq_subprocess_##name,nparams,tycheck}
+static SQRegFunction sq_subprocess_methods[] =
+{
+    _DECL_FUNC(constructor,  -2, _SC("xsi")),
+    _DECL_FUNC(popen,  1, _SC("x")),
+    _DECL_FUNC(call,  1, _SC("x")),
+    _DECL_FUNC(call_capture,  1, _SC("x")),
+    _DECL_FUNC(prune,  1, _SC("x")),
+    _DECL_FUNC(wait,  1, _SC("x")),
+
+    {0,0}
+};
+#undef _DECL_FUNC
+
+SQRESULT sq_register_subprocess(HSQUIRRELVM v)
+{
+    /* create environment table for C functions */
+    lua_newtable(L);
+    lua_pushvalue(L, -1);
+    lua_setfield(L, LUA_REGISTRYINDEX, SP_LIST);
+    lua_pop(L, 1);
+
+#if LUA_VERSION_NUM >= 502
+    lua_createtable(L, 0, sizeof subprocess / sizeof *subprocess - 1);
+    luaL_setfuncs(L, subprocess, 0);
+#else
+    luaL_register(L, "subprocess", subprocess);
+#endif
+
+    /* export PIPE and STDOUT constants */
+    lua_pushlightuserdata(L, &PIPE);
+    lua_setfield(L, -2, "PIPE");
+    lua_pushlightuserdata(L, &STDOUT);
+    lua_setfield(L, -2, "STDOUT");
+
+    /* create metatable for proc objects */
+    luaL_newmetatable(L, SP_PROC_META);
+#if LUA_VERSION_NUM >= 502
+    luaL_setfuncs(L, proc_meta, 0);
+#else
+    luaL_register(L, NULL, proc_meta);
+#endif
+    lua_pushboolean(L, 0);
+    lua_setfield(L, -2, "__metatable");
+    lua_pop(L, 1);
+
+    return 1;
+}
+
+#endif