nn.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052
  1. /* RPROP Neural Networks implementation
  2. * See: http://deeplearning.cs.cmu.edu/pdfs/Rprop.pdf
  3. *
  4. * Copyright (c) 2003-2016, Salvatore Sanfilippo <antirez at gmail dot com>
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright notice,
  11. * this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * * Neither the name of Disque nor the names of its contributors may be used
  16. * to endorse or promote products derived from this software without
  17. * specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. * POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <math.h>
  34. #include <time.h>
  35. #include <string.h>
  36. #include "nn.h"
  37. #if defined(USE_AVX512)
  38. #define USING_SIMD
  39. #include <immintrin.h>
  40. typedef __m512 simdf_t;
  41. #define SIMDF_SIZE 16
  42. #define simdf_zero() _mm512_setzero_ps()
  43. #define simdf_set1f(x) _mm512_set1_ps(x)
  44. #define simdf_loadu(x) _mm512_loadu_ps(x)
  45. #define simdf_load(x) _mm512_load_ps(x)
  46. #define simdf_mul(a,b) _mm512_mul_ps(a,b)
  47. #define simdf_add(a,b) _mm512_add_ps(a,b)
  48. #define simdf_storeu(a,b) _mm512_storeu_ps(a,b)
  49. #define simdf_store(a,b) _mm512_store_ps(a,b)
  50. //let the compiler optmize this
  51. #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + \
  52. x[8] + x[9] + x[10] + x[11] + x[12] + x[13] + x[14] + x[15])
  53. #define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n", \
  54. __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], \
  55. x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]);
  56. #endif
  57. #if defined(USE_AVX)
  58. #define USING_SIMD
  59. #include <immintrin.h>
  60. typedef __m256 simdf_t;
  61. #define SIMDF_SIZE 8
  62. #define simdf_zero() _mm256_setzero_ps()
  63. #define simdf_set1f(x) _mm256_set1_ps(x)
  64. #define simdf_loadu(x) _mm256_loadu_ps(x)
  65. #define simdf_load(x) _mm256_load_ps(x)
  66. #define simdf_mul(a,b) _mm256_mul_ps(a,b)
  67. #define simdf_add(a,b) _mm256_add_ps(a,b)
  68. #define simdf_storeu(a,b) _mm256_storeu_ps(a,b)
  69. #define simdf_store(a,b) _mm256_store_ps(a,b)
  70. //let the compiler optmize this
  71. #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7])
  72. #define simdf_show(x) printf("%d : %f, %f, %f, %f, %f, %f, %f, %f\n", \
  73. __LINE__, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7]);
  74. #endif
  75. #if defined(USE_SSE)
  76. #define USING_SIMD
  77. #include <xmmintrin.h>
  78. #include <pmmintrin.h>
  79. typedef __m128 simdf_t;
  80. #define SIMDF_SIZE 4
  81. #define simdf_zero() _mm_setzero_ps()
  82. #define simdf_set1f(x) _mm_set1_ps(x)
  83. #define simdf_loadu(x) _mm_loadu_ps(x)
  84. #define simdf_load(x) _mm_load_ps(x)
  85. #define simdf_mul(a,b) _mm_mul_ps(a,b)
  86. #define simdf_add(a,b) _mm_add_ps(a,b)
  87. #define simdf_storeu(a,b) _mm_storeu_ps(a,b)
  88. #define simdf_store(a,b) _mm_store_ps(a,b)
  89. //let the compiler optmize this
  90. #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3])
  91. #define simdf_show(x) printf("%d : %f, %f, %f, %f\n", __LINE__, x[0], x[1], x[2], x[3]);
  92. #endif
  93. #if defined(USE_NEON)
  94. #define USING_SIMD
  95. #include <arm_neon.h>
  96. typedef ann_float_t32x4_t simdf_t;
  97. #define SIMDF_SIZE 4
  98. #define simdf_zero() vdupq_n_f32(0.0f)
  99. #define simdf_set1f(x) vdupq_n_f32(x);
  100. #define simdf_loadu(x) vld1q_f32(x)
  101. #define simdf_load(x) vld1q_f32(x)
  102. #define simdf_mul(a,b) vmulq_f32(a,b)
  103. #define simdf_add(a,b) vaddq_f32(a,b)
  104. #define simdf_storeu(a,b) vst1q_f32((ann_float_t32_t*)a,b)
  105. #define simdf_store(a,b) vst1q_f32((ann_float_t32_t*)a,b)
  106. //let the compiler optmize this
  107. #define simdf_sum(x) (x[0] + x[1] + x[2] + x[3])
  108. #define simdf_show(x) printf("%d : %f, %f, %f, %f\n", __LINE__, x[0], x[1], x[2], x[3]);
  109. #endif
  110. #ifndef SIMDF_SIZE
  111. #define SIMDF_SIZE 1
  112. #endif // SIMDF_SIZE
  113. #define ANN_SIZEOF_ann_float_t sizeof(ann_float_t)
  114. #define ANN_ALIGN_BASE (SIMDF_SIZE * ANN_SIZEOF_ann_float_t)
  115. #define ANN_ALIGN_ROUND(x) ((x%ANN_ALIGN_BASE) ? (((x/ANN_ALIGN_BASE)+1)*ANN_ALIGN_BASE) : (size_t)x)
  116. #ifndef HAS_ANN_MALLOC
  117. #define ann_malloc(x) malloc(x)
  118. #define ann_free(x) free(x)
  119. #else
  120. extern void *ann_malloc(size_t sz);
  121. extern void ann_free(void *ptr);
  122. #endif
  123. /*
  124. void *nnpmalloc(int line, size_t sz) {
  125. printf("%d : %zu : %zu\n", line, sz, ANN_ALIGN_ROUND(sz));
  126. return malloc(sz);
  127. }
  128. #define ann_malloc(x) nnpmalloc(__LINE__, x)
  129. */
  130. /* Node Transfer Function */
  131. ann_float_t AnnTransferFunctionSigmoid(ann_float_t x) {
  132. //if(x < -15) return 0;
  133. //else if(x > 15) return 1;
  134. return ((ann_float_t)1.0)/(1.0+exp(-x));
  135. }
  136. ann_float_t AnnTransferFunctionRelu(ann_float_t x) {
  137. return (x > 0.0) ? x : 0.0;
  138. }
  139. ann_float_t AnnTransferFunctionTanh(ann_float_t x) {
  140. return tanh(x);
  141. }
  142. /*
  143. ann_float_t AnnDerivativeIdentity(ann_float_t x) {
  144. return 1;
  145. }
  146. */
  147. ann_float_t AnnDerivativeSigmoid(ann_float_t x) {
  148. return x*(1-x);
  149. }
  150. ann_float_t AnnDerivativeTanh(ann_float_t x) {
  151. return (1-x)*(1+x);
  152. }
  153. ann_float_t AnnDerivativeRelu(ann_float_t x) {
  154. return (x > 0) ? 1 : 0;
  155. }
  156. /* Reset layer data to zero-units */
  157. void AnnResetLayer(AnnLayer *layer) {
  158. layer->units = 0;
  159. layer->units_aligned = 0;
  160. layer->output = NULL;
  161. layer->error = NULL;
  162. layer->weight = NULL;
  163. layer->gradient = NULL;
  164. layer->pgradient = NULL;
  165. layer->delta = NULL;
  166. layer->sgradient = NULL;
  167. }
  168. /* Allocate and return an initialized N-layers network */
  169. AnnRprop *AnnAlloc(int layers) {
  170. AnnRprop *net;
  171. int i;
  172. /* Alloc the net structure */
  173. if ((net = ann_malloc(sizeof(*net))) == NULL)
  174. return NULL;
  175. /* Alloc layers */
  176. if ((net->layer = ann_malloc(sizeof(AnnLayer)*layers)) == NULL) {
  177. ann_free(net);
  178. return NULL;
  179. }
  180. net->layers = layers;
  181. net->flags = 0;
  182. net->rprop_nminus = ANN_DEFAULT_RPROP_NMINUS;
  183. net->rprop_nplus = ANN_DEFAULT_RPROP_NPLUS;
  184. net->rprop_maxupdate = ANN_DEFAULT_RPROP_MAXUPDATE;
  185. net->rprop_minupdate = ANN_DEFAULT_RPROP_MINUPDATE;
  186. net->node_transf_func = AnnTransferFunctionSigmoid;
  187. net->derivative_func = AnnDerivativeSigmoid;
  188. /* Init layers */
  189. for (i = 0; i < layers; i++)
  190. AnnResetLayer(&net->layer[i]);
  191. return net;
  192. }
  193. /* Free a single layer */
  194. void AnnFreeLayer(AnnLayer *layer)
  195. {
  196. ann_free(layer->output);
  197. ann_free(layer->error);
  198. ann_free(layer->weight);
  199. ann_free(layer->gradient);
  200. ann_free(layer->pgradient);
  201. ann_free(layer->delta);
  202. ann_free(layer->sgradient);
  203. AnnResetLayer(layer);
  204. }
  205. /* Free the target net */
  206. void AnnFree(AnnRprop *net)
  207. {
  208. int i;
  209. /* Free layer data */
  210. for (i = 0; i < net->layers; i++) AnnFreeLayer(&net->layer[i]);
  211. /* Free allocated layers structures */
  212. ann_free(net->layer);
  213. /* And the main structure itself */
  214. ann_free(net);
  215. }
  216. /* Init a layer of the net with the specified number of units.
  217. * Return non-zero on out of memory. */
  218. int AnnInitLayer(AnnRprop *net, int i, int units, int bias) {
  219. if (bias) units++; /* Take count of the bias unit */
  220. size_t ann_float_t_units = ANN_ALIGN_ROUND(units*ANN_SIZEOF_ann_float_t);
  221. size_t units_aligned = ann_float_t_units/ANN_SIZEOF_ann_float_t;
  222. size_t ann_float_t_units_units = 0;
  223. AnnLayer *layer = &ANN_LAYER(net, i);
  224. layer->units = units;
  225. layer->units_aligned = units_aligned;
  226. layer->output = ann_malloc(ann_float_t_units);
  227. layer->error = ann_malloc(ann_float_t_units);
  228. if (i) { /* not for output layer */
  229. ann_float_t_units_units = ann_float_t_units*ANN_LAYER(net, i-1).units;
  230. layer->weight = ann_malloc(ann_float_t_units_units);
  231. layer->gradient = ann_malloc(ann_float_t_units_units);
  232. layer->pgradient = ann_malloc(ann_float_t_units_units);
  233. layer->delta = ann_malloc(ann_float_t_units_units);
  234. layer->sgradient = ann_malloc(ann_float_t_units_units);
  235. }
  236. /* Check for out of memory conditions */
  237. if (layer->output == NULL ||
  238. layer->error == NULL ||
  239. (i && layer->weight == NULL) ||
  240. (i && layer->gradient == NULL) ||
  241. (i && layer->pgradient == NULL) ||
  242. (i && layer->sgradient == NULL) ||
  243. (i && layer->delta == NULL))
  244. {
  245. AnnFreeLayer(layer);
  246. AnnResetLayer(layer);
  247. return 1;
  248. }
  249. /* Set all the values to zero */
  250. memset(layer->output, 0, ann_float_t_units);
  251. memset(layer->error, 0, ann_float_t_units);
  252. if (i) {
  253. memset(layer->weight, 0, ann_float_t_units_units);
  254. memset(layer->gradient, 0, ann_float_t_units_units);
  255. memset(layer->pgradient, 0, ann_float_t_units_units);
  256. memset(layer->delta, 0, ann_float_t_units_units);
  257. memset(layer->sgradient, 0, ann_float_t_units_units);
  258. }
  259. /* Set the bias unit output to 1 */
  260. if (bias) layer->output[units-1] = 1;
  261. return 0;
  262. }
  263. /* Clone a network. On out of memory NULL is returned. */
  264. AnnRprop *AnnClone(const AnnRprop* net) {
  265. AnnRprop* copy;
  266. int j;
  267. if ((copy = AnnAlloc(ANN_LAYERS(net))) == NULL) return NULL;
  268. for (j = 0; j < ANN_LAYERS(net); j++) {
  269. AnnLayer *ldst;
  270. const AnnLayer *lsrc;
  271. int units = ANN_UNITS(net,j);
  272. int bias = j > 0;
  273. if (AnnInitLayer(copy, j, units-bias, bias)) {
  274. AnnFree(copy);
  275. return NULL;
  276. }
  277. int ann_float_t_units = units*ANN_SIZEOF_ann_float_t;
  278. lsrc = &net->layer[j];
  279. ldst = &copy->layer[j];
  280. if (lsrc->output)
  281. memcpy(ldst->output, lsrc->output, ann_float_t_units);
  282. if (lsrc->error)
  283. memcpy(ldst->error, lsrc->error, ann_float_t_units);
  284. if (j) {
  285. int weights = ANN_WEIGHTS(net,j);
  286. ann_float_t_units = weights*ANN_SIZEOF_ann_float_t;
  287. if (lsrc->weight)
  288. memcpy(ldst->weight, lsrc->weight, ann_float_t_units);
  289. if (lsrc->gradient)
  290. memcpy(ldst->gradient, lsrc->gradient, ann_float_t_units);
  291. if (lsrc->pgradient)
  292. memcpy(ldst->pgradient, lsrc->pgradient, ann_float_t_units);
  293. if (lsrc->delta)
  294. memcpy(ldst->delta, lsrc->delta, ann_float_t_units);
  295. if (lsrc->sgradient)
  296. memcpy(ldst->sgradient, lsrc->sgradient, ann_float_t_units);
  297. }
  298. }
  299. copy->rprop_nminus = net->rprop_nminus;
  300. copy->rprop_nplus = net->rprop_nplus;
  301. copy->rprop_maxupdate = net->rprop_maxupdate;
  302. copy->rprop_minupdate = net->rprop_minupdate;
  303. copy->flags = net->flags;
  304. copy->node_transf_func = net->node_transf_func;
  305. copy->derivative_func = net->derivative_func;
  306. return copy;
  307. }
  308. /* Create a N-layer input/hidden/output net.
  309. * The units array should specify the number of
  310. * units in every layer from the output to the input layer. */
  311. AnnRprop *AnnCreateNet(int layers, int *units) {
  312. AnnRprop *net;
  313. int i;
  314. if ((net = AnnAlloc(layers)) == NULL) return NULL;
  315. for (i = 0; i < layers; i++) {
  316. if (AnnInitLayer(net, i, units[i], i > 0)) {
  317. AnnFree(net);
  318. return NULL;
  319. }
  320. }
  321. AnnSetRandomWeights(net);
  322. AnnSetDeltas(net, ANN_RPROP_INITIAL_DELTA);
  323. ANN_LEARN_RATE(net) = ANN_DEFAULT_LEARN_RATE;
  324. return net;
  325. }
  326. /* Return the total number of weights this NN has. */
  327. size_t AnnCountWeights(AnnRprop *net) {
  328. size_t weights = 0;
  329. for (int i = ANN_LAYERS(net)-1; i > 0; i--) {
  330. int nextunits = ANN_UNITS(net, i-1);
  331. int units = ANN_UNITS(net, i);
  332. if (i > 1) nextunits--; /* we don't output on bias units */
  333. weights += units*nextunits;
  334. }
  335. return weights;
  336. }
  337. /* Create a 4-layer input/hidden/output net */
  338. AnnRprop *AnnCreateNet4(int iunits, int hunits, int hunits2, int ounits) {
  339. int units[4];
  340. units[0] = ounits;
  341. units[1] = hunits2;
  342. units[2] = hunits;
  343. units[3] = iunits;
  344. return AnnCreateNet(4, units);
  345. }
  346. /* Create a 3-layer input/hidden/output net */
  347. AnnRprop *AnnCreateNet3(int iunits, int hunits, int ounits) {
  348. int units[3];
  349. units[0] = ounits;
  350. units[1] = hunits;
  351. units[2] = iunits;
  352. return AnnCreateNet(3, units);
  353. }
  354. /* Create a 2-layer "linear" network. */
  355. AnnRprop *AnnCreateNet2(int iunits, int ounits) {
  356. int units[2];
  357. units[0] = ounits;
  358. units[1] = iunits;
  359. return AnnCreateNet(2, units);
  360. }
  361. void AnnSimulate(AnnRprop *net) {
  362. int i, j, k;
  363. for (i = ANN_LAYERS(net)-1; i > 0; i--) {
  364. AnnLayer *layer = &ANN_LAYER(net, i);
  365. int nextunits = ANN_UNITS(net, i-1);
  366. int units_aligned = layer->units_aligned;
  367. int units = layer->units;
  368. if (i > 1) nextunits--; /* dont output on bias units */
  369. #ifdef USING_SIMD
  370. int xps, psteps = units/SIMDF_SIZE;
  371. #endif // USING_SIMD
  372. for (j = 0; j < nextunits; j++) {
  373. ann_float_t A = 0; /* Activation final value. */
  374. ann_float_t *w = layer->weight + j*units_aligned;
  375. ann_float_t *o = layer->output;
  376. k = 0;
  377. #ifdef USING_SIMD
  378. if(psteps)
  379. {
  380. simdf_t sumA = simdf_zero();
  381. for (xps = 0; xps < psteps; xps++) {
  382. simdf_t weights = simdf_load(w);
  383. simdf_t outputs = simdf_load(o);
  384. simdf_t prod = simdf_mul(weights,outputs);
  385. sumA = simdf_add(sumA, prod);
  386. w += SIMDF_SIZE;
  387. o += SIMDF_SIZE;
  388. }
  389. A += simdf_sum(sumA);
  390. k += psteps*SIMDF_SIZE;
  391. }
  392. #endif
  393. /* Handle final piece shorter than SIMDF_SIZE . */
  394. for (; k < units; k++) {
  395. A += (*w++) * (*o++);
  396. }
  397. ANN_OUTPUT(net, i-1, j) = (*net->node_transf_func)(A); //sigmoid(A);
  398. }
  399. }
  400. }
  401. /* Create a Tcl procedure that simulates the neural network */
  402. void Ann2Tcl(const AnnRprop *net) {
  403. int i, j, k;
  404. printf("proc ann input {\n");
  405. printf(" set output {");
  406. for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) {
  407. printf("0 ");
  408. }
  409. printf("}\n");
  410. printf(" proc sigmoid x {return [expr {1/(1+exp(-$x))}]}\n");
  411. for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) {
  412. printf(" set input_%d [lindex $input %d]\n", i, i);
  413. }
  414. for (i = ANN_LAYERS(net)-1; i > 0; i--) {
  415. int nextunits = ANN_UNITS(net, i-1);
  416. int units = ANN_UNITS(net, i);
  417. //if (i > 1) nextunits--; /* dont output on bias units */
  418. for (j = 0; j < nextunits; j++) {
  419. ann_float_t W;
  420. if (i == 1) {
  421. printf(" lset output %d ", j);
  422. } else {
  423. printf(" set O_%d_%d", i-1, j);
  424. }
  425. printf(" [sigmoid [expr { \\\n");
  426. for (k = 0; k < units; k++) {
  427. W = ANN_WEIGHT(net, i, k, j);
  428. if (i > 1 && k == units-1) {
  429. printf(" (%.9f)", W);
  430. } else if (i == ANN_LAYERS(net)-1) {
  431. printf(" (%.9f*$input_%d)", W, k);
  432. } else {
  433. printf(" (%.9f*$O_%d_%d)", W, i, k);
  434. }
  435. if ((k+1) < units) printf("+ \\\n");
  436. }
  437. printf("}]]\n");
  438. }
  439. }
  440. printf(" return $output\n");
  441. printf("}\n");
  442. }
  443. /* Create a Javascript procedure that simulates the neural network */
  444. void Ann2Js(const AnnRprop *net) {
  445. int i, j, k;
  446. printf("function ann( input ) {\n");
  447. printf(" var output = [");
  448. for (i = 0; i < ANN_OUTPUT_UNITS(net); i++) {
  449. if(i) printf(", ");
  450. printf("0");
  451. }
  452. printf("];\n");
  453. printf(" var sigmoid = function(x) {return 1.0/(1.0+Math.exp(-x));};\n");
  454. for(i=0, k=ANN_INPUT_UNITS(net); i < k; ++i) {
  455. printf(" var input_%d = input[%d];\n", i, i);
  456. }
  457. for (i = ANN_LAYERS(net)-1; i > 0; i--) {
  458. int nextunits = ANN_UNITS(net, i-1);
  459. int units = ANN_UNITS(net, i);
  460. //if (i > 1) nextunits--; /* dont output on bias units */
  461. for (j = 0; j < nextunits; j++) {
  462. ann_float_t W;
  463. if (i == 1) {
  464. printf(" output[%d]", j);
  465. } else {
  466. printf(" var O_%d_%d", i-1, j);
  467. }
  468. printf(" = sigmoid(\n");
  469. for (k = 0; k < units; k++) {
  470. W = ANN_WEIGHT(net, i, k, j);
  471. if (i > 1 && k == units-1) {
  472. printf(" (%.9f)", W);
  473. } else if (i == ANN_LAYERS(net)-1) {
  474. printf(" (%.9f*input_%d)", W, k);
  475. } else {
  476. printf(" (%.9f*O_%d_%d)", W, i, k);
  477. }
  478. if ((k+1) < units) printf("+\n");
  479. }
  480. printf(");\n");
  481. }
  482. }
  483. printf(" return output;\n");
  484. printf("}\n");
  485. }
  486. /* Print a network representation */
  487. void AnnPrint(const AnnRprop *net) {
  488. int i, j, k;
  489. for (i = 0; i < ANN_LAYERS(net); i++) {
  490. char *layertype = "Hidden";
  491. if (i == 0) layertype = "Output";
  492. if (i == ANN_LAYERS(net)-1) layertype = "Input";
  493. printf("%s layer %d, units %d\n", layertype, i, ANN_UNITS(net,i));
  494. if (i) {
  495. /* Don't compute the bias unit as a target. */
  496. int targets = ANN_UNITS(net,i-1) - (i-1>0);
  497. /* Weights */
  498. printf("\tW");
  499. for (j = 0; j < ANN_UNITS(net, i); j++) {
  500. printf("(");
  501. for (k = 0; k < targets; k++) {
  502. printf("%f", ANN_WEIGHT(net,i,j,k));
  503. if (k != targets-1) printf(" ");
  504. }
  505. printf(") ");
  506. }
  507. printf("\n");
  508. /* Gradients */
  509. printf("\tg");
  510. for (j = 0; j < ANN_UNITS(net, i); j++) {
  511. printf("[");
  512. for (k = 0; k < targets; k++) {
  513. printf("%f", ANN_GRADIENT(net,i,j,k));
  514. if (k != targets-1) printf(" ");
  515. }
  516. printf("] ");
  517. }
  518. printf("\n");
  519. /* SGradients */
  520. printf("\tG");
  521. for (j = 0; j < ANN_UNITS(net, i); j++) {
  522. printf("[");
  523. for (k = 0; k < targets; k++) {
  524. printf("%f", ANN_SGRADIENT(net,i,j,k));
  525. if (k != targets-1) printf(" ");
  526. }
  527. printf("] ");
  528. }
  529. printf("\n");
  530. /* Gradients at t-1 */
  531. printf("\tP");
  532. for (j = 0; j < ANN_UNITS(net, i); j++) {
  533. printf("[");
  534. for (k = 0; k < targets; k++) {
  535. printf("%f", ANN_PGRADIENT(net,i,j,k));
  536. if (k != targets-1) printf(" ");
  537. }
  538. printf("] ");
  539. }
  540. printf("\n");
  541. /* Delta */
  542. printf("\tD");
  543. for (j = 0; j < ANN_UNITS(net, i); j++) {
  544. printf("|");
  545. for (k = 0; k < targets; k++) {
  546. printf("%f", ANN_DELTA(net,i,j,k));
  547. if (k != targets-1) printf(" ");
  548. }
  549. printf("| ");
  550. }
  551. printf("\n");
  552. }
  553. for (j = 0; j < ANN_UNITS(net,i); j++) {
  554. printf("\tO: %f ", ANN_OUTPUT(net,i,j));
  555. }
  556. printf("\n");
  557. printf("\tE /");
  558. for (j = 0; j < ANN_UNITS(net,i); j++) {
  559. printf("%f ", ANN_ERROR(net,i,j));
  560. }
  561. printf("/\n");
  562. }
  563. }
  564. /* Calcuate the global error of the net. This is just the
  565. * Root Mean Square (RMS) error, which is half the sum of the squared
  566. * errors. */
  567. ann_float_t AnnGlobalError(AnnRprop *net, ann_float_t *desired) {
  568. ann_float_t e, t;
  569. int i, outputs = ANN_OUTPUT_UNITS(net);
  570. e = 0;
  571. for (i = 0; i < outputs; i++) {
  572. t = desired[i] - ANN_OUTPUT_NODE(net,i);
  573. e += t*t; /* No need for fabs(t), t*t will always be positive. */
  574. }
  575. return .5*e;
  576. }
  577. /* Set the network input */
  578. void AnnSetInput(AnnRprop *net, ann_float_t *input)
  579. {
  580. int i, inputs = ANN_INPUT_UNITS(net);
  581. for (i = 0; i < inputs; i++) ANN_INPUT_NODE(net,i) = input[i];
  582. }
  583. /* Simulate the net, and return the global error */
  584. ann_float_t AnnSimulateError(AnnRprop *net, ann_float_t *input, ann_float_t *desired) {
  585. AnnSetInput(net, input);
  586. AnnSimulate(net);
  587. return AnnGlobalError(net, desired);
  588. }
  589. /* Compute the error vector y-t in the output unit. This error depends
  590. * on the loss function we use. */
  591. void AnnCalculateOutputError(AnnRprop *net, ann_float_t *desired) {
  592. int units = ANN_OUTPUT_UNITS(net);
  593. ann_float_t factor = (ann_float_t)2/units;
  594. AnnLayer *layer = &ANN_LAYER(net, 0);
  595. for (int j = 0; j < units; j++) {
  596. layer->error[j] = factor * (layer->output[j] - desired[j]);
  597. }
  598. }
  599. /* Calculate gradients with a trivial and slow algorithm, this
  600. * is useful to check that the real implementation is working
  601. * well, comparing the results.
  602. *
  603. * The algorithm used is: to compute the error function in two
  604. * points (E1, with the real weight, and E2 with the weight W = W + 0.1),
  605. * than the approximation of the gradient is G = (E2-E1)/0.1. */
  606. #define GTRIVIAL_DELTA 0.001
  607. void AnnCalculateGradientsTrivial(AnnRprop *net, ann_float_t *desired) {
  608. int j, i, layers = ANN_LAYERS(net);
  609. for (j = 1; j < layers; j++) {
  610. int weights = ANN_WEIGHTS(net,j);
  611. for (i = 0; i < weights; i++) {
  612. ann_float_t t, e1, e2;
  613. AnnLayer *layer = &ANN_LAYER(net,j);
  614. /* Calculate the value of the error function
  615. * in this point. */
  616. AnnSimulate(net);
  617. e1 = AnnGlobalError(net, desired);
  618. t = layer->weight[i];
  619. /* Calculate the error a bit on the right */
  620. layer->weight[i] += GTRIVIAL_DELTA;
  621. AnnSimulate(net);
  622. e2 = AnnGlobalError(net, desired);
  623. /* Restore the original weight */
  624. layer->weight[i] = t;
  625. /* Calculate the gradient */
  626. layer->gradient[i] = (e2-e1)/GTRIVIAL_DELTA;
  627. }
  628. }
  629. }
  630. /* Calculate gradients using the back propagation algorithm */
  631. void AnnCalculateGradients(AnnRprop *net, ann_float_t *desired) {
  632. int j, layers = ANN_LAYERS(net)-1;
  633. /* Populate the error vector net->layer[0]->error according
  634. * to the loss function. */
  635. AnnCalculateOutputError(net,desired);
  636. /* Back-propagate the error and compute the gradient
  637. * for every weight in the net. */
  638. for (j = 0; j < layers; j++) {
  639. AnnLayer *layer = &ANN_LAYER(net, j);
  640. AnnLayer *prev_layer = &ANN_LAYER(net, j+1);
  641. int i, units = layer->units;
  642. int prevunits = prev_layer->units;
  643. int prevunits_aligned = prev_layer->units_aligned;
  644. #ifdef USING_SIMD
  645. int xps, psteps = prevunits/SIMDF_SIZE;
  646. simdf_t es;
  647. #endif // USING_SIMD
  648. /* Skip bias units, they have no connections with the previous
  649. * layers. */
  650. if (j > 1) units--;
  651. /* Reset the next layer errors array */
  652. //for (i = 0; i < prevunits; i++) prev_layer->error[i] = 0;
  653. memset(prev_layer->error, 0, ANN_SIZEOF_ann_float_t*prevunits);
  654. /* For every node in this layer ... */
  655. for (i = 0; i < units; i++) {
  656. ann_float_t error_signal, ei, oi, derivative;
  657. int k;
  658. /* Compute gradient. */
  659. ei = layer->error[i];
  660. oi = layer->output[i];
  661. /* Common derivatives:
  662. *
  663. * identity: 1
  664. * sigmoid: oi*(1-oi)
  665. * softmax: oi*(1-oi)
  666. * tanh: (1-oi)*(1+oi), that's 1-(oi*oi)
  667. * relu: (oi > 0) ? 1 : 0
  668. */
  669. //derivative = oi*(1-oi);
  670. derivative = (*net->derivative_func)(oi);
  671. error_signal = ei*derivative;
  672. /* For every weight between this node and
  673. * the previous layer's nodes: */
  674. ann_float_t *g = prev_layer->gradient + i*prevunits_aligned;
  675. ann_float_t *w = prev_layer->weight + i*prevunits_aligned;
  676. ann_float_t *o = prev_layer->output;
  677. ann_float_t *e = prev_layer->error;
  678. /* 1. Calculate the gradient */
  679. k = 0;
  680. #ifdef USING_SIMD
  681. if(psteps)
  682. {
  683. es = simdf_set1f(error_signal);
  684. //printf("%d : %ld\n", __LINE__, ((long)o & 15));
  685. for (xps = 0; xps < psteps; xps++) {
  686. simdf_t outputs = simdf_load(o);
  687. simdf_t gradients = simdf_mul(es,outputs);
  688. simdf_store(g, gradients);
  689. o += SIMDF_SIZE;
  690. g += SIMDF_SIZE;
  691. }
  692. k += psteps*SIMDF_SIZE;
  693. }
  694. #endif
  695. /* Handle final piece shorter than SIMDF_SIZE . */
  696. for (; k < prevunits; k++) *g++ = error_signal*(*o++);
  697. /* 2. And back-propagate the error to the previous layer */
  698. k = 0;
  699. #ifdef USING_SIMD
  700. if(psteps)
  701. {
  702. //printf("%d : %ld\n", __LINE__, ((long)w & 15));
  703. for (xps = 0; xps < psteps; xps++) {
  704. simdf_t weights = simdf_load(w);
  705. simdf_t errors = simdf_load(e);
  706. simdf_t prod = simdf_mul(es, weights);
  707. simdf_store(e, simdf_add(prod , errors));
  708. e += SIMDF_SIZE;
  709. w += SIMDF_SIZE;
  710. }
  711. k += psteps*SIMDF_SIZE;
  712. }
  713. #endif
  714. /* Handle final piece shorter than SIMDF_SIZE . */
  715. for (; k < prevunits; k++) {
  716. (*e++) += error_signal * (*w++);
  717. }
  718. }
  719. }
  720. }
  721. /* Set the delta values of the net to a given value */
  722. void AnnSetDeltas(AnnRprop *net, ann_float_t val) {
  723. int j, layers = ANN_LAYERS(net);
  724. for (j = 1; j < layers; j++) {
  725. int weights = ANN_WEIGHTS(net,j);
  726. int i;
  727. AnnLayer *layer = &ANN_LAYER(net, j);
  728. for (i = 0; i < weights; i++) layer->delta[i] = val;
  729. }
  730. }
  731. /* Set the sgradient values to zero */
  732. void AnnResetSgradient(AnnRprop *net) {
  733. int j, layers = ANN_LAYERS(net);
  734. for (j = 1; j < layers; j++) {
  735. int weights = ANN_WEIGHTS(net, j);
  736. memset(ANN_LAYER(net, j).sgradient, 0, ANN_SIZEOF_ann_float_t*weights);
  737. }
  738. }
  739. /* Set random weights in the range -0.05,+0.05 */
  740. void AnnSetRandomWeights(AnnRprop *net) {
  741. int i, j, k;
  742. for (i = 1; i < ANN_LAYERS(net); i++) {
  743. for (k = 0; k < ANN_UNITS(net, i-1); k++) {
  744. for (j = 0; j < ANN_UNITS(net, i); j++) {
  745. ANN_WEIGHT(net,i,j,k) = -0.05+.1*(rand()/(RAND_MAX+1.0));
  746. }
  747. }
  748. }
  749. }
  750. /* Scale the net weights of the given factor */
  751. void AnnScaleWeights(AnnRprop *net, ann_float_t factor) {
  752. int j, layers = ANN_LAYERS(net);
  753. for (j = 1; j < layers; j++) {
  754. int weights = ANN_WEIGHTS(net,j);
  755. int i;
  756. AnnLayer *layer = &ANN_LAYER(net, j);
  757. for (i = 0; i < weights; i++)
  758. layer->weight[i] *= factor;
  759. }
  760. }
  761. /* Update the sgradient, that's the sum of the weight's gradient for every
  762. * element of the training set. This is used for the RPROP algorithm
  763. * that works with the sign of the derivative for the whole set. */
  764. void AnnUpdateSgradient(AnnRprop *net) {
  765. int j, i, layers = ANN_LAYERS(net);
  766. for (j = 1; j < layers; j++) {
  767. int weights = ANN_WEIGHTS(net,j);
  768. ann_float_t *sg = net->layer[j].sgradient;
  769. ann_float_t *g = net->layer[j].gradient;
  770. i = 0;
  771. #ifdef USING_SIMD
  772. int psteps = weights/SIMDF_SIZE;
  773. if(psteps)
  774. {
  775. int xps;
  776. for (xps = 0; xps < psteps; xps++) {
  777. simdf_t sgradient = simdf_load(sg);
  778. simdf_t gradient = simdf_load(g);
  779. simdf_store(sg, simdf_add( sgradient, gradient));
  780. sg += SIMDF_SIZE;
  781. g += SIMDF_SIZE;
  782. }
  783. i += psteps*SIMDF_SIZE;
  784. }
  785. #endif
  786. /* Handle final piece shorter than SIMDF_SIZE . */
  787. for (; i < weights; i++)
  788. (*sg++) += (*g++);
  789. }
  790. }
  791. /* Helper function for RPROP, returns -1 if n < 0, +1 if n > 0, 0 if n == 0 */
  792. static inline ann_float_t sign(ann_float_t n) {
  793. if (n > 0) return +1.0;
  794. if (n < 0) return -1.0;
  795. return 0.0;
  796. }
  797. /* The core of the RPROP algorithm.
  798. *
  799. * Note that:
  800. * sgradient is the set-wise gradient.
  801. * delta is the per-weight update value. */
  802. void AnnAdjustWeightsResilientBP(AnnRprop *net) {
  803. int j, i, layers = ANN_LAYERS(net);
  804. for (j = 1; j < layers; j++) {
  805. int weights = ANN_WEIGHTS(net,j) - (j-1>0);
  806. AnnLayer *layer = &ANN_LAYER(net, j);
  807. for (i = 0; i < weights; i++) {
  808. ann_float_t sgradient = layer->sgradient[i];
  809. ann_float_t t = layer->pgradient[i] * sgradient;
  810. ann_float_t delta = layer->delta[i];
  811. if (t > 0) {
  812. delta = ANN_MIN(delta*ANN_RPROP_NPLUS(net),ANN_RPROP_MAXUPDATE(net));
  813. ann_float_t wdelta = -sign(sgradient) * delta;
  814. layer->weight[i] += wdelta;
  815. layer->delta[i] = delta;
  816. layer->pgradient[i] = sgradient;
  817. } else if (t < 0) {
  818. ann_float_t past_wdelta = -sign(layer->pgradient[i]) * delta;
  819. delta = ANN_MAX(delta*ANN_RPROP_NMINUS(net),ANN_RPROP_MINUPDATE(net));
  820. layer->weight[i] -= past_wdelta;
  821. layer->delta[i] = delta;
  822. layer->pgradient[i] = 0;
  823. } else { /* t == 0 */
  824. ann_float_t wdelta = -sign(sgradient) * delta;
  825. layer->weight[i] += wdelta;
  826. layer->pgradient[i] = sgradient;
  827. }
  828. }
  829. }
  830. }
  831. /* Resilient Backpropagation Epoch */
  832. ann_float_t AnnResilientBPEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen) {
  833. ann_float_t error = 0;
  834. int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
  835. AnnResetSgradient(net);
  836. for (j = 0; j < setlen; j++) {
  837. error += AnnSimulateError(net, input, desired);
  838. AnnCalculateGradients(net, desired);
  839. AnnUpdateSgradient(net);
  840. input += inputs;
  841. desired += outputs;
  842. }
  843. AnnAdjustWeightsResilientBP(net);
  844. return error / setlen;
  845. }
  846. /* Update the deltas using the gradient descend algorithm.
  847. * Gradients should be already computed with AnnCalculateGraidents(). */
  848. void AnnUpdateDeltasGD(AnnRprop *net) {
  849. int j, i, layers = ANN_LAYERS(net);
  850. for (j = 1; j < layers; j++) {
  851. int weights = ANN_WEIGHTS(net,j);
  852. AnnLayer *layer = &ANN_LAYER(net, j);
  853. for (i = 0; i < weights; i++)
  854. layer->delta[i] += layer->gradient[i];
  855. }
  856. }
  857. /* Adjust net weights using the (already) calculated deltas. */
  858. void AnnAdjustWeights(AnnRprop *net, int setlen) {
  859. int j, i, layers = ANN_LAYERS(net);
  860. for (j = 1; j < layers; j++) {
  861. int weights = ANN_WEIGHTS(net,j);
  862. AnnLayer *layer = &ANN_LAYER(net, j);
  863. for (i = 0; i < weights; i++) {
  864. layer->weight[i] -= ANN_LEARN_RATE(net)/setlen*layer->delta[i];
  865. }
  866. }
  867. }
  868. /* Gradient Descend training */
  869. ann_float_t AnnGDEpoch(AnnRprop *net, ann_float_t *input, ann_float_t *desidered, int setlen) {
  870. ann_float_t error = 0;
  871. int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
  872. for (j = 0; j < setlen; j++) {
  873. AnnSetDeltas(net, 0);
  874. error += AnnSimulateError(net, input, desidered);
  875. AnnCalculateGradients(net, desidered);
  876. AnnUpdateDeltasGD(net);
  877. input += inputs;
  878. desidered += outputs;
  879. AnnAdjustWeights(net,setlen);
  880. }
  881. return error / setlen;
  882. }
  883. /* This function, called after AnnSimulate(), will return 1 if there is
  884. * an error in the detected class (compared to the desired output),
  885. * othewise 0 is returned. */
  886. int AnnTestClassError(AnnRprop *net, ann_float_t *desired) {
  887. int i, outputs = ANN_OUTPUT_UNITS(net);
  888. int classid, outid;
  889. ann_float_t max = 0;
  890. /* Get the class ID from the test dataset output. */
  891. classid = 0;
  892. for (i = 0; i < outputs; i++)
  893. if (desired[i] == 1) break;
  894. classid = i;
  895. /* Get the network classification. */
  896. max = ANN_OUTPUT_NODE(net,0);
  897. outid = 0;
  898. for (i = 1; i < outputs; i++) {
  899. ann_float_t o = ANN_OUTPUT_NODE(net,i);
  900. if (o > max) {
  901. outid = i;
  902. max = o;
  903. }
  904. }
  905. return outid != classid;
  906. }
  907. /* Simulate the entire test dataset with the neural network and returns the
  908. * average error of all the entries tested. */
  909. void AnnTestError(AnnRprop *net, ann_float_t *input, ann_float_t *desired, int setlen, ann_float_t *avgerr, ann_float_t *classerr) {
  910. ann_float_t error = 0;
  911. int j, inputs = ANN_INPUT_UNITS(net), outputs = ANN_OUTPUT_UNITS(net);
  912. int class_errors = 0;
  913. for (j = 0; j < setlen; j++) {
  914. error += AnnSimulateError(net, input, desired);
  915. if (classerr)
  916. class_errors += AnnTestClassError(net, desired);
  917. input += inputs;
  918. desired += outputs;
  919. }
  920. if (avgerr) *avgerr = error/setlen;
  921. if (classerr) *classerr = (ann_float_t)class_errors*100/setlen;
  922. }
  923. /* Train the net */
  924. ann_float_t AnnTrainWithAlgoFunc(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr,
  925. int maxepochs, int setlen, AnnTrainAlgoFunc algo_func) {
  926. int i = 0;
  927. ann_float_t e = maxerr+1;
  928. while (i++ < maxepochs && e >= maxerr) {
  929. e = (*algo_func)(net, input, desired, setlen);
  930. }
  931. return e;
  932. }
  933. ann_float_t AnnTrain(AnnRprop *net, ann_float_t *input, ann_float_t *desired, ann_float_t maxerr, int maxepochs,
  934. int setlen, int algo) {
  935. AnnTrainAlgoFunc algo_func;
  936. if(algo == ANN_ALGO_BPROP) algo_func = AnnResilientBPEpoch;
  937. else if(algo == ANN_ALGO_GD) algo_func = AnnGDEpoch;
  938. else return -1;
  939. return AnnTrainWithAlgoFunc(net, input, desired, maxerr, maxepochs, setlen, algo_func);
  940. }