expression_language.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. #include "core/error/error.h"
  2. #include "core/math/math.h"
  3. #include "core/strings/string.inl"
  4. #include "resource/expression_language.h"
  5. #include <alloca.h>
  6. #include <limits.h> // UINT_MAX
  7. #include <stdlib.h> // strtof
  8. #include <string.h> // memmove
  9. namespace crown
  10. {
  11. namespace skinny
  12. {
  13. namespace expression_language
  14. {
  15. /// Byte code constants.
  16. ///
  17. /// If the upper 12 bits of the byte code do not match one of these values, the operation is
  18. /// BC_PUSH_FLOAT and the byte code specify the 32 bit float to push. If the upper 12 bits
  19. /// match one of these values (which are all NaNs, so they should never appear as regular
  20. /// floats), the operation will instead be the one matching.
  21. ///
  22. /// The remaining 20 bits of the byte code are used for the id of functions and variables.
  23. enum ByteCode {BC_FUNCTION = 0x7f800000, BC_PUSH_VAR = 0x7f900000, BC_END = 0x7fa00000};
  24. /// Returns the byte code operation part of the byte code word.
  25. static inline unsigned bc_mask(unsigned i) {return i & 0xfff00000;}
  26. /// Returns the id part of the byte code word.
  27. static inline unsigned id_mask(unsigned i) {return i & 0x000fffff;}
  28. /// Opcodes for functions
  29. enum OpCode
  30. {
  31. OP_ADD,
  32. OP_SUB,
  33. OP_MUL,
  34. OP_DIV,
  35. OP_UNARY_MINUS,
  36. OP_NOP,
  37. OP_SIN,
  38. OP_COS,
  39. OP_ABS,
  40. OP_MATCH,
  41. OP_MATCH_2D
  42. };
  43. static inline float pop(Stack &stack) {CE_ASSERT(stack.size > 0, "Stack underflow"); return stack.data[--stack.size];}
  44. static inline void push(Stack &stack, float f) {CE_ASSERT(stack.size < stack.capacity, "Stack overflow"); stack.data[stack.size++] = f;}
  45. inline float fmax(float a, float b)
  46. {
  47. return a > b ? a : b;
  48. }
  49. inline float length(float a, float b)
  50. {
  51. return fsqrt((b - a) * (b - a));
  52. }
  53. inline float match(float a, float b)
  54. {
  55. return fmax(1.0f-length(a, b), 0.0f);
  56. }
  57. inline float match2d(float a, float b, float c, float d)
  58. {
  59. return match(a, b) * match(c, d);
  60. }
  61. /// Computes the function specified by @a op_code on the @a stack.
  62. static inline void compute_function(OpCode op_code, Stack &stack)
  63. {
  64. #define POP() pop(stack)
  65. #define PUSH(f) push(stack, f)
  66. float a,b,c,d;
  67. switch(op_code) {
  68. case OP_ADD: b=POP(); a=POP(); PUSH(a+b); break;
  69. case OP_SUB: b=POP(); a=POP(); PUSH(a-b); break;
  70. case OP_MUL: b=POP(); a=POP(); PUSH(a*b); break;
  71. case OP_DIV: b=POP(); a=POP(); PUSH(a/b); break;
  72. case OP_UNARY_MINUS: PUSH(-POP()); break;
  73. case OP_SIN: PUSH(fsin(POP())); break;
  74. case OP_COS: PUSH(fcos(POP())); break;
  75. case OP_ABS: a = POP(); PUSH(fabs(a)); break;
  76. case OP_MATCH: b=POP(); a=POP(); PUSH(match(a, b)); break;
  77. case OP_MATCH_2D: d=POP(); c=POP(); b=POP(); a=POP(); PUSH(match2d(a,b,c,d)); break;
  78. case OP_NOP: break;
  79. default:
  80. CE_FATAL("Unknown opcode");
  81. }
  82. #undef POP
  83. #undef PUSH
  84. }
  85. /// Union to cast through to convert between float and unsigned.
  86. union FloatAndUnsigned
  87. {
  88. float f;
  89. unsigned u;
  90. };
  91. static inline float unsigned_to_float(unsigned u) {FloatAndUnsigned fu; fu.u = u; return fu.f;}
  92. bool run(const unsigned *byte_code, const float *variables, Stack &stack)
  93. {
  94. const unsigned *p = byte_code;
  95. while (true) {
  96. unsigned bc = *p++;
  97. unsigned op = bc_mask(bc);
  98. unsigned id = id_mask(bc);
  99. switch (op) {
  100. case BC_PUSH_VAR:
  101. if (stack.size == stack.capacity) return false;
  102. stack.data[stack.size++] = variables[id];
  103. break;
  104. case BC_FUNCTION:
  105. compute_function((OpCode)id, stack);
  106. break;
  107. case BC_END:
  108. return true;
  109. default: // BC_PUSH_FLOAT
  110. if (stack.size == stack.capacity) return false;
  111. stack.data[stack.size++] = unsigned_to_float(bc);
  112. break;
  113. }
  114. }
  115. }
  116. } // namespace expression_language
  117. #if CROWN_CAN_COMPILE
  118. namespace expression_language
  119. {
  120. static inline unsigned float_to_unsigned(float f) {FloatAndUnsigned fu; fu.f = f; return fu.u;}
  121. #ifdef WIN32
  122. float strtof(const char *nptr, char **endptr) {return (float)strtod(nptr, endptr);}
  123. #endif
  124. /// Represents a token in the expression language. The tokens are used
  125. /// both during the tokenization phase and as a representation of the
  126. /// program. (A list of tokens in reverse polish notation form.)
  127. struct Token
  128. {
  129. enum TokenType {EMPTY, NUMBER, FUNCTION, VARIABLE, LEFT_PARENTHESIS, RIGHT_PARENTHESIS};
  130. Token() : type(EMPTY), id(0) {}
  131. explicit Token(TokenType type) : type(type) {}
  132. Token(TokenType type, unsigned id) : type(type), id(id) {}
  133. Token(TokenType type, float value) : type(type), value(value) {}
  134. TokenType type; ///< Identifies the type of the token
  135. union {
  136. unsigned id; ///< Id for FUNCTION and VARIABLE tokens
  137. float value; ///< Numeric value for NUMBER tokens
  138. };
  139. };
  140. /// Describes a function.
  141. struct Function
  142. {
  143. Function() {}
  144. Function(OpCode op_code, unsigned precedence, unsigned arity) : op_code(op_code), precedence(precedence), arity(arity) {}
  145. OpCode op_code; ///< The opcode of the function
  146. unsigned precedence; ///< The precedence of the function operator.
  147. unsigned arity; ///< The number of arguments that the function takes.
  148. };
  149. /// Represents the environment in which we are compiling -- the available variables,
  150. /// constants and functions.
  151. struct CompileEnvironment
  152. {
  153. unsigned num_variables;
  154. const char **variable_names;
  155. unsigned num_constants;
  156. const char **constant_names;
  157. const float *constant_values;
  158. unsigned num_functions;
  159. const char **function_names;
  160. const Function *function_values;
  161. /// Finds a string in @a strings matching @a s of length @a len and returns its index.
  162. /// Returns UINT_MAX if no such string is found.
  163. static unsigned find_string(const char *s, unsigned len, unsigned num_strings, const char **strings)
  164. {
  165. for (unsigned i=0; i<num_strings; ++i)
  166. if (strncmp(s, strings[i], len) == 0 && strlen32(strings[i]) == len)
  167. return i;
  168. return UINT_MAX;
  169. }
  170. /// Finds a token representing the identifier in the environment.
  171. Token token_for_identifier(const char *identifier, unsigned len) const
  172. {
  173. unsigned i;
  174. if ( (i=find_string(identifier, len, num_variables, variable_names)) != UINT_MAX)
  175. return Token(Token::VARIABLE, i);
  176. else if ( (i=find_string(identifier, len, num_constants, constant_names)) != UINT_MAX)
  177. return Token(Token::NUMBER, constant_values[i]);
  178. else if ( (i=find_string(identifier, len, num_functions, function_names)) != UINT_MAX)
  179. return Token(Token::FUNCTION, i);
  180. else {
  181. CE_FATAL("Unknown identifier: %s", identifier);
  182. return Token();
  183. }
  184. }
  185. /// Finds a token representing the identifier in the environment.
  186. Token token_for_identifier(const char *identifier) const
  187. {
  188. return token_for_identifier(identifier, strlen32(identifier));
  189. }
  190. /// True if there is a function matching the specified identifier.
  191. bool has_function(char * identifier) const
  192. {
  193. return find_string(identifier, strlen32(identifier), num_functions, function_names) != UINT_MAX;
  194. }
  195. };
  196. /// Tokenizes the source code @a p into a sequence of tokens. The environment @a env
  197. /// is used for looking up source code identifiers.
  198. /// Returns the total number of tokens. If the returned number is greater than the @a capcity, only
  199. /// the first @a capacity items will be converted.
  200. static unsigned tokenize(const char *p, const CompileEnvironment &env, Token *tokens, unsigned capacity)
  201. {
  202. // Determines if the next + or - is a binary or unary operator.
  203. bool binary = false;
  204. unsigned num_tokens = 0;
  205. unsigned overflow_tokens = 0;
  206. while (*p != 0) {
  207. Token token(Token::EMPTY);
  208. // Numbers
  209. if (*p >= '0' && *p <= '9') {
  210. char *out;
  211. token = Token(Token::NUMBER, strtof(p, &out));
  212. p = out;
  213. binary = true;
  214. // Identifiers
  215. } else if ( (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p == '_') ) {
  216. const char *identifier = p;
  217. while ( (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p == '_') || (*p >= '0' && *p <= '9'))
  218. p++;
  219. token = env.token_for_identifier(identifier, u32(p-identifier));
  220. binary = true;
  221. // Operators
  222. } else {
  223. switch (*p) {
  224. case '(': token = Token(Token::LEFT_PARENTHESIS); binary = false; break;
  225. case ')': token = Token(Token::RIGHT_PARENTHESIS); binary = true; break;
  226. case ' ': case '\t': case '\n': case '\r': break;
  227. case '-': token = env.token_for_identifier(binary ? "-" : "u-"); binary = false; break;
  228. case '+': token = env.token_for_identifier(binary ? "+" : "u+"); binary = false; break;
  229. default: {
  230. char s1[2] = {*p,0};
  231. char s2[3] = {*p, *(p+1), 0};
  232. if (s2[1] && env.has_function(s2)) {
  233. token = env.token_for_identifier(s2);
  234. ++p;
  235. } else
  236. token = env.token_for_identifier(s1);
  237. binary = false;
  238. break;
  239. }
  240. }
  241. ++p;
  242. }
  243. if (token.type != Token::EMPTY) {
  244. if (num_tokens == capacity)
  245. ++overflow_tokens;
  246. else
  247. tokens[num_tokens++] = token;
  248. }
  249. }
  250. return num_tokens + overflow_tokens;
  251. }
  252. /// Performs constant folding on the program represented by @a rpl which is a
  253. /// sequence of tokens in reverse polish notation. Any function found in the
  254. /// token stream which only takes constant arguments is replaced by the
  255. /// result of evaluating the function over the constant arguments.
  256. static void fold_constants(Token *rpl, unsigned &num_tokens, CompileEnvironment &env)
  257. {
  258. static const int MAX_ARITY = 4;
  259. float stack_data[MAX_ARITY];
  260. for (unsigned i=0; i<num_tokens; ++i) {
  261. if (rpl[i].type != Token::FUNCTION)
  262. continue;
  263. Stack stack(stack_data, MAX_ARITY);
  264. bool constant_arguments = true;
  265. Function f = env.function_values[rpl[i].id];
  266. unsigned arity = f.arity;
  267. CE_ASSERT(arity <= MAX_ARITY, "MAX_ARITY too small");
  268. CE_ASSERT(i >= arity, "Too few arguments to function");
  269. unsigned arg_start = i - arity;
  270. for (unsigned j=0; j<arity && constant_arguments; ++j) {
  271. constant_arguments = constant_arguments && rpl[i-j-1].type == Token::NUMBER;
  272. stack.data[j] = rpl[arg_start+j].value;
  273. }
  274. if (!constant_arguments)
  275. continue;
  276. stack.size = arity;
  277. compute_function(f.op_code, stack);
  278. unsigned results = stack.size;
  279. int to_remove = int(arity + 1) - int(results);
  280. if (to_remove > 0) {
  281. memmove(&rpl[arg_start], &rpl[arg_start+to_remove], sizeof(Token)*(num_tokens-arg_start-to_remove));
  282. num_tokens -= to_remove;
  283. }
  284. for (unsigned res = 0; res<stack.size; ++res)
  285. rpl[arg_start + res] = Token(Token::NUMBER, stack.data[res]);
  286. i = arg_start - 1;
  287. }
  288. }
  289. /// Generates bytecode from a program in RPL token stream form.
  290. /// Returns the number of byte_code tokens generated. If the returned number is > capacity, only the first
  291. /// capacity items are generated.
  292. static unsigned generate_bytecode(Token *rpl, unsigned num_tokens, const CompileEnvironment &env, unsigned *byte_code, unsigned capacity)
  293. {
  294. unsigned size = 0;
  295. unsigned overflow = 0;
  296. for (unsigned i=0; i<num_tokens; ++i) {
  297. Function f;
  298. Token t = rpl[i];
  299. unsigned op;
  300. switch (t.type) {
  301. case Token::NUMBER:
  302. op = float_to_unsigned(t.value);
  303. break;
  304. case Token::VARIABLE:
  305. op = BC_PUSH_VAR + t.id;
  306. break;
  307. case Token::FUNCTION:
  308. f = env.function_values[t.id];
  309. op = BC_FUNCTION + f.op_code;
  310. break;
  311. default:
  312. op = 0xdeadbeef;
  313. CE_FATAL("Unknown token");
  314. break;
  315. }
  316. if (size < capacity)
  317. byte_code[size++] = op;
  318. else
  319. ++overflow;
  320. }
  321. unsigned op = BC_END;
  322. if (size < capacity)
  323. byte_code[size++] = op;
  324. else
  325. ++overflow;
  326. return size + overflow;
  327. }
  328. /// Represents an item on the function call stack.
  329. /// This object is comparable so that functions that have higher precedence are executed
  330. /// before others.
  331. struct FunctionStackItem
  332. {
  333. FunctionStackItem() {}
  334. FunctionStackItem(Token t, int p, int pl) : token(t), precedence(p), par_level(pl) {}
  335. inline int cmp(const FunctionStackItem &f) const {
  336. if (par_level != f.par_level) return par_level - f.par_level;
  337. return precedence - f.precedence;
  338. }
  339. inline bool operator<(const FunctionStackItem &other) const {return cmp(other) < 0;}
  340. inline bool operator<=(const FunctionStackItem &other) const {return cmp(other) <= 0;}
  341. inline bool operator==(const FunctionStackItem &other) const {return cmp(other) == 0;}
  342. inline bool operator>=(const FunctionStackItem &other) const {return cmp(other) >= 0;}
  343. inline bool operator>(const FunctionStackItem &other) const {return cmp(other) > 0;}
  344. Token token;
  345. int precedence;
  346. int par_level;
  347. };
  348. const int NUM_DEFAULT_FUNCTIONS = 12;
  349. /// Sets up the functions that should be usable in the language.
  350. static unsigned setup_functions(const char **names, Function *functions, unsigned capacity)
  351. {
  352. CE_ASSERT(capacity >= NUM_DEFAULT_FUNCTIONS, "Not enough space for default functions");
  353. CE_UNUSED(capacity);
  354. names[0] = ","; functions[0] = Function(OP_NOP, 1, 0);
  355. names[1] = "+"; functions[1] = Function(OP_ADD, 12, 2);
  356. names[2] = "-"; functions[2] = Function(OP_SUB, 12, 2);
  357. names[3] = "*"; functions[3] = Function(OP_MUL, 13, 2);
  358. names[4] = "/"; functions[4] = Function(OP_DIV, 13, 2);
  359. names[5] = "u-"; functions[5] = Function(OP_UNARY_MINUS, 16, 1);
  360. names[6] = "u+"; functions[6] = Function(OP_NOP, 16, 0);
  361. names[7] = "sin"; functions[7] = Function(OP_SIN, 17, 1);
  362. names[8] = "cos"; functions[8] = Function(OP_COS, 17, 1);
  363. names[9] = "abs"; functions[9] = Function(OP_ABS, 17, 1);
  364. names[10] = "match"; functions[10] = Function(OP_MATCH, 17, 2);
  365. names[11] = "match_2d"; functions[11] = Function(OP_MATCH_2D, 17, 4);
  366. return NUM_DEFAULT_FUNCTIONS;
  367. }
  368. unsigned compile(const char *source, unsigned num_variables, const char **variables,
  369. unsigned num_constants, const char **constant_names, const float *constant_values,
  370. unsigned *byte_code, unsigned capacity)
  371. {
  372. const char *function_names[NUM_DEFAULT_FUNCTIONS];
  373. Function functions[NUM_DEFAULT_FUNCTIONS];
  374. unsigned num_functions = setup_functions(function_names, functions, NUM_DEFAULT_FUNCTIONS);
  375. CompileEnvironment env;
  376. env.num_variables = num_variables;
  377. env.variable_names = variables;
  378. env.num_constants = num_constants;
  379. env.constant_names = constant_names;
  380. env.constant_values = constant_values;
  381. env.num_functions = num_functions;
  382. env.function_names = function_names;
  383. env.function_values = functions;
  384. unsigned num_tokens = tokenize(source, env, NULL, 0);
  385. // Change alloca to some other temp memory allocator if you want to
  386. Token *tokens = (Token *)alloca(sizeof(Token) * num_tokens);
  387. tokenize(source, env, tokens, num_tokens);
  388. Token *rpl = (Token *)alloca(sizeof(Token) * num_tokens);
  389. unsigned num_rpl = 0;
  390. FunctionStackItem *function_stack = (FunctionStackItem *)alloca(sizeof(FunctionStackItem)*num_tokens);
  391. unsigned num_function_stack = 0;
  392. int par_level = 0;
  393. for (unsigned i=0; i<num_tokens; ++i) {
  394. Token &token = tokens[i];
  395. switch (token.type) {
  396. case Token::NUMBER:
  397. case Token::VARIABLE:
  398. rpl[num_rpl++] = token;
  399. break;
  400. case Token::LEFT_PARENTHESIS:
  401. ++par_level;
  402. break;
  403. case Token::RIGHT_PARENTHESIS:
  404. --par_level;
  405. break;
  406. case Token::FUNCTION: {
  407. FunctionStackItem f(token, env.function_values[token.id].precedence, par_level);
  408. while (num_function_stack>0 && function_stack[num_function_stack-1] >= f)
  409. rpl[num_rpl++] = function_stack[--num_function_stack].token;
  410. function_stack[num_function_stack++] = f;
  411. break;
  412. }
  413. default:
  414. CE_FATAL("Unknown token");
  415. break;
  416. }
  417. }
  418. while (num_function_stack>0)
  419. rpl[num_rpl++] = function_stack[--num_function_stack].token;
  420. fold_constants(rpl, num_rpl, env);
  421. return generate_bytecode(rpl, num_rpl, env, byte_code, capacity);
  422. }
  423. } // namespace expression_language
  424. #endif // CROWN_CAN_COMPILE
  425. } // namespace skinny
  426. } // namespace crown