expr.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /* expr.c - evaluate expression
  2. *
  3. * Copyright 2016 Google Inc.
  4. * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
  5. *
  6. * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
  7. *
  8. * The web standard is incomplete (precedence grouping missing), see:
  9. * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
  10. *
  11. * eval_expr() uses the recursive "Precedence Climbing" algorithm:
  12. *
  13. * Clarke, Keith. "The top-down parsing of expressions." University of London.
  14. * Queen Mary College. Department of Computer Science and Statistics, 1986.
  15. *
  16. * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
  17. *
  18. * Nice explanation and Python implementation:
  19. * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
  20. USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
  21. config EXPR
  22. bool "expr"
  23. default n
  24. help
  25. usage: expr ARG1 OPERATOR ARG2...
  26. Evaluate expression and print result. For example, "expr 1 + 2".
  27. The supported operators are (grouped from highest to lowest priority):
  28. ( ) : * / % + - != <= < >= > = & |
  29. Each constant and operator must be a separate command line argument.
  30. All operators are infix, meaning they expect a constant (or expression
  31. that resolves to a constant) on each side of the operator. Operators of
  32. the same priority (within each group above) are evaluated left to right.
  33. Parentheses may be used (as separate arguments) to elevate the priority
  34. of expressions.
  35. Calling expr from a command shell requires a lot of \( or '*' escaping
  36. to avoid interpreting shell control characters.
  37. The & and | operators are logical (not bitwise) and may operate on
  38. strings (a blank string is "false"). Comparison operators may also
  39. operate on strings (alphabetical sort).
  40. Constants may be strings or integers. Comparison, logical, and regex
  41. operators may operate on strings (a blank string is "false"), other
  42. operators require integers.
  43. */
  44. // TODO: int overflow checking
  45. #define FOR_expr
  46. #include "toys.h"
  47. GLOBALS(
  48. char **tok; // current token, not on the stack since recursive calls mutate it
  49. char *refree;
  50. )
  51. // Scalar value. If s != NULL, it's a string, otherwise it's an int.
  52. struct value {
  53. char *s;
  54. long long i;
  55. };
  56. // Get the value as a string.
  57. char *get_str(struct value *v)
  58. {
  59. if (v->s) return v->s;
  60. else return xmprintf("%lld", v->i);
  61. }
  62. // Get the value as an integer and return 1, or return 0 on error.
  63. int get_int(struct value *v, long long *ret)
  64. {
  65. if (v->s) {
  66. char *endp;
  67. *ret = strtoll(v->s, &endp, 10);
  68. if (*endp) return 0; // If endp points to NUL, all chars were converted
  69. } else *ret = v->i;
  70. return 1;
  71. }
  72. // Preserve the invariant that v.s is NULL when the value is an integer.
  73. void assign_int(struct value *v, long long i)
  74. {
  75. v->i = i;
  76. v->s = NULL;
  77. }
  78. // Check if v is 0 or the empty string.
  79. static int is_false(struct value *v)
  80. {
  81. return get_int(v, &v->i) && !v->i;
  82. }
  83. // 'ret' is filled with a string capture or int match position.
  84. static void re(char *target, char *pattern, struct value *ret)
  85. {
  86. regex_t pat;
  87. regmatch_t m[2];
  88. xregcomp(&pat, pattern, 0);
  89. // must match at pos 0
  90. if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
  91. // Return first parenthesized subexpression as string, or length of match
  92. if (pat.re_nsub>0) {
  93. ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
  94. target+m[1].rm_so);
  95. if (TT.refree) free(TT.refree);
  96. TT.refree = ret->s;
  97. } else assign_int(ret, m[0].rm_eo);
  98. } else {
  99. if (pat.re_nsub>0) ret->s = "";
  100. else assign_int(ret, 0);
  101. }
  102. regfree(&pat);
  103. }
  104. // 4 different signatures of operators. S = string, I = int, SI = string or
  105. // int.
  106. enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
  107. enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
  108. // operators grouped by precedence
  109. static struct op_def {
  110. char *tok;
  111. char prec, sig, op; // precedence, signature for type coercion, operator ID
  112. } OPS[] = {
  113. // logical ops, precedence 1 and 2, signature SI_TO_SI
  114. {"|", 1, SI_TO_SI, OR },
  115. {"&", 2, SI_TO_SI, AND },
  116. // comparison ops, precedence 3, signature SI_TO_I
  117. {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
  118. {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
  119. {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
  120. // arithmetic ops, precedence 4 and 5, signature I_TO_I
  121. {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
  122. {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
  123. // regex match, precedence 6, signature S_TO_SI
  124. {":", 6, S_TO_SI, RE },
  125. {NULL, 0, 0, 0}, // sentinel
  126. };
  127. void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
  128. {
  129. long long a, b, x = 0; // x = a OP b for ints.
  130. char *s, *t; // string operands
  131. int cmp;
  132. switch (o->sig) {
  133. case SI_TO_SI:
  134. switch (o->op) {
  135. case OR: if (is_false(ret)) *ret = *rhs; break;
  136. case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
  137. }
  138. break;
  139. case SI_TO_I:
  140. if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
  141. cmp = a - b;
  142. } else { // otherwise compare both as strings
  143. cmp = strcmp(s = get_str(ret), t = get_str(rhs));
  144. if (ret->s != s) free(s);
  145. if (rhs->s != t) free(t);
  146. }
  147. switch (o->op) {
  148. case EQ: x = cmp == 0; break;
  149. case NE: x = cmp != 0; break;
  150. case GT: x = cmp > 0; break;
  151. case GTE: x = cmp >= 0; break;
  152. case LT: x = cmp < 0; break;
  153. case LTE: x = cmp <= 0; break;
  154. }
  155. assign_int(ret, x);
  156. break;
  157. case I_TO_I:
  158. if (!get_int(ret, &a) || !get_int(rhs, &b))
  159. error_exit("non-integer argument");
  160. switch (o->op) {
  161. case ADD: x = a + b; break;
  162. case SUB: x = a - b; break;
  163. case MUL: x = a * b; break;
  164. case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
  165. case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
  166. }
  167. assign_int(ret, x);
  168. break;
  169. case S_TO_SI: // op == RE
  170. s = get_str(ret);
  171. cmp = ret->s!=s; // ret overwritten by re so check now
  172. re(s, t = get_str(rhs), ret);
  173. if (cmp) free(s);
  174. if (rhs->s!=t) free(t);
  175. break;
  176. }
  177. }
  178. // Evalute a compound expression using recursive "Precedence Climbing"
  179. // algorithm, setting 'ret'.
  180. static void eval_expr(struct value *ret, int min_prec)
  181. {
  182. if (!*TT.tok) error_exit("Unexpected end of input");
  183. // Evaluate LHS atom, setting 'ret'.
  184. if (!strcmp(*TT.tok, "(")) { // parenthesized expression
  185. TT.tok++; // consume (
  186. eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
  187. if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
  188. if (!*TT.tok) error_exit("Expected )");
  189. if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
  190. } else ret->s = *TT.tok; // simple literal, all values start as strings
  191. TT.tok++;
  192. // Evaluate RHS and apply operator until precedence is too low.
  193. struct value rhs;
  194. while (*TT.tok) {
  195. struct op_def *o = OPS;
  196. while (o->tok) { // Look up operator
  197. if (!strcmp(*TT.tok, o->tok)) break;
  198. o++;
  199. }
  200. if (!o->tok) break; // Not an operator (extra input will fail later)
  201. if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
  202. TT.tok++;
  203. eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
  204. eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
  205. }
  206. }
  207. void expr_main(void)
  208. {
  209. struct value ret = {0};
  210. toys.exitval = 2; // if exiting early, indicate error
  211. TT.tok = toys.optargs; // initialize global token
  212. eval_expr(&ret, 1);
  213. if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
  214. if (ret.s) printf("%s\n", ret.s);
  215. else printf("%lld\n", ret.i);
  216. toys.exitval = is_false(&ret);
  217. if (TT.refree) free(TT.refree);
  218. }