tr.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /* tr.c - translate or delete characters
  2. *
  3. * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
  4. *
  5. * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
  6. USE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
  7. config TR
  8. bool "tr"
  9. default n
  10. help
  11. usage: tr [-cds] SET1 [SET2]
  12. Translate, squeeze, or delete characters from stdin, writing to stdout
  13. -c/-C Take complement of SET1
  14. -d Delete input characters coded SET1
  15. -s Squeeze multiple output characters of SET2 into one character
  16. */
  17. #define FOR_tr
  18. #include "toys.h"
  19. GLOBALS(
  20. short map[256]; //map of chars
  21. int len1, len2;
  22. )
  23. enum {
  24. class_alpha, class_alnum, class_digit,
  25. class_lower,class_upper,class_space,class_blank,
  26. class_punct,class_cntrl,class_xdigit,class_invalid
  27. };
  28. static void map_translation(char *set1 , char *set2)
  29. {
  30. int i = TT.len1, k = 0;
  31. if (toys.optflags & FLAG_d)
  32. for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
  33. if (toys.optflags & FLAG_s) {
  34. for (i = TT.len1, k = 0; i; i--, k++)
  35. TT.map[set1[k]] = TT.map[set1[k]]|0x200;
  36. for (i = TT.len2, k = 0; i; i--, k++)
  37. TT.map[set2[k]] = TT.map[set2[k]]|0x200;
  38. }
  39. i = k = 0;
  40. while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present
  41. TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
  42. if (set2[k + 1]) k++;
  43. i++;
  44. }
  45. }
  46. static int handle_escape_char(char **esc_val) //taken from printf
  47. {
  48. char *ptr = *esc_val;
  49. int esc_length = 0;
  50. unsigned base = 0, num = 0, result = 0, count = 0;
  51. if (*ptr == 'x') {
  52. ptr++;
  53. esc_length++;
  54. base = 16;
  55. } else if (isdigit(*ptr)) base = 8;
  56. while (esc_length < 3 && base) {
  57. num = tolower(*ptr) - '0';
  58. if (num > 10) num += ('0' - 'a' + 10);
  59. if (num >= base) {
  60. if (base == 16) {
  61. esc_length--;
  62. if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
  63. result = '\\';
  64. ptr--;
  65. }
  66. }
  67. break;
  68. }
  69. esc_length++;
  70. count = result = (count * base) + num;
  71. ptr++;
  72. }
  73. if (base) {
  74. ptr--;
  75. *esc_val = ptr;
  76. return (char)result;
  77. } else {
  78. switch (*ptr) {
  79. case 'n': result = '\n'; break;
  80. case 't': result = '\t'; break;
  81. case 'e': result = (char)27; break;
  82. case 'b': result = '\b'; break;
  83. case 'a': result = '\a'; break;
  84. case 'f': result = '\f'; break;
  85. case 'v': result = '\v'; break;
  86. case 'r': result = '\r'; break;
  87. case '\\': result = '\\'; break;
  88. default :
  89. result = '\\';
  90. ptr--; // Let pointer pointing to / we will increment after returning.
  91. break;
  92. }
  93. }
  94. *esc_val = ptr;
  95. return (char)result;
  96. }
  97. static int find_class(char *class_name)
  98. {
  99. int i;
  100. static char *class[] = {
  101. "[:alpha:]","[:alnum:]","[:digit:]",
  102. "[:lower:]","[:upper:]","[:space:]",
  103. "[:blank:]","[:punct:]","[:cntrl:]",
  104. "[:xdigit:]","NULL"
  105. };
  106. for (i = 0; i != class_invalid; i++) {
  107. if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break;
  108. }
  109. return i;
  110. }
  111. static char *expand_set(char *arg, int *len)
  112. {
  113. int i = 0, j, k, size = 256;
  114. char *set = xzalloc(size*sizeof(char));
  115. while (*arg) {
  116. if (i >= size) {
  117. size += 256;
  118. set = xrealloc(set, size);
  119. }
  120. if (*arg == '\\') {
  121. arg++;
  122. set[i++] = (int)handle_escape_char(&arg);
  123. arg++;
  124. continue;
  125. }
  126. if (arg[1] == '-') {
  127. if (arg[2] == '\0') goto save;
  128. j = arg[0];
  129. k = arg[2];
  130. if (j > k) perror_exit("reverse colating order");
  131. while (j <= k) set[i++] = j++;
  132. arg += 3;
  133. continue;
  134. }
  135. if (arg[0] == '[' && arg[1] == ':') {
  136. if ((j = find_class(arg)) == class_invalid) goto save;
  137. if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) {
  138. for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
  139. }
  140. if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) {
  141. for (k = 'a'; k <= 'z'; k++) set[i++] = k;
  142. }
  143. if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) {
  144. for (k = '0'; k <= '9'; k++) set[i++] = k;
  145. }
  146. if (j == class_space || j == class_blank) {
  147. set[i++] = '\t';
  148. if (j == class_space) {
  149. set[i++] = '\n';
  150. set[i++] = '\f';
  151. set[i++] = '\r';
  152. set[i++] = '\v';
  153. }
  154. set[i++] = ' ';
  155. }
  156. if (j == class_punct) {
  157. for (k = 0; k <= 255; k++)
  158. if (ispunct(k)) set[i++] = k;
  159. }
  160. if (j == class_cntrl) {
  161. for (k = 0; k <= 255; k++)
  162. if (iscntrl(k)) set[i++] = k;
  163. }
  164. if (j == class_xdigit) {
  165. for (k = 'A'; k <= 'F'; k++) {
  166. set[i + 6] = k | 0x20;
  167. set[i++] = k;
  168. }
  169. i += 6;
  170. arg += 10;
  171. continue;
  172. }
  173. arg += 9; //never here for class_xdigit.
  174. continue;
  175. }
  176. if (arg[0] == '[' && arg[1] == '=') { //[=char=] only
  177. arg += 2;
  178. if (*arg) set[i++] = *arg;
  179. if (!arg[1] || arg[1] != '=' || arg[2] != ']')
  180. error_exit("bad equiv class");
  181. continue;
  182. }
  183. save:
  184. set[i++] = *arg++;
  185. }
  186. *len = i;
  187. return set;
  188. }
  189. static void print_map(char *set1, char *set2)
  190. {
  191. int n, src, dst, prev = -1;
  192. while ((n = read(0, toybuf, sizeof(toybuf)))) {
  193. if (!FLAG(d) && !FLAG(s)) {
  194. for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]];
  195. } else {
  196. for (src = dst = 0; src < n; src++) {
  197. int ch = TT.map[toybuf[src]];
  198. if (FLAG(d) && (ch & 0x100)) continue;
  199. if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue;
  200. toybuf[dst++] = prev = ch;
  201. }
  202. }
  203. xwrite(1, toybuf, dst);
  204. }
  205. }
  206. static void do_complement(char **set)
  207. {
  208. int i, j;
  209. char *comp = xmalloc(256);
  210. for (i = 0, j = 0;i < 256; i++) {
  211. if (memchr(*set, i, TT.len1)) continue;
  212. else comp[j++] = (char)i;
  213. }
  214. free(*set);
  215. TT.len1 = j;
  216. *set = comp;
  217. }
  218. void tr_main(void)
  219. {
  220. char *set1, *set2 = NULL;
  221. int i;
  222. for (i = 0; i < 256; i++) TT.map[i] = i; //init map
  223. set1 = expand_set(toys.optargs[0], &TT.len1);
  224. if (toys.optflags & FLAG_c) do_complement(&set1);
  225. if (toys.optargs[1]) {
  226. if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string");
  227. set2 = expand_set(toys.optargs[1], &TT.len2);
  228. }
  229. map_translation(set1, set2);
  230. print_map(set1, set2);
  231. free(set1);
  232. free(set2);
  233. }