sort.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /* sort.c - put input lines into order
  2. *
  3. * Copyright 2004, 2008 Rob Landley <rob@landley.net>
  4. *
  5. * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
  6. *
  7. * Deviations from POSIX: Lots.
  8. * We invented -x
  9. USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  10. config SORT
  11. bool "sort"
  12. default y
  13. help
  14. usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
  15. Sort all lines of text from input files (or stdin) to stdout.
  16. -r Reverse
  17. -u Unique lines only
  18. -n Numeric order (instead of alphabetical)
  19. -b Ignore leading blanks (or trailing blanks in second part of key)
  20. -c Check whether input is sorted
  21. -d Dictionary order (use alphanumeric and whitespace chars only)
  22. -f Force uppercase (case insensitive sort)
  23. -i Ignore nonprinting characters
  24. -M Month sort (jan, feb, etc)
  25. -x Hexadecimal numerical sort
  26. -s Skip fallback sort (only sort with keys)
  27. -z Zero (null) terminated lines
  28. -k Sort by "key" (see below)
  29. -t Use a key separator other than whitespace
  30. -o Output to FILE instead of stdout
  31. -V Version numbers (name-1.234-rc6.5b.tgz)
  32. Sorting by key looks at a subset of the words on each line. -k2 uses the
  33. second word to the end of the line, -k2,2 looks at only the second word,
  34. -k2,4 looks from the start of the second to the end of the fourth word.
  35. -k2.4,5 starts from the fourth character of the second word, to the end
  36. of the fifth word. Specifying multiple keys uses the later keys as tie
  37. breakers, in order. A type specifier appended to a sort key (such as -2,2n)
  38. applies only to sorting that key.
  39. config SORT_FLOAT
  40. bool
  41. default y
  42. depends on TOYBOX_FLOAT
  43. help
  44. usage: sort [-g]
  45. -g General numeric sort (double precision with nan and inf)
  46. */
  47. #define FOR_sort
  48. #include "toys.h"
  49. GLOBALS(
  50. char *t;
  51. struct arg_list *k;
  52. char *o, *T, S;
  53. void *key_list;
  54. unsigned linecount;
  55. char **lines, *name;
  56. )
  57. // The sort types are n, g, and M.
  58. // u, c, s, and z apply to top level only, not to keys.
  59. // b at top level implies bb.
  60. // The remaining options can be applied to search keys.
  61. #define FLAG_bb (1<<31) // Ignore trailing blanks
  62. struct sort_key {
  63. struct sort_key *next_key; // linked list
  64. unsigned range[4]; // start word, start char, end word, end char
  65. int flags;
  66. };
  67. // Copy of the part of this string corresponding to a key/flags.
  68. static char *get_key_data(char *str, struct sort_key *key, int flags)
  69. {
  70. int start = 0, end, len, i, j;
  71. // Special case whole string, so we don't have to make a copy
  72. if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
  73. && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
  74. // Find start of key on first pass, end on second pass
  75. len = strlen(str);
  76. for (j=0; j<2; j++) {
  77. if (!key->range[2*j]) end=len;
  78. // Loop through fields
  79. else {
  80. end = 0;
  81. for (i = 1; i < key->range[2*j]+j; i++) {
  82. // Skip leading blanks
  83. if (str[end] && !TT.t) while (isspace(str[end])) end++;
  84. // Skip body of key
  85. for (; str[end]; end++) {
  86. if (TT.t) {
  87. if (str[end]==*TT.t) {
  88. end++;
  89. break;
  90. }
  91. } else if (isspace(str[end])) break;
  92. }
  93. }
  94. }
  95. if (!j) start = end;
  96. }
  97. // Key with explicit separator starts after the separator
  98. if (TT.t && str[start]==*TT.t) start++;
  99. // Strip leading and trailing whitespace if necessary
  100. if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
  101. while (isspace(str[start])) start++;
  102. if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
  103. // Handle offsets on start and end
  104. if (key->range[3]) {
  105. end += key->range[3]-1;
  106. if (end>len) end=len;
  107. }
  108. if (key->range[1]) {
  109. start += key->range[1]-1;
  110. if (start>len) start=len;
  111. }
  112. // Make the copy
  113. if (end<start) end = start;
  114. str = xstrndup(str+start, end-start);
  115. // Handle -d
  116. if (flags&FLAG_d) {
  117. for (start = end = 0; str[end]; end++)
  118. if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
  119. str[start] = 0;
  120. }
  121. // Handle -i
  122. if (flags&FLAG_i) {
  123. for (start = end = 0; str[end]; end++)
  124. if (isprint(str[end])) str[start++] = str[end];
  125. str[start] = 0;
  126. }
  127. return str;
  128. }
  129. // append a sort_key to key_list.
  130. static struct sort_key *add_key(void)
  131. {
  132. void **stupid_compiler = &TT.key_list;
  133. struct sort_key **pkey = (struct sort_key **)stupid_compiler;
  134. while (*pkey) pkey = &((*pkey)->next_key);
  135. return *pkey = xzalloc(sizeof(struct sort_key));
  136. }
  137. // Perform actual comparison
  138. static int compare_values(int flags, char *x, char *y)
  139. {
  140. if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
  141. char *xx,*yy;
  142. double dx = strtod(x,&xx), dy = strtod(y,&yy);
  143. int xinf, yinf;
  144. // not numbers < NaN < -infinity < numbers < +infinity
  145. if (x==xx) return y==yy ? 0 : -1;
  146. if (y==yy) return 1;
  147. // Check for isnan
  148. if (dx!=dx) return (dy!=dy) ? 0 : -1;
  149. if (dy!=dy) return 1;
  150. // Check for infinity. (Could underflow, but avoids needing libm.)
  151. xinf = (1.0/dx == 0.0);
  152. yinf = (1.0/dy == 0.0);
  153. if (xinf) {
  154. if(dx<0) return (yinf && dy<0) ? 0 : -1;
  155. return (yinf && dy>0) ? 0 : 1;
  156. }
  157. if (yinf) return dy<0 ? 1 : -1;
  158. return dx<dy ? -1 : dx>dy;
  159. } else if (flags & FLAG_M) {
  160. struct tm thyme;
  161. int dx;
  162. char *xx,*yy;
  163. xx = strptime(x,"%b",&thyme);
  164. dx = thyme.tm_mon;
  165. yy = strptime(y,"%b",&thyme);
  166. if (!xx) return !yy ? 0 : -1;
  167. else if (!yy) return 1;
  168. else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
  169. } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
  170. else if (flags & FLAG_V) {
  171. while (*x && *y) {
  172. while (*x && *x == *y) x++, y++;
  173. if (isdigit(*x) && isdigit(*y)) {
  174. long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
  175. if (xx<yy) return -1;
  176. if (xx>yy) return 1;
  177. } else {
  178. char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
  179. // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
  180. if (xx != yy) {
  181. if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
  182. else return 1;
  183. }
  184. }
  185. }
  186. return *x ? !!*y : -1;
  187. // This is actually an integer sort with decimals sorted by string fallback.
  188. } else if (flags & FLAG_n) {
  189. long long dx = atoll(x), dy = atoll(y);
  190. return dx<dy ? -1 : dx>dy;
  191. // Ascii sort
  192. } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
  193. }
  194. // Callback from qsort(): Iterate through key_list and perform comparisons.
  195. static int compare_keys(const void *xarg, const void *yarg)
  196. {
  197. int flags = toys.optflags, retval = 0;
  198. char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
  199. struct sort_key *key;
  200. for (key=(void *)TT.key_list; !retval && key; key = key->next_key) {
  201. flags = key->flags ? : toys.optflags;
  202. // Chop out and modify key chunks, handling -dfib
  203. x = get_key_data(xx, key, flags);
  204. y = get_key_data(yy, key, flags);
  205. retval = compare_values(flags, x, y);
  206. // Free the copies get_key_data() made.
  207. if (x != xx) free(x);
  208. if (y != yy) free(y);
  209. if (retval) break;
  210. }
  211. // Perform fallback sort if necessary (always case insensitive, no -f,
  212. // the point is to get a stable order even for -f sorts)
  213. if (!retval && !FLAG(s)) {
  214. flags = toys.optflags;
  215. retval = strcmp(xx, yy);
  216. }
  217. return retval * ((flags&FLAG_r) ? -1 : 1);
  218. }
  219. // Read each line from file, appending to a big array.
  220. static void sort_lines(char **pline, long len)
  221. {
  222. char *line;
  223. if (!pline) return;
  224. line = *pline;
  225. if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
  226. *pline = 0;
  227. // handle -c here so we don't allocate more memory than necessary.
  228. if (FLAG(c)) {
  229. if (TT.lines && compare_keys((void *)&TT.lines, &line)>-!!FLAG(u))
  230. error_exit("%s: Check line %u\n", TT.name, TT.linecount);
  231. free(TT.lines);
  232. TT.lines = (void *)line;
  233. } else {
  234. if (!(TT.linecount&63))
  235. TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
  236. TT.lines[TT.linecount] = line;
  237. }
  238. TT.linecount++;
  239. }
  240. // Callback from loopfiles to handle input files.
  241. static void sort_read(int fd, char *name)
  242. {
  243. TT.name = name;
  244. do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines);
  245. }
  246. void sort_main(void)
  247. {
  248. int idx, jdx, fd = 1;
  249. if (FLAG(u)) toys.optflags |= FLAG_s;
  250. // Parse -k sort keys.
  251. if (TT.k) {
  252. struct arg_list *arg;
  253. for (arg = TT.k; arg; arg = arg->next) {
  254. struct sort_key *key = add_key();
  255. char *temp, *temp2, *optlist;
  256. int flag;
  257. idx = 0;
  258. temp = arg->arg;
  259. while (*temp) {
  260. // Start of range
  261. key->range[2*idx] = strtol(temp, &temp, 10);
  262. if (*temp=='.') key->range[(2*idx)+1] = strtol(temp+1, &temp, 10);
  263. // Handle flags appended to a key type.
  264. for (;*temp;temp++) {
  265. // Second comma becomes an "Unknown key" error.
  266. if (*temp==',' && !idx++) {
  267. temp++;
  268. break;
  269. }
  270. // Which flag is this?
  271. optlist = toys.which->options;
  272. temp2 = strchr(optlist, *temp);
  273. flag = 1<<(optlist-temp2+strlen(optlist)-1);
  274. // Was it a flag that can apply to a key?
  275. if (!temp2 || flag>FLAG_x || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) {
  276. toys.exitval = 2;
  277. error_exit("Unknown key option.");
  278. }
  279. // b after , means strip _trailing_ space, not leading.
  280. if (idx && flag==FLAG_b) flag = FLAG_bb;
  281. key->flags |= flag;
  282. }
  283. }
  284. }
  285. }
  286. // global b flag strips both leading and trailing spaces
  287. if (FLAG(b)) toys.optflags |= FLAG_bb;
  288. // If no keys, perform alphabetic sort over the whole line.
  289. if (!TT.key_list) add_key()->range[0] = 1;
  290. // Open input files and read data, populating TT.lines[TT.linecount]
  291. loopfiles(toys.optargs, sort_read);
  292. // The compare (-c) logic was handled in sort_read(),
  293. // so if we got here, we're done.
  294. if (FLAG(c)) goto exit_now;
  295. // Perform the actual sort
  296. qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
  297. // handle unique (-u)
  298. if (FLAG(u)) {
  299. for (jdx=0, idx=1; idx<TT.linecount; idx++) {
  300. if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) free(TT.lines[idx]);
  301. else TT.lines[++jdx] = TT.lines[idx];
  302. }
  303. if (TT.linecount) TT.linecount = jdx+1;
  304. }
  305. // Open output file if necessary. We can't do this until we've finished
  306. // reading in case the output file is one of the input files.
  307. if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
  308. // Output result
  309. for (idx = 0; idx<TT.linecount; idx++) {
  310. char *s = TT.lines[idx];
  311. unsigned i = strlen(s);
  312. if (!FLAG(z)) s[i] = '\n';
  313. xwrite(fd, s, i+1);
  314. if (CFG_TOYBOX_FREE) free(s);
  315. }
  316. exit_now:
  317. if (CFG_TOYBOX_FREE) {
  318. if (fd != 1) close(fd);
  319. free(TT.lines);
  320. }
  321. }