grep.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /* grep.c - show lines matching regular expressions
  2. *
  3. * Copyright 2013 CE Strake <strake888 at gmail.com>
  4. *
  5. * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
  6. *
  7. * Posix doesn't even specify -r, documenting deviations from it is silly.
  8. * echo hello | grep -w ''
  9. * echo '' | grep -w ''
  10. * echo hello | grep -f </dev/null
  11. *
  12. USE_GREP(NEWTOY(grep, "(line-buffered)(color):;(exclude-dir)*S(exclude)*M(include)*ZzEFHIab(byte-offset)h(no-filename)ino(only-matching)rRsvwcL(files-without-match)l(files-with-matches)q(quiet)(silent)e*f*C#B#A#m#x[!wx][!EF]", TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)|TOYFLAG_LINEBUF))
  13. USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)|TOYFLAG_LINEBUF))
  14. USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)|TOYFLAG_LINEBUF))
  15. config GREP
  16. bool "grep"
  17. default y
  18. help
  19. usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
  20. Show lines matching regular expressions. If no -e, first argument is
  21. regular expression to match. With no files (or "-" filename) read stdin.
  22. Returns 0 if matched, 1 if no match found, 2 for command errors.
  23. -e Regex to match. (May be repeated.)
  24. -f File listing regular expressions to match.
  25. file search:
  26. -r Recurse into subdirectories (defaults FILE to ".")
  27. -R Recurse into subdirectories and symlinks to directories
  28. -M Match filename pattern (--include)
  29. -S Skip filename pattern (--exclude)
  30. --exclude-dir=PATTERN Skip directory pattern
  31. -I Ignore binary files
  32. match type:
  33. -A Show NUM lines after -B Show NUM lines before match
  34. -C NUM lines context (A+B) -E extended regex syntax
  35. -F fixed (literal match) -a always text (not binary)
  36. -i case insensitive -m match MAX many lines
  37. -v invert match -w whole word (implies -E)
  38. -x whole line -z input NUL terminated
  39. display modes: (default: matched line)
  40. -L filenames with no match -Z output is NUL terminated
  41. -c count of matching lines -l filenames with a match
  42. -o only matching part -q quiet (errors only)
  43. -s silent (no error msg)
  44. output prefix (default: filename if checking more than 1 file)
  45. -H force filename -b byte offset of match
  46. -h hide filename -n line number of match
  47. config EGREP
  48. bool
  49. default y
  50. depends on GREP
  51. config FGREP
  52. bool
  53. default y
  54. depends on GREP
  55. */
  56. #define FOR_grep
  57. #include "toys.h"
  58. GLOBALS(
  59. long m, A, B, C;
  60. struct arg_list *f, *e, *M, *S, *exclude_dir;
  61. char *color;
  62. char *purple, *cyan, *red, *green, *grey;
  63. struct double_list *reg;
  64. char indelim, outdelim;
  65. int found, tried;
  66. )
  67. struct reg {
  68. struct reg *next, *prev;
  69. int rc;
  70. regex_t r;
  71. regmatch_t m;
  72. };
  73. static void numdash(long num, char dash)
  74. {
  75. printf("%s%ld%s%c", TT.green, num, TT.cyan, dash);
  76. }
  77. // Emit line with various potential prefixes and delimiter
  78. static void outline(char *line, char dash, char *name, long lcount, long bcount,
  79. unsigned trim)
  80. {
  81. if (!trim && FLAG(o)) return;
  82. if (name && FLAG(H)) printf("%s%s%s%c", TT.purple, name, TT.cyan, dash);
  83. if (FLAG(c)) {
  84. printf("%s%ld", TT.grey, lcount);
  85. xputc(TT.outdelim);
  86. } else if (lcount && FLAG(n)) numdash(lcount, dash);
  87. if (bcount && FLAG(b)) numdash(bcount-1, dash);
  88. if (line) {
  89. if (FLAG(color)) xputsn(FLAG(o) ? TT.red : TT.grey);
  90. // support embedded NUL bytes in output
  91. xputsl(line, trim);
  92. xputc(TT.outdelim);
  93. }
  94. }
  95. // Show matches in one file
  96. static void do_grep(int fd, char *name)
  97. {
  98. long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
  99. struct double_list *dlb = 0;
  100. char *bars = 0;
  101. FILE *file;
  102. int bin = 0;
  103. if (!FLAG(r)) TT.tried++;
  104. if (!fd) name = "(standard input)";
  105. // Only run binary file check on lseekable files.
  106. if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
  107. char buf[256];
  108. int len, i = 0;
  109. unsigned wc;
  110. // If the first 256 bytes don't parse as utf8, call it binary.
  111. if (0<(len = read(fd, buf, 256))) {
  112. lseek(fd, -len, SEEK_CUR);
  113. while (i<len) {
  114. bin = utf8towc(&wc, buf+i, len-i);
  115. if (bin == -2) i = len;
  116. if (bin<1) break;
  117. i += bin;
  118. }
  119. bin = i!=len;
  120. }
  121. if (bin && FLAG(I)) return;
  122. }
  123. if (!(file = fdopen(fd, "r"))) return perror_msg("%s", name);
  124. // Loop through lines of input
  125. for (;;) {
  126. char *line = 0, *start;
  127. struct reg *shoe;
  128. size_t ulen;
  129. long len;
  130. int matched = 0, rc = 1;
  131. // get next line, check and trim delimiter
  132. lcount++;
  133. errno = 0;
  134. ulen = len = getdelim(&line, &ulen, TT.indelim, file);
  135. if (len == -1 && errno) perror_msg("%s", name);
  136. if (len<1) break;
  137. if (line[ulen-1] == TT.indelim) line[--ulen] = 0;
  138. // Prepare for next line
  139. start = line;
  140. if (TT.reg) for (shoe = (void *)TT.reg; shoe; shoe = shoe->next)
  141. shoe->rc = 0;
  142. // Loop to handle multiple matches in same line
  143. do {
  144. regmatch_t *mm = (void *)toybuf;
  145. // Handle "fixed" (literal) matches
  146. if (FLAG(F)) {
  147. struct arg_list *seek, fseek;
  148. char *s = 0;
  149. for (seek = TT.e; seek; seek = seek->next) {
  150. if (FLAG(x)) {
  151. if (!(FLAG(i) ? strcasecmp : strcmp)(seek->arg, line)) s = line;
  152. } else if (!*seek->arg) {
  153. // No need to set fseek.next because this will match every line.
  154. seek = &fseek;
  155. fseek.arg = s = line;
  156. } else if (FLAG(i)) s = strcasestr(start, seek->arg);
  157. else s = strstr(start, seek->arg);
  158. if (s) break;
  159. }
  160. if (s) {
  161. rc = 0;
  162. mm->rm_so = (s-start);
  163. mm->rm_eo = (s-start)+strlen(seek->arg);
  164. } else rc = 1;
  165. // Handle regex matches
  166. } else {
  167. int baseline = mm->rm_eo;
  168. mm->rm_so = mm->rm_eo = INT_MAX;
  169. rc = 1;
  170. for (shoe = (void *)TT.reg; shoe; shoe = shoe->next) {
  171. // Do we need to re-check this regex?
  172. if (!shoe->rc) {
  173. shoe->m.rm_so -= baseline;
  174. shoe->m.rm_eo -= baseline;
  175. if (!matched || shoe->m.rm_so<0)
  176. shoe->rc = regexec0(&shoe->r, start, ulen-(start-line), 1,
  177. &shoe->m, start==line ? 0 : REG_NOTBOL);
  178. }
  179. // If we got a match, is it a _better_ match?
  180. if (!shoe->rc && (shoe->m.rm_so < mm->rm_so ||
  181. (shoe->m.rm_so == mm->rm_so && shoe->m.rm_eo >= mm->rm_eo)))
  182. {
  183. mm = &shoe->m;
  184. rc = 0;
  185. }
  186. }
  187. }
  188. if (!rc && FLAG(o) && !mm->rm_eo && ulen>start-line) {
  189. start++;
  190. continue;
  191. }
  192. if (!rc && FLAG(x) && (mm->rm_so || ulen-(start-line)!=mm->rm_eo)) rc = 1;
  193. if (!rc && FLAG(w)) {
  194. char c = 0;
  195. if ((start+mm->rm_so)!=line) {
  196. c = start[mm->rm_so-1];
  197. if (!isalnum(c) && c != '_') c = 0;
  198. }
  199. if (!c) {
  200. c = start[mm->rm_eo];
  201. if (!isalnum(c) && c != '_') c = 0;
  202. }
  203. if (c) {
  204. start += mm->rm_so+1;
  205. continue;
  206. }
  207. }
  208. if (FLAG(v)) {
  209. if (FLAG(o)) {
  210. if (rc) mm->rm_eo = ulen-(start-line);
  211. else if (!mm->rm_so) {
  212. start += mm->rm_eo;
  213. continue;
  214. } else mm->rm_eo = mm->rm_so;
  215. } else {
  216. if (!rc) break;
  217. mm->rm_eo = ulen-(start-line);
  218. }
  219. mm->rm_so = 0;
  220. } else if (rc) break;
  221. // At least one line we didn't print since match while -ABC active
  222. if (bars) {
  223. xputs(bars);
  224. bars = 0;
  225. }
  226. matched++;
  227. TT.found = 1;
  228. // Are we NOT showing the matching text?
  229. if (FLAG(q)) {
  230. toys.exitval = 0;
  231. xexit();
  232. }
  233. if (FLAG(L) || FLAG(l)) {
  234. if (FLAG(l)) xprintf("%s%c", name, TT.outdelim);
  235. free(line);
  236. fclose(file);
  237. return;
  238. }
  239. if (!FLAG(c)) {
  240. long bcount = 1 + offset + (start-line) + (FLAG(o) ? mm->rm_so : 0);
  241. if (bin) printf("Binary file %s matches\n", name);
  242. else if (FLAG(o))
  243. outline(start+mm->rm_so, ':', name, lcount, bcount,
  244. mm->rm_eo-mm->rm_so);
  245. else {
  246. while (dlb) {
  247. struct double_list *dl = dlist_pop(&dlb);
  248. unsigned *uu = (void *)(dl->data+(strlen(dl->data)|3)+1);
  249. outline(dl->data, '-', name, lcount-before, uu[0]+1, uu[1]);
  250. free(dl->data);
  251. free(dl);
  252. before--;
  253. }
  254. if (matched==1)
  255. outline(FLAG(color) ? 0 : line, ':', name, lcount, bcount, ulen);
  256. if (FLAG(color)) {
  257. xputsn(TT.grey);
  258. if (mm->rm_so) xputsl(start, mm->rm_so);
  259. xputsn(TT.red);
  260. xputsl(start+mm->rm_so, mm->rm_eo-mm->rm_so);
  261. }
  262. if (TT.A) after = TT.A+1;
  263. }
  264. }
  265. start += mm->rm_eo;
  266. if (mm->rm_so == mm->rm_eo) break;
  267. } while (*start);
  268. offset += len;
  269. if (matched) {
  270. // Finish off pending line color fragment.
  271. if (FLAG(color) && !FLAG(o)) {
  272. xputsn(TT.grey);
  273. if (ulen > start-line) xputsl(start, ulen-(start-line));
  274. xputc(TT.outdelim);
  275. }
  276. mcount++;
  277. } else {
  278. int discard = (after || TT.B);
  279. if (after && --after) {
  280. outline(line, '-', name, lcount, 0, ulen);
  281. discard = 0;
  282. }
  283. if (discard && TT.B) {
  284. unsigned *uu, ul = (ulen|3)+1;
  285. line = xrealloc(line, ul+8);
  286. uu = (void *)(line+ul);
  287. uu[0] = offset-len;
  288. uu[1] = ulen;
  289. dlist_add(&dlb, line);
  290. line = 0;
  291. if (++before>TT.B) {
  292. struct double_list *dl;
  293. dl = dlist_pop(&dlb);
  294. free(dl->data);
  295. free(dl);
  296. before--;
  297. } else discard = 0;
  298. }
  299. // If we discarded a line while displaying context, show bars before next
  300. // line (but don't show them now in case that was last match in file)
  301. if (discard && mcount) bars = "--";
  302. }
  303. free(line);
  304. if (FLAG(m) && mcount >= TT.m) break;
  305. }
  306. if (FLAG(L)) xprintf("%s%c", name, TT.outdelim);
  307. else if (FLAG(c)) outline(0, ':', name, mcount, 0, 1);
  308. // loopfiles will also close the fd, but this frees an (opaque) struct.
  309. fclose(file);
  310. while (dlb) {
  311. struct double_list *dl = dlist_pop(&dlb);
  312. free(dl->data);
  313. free(dl);
  314. }
  315. }
  316. static void parse_regex(void)
  317. {
  318. struct arg_list *al, *new, *list = NULL;
  319. char *s, *ss;
  320. // Add all -f lines to -e list. (Yes, this is leaking allocation context for
  321. // exit to free. Not supporting nofork for this command any time soon.)
  322. al = TT.f ? TT.f : TT.e;
  323. while (al) {
  324. if (TT.f) {
  325. if (!*(s = ss = xreadfile(al->arg, 0, 0))) {
  326. free(ss);
  327. s = 0;
  328. }
  329. } else s = ss = al->arg;
  330. // Advance, when we run out of -f switch to -e.
  331. al = al->next;
  332. if (!al && TT.f) {
  333. TT.f = 0;
  334. al = TT.e;
  335. }
  336. if (!s) continue;
  337. // Split lines at \n, add individual lines to new list.
  338. do {
  339. ss = FLAG(z) ? 0 : strchr(s, '\n');
  340. if (ss) *(ss++) = 0;
  341. new = xmalloc(sizeof(struct arg_list));
  342. new->next = list;
  343. new->arg = s;
  344. list = new;
  345. s = ss;
  346. } while (ss && *s);
  347. }
  348. TT.e = list;
  349. if (!FLAG(F)) {
  350. // Convert regex list
  351. for (al = TT.e; al; al = al->next) {
  352. struct reg *shoe;
  353. if (FLAG(o) && !*al->arg) continue;
  354. dlist_add_nomalloc(&TT.reg, (void *)(shoe = xmalloc(sizeof(struct reg))));
  355. xregcomp(&shoe->r, al->arg,
  356. (REG_EXTENDED*!!FLAG(E))|(REG_ICASE*!!FLAG(i)));
  357. }
  358. dlist_terminate(TT.reg);
  359. }
  360. }
  361. static int do_grep_r(struct dirtree *new)
  362. {
  363. struct arg_list *al;
  364. char *name;
  365. if (!new->parent) TT.tried++;
  366. if (!dirtree_notdotdot(new)) return 0;
  367. if (S_ISDIR(new->st.st_mode)) {
  368. for (al = TT.exclude_dir; al; al = al->next)
  369. if (!fnmatch(al->arg, new->name, 0)) return 0;
  370. return DIRTREE_RECURSE|(FLAG(R)?DIRTREE_SYMFOLLOW:0);
  371. }
  372. if (TT.S || TT.M) {
  373. for (al = TT.S; al; al = al->next)
  374. if (!fnmatch(al->arg, new->name, 0)) return 0;
  375. if (TT.M) {
  376. for (al = TT.M; al; al = al->next)
  377. if (!fnmatch(al->arg, new->name, 0)) break;
  378. if (!al) return 0;
  379. }
  380. }
  381. // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
  382. if (new->parent && !FLAG(h)) toys.optflags |= FLAG_H;
  383. name = dirtree_path(new, 0);
  384. do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
  385. free(name);
  386. return 0;
  387. }
  388. void grep_main(void)
  389. {
  390. char **ss = toys.optargs;
  391. if (FLAG(color) && (!TT.color || !strcmp(TT.color, "auto")) && !isatty(1))
  392. toys.optflags &= ~FLAG_color;
  393. if (FLAG(color)) {
  394. TT.purple = "\e[35m";
  395. TT.cyan = "\e[36m";
  396. TT.red = "\e[1;31m";
  397. TT.green = "\e[32m";
  398. TT.grey = "\e[m";
  399. } else TT.purple = TT.cyan = TT.red = TT.green = TT.grey = "";
  400. if (FLAG(R)) toys.optflags |= FLAG_r;
  401. // Grep exits with 2 for errors
  402. toys.exitval = 2;
  403. if (!TT.A) TT.A = TT.C;
  404. if (!TT.B) TT.B = TT.C;
  405. TT.indelim = '\n' * !FLAG(z);
  406. TT.outdelim = '\n' * !FLAG(Z);
  407. // Handle egrep and fgrep
  408. if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
  409. if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
  410. if (!TT.e && !TT.f) {
  411. if (!*ss) error_exit("no REGEX");
  412. TT.e = xzalloc(sizeof(struct arg_list));
  413. TT.e->arg = *(ss++);
  414. toys.optc--;
  415. }
  416. parse_regex();
  417. if (!FLAG(h) && toys.optc>1) toys.optflags |= FLAG_H;
  418. if (FLAG(s)) {
  419. close(2);
  420. xopen_stdio("/dev/null", O_RDWR);
  421. }
  422. if (FLAG(r)) {
  423. // Iterate through -r arguments. Use "." as default if none provided.
  424. for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
  425. if (!strcmp(*ss, "-")) do_grep(0, *ss);
  426. else dirtree_read(*ss, do_grep_r);
  427. }
  428. } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
  429. if (TT.tried >= toys.optc || (FLAG(q)&&TT.found)) toys.exitval = !TT.found;
  430. }