diff.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914
  1. /* diff.c - compare files line by line
  2. *
  3. * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
  4. * Copyright 2014 Ashwini Kumar <ak.ashwini1981@gmail.com>
  5. *
  6. * See: http://cm.bell-labs.com/cm/cs/cstr/41.pdf
  7. USE_DIFF(NEWTOY(diff, "<2>2(color)(strip-trailing-cr)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)L(label)*S(starting-file):N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
  8. config DIFF
  9. bool "diff"
  10. default n
  11. help
  12. usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2
  13. -a Treat all files as text
  14. -b Ignore changes in the amount of whitespace
  15. -B Ignore changes whose lines are all blank
  16. -d Try hard to find a smaller set of changes
  17. -i Ignore case differences
  18. -L Use LABEL instead of the filename in the unified header
  19. -N Treat absent files as empty
  20. -q Output only whether files differ
  21. -r Recurse
  22. -S Start with FILE when comparing directories
  23. -T Make tabs line up by prefixing a tab when necessary
  24. -s Report when two files are the same
  25. -t Expand tabs to spaces in output
  26. -u Unified diff
  27. -U Output LINES lines of context
  28. -w Ignore all whitespace
  29. --color Colored output
  30. --strip-trailing-cr Strip trailing '\r's from input lines
  31. */
  32. #define FOR_diff
  33. #include "toys.h"
  34. GLOBALS(
  35. long ct;
  36. char *start;
  37. struct arg_list *L_list;
  38. int dir_num, size, is_binary, status, change, len[2];
  39. int *offset[2];
  40. struct stat st[2];
  41. )
  42. #define MIN(x,y) ((x) < (y) ? (x) : (y))
  43. #define MAX(x,y) ((x) > (y) ? (x) : (y))
  44. #define IS_STDIN(s) ((s)[0] == '-' && !(s)[1])
  45. struct v_vector {
  46. unsigned serial:31;
  47. unsigned last:1;
  48. union {
  49. unsigned hash;
  50. unsigned p;
  51. };
  52. };
  53. struct diff {
  54. long a, b, c, d, prev, suff;
  55. };
  56. static struct dir_t {
  57. char **list;
  58. int nr_elm;
  59. } dir[2];
  60. struct candidate {
  61. int a, b;
  62. struct candidate *prev, *next;
  63. };
  64. static struct file_t {
  65. FILE *fp;
  66. int len;
  67. } file[2];
  68. enum {
  69. SAME,
  70. DIFFER,
  71. };
  72. enum {
  73. empty = 1 << 9,
  74. eol = 1 << 10,
  75. eof = 1 << 11,
  76. space = 1 << 12
  77. };
  78. static int comp(const void *a, const void* b)
  79. {
  80. int i = ((struct v_vector *)a)->hash -
  81. ((struct v_vector *)b)->hash;
  82. if (!i) i = ((struct v_vector *)a)->serial -
  83. ((struct v_vector *)b)->serial;
  84. return i;
  85. }
  86. static int search (struct candidate **K, int r, int k, int j)
  87. {
  88. int low = r, upper = k, mid;
  89. mid = (low + upper) / 2;
  90. while (low <= mid) {
  91. if (((struct candidate*)(K[mid]))->b < j &&
  92. ((struct candidate*)(K[mid + 1]))->b > j)
  93. return mid;
  94. if (((struct candidate*)(K[mid]))->b < j) low = mid + 1;
  95. else if (((struct candidate*)(K[mid]))->b > j) upper = mid - 1;
  96. else return -1;
  97. mid = (low + upper) / 2;
  98. }
  99. return -1;
  100. }
  101. static struct candidate * new_candidate (int i, int j, struct candidate* prev)
  102. {
  103. struct candidate *c = xzalloc(sizeof(struct candidate));
  104. c->a = i;
  105. c->b = j;
  106. c->prev = prev;
  107. return c;
  108. }
  109. static void free_candidates(struct candidate *c)
  110. {
  111. struct candidate *t = c;
  112. while ((t = c)) {
  113. c = c->next;
  114. free(t);
  115. }
  116. }
  117. /*
  118. * 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
  119. * 2. if found do
  120. * 2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
  121. * 2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
  122. * 3. if E[p].last true break i.e we have reached at the end of an equiv class
  123. * else p = p + 1 //keep traversing the equiv class.
  124. * 4. K[r] = c //Save the sucessfully filled k-candidate.
  125. */
  126. static void do_merge(struct candidate **K, int *k, int i,
  127. struct v_vector *E, int p)
  128. {
  129. int r = 0, s, j;
  130. struct candidate *pr = 0, *c = K[0];
  131. while (1) {
  132. j = E[p].serial;
  133. s = search(K, r, *k, j);
  134. if (s >= 0 && (((struct candidate*)(K[s]))->b < j &&
  135. ((struct candidate*)(K[s + 1]))->b > j)) {
  136. if (((struct candidate*)(K[s + 1]))->b > j) {
  137. pr = K[s];
  138. if (r && K[r]) c->next = K[r];
  139. K[r] = c;
  140. r = s + 1;
  141. c = new_candidate(i , j, pr);
  142. }
  143. if (s == *k) {
  144. K[*k + 2] = K[*k + 1];
  145. *k = *k + 1;
  146. break;
  147. }
  148. }
  149. if (E[p].last) break;
  150. else p = p + 1;
  151. }
  152. K[r] = c;
  153. }
  154. static FILE* read_stdin()
  155. {
  156. char *tmp_name;
  157. int tmpfd = xtempfile("stdin", &tmp_name);
  158. unlink(tmp_name);
  159. free(tmp_name);
  160. xsendfile(0, tmpfd);
  161. return fdopen(tmpfd, "r");
  162. }
  163. static int read_tok(FILE *fp, off_t *off, int tok)
  164. {
  165. int t = 0, is_space;
  166. tok |= empty;
  167. while (!(tok & eol)) {
  168. t = fgetc(fp);
  169. if (FLAG(strip_trailing_cr) && t == '\r') {
  170. int t2 = fgetc(fp);
  171. if (t2 == '\n') {
  172. t = t2;
  173. if (off) (*off)++;
  174. } else {
  175. ungetc(t2, fp);
  176. }
  177. }
  178. if (off && t != EOF) *off += 1;
  179. is_space = isspace(t) || (t == EOF);
  180. tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
  181. if (t == '\n') tok |= eol;
  182. if (toys.optflags & FLAG_i)
  183. if (t >= 'A' && t <= 'Z') t = tolower(t);
  184. if (toys.optflags & FLAG_w && is_space) continue;
  185. if (toys.optflags & FLAG_b) {
  186. if (tok & space) {
  187. if (is_space) continue;
  188. tok &= ~space;
  189. } else if (is_space) t = space + ' ';
  190. }
  191. tok &= ~(empty + 0xff); //remove empty and char too.
  192. tok |= t; //add most recent char
  193. break;
  194. }
  195. return tok;
  196. }
  197. int bcomp(const void *a, const void *b)
  198. {
  199. struct v_vector *l = (struct v_vector*)a,
  200. *r = (struct v_vector*)b;
  201. int ret = l->hash - r->hash;
  202. if (!ret) {
  203. if ((r -1)->last) return 0;
  204. else return -1;
  205. }
  206. return ret;
  207. }
  208. /* file[0] corresponds file 1 and file[1] correspond file 2.
  209. * 1. calc hashes for both the files and store them in vector(v[0], v[1])
  210. * 2. sort file[1] with hash as primary and serial as sec. key
  211. * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
  212. * classes of lines in file[1], with e.last = true on the last element of each class.
  213. * The elements are ordered by serial within classes.
  214. * 4. Form the p vector stored in p_vector. p_vector[i], if non-zero, now points in e vector
  215. * to the beginning of the equiv class of lines in file[1] equivalent to line
  216. * i in file[0].
  217. * 5. Form the k-candidates as discribed in do_merge.
  218. * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
  219. * file[1], i.e J comprises LCS
  220. */
  221. static int * create_j_vector()
  222. {
  223. int tok, i, j, size = 100, k;
  224. off_t off;
  225. long hash;
  226. int *p_vector, *J;
  227. struct v_vector *v[2], *e;
  228. struct candidate **kcand, *pr;
  229. for (i = 0; i < 2; i++) {
  230. tok = off = 0;
  231. hash = 5831;
  232. v[i] = xzalloc(size * sizeof(struct v_vector));
  233. TT.offset[i] = xzalloc(size * sizeof(int));
  234. file[i].len = 0;
  235. fseek(file[i].fp, 0, SEEK_SET);
  236. while (1) {
  237. tok = read_tok(file[i].fp, &off, tok);
  238. if (!(tok & empty)) {
  239. hash = ((hash << 5) + hash) + (tok & 0xff);
  240. continue;
  241. }
  242. if (size == ++file[i].len) {
  243. size = size * 11 / 10;
  244. v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
  245. TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
  246. }
  247. v[i][file[i].len].hash = hash & INT_MAX;
  248. TT.offset[i][file[i].len] = off;
  249. if ((tok & eof)) {
  250. TT.offset[i][file[i].len] = ++off;
  251. break;
  252. }
  253. hash = 5831; //next line
  254. tok = 0;
  255. }
  256. if (TT.offset[i][file[i].len] - TT.offset[i][file[i].len - 1] == 1)
  257. file[i].len--;
  258. }
  259. for (i = 0; i <= file[1].len; i++) v[1][i].serial = i;
  260. qsort(v[1] + 1, file[1].len, sizeof(struct v_vector), comp);
  261. e = v[1];
  262. e[0].serial = 0;
  263. e[0].last = 1;
  264. for ( i = 1; i <= file[1].len; i++) {
  265. if ((i == file[1].len) || (v[1][i].hash != v[1][i+1].hash)) e[i].last = 1;
  266. else e[i].last = 0;
  267. }
  268. p_vector = xzalloc((file[0].len + 2) * sizeof(int));
  269. for (i = 1; i <= file[0].len; i++) {
  270. void *r = bsearch(&v[0][i], (e + 1), file[1].len, sizeof(e[0]), bcomp);
  271. if (r) p_vector[i] = (struct v_vector*)r - e;
  272. }
  273. for (i = 1; i <= file[0].len; i++)
  274. e[i].p = p_vector[i];
  275. free(p_vector);
  276. size = 100;
  277. kcand = xzalloc(size * sizeof(struct candidate*));
  278. kcand[0] = new_candidate(0 , 0, NULL);
  279. kcand[1] = new_candidate(file[0].len+1, file[1].len+1, NULL); //the fence
  280. k = 0; //last successfully filled k candidate.
  281. for (i = 1; i <= file[0].len; i++) {
  282. if (!e[i].p) continue;
  283. if ((size - 2) == k) {
  284. size = size * 11 / 10;
  285. kcand = xrealloc(kcand, (size * sizeof(struct candidate*)));
  286. }
  287. do_merge(kcand, &k, i, e, e[i].p);
  288. }
  289. free(v[0]); //no need for v_vector now.
  290. free(v[1]);
  291. J = xzalloc((file[0].len + 2) * sizeof(int));
  292. for (pr = kcand[k]; pr; pr = pr->prev)
  293. J[pr->a] = pr->b;
  294. J[file[0].len + 1] = file[1].len+1; //mark boundary
  295. for (i = k + 1; i >= 0; i--) free_candidates(kcand[i]);
  296. free(kcand);
  297. for (i = 1; i <= file[0].len; i++) { // jackpot?
  298. if (!J[i]) continue;
  299. fseek(file[0].fp, TT.offset[0][i - 1], SEEK_SET);
  300. fseek(file[1].fp, TT.offset[1][J[i] - 1], SEEK_SET);
  301. for (j = J[i]; i <= file[0].len && J[i] == j; i++, j++) {
  302. int tok0 = 0, tok1 = 0;
  303. do {
  304. tok0 = read_tok(file[0].fp, NULL, tok0);
  305. tok1 = read_tok(file[1].fp, NULL, tok1);
  306. if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
  307. J[i] = 0;
  308. } while (!(tok0 & tok1 & empty));
  309. }
  310. }
  311. return J;
  312. }
  313. static int *diff(char **files)
  314. {
  315. size_t i ,j;
  316. int s, t;
  317. char *bufi, *bufj;
  318. TT.is_binary = 0; //loop calls to diff
  319. TT.status = SAME;
  320. for (i = 0; i < 2; i++) {
  321. if (IS_STDIN(files[i])) file[i].fp = read_stdin();
  322. else file[i].fp = fopen(files[i], "r");
  323. if (!file[i].fp){
  324. perror_msg("%s",files[i]);
  325. TT.status = 2;
  326. return NULL; //return SAME
  327. }
  328. }
  329. s = sizeof(toybuf)/2;
  330. bufi = toybuf;
  331. bufj = (toybuf + s);
  332. fseek(file[0].fp, 0, SEEK_SET);
  333. fseek(file[1].fp, 0, SEEK_SET);
  334. if (toys.optflags & FLAG_a) return create_j_vector();
  335. while (1) {
  336. i = fread(bufi, 1, s, file[0].fp);
  337. j = fread(bufj, 1, s, file[1].fp);
  338. if (i != j) TT.status = DIFFER;
  339. for (t = 0; t < i && !TT.is_binary; t++)
  340. if (!bufi[t]) TT.is_binary = 1;
  341. for (t = 0; t < j && !TT.is_binary; t++)
  342. if (!bufj[t]) TT.is_binary = 1;
  343. i = MIN(i, j);
  344. for (t = 0; t < i; t++)
  345. if (bufi[t] != bufj[t]) TT.status = DIFFER;
  346. if (!i || !j) break;
  347. }
  348. if (TT.is_binary || (TT.status == SAME)) return NULL;
  349. return create_j_vector();
  350. }
  351. static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
  352. {
  353. int i, j, cc, cl;
  354. char *reset = NULL;
  355. if (c != ' ' && (toys.optflags & FLAG_color)) {
  356. printf("\e[%dm", c == '+' ? 32 : 31);
  357. reset = "\e[0m";
  358. }
  359. for (i = a; i <= b; i++) {
  360. fseek(fp, off_set[i - 1], SEEK_SET);
  361. putchar(c);
  362. if (toys.optflags & FLAG_T) putchar('\t');
  363. for (j = 0, cl = 0; j < (off_set[i] - off_set[i - 1]); j++) {
  364. cc = fgetc(fp);
  365. if (cc == EOF) {
  366. printf("%s\n\\ No newline at end of file\n", reset ? reset : "");
  367. return;
  368. }
  369. if ((cc == '\t') && (toys.optflags & FLAG_t))
  370. do putchar(' '); while (++cl & 7);
  371. else {
  372. putchar(cc); //xputc has calls to fflush, it hurts performance badly.
  373. cl++;
  374. }
  375. }
  376. }
  377. if (reset) printf("%s", reset);
  378. }
  379. static char *concat_file_path(char *path, char *default_path)
  380. {
  381. char *final_path;
  382. if ('/' == path[strlen(path) - 1]) {
  383. while (*default_path == '/') ++default_path;
  384. final_path = xmprintf("%s%s", path, default_path);
  385. }
  386. else if (*default_path != '/')
  387. final_path = xmprintf("%s/%s", path, default_path);
  388. else final_path = xmprintf("%s%s", path, default_path);
  389. return final_path;
  390. }
  391. static int skip(struct dirtree *node)
  392. {
  393. int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
  394. char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
  395. struct stat st;
  396. ptr = f_path;
  397. ptr += len;
  398. if (ptr[0]) {
  399. tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
  400. if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
  401. else ret = 1; //not present on other side.
  402. }
  403. free(f_path);
  404. if (tmp) free(tmp);
  405. return ret; //add otherwise
  406. }
  407. static void add_to_list(struct dirtree *node)
  408. {
  409. char *full_path;
  410. dir[TT.dir_num].list = xrealloc(dir[TT.dir_num].list,
  411. (TT.size + 1)*sizeof(char*));
  412. TT.size++;
  413. full_path = dirtree_path(node, NULL);
  414. dir[TT.dir_num].list[TT.size - 1] = full_path;
  415. }
  416. static int list_dir (struct dirtree *node)
  417. {
  418. int ret = 0;
  419. if (!dirtree_notdotdot(node)) return 0;
  420. if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
  421. add_to_list(node);
  422. return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
  423. }
  424. if (S_ISDIR(node->st.st_mode) && (toys.optflags & FLAG_r)) {
  425. if (!(toys.optflags & FLAG_N)) ret = skip(node);
  426. if (!ret) return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
  427. else {
  428. add_to_list(node); //only at one side.
  429. return 0;
  430. }
  431. } else {
  432. add_to_list(node);
  433. return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
  434. }
  435. }
  436. static int cmp(const void *p1, const void *p2)
  437. {
  438. return strcmp(* (char * const *)p1, * (char * const *)p2);
  439. }
  440. // quote and escape filenames that have awkward characters
  441. char *quote_filename(char *filename)
  442. {
  443. char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\";
  444. char *result, *s, *t;
  445. size_t len = 0;
  446. int quote = 0;
  447. // calculate memory usage and presence of quotes
  448. for (s = filename; *s; s++) {
  449. if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
  450. || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
  451. {
  452. quote = 1;
  453. len += 2;
  454. } else if (*s == ' ') {
  455. quote = 1;
  456. len++;
  457. } else if (*s < 0x20 || *s >= 0x80) {
  458. quote = 1;
  459. len += 4;
  460. } else {
  461. len++;
  462. }
  463. }
  464. // construct the new string
  465. result = xmalloc(len + (quote ? 2 : 0) + 1);
  466. t = result;
  467. if (quote) *t++ = '"';
  468. for (s = filename; *s; s++) {
  469. if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
  470. || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
  471. {
  472. *t = '\\';
  473. t[1] = to[strchr(from, *s) - from];
  474. t += 2;
  475. } else if (*s < 0x20 || *s >= 0x80) {
  476. sprintf(t, "\\%.3o", *s);
  477. t += 4;
  478. } else {
  479. *t++ = *s;
  480. }
  481. }
  482. if (quote) *t++ = '"';
  483. *t = 0;
  484. return result;
  485. }
  486. static void show_label(char *prefix, char *filename, struct stat *sb)
  487. {
  488. char date[36];
  489. char *quoted_file;
  490. quoted_file = quote_filename(filename);
  491. printf("%s %s\t%s\n", prefix, quoted_file,
  492. format_iso_time(date, sizeof(date), &sb->st_mtim));
  493. free(quoted_file);
  494. }
  495. static void do_diff(char **files)
  496. {
  497. long i = 1, size = 1, x = 0, change = 0, ignore_white,
  498. start1, end1, start2, end2;
  499. struct diff *d;
  500. struct arg_list *llist = TT.L_list;
  501. int *J;
  502. TT.offset[0] = TT.offset[1] = NULL;
  503. J = diff(files);
  504. if (!J) return; //No need to compare, have to status only
  505. d = xzalloc(size *sizeof(struct diff));
  506. do {
  507. ignore_white = 0;
  508. for (d[x].a = i; d[x].a <= file[0].len; d[x].a++) {
  509. if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
  510. else continue;
  511. }
  512. d[x].c = (J[d[x].a - 1] + 1);
  513. for (d[x].b = (d[x].a - 1); d[x].b <= file[0].len; d[x].b++) {
  514. if (J[d[x].b + 1]) break;
  515. else continue;
  516. }
  517. d[x].d = (J[d[x].b + 1] - 1);
  518. if ((toys.optflags & FLAG_B)) {
  519. if (d[x].a <= d[x].b) {
  520. if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
  521. == (d[x].b - d[x].a + 1))
  522. ignore_white = 1;
  523. } else if (d[x].c <= d[x].d){
  524. if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
  525. == (d[x].d - d[x].c + 1))
  526. ignore_white = 1;
  527. }
  528. }
  529. if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white)
  530. change = 1; //is we have diff ?
  531. if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
  532. i = d[x].b + 1;
  533. if (i > file[0].len) break;
  534. J[d[x].b] = d[x].d;
  535. if (!ignore_white) x++;
  536. } while (i <= file[0].len);
  537. i = x+1;
  538. TT.status = change; //update status, may change bcoz of -w etc.
  539. if (!(toys.optflags & FLAG_q) && change) { //start of !FLAG_q
  540. if (toys.optflags & FLAG_color) printf("\e[1m");
  541. if (toys.optflags & FLAG_L) printf("--- %s\n", llist->arg);
  542. else show_label("---", files[0], &(TT).st[0]);
  543. if (((toys.optflags & FLAG_L) && !llist->next) || !(toys.optflags & FLAG_L))
  544. show_label("+++", files[1], &(TT).st[1]);
  545. else {
  546. while (llist->next) llist = llist->next;
  547. printf("+++ %s\n", llist->arg);
  548. }
  549. if (toys.optflags & FLAG_color) printf("\e[0m");
  550. struct diff *t, *ptr1 = d, *ptr2 = d;
  551. while (i) {
  552. long a,b;
  553. if (TT.ct > file[0].len) TT.ct = file[0].len; //trim context to file len.
  554. if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
  555. i--;
  556. continue;
  557. }
  558. //Handle the context stuff
  559. a = ptr1->a;
  560. b = ptr1->b;
  561. b = MIN(file[0].len, b);
  562. if (i == x + 1) ptr1->suff = MAX(1,a - TT.ct);
  563. else {
  564. if ((ptr1 - 1)->prev >= (ptr1->a - TT.ct))
  565. ptr1->suff = (ptr1 - 1)->prev + 1;
  566. else ptr1->suff = ptr1->a - TT.ct;
  567. }
  568. calc_ct:
  569. if (i > 1) {
  570. if ((ptr2->b + TT.ct) >= (ptr2 + 1)->a) {
  571. ptr2++;
  572. i--;
  573. goto calc_ct;
  574. } else ptr2->prev = ptr2->b + TT.ct;
  575. } else ptr2->prev = ptr2->b;
  576. start1 = (ptr2->prev - ptr1->suff + 1);
  577. end1 = (start1 == 1) ? -1 : start1;
  578. start2 = MAX(1, ptr1->c - (ptr1->a - ptr1->suff));
  579. end2 = ptr2->prev - ptr2->b + ptr2->d;
  580. if (toys.optflags & FLAG_color) printf("\e[36m");
  581. printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
  582. if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
  583. else putchar(' ');
  584. printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
  585. if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
  586. else putchar(' ');
  587. printf("@@");
  588. if (toys.optflags & FLAG_color) printf("\e[0m");
  589. putchar('\n');
  590. for (t = ptr1; t <= ptr2; t++) {
  591. if (t== ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], file[0].fp);
  592. print_diff(t->a, t->b, '-', TT.offset[0], file[0].fp);
  593. print_diff(t->c, t->d, '+', TT.offset[1], file[1].fp);
  594. if (t == ptr2)
  595. print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], file[0].fp);
  596. else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], file[0].fp);
  597. }
  598. ptr2++;
  599. ptr1 = ptr2;
  600. i--;
  601. } //end of while
  602. } //End of !FLAG_q
  603. free(d);
  604. free(J);
  605. free(TT.offset[0]);
  606. free(TT.offset[1]);
  607. }
  608. static void show_status(char **files)
  609. {
  610. switch (TT.status) {
  611. case SAME:
  612. if (toys.optflags & FLAG_s)
  613. printf("Files %s and %s are identical\n",files[0], files[1]);
  614. break;
  615. case DIFFER:
  616. if ((toys.optflags & FLAG_q) || TT.is_binary)
  617. printf("Files %s and %s differ\n",files[0], files[1]);
  618. break;
  619. }
  620. }
  621. static void create_empty_entry(int l , int r, int j)
  622. {
  623. struct stat st[2];
  624. char *f[2], *path[2];
  625. int i;
  626. if (j > 0 && (toys.optflags & FLAG_N)) {
  627. path[0] = concat_file_path(dir[0].list[0], dir[1].list[r] + TT.len[1]);
  628. f[0] = "/dev/null";
  629. path[1] = f[1] = dir[1].list[r];
  630. stat(f[1], &st[0]);
  631. st[1] = st[0];
  632. }
  633. else if (j < 0 && (toys.optflags & FLAG_N)) {
  634. path[1] = concat_file_path(dir[1].list[0], dir[0].list[l] + TT.len[0]);
  635. f[1] = "/dev/null";
  636. path[0] = f[0] = dir[0].list[l];
  637. stat(f[0], &st[0]);
  638. st[1] = st[0];
  639. }
  640. if (!j) {
  641. for (i = 0; i < 2; i++) {
  642. path[i] = f[i] = dir[i].list[!i ? l: r];
  643. stat(f[i], &st[i]);
  644. }
  645. }
  646. if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode))
  647. printf("Common subdirectories: %s and %s\n", path[0], path[1]);
  648. else if (!S_ISREG(st[0].st_mode) && !S_ISDIR(st[0].st_mode))
  649. printf("File %s is not a regular file or directory "
  650. "and was skipped\n", path[0]);
  651. else if (!S_ISREG(st[1].st_mode) && !S_ISDIR(st[1].st_mode))
  652. printf("File %s is not a regular file or directory "
  653. "and was skipped\n", path[1]);
  654. else if (S_ISDIR(st[0].st_mode) != S_ISDIR(st[1].st_mode)) {
  655. if (S_ISDIR(st[0].st_mode))
  656. printf("File %s is a %s while file %s is a"
  657. " %s\n", path[0], "directory", path[1], "regular file");
  658. else
  659. printf("File %s is a %s while file %s is a"
  660. " %s\n", path[0], "regular file", path[1], "directory");
  661. } else {
  662. do_diff(f);
  663. show_status(path);
  664. if (file[0].fp) fclose(file[0].fp);
  665. if (file[1].fp) fclose(file[1].fp);
  666. }
  667. if ((toys.optflags & FLAG_N) && j) {
  668. if (j > 0) free(path[0]);
  669. else free(path[1]);
  670. }
  671. }
  672. static void diff_dir(int *start)
  673. {
  674. int l, r, j = 0;
  675. l = start[0]; //left side file start
  676. r = start[1]; //right side file start
  677. while (l < dir[0].nr_elm && r < dir[1].nr_elm) {
  678. if ((j = strcmp ((dir[0].list[l] + TT.len[0]),
  679. (dir[1].list[r] + TT.len[1]))) && !(toys.optflags & FLAG_N)) {
  680. if (j > 0) {
  681. printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
  682. free(dir[1].list[r]);
  683. r++;
  684. } else {
  685. printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
  686. free(dir[0].list[l]);
  687. l++;
  688. }
  689. TT.status = DIFFER;
  690. } else {
  691. create_empty_entry(l, r, j); //create non empty dirs/files if -N.
  692. if (j > 0) {
  693. free(dir[1].list[r]);
  694. r++;
  695. } else if (j < 0) {
  696. free(dir[0].list[l]);
  697. l++;
  698. } else {
  699. free(dir[1].list[r]);
  700. free(dir[0].list[l]);
  701. l++;
  702. r++;
  703. }
  704. }
  705. }
  706. if (l == dir[0].nr_elm) {
  707. while (r < dir[1].nr_elm) {
  708. if (!(toys.optflags & FLAG_N)) {
  709. printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
  710. TT.status = DIFFER;
  711. } else create_empty_entry(l, r, 1);
  712. free(dir[1].list[r]);
  713. r++;
  714. }
  715. } else if (r == dir[1].nr_elm) {
  716. while (l < dir[0].nr_elm) {
  717. if (!(toys.optflags & FLAG_N)) {
  718. printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
  719. TT.status = DIFFER;
  720. } else create_empty_entry(l, r, -1);
  721. free(dir[0].list[l]);
  722. l++;
  723. }
  724. }
  725. free(dir[0].list[0]); //we are done, free root nodes too
  726. free(dir[1].list[0]);
  727. }
  728. void diff_main(void)
  729. {
  730. int j = 0, k = 1, start[2] = {1, 1};
  731. char *files[2];
  732. toys.exitval = 2;
  733. if ((toys.optflags & FLAG_color) && !isatty(1)) toys.optflags ^= FLAG_color;
  734. for (j = 0; j < 2; j++) {
  735. files[j] = toys.optargs[j];
  736. if (IS_STDIN(files[j])) {
  737. if (fstat(0, &TT.st[j]) == -1)
  738. perror_exit("can't fstat %s", files[j]);
  739. } else {
  740. xstat(files[j], &TT.st[j]);
  741. }
  742. }
  743. if ((IS_STDIN(files[0]) || IS_STDIN(files[1]))
  744. && (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)))
  745. error_exit("can't compare stdin to directory");
  746. if ((TT.st[0].st_ino == TT.st[1].st_ino) //physicaly same device
  747. && (TT.st[0].st_dev == TT.st[1].st_dev)) {
  748. toys.exitval = 0;
  749. return show_status(files);
  750. }
  751. if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
  752. for (j = 0; j < 2; j++) {
  753. memset(&dir[j], 0, sizeof(struct dir_t));
  754. dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
  755. dir[j].nr_elm = TT.size; //size updated in list_dir
  756. qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp);
  757. TT.len[j] = strlen(dir[j].list[0]); //calc root node len
  758. TT.len[j] += (dir[j].list[0][TT.len[j] -1] != '/');
  759. if (toys.optflags & FLAG_S) {
  760. while (k < TT.size && strcmp(dir[j].list[k] +
  761. TT.len[j], TT.start) < 0) {
  762. start[j] += 1;
  763. k++;
  764. }
  765. }
  766. TT.dir_num++;
  767. TT.size = 0;
  768. k = 1;
  769. }
  770. diff_dir(start);
  771. free(dir[0].list); //free array
  772. free(dir[1].list);
  773. } else {
  774. if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
  775. int d = S_ISDIR(TT.st[0].st_mode);
  776. char *slash = strrchr(files[d], '/');
  777. files[1 - d] = concat_file_path(files[1 - d], slash ? slash + 1 : files[d]);
  778. if ((stat(files[1 - d], &TT.st[1 - d])) == -1)
  779. perror_exit("%s", files[1 - d]);
  780. }
  781. do_diff(files);
  782. show_status(files);
  783. if (file[0].fp) fclose(file[0].fp);
  784. if (file[1].fp) fclose(file[1].fp);
  785. }
  786. toys.exitval = TT.status; //exit status will be the status
  787. }