Contents of /trunk/mkinitrd-magellan/busybox/coreutils/diff.c
Parent Directory | Revision Log
Revision 532 -
(show annotations)
(download)
Sat Sep 1 22:45:15 2007 UTC (17 years ago) by niro
File MIME type: text/plain
File size: 29636 byte(s)
Sat Sep 1 22:45:15 2007 UTC (17 years ago) by niro
File MIME type: text/plain
File size: 29636 byte(s)
-import if magellan mkinitrd; it is a fork of redhats mkinitrd-5.0.8 with all magellan patches and features; deprecates magellan-src/mkinitrd
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * Mini diff implementation for busybox, adapted from OpenBSD diff. |
4 | * |
5 | * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> |
6 | * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> |
7 | * |
8 | * Sponsored in part by the Defense Advanced Research Projects |
9 | * Agency (DARPA) and Air Force Research Laboratory, Air Force |
10 | * Materiel Command, USAF, under agreement number F39502-99-1-0512. |
11 | * |
12 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
13 | */ |
14 | |
15 | #include "busybox.h" |
16 | |
17 | #define FSIZE_MAX 32768 |
18 | |
19 | /* |
20 | * Output flags |
21 | */ |
22 | #define D_HEADER 1 /* Print a header/footer between files */ |
23 | #define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */ |
24 | #define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */ |
25 | |
26 | /* |
27 | * Status values for print_status() and diffreg() return values |
28 | * Guide: |
29 | * D_SAME - files are the same |
30 | * D_DIFFER - files differ |
31 | * D_BINARY - binary files differ |
32 | * D_COMMON - subdirectory common to both dirs |
33 | * D_ONLY - file only exists in one dir |
34 | * D_MISMATCH1 - path1 a dir, path2 a file |
35 | * D_MISMATCH2 - path1 a file, path2 a dir |
36 | * D_ERROR - error occurred |
37 | * D_SKIPPED1 - skipped path1 as it is a special file |
38 | * D_SKIPPED2 - skipped path2 as it is a special file |
39 | */ |
40 | |
41 | #define D_SAME 0 |
42 | #define D_DIFFER (1<<0) |
43 | #define D_BINARY (1<<1) |
44 | #define D_COMMON (1<<2) |
45 | #define D_ONLY (1<<3) |
46 | #define D_MISMATCH1 (1<<4) |
47 | #define D_MISMATCH2 (1<<5) |
48 | #define D_ERROR (1<<6) |
49 | #define D_SKIPPED1 (1<<7) |
50 | #define D_SKIPPED2 (1<<8) |
51 | |
52 | /* Command line options */ |
53 | #define FLAG_a (1<<0) |
54 | #define FLAG_b (1<<1) |
55 | #define FLAG_d (1<<2) |
56 | #define FLAG_i (1<<3) |
57 | #define FLAG_L (1<<4) |
58 | #define FLAG_N (1<<5) |
59 | #define FLAG_q (1<<6) |
60 | #define FLAG_r (1<<7) |
61 | #define FLAG_s (1<<8) |
62 | #define FLAG_S (1<<9) |
63 | #define FLAG_t (1<<10) |
64 | #define FLAG_T (1<<11) |
65 | #define FLAG_U (1<<12) |
66 | #define FLAG_w (1<<13) |
67 | |
68 | /* XXX: FIXME: the following variables should be static, but gcc currently |
69 | * creates a much bigger object if we do this. [which version of gcc? --vda] */ |
70 | /* 4.x, IIRC also 3.x --bernhard */ |
71 | /* This is the default number of lines of context. */ |
72 | int context = 3; |
73 | int status; |
74 | char *start; |
75 | const char *label1; |
76 | const char *label2; |
77 | struct stat stb1, stb2; |
78 | char **dl; |
79 | USE_FEATURE_DIFF_DIR(int dl_count;) |
80 | |
81 | struct cand { |
82 | int x; |
83 | int y; |
84 | int pred; |
85 | }; |
86 | |
87 | struct line { |
88 | int serial; |
89 | int value; |
90 | } *file[2]; |
91 | |
92 | /* |
93 | * The following struct is used to record change information |
94 | * doing a "context" or "unified" diff. (see routine "change" to |
95 | * understand the highly mnemonic field names) |
96 | */ |
97 | struct context_vec { |
98 | int a; /* start line in old file */ |
99 | int b; /* end line in old file */ |
100 | int c; /* start line in new file */ |
101 | int d; /* end line in new file */ |
102 | }; |
103 | |
104 | static int *J; /* will be overlaid on class */ |
105 | static int *class; /* will be overlaid on file[0] */ |
106 | static int *klist; /* will be overlaid on file[0] after class */ |
107 | static int *member; /* will be overlaid on file[1] */ |
108 | static int clen; |
109 | static int len[2]; |
110 | static int pref, suff; /* length of prefix and suffix */ |
111 | static int slen[2]; |
112 | static smallint anychange; |
113 | static long *ixnew; /* will be overlaid on file[1] */ |
114 | static long *ixold; /* will be overlaid on klist */ |
115 | static struct cand *clist; /* merely a free storage pot for candidates */ |
116 | static int clistlen; /* the length of clist */ |
117 | static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ |
118 | static struct context_vec *context_vec_start; |
119 | static struct context_vec *context_vec_end; |
120 | static struct context_vec *context_vec_ptr; |
121 | |
122 | |
123 | static void print_only(const char *path, size_t dirlen, const char *entry) |
124 | { |
125 | if (dirlen > 1) |
126 | dirlen--; |
127 | printf("Only in %.*s: %s\n", (int) dirlen, path, entry); |
128 | } |
129 | |
130 | |
131 | static void print_status(int val, char *path1, char *path2, char *entry) |
132 | { |
133 | const char * const _entry = entry ? entry : ""; |
134 | char * const _path1 = entry ? concat_path_file(path1, _entry) : path1; |
135 | char * const _path2 = entry ? concat_path_file(path2, _entry) : path2; |
136 | |
137 | switch (val) { |
138 | case D_ONLY: |
139 | print_only(path1, strlen(path1), entry); |
140 | break; |
141 | case D_COMMON: |
142 | printf("Common subdirectories: %s and %s\n", _path1, _path2); |
143 | break; |
144 | case D_BINARY: |
145 | printf("Binary files %s and %s differ\n", _path1, _path2); |
146 | break; |
147 | case D_DIFFER: |
148 | if (option_mask32 & FLAG_q) |
149 | printf("Files %s and %s differ\n", _path1, _path2); |
150 | break; |
151 | case D_SAME: |
152 | if (option_mask32 & FLAG_s) |
153 | printf("Files %s and %s are identical\n", _path1, _path2); |
154 | break; |
155 | case D_MISMATCH1: |
156 | printf("File %s is a %s while file %s is a %s\n", |
157 | _path1, "directory", _path2, "regular file"); |
158 | break; |
159 | case D_MISMATCH2: |
160 | printf("File %s is a %s while file %s is a %s\n", |
161 | _path1, "regular file", _path2, "directory"); |
162 | break; |
163 | case D_SKIPPED1: |
164 | printf("File %s is not a regular file or directory and was skipped\n", |
165 | _path1); |
166 | break; |
167 | case D_SKIPPED2: |
168 | printf("File %s is not a regular file or directory and was skipped\n", |
169 | _path2); |
170 | break; |
171 | } |
172 | if (entry) { |
173 | free(_path1); |
174 | free(_path2); |
175 | } |
176 | } |
177 | static void fiddle_sum(int *sum, int t) |
178 | { |
179 | *sum = (int)(*sum * 127 + t); |
180 | } |
181 | /* |
182 | * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
183 | */ |
184 | static int readhash(FILE * f) |
185 | { |
186 | int i, t, space; |
187 | int sum; |
188 | |
189 | sum = 1; |
190 | space = 0; |
191 | if (!(option_mask32 & FLAG_b) && !(option_mask32 & FLAG_w)) { |
192 | for (i = 0; (t = getc(f)) != '\n'; i++) { |
193 | if (t == EOF) { |
194 | if (i == 0) |
195 | return 0; |
196 | break; |
197 | } |
198 | fiddle_sum(&sum, t); |
199 | } |
200 | } else { |
201 | for (i = 0;;) { |
202 | switch (t = getc(f)) { |
203 | case '\t': |
204 | case '\r': |
205 | case '\v': |
206 | case '\f': |
207 | case ' ': |
208 | space++; |
209 | continue; |
210 | default: |
211 | if (space && !(option_mask32 & FLAG_w)) { |
212 | i++; |
213 | space = 0; |
214 | } |
215 | fiddle_sum(&sum, t); |
216 | i++; |
217 | continue; |
218 | case EOF: |
219 | if (i == 0) |
220 | return 0; |
221 | /* FALLTHROUGH */ |
222 | case '\n': |
223 | break; |
224 | } |
225 | break; |
226 | } |
227 | } |
228 | /* |
229 | * There is a remote possibility that we end up with a zero sum. |
230 | * Zero is used as an EOF marker, so return 1 instead. |
231 | */ |
232 | return (sum == 0 ? 1 : sum); |
233 | } |
234 | |
235 | |
236 | /* |
237 | * Check to see if the given files differ. |
238 | * Returns 0 if they are the same, 1 if different, and -1 on error. |
239 | */ |
240 | static int files_differ(FILE * f1, FILE * f2, int flags) |
241 | { |
242 | size_t i, j; |
243 | |
244 | if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size || |
245 | (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) |
246 | return 1; |
247 | while (1) { |
248 | i = fread(bb_common_bufsiz1, 1, BUFSIZ/2, f1); |
249 | j = fread(bb_common_bufsiz1 + BUFSIZ/2, 1, BUFSIZ/2, f2); |
250 | if (i != j) |
251 | return 1; |
252 | if (i == 0 && j == 0) { |
253 | if (ferror(f1) || ferror(f2)) |
254 | return 1; |
255 | return 0; |
256 | } |
257 | if (memcmp(bb_common_bufsiz1, |
258 | bb_common_bufsiz1 + BUFSIZ/2, i) != 0) |
259 | return 1; |
260 | } |
261 | } |
262 | |
263 | |
264 | static void prepare(int i, FILE * fd, off_t filesize) |
265 | { |
266 | struct line *p; |
267 | int h; |
268 | size_t j, sz; |
269 | |
270 | rewind(fd); |
271 | |
272 | sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25; |
273 | if (sz < 100) |
274 | sz = 100; |
275 | |
276 | p = xmalloc((sz + 3) * sizeof(struct line)); |
277 | j = 0; |
278 | while ((h = readhash(fd))) { |
279 | if (j == sz) { |
280 | sz = sz * 3 / 2; |
281 | p = xrealloc(p, (sz + 3) * sizeof(struct line)); |
282 | } |
283 | p[++j].value = h; |
284 | } |
285 | len[i] = j; |
286 | file[i] = p; |
287 | } |
288 | |
289 | |
290 | static void prune(void) |
291 | { |
292 | int i, j; |
293 | |
294 | for (pref = 0; pref < len[0] && pref < len[1] && |
295 | file[0][pref + 1].value == file[1][pref + 1].value; pref++) |
296 | ; |
297 | for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && |
298 | file[0][len[0] - suff].value == file[1][len[1] - suff].value; |
299 | suff++) |
300 | ; |
301 | for (j = 0; j < 2; j++) { |
302 | sfile[j] = file[j] + pref; |
303 | slen[j] = len[j] - pref - suff; |
304 | for (i = 0; i <= slen[j]; i++) |
305 | sfile[j][i].serial = i; |
306 | } |
307 | } |
308 | |
309 | |
310 | static void equiv(struct line *a, int n, struct line *b, int m, int *c) |
311 | { |
312 | int i, j; |
313 | |
314 | i = j = 1; |
315 | while (i <= n && j <= m) { |
316 | if (a[i].value < b[j].value) |
317 | a[i++].value = 0; |
318 | else if (a[i].value == b[j].value) |
319 | a[i++].value = j; |
320 | else |
321 | j++; |
322 | } |
323 | while (i <= n) |
324 | a[i++].value = 0; |
325 | b[m + 1].value = 0; |
326 | j = 0; |
327 | while (++j <= m) { |
328 | c[j] = -b[j].serial; |
329 | while (b[j + 1].value == b[j].value) { |
330 | j++; |
331 | c[j] = b[j].serial; |
332 | } |
333 | } |
334 | c[j] = -1; |
335 | } |
336 | |
337 | |
338 | static int isqrt(int n) |
339 | { |
340 | int y, x = 1; |
341 | |
342 | if (n == 0) |
343 | return 0; |
344 | |
345 | do { |
346 | y = x; |
347 | x = n / x; |
348 | x += y; |
349 | x /= 2; |
350 | } while ((x - y) > 1 || (x - y) < -1); |
351 | |
352 | return x; |
353 | } |
354 | |
355 | |
356 | static int newcand(int x, int y, int pred) |
357 | { |
358 | struct cand *q; |
359 | |
360 | if (clen == clistlen) { |
361 | clistlen = clistlen * 11 / 10; |
362 | clist = xrealloc(clist, clistlen * sizeof(struct cand)); |
363 | } |
364 | q = clist + clen; |
365 | q->x = x; |
366 | q->y = y; |
367 | q->pred = pred; |
368 | return clen++; |
369 | } |
370 | |
371 | |
372 | static int search(int *c, int k, int y) |
373 | { |
374 | int i, j, l, t; |
375 | |
376 | if (clist[c[k]].y < y) /* quick look for typical case */ |
377 | return k + 1; |
378 | i = 0; |
379 | j = k + 1; |
380 | while (1) { |
381 | l = i + j; |
382 | if ((l >>= 1) <= i) |
383 | break; |
384 | t = clist[c[l]].y; |
385 | if (t > y) |
386 | j = l; |
387 | else if (t < y) |
388 | i = l; |
389 | else |
390 | return l; |
391 | } |
392 | return l + 1; |
393 | } |
394 | |
395 | |
396 | static int stone(int *a, int n, int *b, int *c) |
397 | { |
398 | int i, k, y, j, l; |
399 | int oldc, tc, oldl; |
400 | unsigned int numtries; |
401 | |
402 | #if ENABLE_FEATURE_DIFF_MINIMAL |
403 | const unsigned int bound = |
404 | (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n)); |
405 | #else |
406 | const unsigned int bound = MAX(256, isqrt(n)); |
407 | #endif |
408 | k = 0; |
409 | c[0] = newcand(0, 0, 0); |
410 | for (i = 1; i <= n; i++) { |
411 | j = a[i]; |
412 | if (j == 0) |
413 | continue; |
414 | y = -b[j]; |
415 | oldl = 0; |
416 | oldc = c[0]; |
417 | numtries = 0; |
418 | do { |
419 | if (y <= clist[oldc].y) |
420 | continue; |
421 | l = search(c, k, y); |
422 | if (l != oldl + 1) |
423 | oldc = c[l - 1]; |
424 | if (l <= k) { |
425 | if (clist[c[l]].y <= y) |
426 | continue; |
427 | tc = c[l]; |
428 | c[l] = newcand(i, y, oldc); |
429 | oldc = tc; |
430 | oldl = l; |
431 | numtries++; |
432 | } else { |
433 | c[l] = newcand(i, y, oldc); |
434 | k++; |
435 | break; |
436 | } |
437 | } while ((y = b[++j]) > 0 && numtries < bound); |
438 | } |
439 | return k; |
440 | } |
441 | |
442 | |
443 | static void unravel(int p) |
444 | { |
445 | struct cand *q; |
446 | int i; |
447 | |
448 | for (i = 0; i <= len[0]; i++) |
449 | J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0; |
450 | for (q = clist + p; q->y != 0; q = clist + q->pred) |
451 | J[q->x + pref] = q->y + pref; |
452 | } |
453 | |
454 | |
455 | static void unsort(struct line *f, int l, int *b) |
456 | { |
457 | int *a, i; |
458 | |
459 | a = xmalloc((l + 1) * sizeof(int)); |
460 | for (i = 1; i <= l; i++) |
461 | a[f[i].serial] = f[i].value; |
462 | for (i = 1; i <= l; i++) |
463 | b[i] = a[i]; |
464 | free(a); |
465 | } |
466 | |
467 | |
468 | static int skipline(FILE * f) |
469 | { |
470 | int i, c; |
471 | |
472 | for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) |
473 | continue; |
474 | return i; |
475 | } |
476 | |
477 | |
478 | /* |
479 | * Check does double duty: |
480 | * 1. ferret out any fortuitous correspondences due |
481 | * to confounding by hashing (which result in "jackpot") |
482 | * 2. collect random access indexes to the two files |
483 | */ |
484 | static void check(FILE * f1, FILE * f2) |
485 | { |
486 | int i, j, jackpot, c, d; |
487 | long ctold, ctnew; |
488 | |
489 | rewind(f1); |
490 | rewind(f2); |
491 | j = 1; |
492 | ixold[0] = ixnew[0] = 0; |
493 | jackpot = 0; |
494 | ctold = ctnew = 0; |
495 | for (i = 1; i <= len[0]; i++) { |
496 | if (J[i] == 0) { |
497 | ixold[i] = ctold += skipline(f1); |
498 | continue; |
499 | } |
500 | while (j < J[i]) { |
501 | ixnew[j] = ctnew += skipline(f2); |
502 | j++; |
503 | } |
504 | if ((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w) |
505 | || (option_mask32 & FLAG_i)) { |
506 | while (1) { |
507 | c = getc(f1); |
508 | d = getc(f2); |
509 | /* |
510 | * GNU diff ignores a missing newline |
511 | * in one file if bflag || wflag. |
512 | */ |
513 | if (((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)) && |
514 | ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) { |
515 | break; |
516 | } |
517 | ctold++; |
518 | ctnew++; |
519 | if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) { |
520 | do { |
521 | if (c == '\n') |
522 | break; |
523 | ctold++; |
524 | } while (isspace(c = getc(f1))); |
525 | do { |
526 | if (d == '\n') |
527 | break; |
528 | ctnew++; |
529 | } while (isspace(d = getc(f2))); |
530 | } else if (option_mask32 & FLAG_w) { |
531 | while (isspace(c) && c != '\n') { |
532 | c = getc(f1); |
533 | ctold++; |
534 | } |
535 | while (isspace(d) && d != '\n') { |
536 | d = getc(f2); |
537 | ctnew++; |
538 | } |
539 | } |
540 | if (c != d) { |
541 | jackpot++; |
542 | J[i] = 0; |
543 | if (c != '\n' && c != EOF) |
544 | ctold += skipline(f1); |
545 | if (d != '\n' && c != EOF) |
546 | ctnew += skipline(f2); |
547 | break; |
548 | } |
549 | if (c == '\n' || c == EOF) |
550 | break; |
551 | } |
552 | } else { |
553 | while (1) { |
554 | ctold++; |
555 | ctnew++; |
556 | if ((c = getc(f1)) != (d = getc(f2))) { |
557 | J[i] = 0; |
558 | if (c != '\n' && c != EOF) |
559 | ctold += skipline(f1); |
560 | if (d != '\n' && c != EOF) |
561 | ctnew += skipline(f2); |
562 | break; |
563 | } |
564 | if (c == '\n' || c == EOF) |
565 | break; |
566 | } |
567 | } |
568 | ixold[i] = ctold; |
569 | ixnew[j] = ctnew; |
570 | j++; |
571 | } |
572 | for (; j <= len[1]; j++) |
573 | ixnew[j] = ctnew += skipline(f2); |
574 | } |
575 | |
576 | |
577 | /* shellsort CACM #201 */ |
578 | static void sort(struct line *a, int n) |
579 | { |
580 | struct line *ai, *aim, w; |
581 | int j, m = 0, k; |
582 | |
583 | if (n == 0) |
584 | return; |
585 | for (j = 1; j <= n; j *= 2) |
586 | m = 2 * j - 1; |
587 | for (m /= 2; m != 0; m /= 2) { |
588 | k = n - m; |
589 | for (j = 1; j <= k; j++) { |
590 | for (ai = &a[j]; ai > a; ai -= m) { |
591 | aim = &ai[m]; |
592 | if (aim < ai) |
593 | break; /* wraparound */ |
594 | if (aim->value > ai[0].value || |
595 | (aim->value == ai[0].value && aim->serial > ai[0].serial)) |
596 | break; |
597 | w.value = ai[0].value; |
598 | ai[0].value = aim->value; |
599 | aim->value = w.value; |
600 | w.serial = ai[0].serial; |
601 | ai[0].serial = aim->serial; |
602 | aim->serial = w.serial; |
603 | } |
604 | } |
605 | } |
606 | } |
607 | |
608 | |
609 | static void uni_range(int a, int b) |
610 | { |
611 | if (a < b) |
612 | printf("%d,%d", a, b - a + 1); |
613 | else if (a == b) |
614 | printf("%d", b); |
615 | else |
616 | printf("%d,0", b); |
617 | } |
618 | |
619 | |
620 | static void fetch(long *f, int a, int b, FILE * lb, int ch) |
621 | { |
622 | int i, j, c, lastc, col, nc; |
623 | |
624 | if (a > b) |
625 | return; |
626 | for (i = a; i <= b; i++) { |
627 | fseek(lb, f[i - 1], SEEK_SET); |
628 | nc = f[i] - f[i - 1]; |
629 | if (ch != '\0') { |
630 | putchar(ch); |
631 | if (option_mask32 & FLAG_T) |
632 | putchar('\t'); |
633 | } |
634 | col = 0; |
635 | for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { |
636 | if ((c = getc(lb)) == EOF) { |
637 | printf("\n\\ No newline at end of file\n"); |
638 | return; |
639 | } |
640 | if (c == '\t' && (option_mask32 & FLAG_t)) { |
641 | do { |
642 | putchar(' '); |
643 | } while (++col & 7); |
644 | } else { |
645 | putchar(c); |
646 | col++; |
647 | } |
648 | } |
649 | } |
650 | return; |
651 | } |
652 | |
653 | |
654 | static int asciifile(FILE * f) |
655 | { |
656 | #if ENABLE_FEATURE_DIFF_BINARY |
657 | int i, cnt; |
658 | #endif |
659 | |
660 | if ((option_mask32 & FLAG_a) || f == NULL) |
661 | return 1; |
662 | |
663 | #if ENABLE_FEATURE_DIFF_BINARY |
664 | rewind(f); |
665 | cnt = fread(bb_common_bufsiz1, 1, BUFSIZ, f); |
666 | for (i = 0; i < cnt; i++) { |
667 | if (!isprint(bb_common_bufsiz1[i]) |
668 | && !isspace(bb_common_bufsiz1[i])) { |
669 | return 0; |
670 | } |
671 | } |
672 | #endif |
673 | return 1; |
674 | } |
675 | |
676 | |
677 | /* dump accumulated "unified" diff changes */ |
678 | static void dump_unified_vec(FILE * f1, FILE * f2) |
679 | { |
680 | struct context_vec *cvp = context_vec_start; |
681 | int lowa, upb, lowc, upd; |
682 | int a, b, c, d; |
683 | char ch; |
684 | |
685 | if (context_vec_start > context_vec_ptr) |
686 | return; |
687 | |
688 | b = d = 0; /* gcc */ |
689 | lowa = MAX(1, cvp->a - context); |
690 | upb = MIN(len[0], context_vec_ptr->b + context); |
691 | lowc = MAX(1, cvp->c - context); |
692 | upd = MIN(len[1], context_vec_ptr->d + context); |
693 | |
694 | printf("@@ -"); |
695 | uni_range(lowa, upb); |
696 | printf(" +"); |
697 | uni_range(lowc, upd); |
698 | printf(" @@\n"); |
699 | |
700 | /* |
701 | * Output changes in "unified" diff format--the old and new lines |
702 | * are printed together. |
703 | */ |
704 | for (; cvp <= context_vec_ptr; cvp++) { |
705 | a = cvp->a; |
706 | b = cvp->b; |
707 | c = cvp->c; |
708 | d = cvp->d; |
709 | |
710 | /* |
711 | * c: both new and old changes |
712 | * d: only changes in the old file |
713 | * a: only changes in the new file |
714 | */ |
715 | if (a <= b && c <= d) |
716 | ch = 'c'; |
717 | else |
718 | ch = (a <= b) ? 'd' : 'a'; |
719 | if (ch == 'c' || ch == 'd') { |
720 | fetch(ixold, lowa, a - 1, f1, ' '); |
721 | fetch(ixold, a, b, f1, '-'); |
722 | } |
723 | if (ch == 'a') |
724 | fetch(ixnew, lowc, c - 1, f2, ' '); |
725 | if (ch == 'c' || ch == 'a') |
726 | fetch(ixnew, c, d, f2, '+'); |
727 | lowa = b + 1; |
728 | lowc = d + 1; |
729 | } |
730 | fetch(ixnew, d + 1, upd, f2, ' '); |
731 | |
732 | context_vec_ptr = context_vec_start - 1; |
733 | } |
734 | |
735 | |
736 | static void print_header(const char *file1, const char *file2) |
737 | { |
738 | if (label1) |
739 | printf("--- %s\n", label1); |
740 | else |
741 | printf("--- %s\t%s", file1, ctime(&stb1.st_mtime)); |
742 | if (label2) |
743 | printf("+++ %s\n", label2); |
744 | else |
745 | printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime)); |
746 | } |
747 | |
748 | |
749 | /* |
750 | * Indicate that there is a difference between lines a and b of the from file |
751 | * to get to lines c to d of the to file. If a is greater than b then there |
752 | * are no lines in the from file involved and this means that there were |
753 | * lines appended (beginning at b). If c is greater than d then there are |
754 | * lines missing from the to file. |
755 | */ |
756 | static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a, |
757 | int b, int c, int d) |
758 | { |
759 | static size_t max_context = 64; |
760 | |
761 | if ((a > b && c > d) || (option_mask32 & FLAG_q)) { |
762 | anychange = 1; |
763 | return; |
764 | } |
765 | |
766 | /* |
767 | * Allocate change records as needed. |
768 | */ |
769 | if (context_vec_ptr == context_vec_end - 1) { |
770 | ptrdiff_t offset = context_vec_ptr - context_vec_start; |
771 | |
772 | max_context <<= 1; |
773 | context_vec_start = xrealloc(context_vec_start, |
774 | max_context * sizeof(struct context_vec)); |
775 | context_vec_end = context_vec_start + max_context; |
776 | context_vec_ptr = context_vec_start + offset; |
777 | } |
778 | if (anychange == 0) { |
779 | /* |
780 | * Print the context/unidiff header first time through. |
781 | */ |
782 | print_header(file1, file2); |
783 | } else if (a > context_vec_ptr->b + (2 * context) + 1 && |
784 | c > context_vec_ptr->d + (2 * context) + 1) { |
785 | /* |
786 | * If this change is more than 'context' lines from the |
787 | * previous change, dump the record and reset it. |
788 | */ |
789 | dump_unified_vec(f1, f2); |
790 | } |
791 | context_vec_ptr++; |
792 | context_vec_ptr->a = a; |
793 | context_vec_ptr->b = b; |
794 | context_vec_ptr->c = c; |
795 | context_vec_ptr->d = d; |
796 | anychange = 1; |
797 | } |
798 | |
799 | |
800 | static void output(char *file1, FILE * f1, char *file2, FILE * f2) |
801 | { |
802 | /* Note that j0 and j1 can't be used as they are defined in math.h. |
803 | * This also allows the rather amusing variable 'j00'... */ |
804 | int m, i0, i1, j00, j01; |
805 | |
806 | rewind(f1); |
807 | rewind(f2); |
808 | m = len[0]; |
809 | J[0] = 0; |
810 | J[m + 1] = len[1] + 1; |
811 | for (i0 = 1; i0 <= m; i0 = i1 + 1) { |
812 | while (i0 <= m && J[i0] == J[i0 - 1] + 1) |
813 | i0++; |
814 | j00 = J[i0 - 1] + 1; |
815 | i1 = i0 - 1; |
816 | while (i1 < m && J[i1 + 1] == 0) |
817 | i1++; |
818 | j01 = J[i1 + 1] - 1; |
819 | J[i1] = j01; |
820 | change(file1, f1, file2, f2, i0, i1, j00, j01); |
821 | } |
822 | if (m == 0) { |
823 | change(file1, f1, file2, f2, 1, 0, 1, len[1]); |
824 | } |
825 | if (anychange != 0 && !(option_mask32 & FLAG_q)) { |
826 | dump_unified_vec(f1, f2); |
827 | } |
828 | } |
829 | |
830 | /* |
831 | * The following code uses an algorithm due to Harold Stone, |
832 | * which finds a pair of longest identical subsequences in |
833 | * the two files. |
834 | * |
835 | * The major goal is to generate the match vector J. |
836 | * J[i] is the index of the line in file1 corresponding |
837 | * to line i file0. J[i] = 0 if there is no |
838 | * such line in file1. |
839 | * |
840 | * Lines are hashed so as to work in core. All potential |
841 | * matches are located by sorting the lines of each file |
842 | * on the hash (called ``value''). In particular, this |
843 | * collects the equivalence classes in file1 together. |
844 | * Subroutine equiv replaces the value of each line in |
845 | * file0 by the index of the first element of its |
846 | * matching equivalence in (the reordered) file1. |
847 | * To save space equiv squeezes file1 into a single |
848 | * array member in which the equivalence classes |
849 | * are simply concatenated, except that their first |
850 | * members are flagged by changing sign. |
851 | * |
852 | * Next the indices that point into member are unsorted into |
853 | * array class according to the original order of file0. |
854 | * |
855 | * The cleverness lies in routine stone. This marches |
856 | * through the lines of file0, developing a vector klist |
857 | * of "k-candidates". At step i a k-candidate is a matched |
858 | * pair of lines x,y (x in file0 y in file1) such that |
859 | * there is a common subsequence of length k |
860 | * between the first i lines of file0 and the first y |
861 | * lines of file1, but there is no such subsequence for |
862 | * any smaller y. x is the earliest possible mate to y |
863 | * that occurs in such a subsequence. |
864 | * |
865 | * Whenever any of the members of the equivalence class of |
866 | * lines in file1 matable to a line in file0 has serial number |
867 | * less than the y of some k-candidate, that k-candidate |
868 | * with the smallest such y is replaced. The new |
869 | * k-candidate is chained (via pred) to the current |
870 | * k-1 candidate so that the actual subsequence can |
871 | * be recovered. When a member has serial number greater |
872 | * that the y of all k-candidates, the klist is extended. |
873 | * At the end, the longest subsequence is pulled out |
874 | * and placed in the array J by unravel |
875 | * |
876 | * With J in hand, the matches there recorded are |
877 | * checked against reality to assure that no spurious |
878 | * matches have crept in due to hashing. If they have, |
879 | * they are broken, and "jackpot" is recorded--a harmless |
880 | * matter except that a true match for a spuriously |
881 | * mated line may now be unnecessarily reported as a change. |
882 | * |
883 | * Much of the complexity of the program comes simply |
884 | * from trying to minimize core utilization and |
885 | * maximize the range of doable problems by dynamically |
886 | * allocating what is needed and reusing what is not. |
887 | * The core requirements for problems larger than somewhat |
888 | * are (in words) 2*length(file0) + length(file1) + |
889 | * 3*(number of k-candidates installed), typically about |
890 | * 6n words for files of length n. |
891 | */ |
892 | static unsigned diffreg(char * ofile1, char * ofile2, int flags) |
893 | { |
894 | char *file1 = ofile1; |
895 | char *file2 = ofile2; |
896 | FILE *f1 = stdin, *f2 = stdin; |
897 | unsigned rval; |
898 | int i; |
899 | |
900 | anychange = 0; |
901 | context_vec_ptr = context_vec_start - 1; |
902 | |
903 | if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) |
904 | return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); |
905 | |
906 | rval = D_SAME; |
907 | |
908 | if (LONE_DASH(file1) && LONE_DASH(file2)) |
909 | goto closem; |
910 | |
911 | if (flags & D_EMPTY1) |
912 | f1 = xfopen(bb_dev_null, "r"); |
913 | else if (NOT_LONE_DASH(file1)) |
914 | f1 = xfopen(file1, "r"); |
915 | if (flags & D_EMPTY2) |
916 | f2 = xfopen(bb_dev_null, "r"); |
917 | else if (NOT_LONE_DASH(file2)) |
918 | f2 = xfopen(file2, "r"); |
919 | |
920 | /* We can't diff non-seekable stream - we use rewind(), fseek(). |
921 | * This can be fixed (volunteers?). |
922 | * Meanwhile we should check it here by stat'ing input fds, |
923 | * but I am lazy and check that in main() instead. |
924 | * Check in main won't catch "diffing fifos buried in subdirectories" |
925 | * failure scenario - not very likely in real life... */ |
926 | |
927 | i = files_differ(f1, f2, flags); |
928 | if (i == 0) |
929 | goto closem; |
930 | else if (i != 1) { /* 1 == ok */ |
931 | /* error */ |
932 | status |= 2; |
933 | goto closem; |
934 | } |
935 | |
936 | if (!asciifile(f1) || !asciifile(f2)) { |
937 | rval = D_BINARY; |
938 | status |= 1; |
939 | goto closem; |
940 | } |
941 | |
942 | prepare(0, f1, stb1.st_size); |
943 | prepare(1, f2, stb2.st_size); |
944 | prune(); |
945 | sort(sfile[0], slen[0]); |
946 | sort(sfile[1], slen[1]); |
947 | |
948 | member = (int *) file[1]; |
949 | equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
950 | member = xrealloc(member, (slen[1] + 2) * sizeof(int)); |
951 | |
952 | class = (int *) file[0]; |
953 | unsort(sfile[0], slen[0], class); |
954 | class = xrealloc(class, (slen[0] + 2) * sizeof(int)); |
955 | |
956 | klist = xmalloc((slen[0] + 2) * sizeof(int)); |
957 | clen = 0; |
958 | clistlen = 100; |
959 | clist = xmalloc(clistlen * sizeof(struct cand)); |
960 | i = stone(class, slen[0], member, klist); |
961 | free(member); |
962 | free(class); |
963 | |
964 | J = xrealloc(J, (len[0] + 2) * sizeof(int)); |
965 | unravel(klist[i]); |
966 | free(clist); |
967 | free(klist); |
968 | |
969 | ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); |
970 | ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); |
971 | check(f1, f2); |
972 | output(file1, f1, file2, f2); |
973 | |
974 | closem: |
975 | if (anychange) { |
976 | status |= 1; |
977 | if (rval == D_SAME) |
978 | rval = D_DIFFER; |
979 | } |
980 | fclose_if_not_stdin(f1); |
981 | fclose_if_not_stdin(f2); |
982 | if (file1 != ofile1) |
983 | free(file1); |
984 | if (file2 != ofile2) |
985 | free(file2); |
986 | return rval; |
987 | } |
988 | |
989 | |
990 | #if ENABLE_FEATURE_DIFF_DIR |
991 | static void do_diff(char *dir1, char *path1, char *dir2, char *path2) |
992 | { |
993 | int flags = D_HEADER; |
994 | int val; |
995 | |
996 | char *fullpath1 = concat_path_file(dir1, path1); |
997 | char *fullpath2 = concat_path_file(dir2, path2); |
998 | |
999 | if (stat(fullpath1, &stb1) != 0) { |
1000 | flags |= D_EMPTY1; |
1001 | memset(&stb1, 0, sizeof(stb1)); |
1002 | if (ENABLE_FEATURE_CLEAN_UP) |
1003 | free(fullpath1); |
1004 | fullpath1 = concat_path_file(dir1, path2); |
1005 | } |
1006 | if (stat(fullpath2, &stb2) != 0) { |
1007 | flags |= D_EMPTY2; |
1008 | memset(&stb2, 0, sizeof(stb2)); |
1009 | stb2.st_mode = stb1.st_mode; |
1010 | if (ENABLE_FEATURE_CLEAN_UP) |
1011 | free(fullpath2); |
1012 | fullpath2 = concat_path_file(dir2, path1); |
1013 | } |
1014 | |
1015 | if (stb1.st_mode == 0) |
1016 | stb1.st_mode = stb2.st_mode; |
1017 | |
1018 | if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { |
1019 | printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); |
1020 | return; |
1021 | } |
1022 | |
1023 | if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) |
1024 | val = D_SKIPPED1; |
1025 | else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) |
1026 | val = D_SKIPPED2; |
1027 | else |
1028 | val = diffreg(fullpath1, fullpath2, flags); |
1029 | |
1030 | print_status(val, fullpath1, fullpath2, NULL); |
1031 | } |
1032 | #endif |
1033 | |
1034 | |
1035 | #if ENABLE_FEATURE_DIFF_DIR |
1036 | static int dir_strcmp(const void *p1, const void *p2) |
1037 | { |
1038 | return strcmp(*(char *const *) p1, *(char *const *) p2); |
1039 | } |
1040 | |
1041 | |
1042 | /* This function adds a filename to dl, the directory listing. */ |
1043 | static int add_to_dirlist(const char *filename, |
1044 | struct stat ATTRIBUTE_UNUSED * sb, void *userdata, |
1045 | int depth ATTRIBUTE_UNUSED) |
1046 | { |
1047 | dl_count++; |
1048 | dl = xrealloc(dl, dl_count * sizeof(char *)); |
1049 | dl[dl_count - 1] = xstrdup(filename); |
1050 | if (option_mask32 & FLAG_r) { |
1051 | int *pp = (int *) userdata; |
1052 | int path_len = *pp + 1; |
1053 | |
1054 | dl[dl_count - 1] = &(dl[dl_count - 1])[path_len]; |
1055 | } |
1056 | return TRUE; |
1057 | } |
1058 | |
1059 | |
1060 | /* This returns a sorted directory listing. */ |
1061 | static char **get_dir(char *path) |
1062 | { |
1063 | int i; |
1064 | char **retval; |
1065 | |
1066 | /* If -r has been set, then the recursive_action function will be |
1067 | * used. Unfortunately, this outputs the root directory along with |
1068 | * the recursed paths, so use void *userdata to specify the string |
1069 | * length of the root directory. It can then be removed in |
1070 | * add_to_dirlist. */ |
1071 | |
1072 | int path_len = strlen(path); |
1073 | void *userdata = &path_len; |
1074 | |
1075 | /* Reset dl_count - there's no need to free dl as xrealloc does |
1076 | * the job nicely. */ |
1077 | dl_count = 0; |
1078 | |
1079 | /* Now fill dl with a listing. */ |
1080 | if (option_mask32 & FLAG_r) |
1081 | recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, |
1082 | userdata, 0); |
1083 | else { |
1084 | DIR *dp; |
1085 | struct dirent *ep; |
1086 | |
1087 | dp = warn_opendir(path); |
1088 | while ((ep = readdir(dp))) { |
1089 | if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.')) |
1090 | continue; |
1091 | add_to_dirlist(ep->d_name, NULL, NULL, 0); |
1092 | } |
1093 | closedir(dp); |
1094 | } |
1095 | |
1096 | /* Sort dl alphabetically. */ |
1097 | qsort(dl, dl_count, sizeof(char *), dir_strcmp); |
1098 | |
1099 | /* Copy dl so that we can return it. */ |
1100 | retval = xmalloc(dl_count * sizeof(char *)); |
1101 | for (i = 0; i < dl_count; i++) |
1102 | retval[i] = xstrdup(dl[i]); |
1103 | |
1104 | return retval; |
1105 | } |
1106 | |
1107 | |
1108 | static void diffdir(char *p1, char *p2) |
1109 | { |
1110 | char **dirlist1, **dirlist2; |
1111 | char *dp1, *dp2; |
1112 | int dirlist1_count, dirlist2_count; |
1113 | int pos; |
1114 | |
1115 | /* Check for trailing slashes. */ |
1116 | |
1117 | dp1 = last_char_is(p1, '/'); |
1118 | if (dp1 != NULL) |
1119 | *dp1 = '\0'; |
1120 | dp2 = last_char_is(p2, '/'); |
1121 | if (dp2 != NULL) |
1122 | *dp2 = '\0'; |
1123 | |
1124 | /* Get directory listings for p1 and p2. */ |
1125 | |
1126 | dirlist1 = get_dir(p1); |
1127 | dirlist1_count = dl_count; |
1128 | dirlist1[dirlist1_count] = NULL; |
1129 | dirlist2 = get_dir(p2); |
1130 | dirlist2_count = dl_count; |
1131 | dirlist2[dirlist2_count] = NULL; |
1132 | |
1133 | /* If -S was set, find the starting point. */ |
1134 | if (start) { |
1135 | while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) |
1136 | dirlist1++; |
1137 | while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) |
1138 | dirlist2++; |
1139 | if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) |
1140 | bb_error_msg(bb_msg_invalid_arg, "NULL", "-S"); |
1141 | } |
1142 | |
1143 | /* Now that both dirlist1 and dirlist2 contain sorted directory |
1144 | * listings, we can start to go through dirlist1. If both listings |
1145 | * contain the same file, then do a normal diff. Otherwise, behaviour |
1146 | * is determined by whether the -N flag is set. */ |
1147 | while (*dirlist1 != NULL || *dirlist2 != NULL) { |
1148 | dp1 = *dirlist1; |
1149 | dp2 = *dirlist2; |
1150 | pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); |
1151 | if (pos == 0) { |
1152 | do_diff(p1, dp1, p2, dp2); |
1153 | dirlist1++; |
1154 | dirlist2++; |
1155 | } else if (pos < 0) { |
1156 | if (option_mask32 & FLAG_N) |
1157 | do_diff(p1, dp1, p2, NULL); |
1158 | else |
1159 | print_only(p1, strlen(p1) + 1, dp1); |
1160 | dirlist1++; |
1161 | } else { |
1162 | if (option_mask32 & FLAG_N) |
1163 | do_diff(p1, NULL, p2, dp2); |
1164 | else |
1165 | print_only(p2, strlen(p2) + 1, dp2); |
1166 | dirlist2++; |
1167 | } |
1168 | } |
1169 | } |
1170 | #endif |
1171 | |
1172 | |
1173 | int diff_main(int argc, char **argv) |
1174 | { |
1175 | smallint gotstdin = 0; |
1176 | char *U_opt; |
1177 | char *f1, *f2; |
1178 | llist_t *L_arg = NULL; |
1179 | |
1180 | /* exactly 2 params; collect multiple -L <label> */ |
1181 | opt_complementary = "=2:L::"; |
1182 | getopt32(argc, argv, "abdiL:NqrsS:tTU:wu" |
1183 | "p" /* ignored (for compatibility) */, |
1184 | &L_arg, &start, &U_opt); |
1185 | /*argc -= optind;*/ |
1186 | argv += optind; |
1187 | while (L_arg) { |
1188 | if (label1 && label2) |
1189 | bb_show_usage(); |
1190 | if (!label1) |
1191 | label1 = L_arg->data; |
1192 | else { /* then label2 is NULL */ |
1193 | label2 = label1; |
1194 | label1 = L_arg->data; |
1195 | } |
1196 | /* we leak L_arg here... */ |
1197 | L_arg = L_arg->link; |
1198 | } |
1199 | if (option_mask32 & FLAG_U) |
1200 | context = xatou_range(U_opt, 1, INT_MAX); |
1201 | |
1202 | /* |
1203 | * Do sanity checks, fill in stb1 and stb2 and call the appropriate |
1204 | * driver routine. Both drivers use the contents of stb1 and stb2. |
1205 | */ |
1206 | |
1207 | f1 = argv[0]; |
1208 | f2 = argv[1]; |
1209 | if (LONE_DASH(f1)) { |
1210 | fstat(STDIN_FILENO, &stb1); |
1211 | gotstdin = 1; |
1212 | } else |
1213 | xstat(f1, &stb1); |
1214 | if (LONE_DASH(f2)) { |
1215 | fstat(STDIN_FILENO, &stb2); |
1216 | gotstdin = 1; |
1217 | } else |
1218 | xstat(f2, &stb2); |
1219 | if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) |
1220 | bb_error_msg_and_die("can't compare - to a directory"); |
1221 | if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { |
1222 | #if ENABLE_FEATURE_DIFF_DIR |
1223 | diffdir(f1, f2); |
1224 | #else |
1225 | bb_error_msg_and_die("directory comparison not supported"); |
1226 | #endif |
1227 | } else { |
1228 | if (S_ISDIR(stb1.st_mode)) { |
1229 | f1 = concat_path_file(f1, f2); |
1230 | xstat(f1, &stb1); |
1231 | } |
1232 | if (S_ISDIR(stb2.st_mode)) { |
1233 | f2 = concat_path_file(f2, f1); |
1234 | xstat(f2, &stb2); |
1235 | } |
1236 | /* XXX: FIXME: */ |
1237 | /* We can't diff e.g. stdin supplied by a pipe - we use rewind(), fseek(). |
1238 | * This can be fixed (volunteers?) */ |
1239 | if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode)) |
1240 | bb_error_msg_and_die("can't diff non-seekable stream"); |
1241 | print_status(diffreg(f1, f2, 0), f1, f2, NULL); |
1242 | } |
1243 | return status; |
1244 | } |