2 |
/* |
/* |
3 |
* Mini diff implementation for busybox, adapted from OpenBSD diff. |
* Mini diff implementation for busybox, adapted from OpenBSD diff. |
4 |
* |
* |
5 |
|
* Copyright (C) 2010 by Matheus Izvekov <mizvekov@gmail.com> |
6 |
* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> |
* Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> |
7 |
* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> |
* Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> |
8 |
* |
* |
13 |
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
14 |
*/ |
*/ |
15 |
|
|
|
#include "libbb.h" |
|
|
|
|
|
// #define FSIZE_MAX 32768 |
|
|
|
|
|
/* NOINLINEs added to prevent gcc from merging too much into diffreg() |
|
|
* (it bites more than it can (efficiently) chew). */ |
|
|
|
|
|
/* |
|
|
* Output flags |
|
|
*/ |
|
|
enum { |
|
|
/* Print a header/footer between files */ |
|
|
/* D_HEADER = 1, - unused */ |
|
|
/* Treat file as empty (/dev/null) */ |
|
|
D_EMPTY1 = 2 * ENABLE_FEATURE_DIFF_DIR, |
|
|
D_EMPTY2 = 4 * ENABLE_FEATURE_DIFF_DIR, |
|
|
}; |
|
|
|
|
16 |
/* |
/* |
17 |
* Status values for print_status() and diffreg() return values |
* The following code uses an algorithm due to Harold Stone, |
18 |
* Guide: |
* which finds a pair of longest identical subsequences in |
19 |
* D_SAME - files are the same |
* the two files. |
20 |
* D_DIFFER - files differ |
* |
21 |
* D_BINARY - binary files differ |
* The major goal is to generate the match vector J. |
22 |
* D_COMMON - subdirectory common to both dirs |
* J[i] is the index of the line in file1 corresponding |
23 |
* D_ONLY - file only exists in one dir |
* to line i in file0. J[i] = 0 if there is no |
24 |
* D_ISDIR1 - path1 a dir, path2 a file |
* such line in file1. |
25 |
* D_ISDIR2 - path1 a file, path2 a dir |
* |
26 |
* D_ERROR - error occurred |
* Lines are hashed so as to work in core. All potential |
27 |
* D_SKIPPED1 - skipped path1 as it is a special file |
* matches are located by sorting the lines of each file |
28 |
* D_SKIPPED2 - skipped path2 as it is a special file |
* on the hash (called "value"). In particular, this |
29 |
|
* collects the equivalence classes in file1 together. |
30 |
|
* Subroutine equiv replaces the value of each line in |
31 |
|
* file0 by the index of the first element of its |
32 |
|
* matching equivalence in (the reordered) file1. |
33 |
|
* To save space equiv squeezes file1 into a single |
34 |
|
* array member in which the equivalence classes |
35 |
|
* are simply concatenated, except that their first |
36 |
|
* members are flagged by changing sign. |
37 |
|
* |
38 |
|
* Next the indices that point into member are unsorted into |
39 |
|
* array class according to the original order of file0. |
40 |
|
* |
41 |
|
* The cleverness lies in routine stone. This marches |
42 |
|
* through the lines of file0, developing a vector klist |
43 |
|
* of "k-candidates". At step i a k-candidate is a matched |
44 |
|
* pair of lines x,y (x in file0, y in file1) such that |
45 |
|
* there is a common subsequence of length k |
46 |
|
* between the first i lines of file0 and the first y |
47 |
|
* lines of file1, but there is no such subsequence for |
48 |
|
* any smaller y. x is the earliest possible mate to y |
49 |
|
* that occurs in such a subsequence. |
50 |
|
* |
51 |
|
* Whenever any of the members of the equivalence class of |
52 |
|
* lines in file1 matable to a line in file0 has serial number |
53 |
|
* less than the y of some k-candidate, that k-candidate |
54 |
|
* with the smallest such y is replaced. The new |
55 |
|
* k-candidate is chained (via pred) to the current |
56 |
|
* k-1 candidate so that the actual subsequence can |
57 |
|
* be recovered. When a member has serial number greater |
58 |
|
* that the y of all k-candidates, the klist is extended. |
59 |
|
* At the end, the longest subsequence is pulled out |
60 |
|
* and placed in the array J by unravel |
61 |
|
* |
62 |
|
* With J in hand, the matches there recorded are |
63 |
|
* checked against reality to assure that no spurious |
64 |
|
* matches have crept in due to hashing. If they have, |
65 |
|
* they are broken, and "jackpot" is recorded--a harmless |
66 |
|
* matter except that a true match for a spuriously |
67 |
|
* mated line may now be unnecessarily reported as a change. |
68 |
|
* |
69 |
|
* Much of the complexity of the program comes simply |
70 |
|
* from trying to minimize core utilization and |
71 |
|
* maximize the range of doable problems by dynamically |
72 |
|
* allocating what is needed and reusing what is not. |
73 |
|
* The core requirements for problems larger than somewhat |
74 |
|
* are (in words) 2*length(file0) + length(file1) + |
75 |
|
* 3*(number of k-candidates installed), typically about |
76 |
|
* 6n words for files of length n. |
77 |
*/ |
*/ |
|
#define D_SAME 0 |
|
|
#define D_DIFFER (1 << 0) |
|
|
#define D_BINARY (1 << 1) |
|
|
#define D_COMMON (1 << 2) |
|
|
/*#define D_ONLY (1 << 3) - unused */ |
|
|
#define D_ISDIR1 (1 << 4) |
|
|
#define D_ISDIR2 (1 << 5) |
|
|
#define D_ERROR (1 << 6) |
|
|
#define D_SKIPPED1 (1 << 7) |
|
|
#define D_SKIPPED2 (1 << 8) |
|
|
|
|
|
/* Command line options */ |
|
|
#define FLAG_a (1 << 0) |
|
|
#define FLAG_b (1 << 1) |
|
|
#define FLAG_d (1 << 2) |
|
|
#define FLAG_i (1 << 3) |
|
|
#define FLAG_L (1 << 4) |
|
|
#define FLAG_N (1 << 5) |
|
|
#define FLAG_q (1 << 6) |
|
|
#define FLAG_r (1 << 7) |
|
|
#define FLAG_s (1 << 8) |
|
|
#define FLAG_S (1 << 9) |
|
|
#define FLAG_t (1 << 10) |
|
|
#define FLAG_T (1 << 11) |
|
|
#define FLAG_U (1 << 12) |
|
|
#define FLAG_w (1 << 13) |
|
78 |
|
|
79 |
|
#include "libbb.h" |
80 |
|
|
81 |
struct cand { |
#if 0 |
82 |
int x; |
//#define dbg_error_msg(...) bb_error_msg(__VA_ARGS__) |
83 |
int y; |
#else |
84 |
int pred; |
#define dbg_error_msg(...) ((void)0) |
85 |
}; |
#endif |
86 |
|
|
87 |
struct line { |
enum { /* print_status() and diffreg() return values */ |
88 |
int serial; |
STATUS_SAME, /* files are the same */ |
89 |
int value; |
STATUS_DIFFER, /* files differ */ |
90 |
|
STATUS_BINARY, /* binary files differ */ |
91 |
}; |
}; |
92 |
|
|
93 |
/* |
enum { /* Commandline flags */ |
94 |
* The following struct is used to record change information |
FLAG_a, |
95 |
* doing a "context" or "unified" diff. (see routine "change" to |
FLAG_b, |
96 |
* understand the highly mnemonic field names) |
FLAG_d, |
97 |
*/ |
FLAG_i, |
98 |
struct context_vec { |
FLAG_L, /* never used, handled by getopt32 */ |
99 |
int a; /* start line in old file */ |
FLAG_N, |
100 |
int b; /* end line in old file */ |
FLAG_q, |
101 |
int c; /* start line in new file */ |
FLAG_r, |
102 |
int d; /* end line in new file */ |
FLAG_s, |
103 |
|
FLAG_S, /* never used, handled by getopt32 */ |
104 |
|
FLAG_t, |
105 |
|
FLAG_T, |
106 |
|
FLAG_U, /* never used, handled by getopt32 */ |
107 |
|
FLAG_w, |
108 |
|
FLAG_u, /* ignored, this is the default */ |
109 |
|
FLAG_p, /* not implemented */ |
110 |
|
FLAG_B, |
111 |
|
FLAG_E, /* not implemented */ |
112 |
}; |
}; |
113 |
|
#define FLAG(x) (1 << FLAG_##x) |
114 |
|
|
115 |
|
/* We cache file position to avoid excessive seeking */ |
116 |
#define g_read_buf bb_common_bufsiz1 |
typedef struct FILE_and_pos_t { |
117 |
|
FILE *ft_fp; |
118 |
|
off_t ft_pos; |
119 |
|
} FILE_and_pos_t; |
120 |
|
|
121 |
struct globals { |
struct globals { |
|
bool anychange; |
|
122 |
smallint exit_status; |
smallint exit_status; |
123 |
int opt_U_context; |
int opt_U_context; |
124 |
size_t max_context; /* size of context_vec_start */ |
char *label[2]; |
125 |
USE_FEATURE_DIFF_DIR(int dl_count;) |
struct stat stb[2]; |
|
USE_FEATURE_DIFF_DIR(char **dl;) |
|
|
char *opt_S_start; |
|
|
const char *label1; |
|
|
const char *label2; |
|
|
int *J; /* will be overlaid on class */ |
|
|
int clen; |
|
|
int pref, suff; /* length of prefix and suffix */ |
|
|
int nlen[2]; |
|
|
int slen[2]; |
|
|
int clistlen; /* the length of clist */ |
|
|
struct cand *clist; /* merely a free storage pot for candidates */ |
|
|
long *ixnew; /* will be overlaid on nfile[1] */ |
|
|
long *ixold; /* will be overlaid on klist */ |
|
|
struct line *nfile[2]; |
|
|
struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ |
|
|
struct context_vec *context_vec_start; |
|
|
struct context_vec *context_vec_end; |
|
|
struct context_vec *context_vec_ptr; |
|
|
char *tempname1, *tempname2; |
|
|
struct stat stb1, stb2; |
|
126 |
}; |
}; |
127 |
#define G (*ptr_to_globals) |
#define G (*ptr_to_globals) |
|
#define anychange (G.anychange ) |
|
128 |
#define exit_status (G.exit_status ) |
#define exit_status (G.exit_status ) |
129 |
#define opt_U_context (G.opt_U_context ) |
#define opt_U_context (G.opt_U_context ) |
130 |
#define max_context (G.max_context ) |
#define label (G.label ) |
131 |
#define dl_count (G.dl_count ) |
#define stb (G.stb ) |
|
#define dl (G.dl ) |
|
|
#define opt_S_start (G.opt_S_start ) |
|
|
#define label1 (G.label1 ) |
|
|
#define label2 (G.label2 ) |
|
|
#define J (G.J ) |
|
|
#define clen (G.clen ) |
|
|
#define pref (G.pref ) |
|
|
#define suff (G.suff ) |
|
|
#define nlen (G.nlen ) |
|
|
#define slen (G.slen ) |
|
|
#define clistlen (G.clistlen ) |
|
|
#define clist (G.clist ) |
|
|
#define ixnew (G.ixnew ) |
|
|
#define ixold (G.ixold ) |
|
|
#define nfile (G.nfile ) |
|
|
#define sfile (G.sfile ) |
|
|
#define context_vec_start (G.context_vec_start ) |
|
|
#define context_vec_end (G.context_vec_end ) |
|
|
#define context_vec_ptr (G.context_vec_ptr ) |
|
|
#define stb1 (G.stb1 ) |
|
|
#define stb2 (G.stb2 ) |
|
|
#define tempname1 (G.tempname1 ) |
|
|
#define tempname2 (G.tempname2 ) |
|
132 |
#define INIT_G() do { \ |
#define INIT_G() do { \ |
133 |
SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ |
SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ |
134 |
opt_U_context = 3; \ |
opt_U_context = 3; \ |
|
max_context = 64; \ |
|
135 |
} while (0) |
} while (0) |
136 |
|
|
137 |
|
typedef int token_t; |
138 |
|
|
139 |
#if ENABLE_FEATURE_DIFF_DIR |
enum { |
140 |
static void print_only(const char *path, const char *entry) |
/* Public */ |
141 |
{ |
TOK_EMPTY = 1 << 9, /* Line fully processed, you can proceed to the next */ |
142 |
printf("Only in %s: %s\n", path, entry); |
TOK_EOF = 1 << 10, /* File ended */ |
143 |
} |
/* Private (Only to be used by read_token() */ |
144 |
#endif |
TOK_EOL = 1 << 11, /* we saw EOL (sticky) */ |
145 |
|
TOK_SPACE = 1 << 12, /* used -b code, means we are skipping spaces */ |
146 |
|
SHIFT_EOF = (sizeof(token_t)*8 - 8) - 1, |
147 |
|
CHAR_MASK = 0x1ff, /* 8th bit is used to distinguish EOF from 0xff */ |
148 |
|
}; |
149 |
|
|
150 |
|
/* Restores full EOF from one 8th bit: */ |
151 |
|
//#define TOK2CHAR(t) (((t) << SHIFT_EOF) >> SHIFT_EOF) |
152 |
|
/* We don't really need the above, we only need to have EOF != any_real_char: */ |
153 |
|
#define TOK2CHAR(t) ((t) & CHAR_MASK) |
154 |
|
|
155 |
static void print_status(int val, char *_path1, char *_path2) |
static void seek_ft(FILE_and_pos_t *ft, off_t pos) |
156 |
{ |
{ |
157 |
/*const char *const _entry = entry ? entry : "";*/ |
if (ft->ft_pos != pos) { |
158 |
/*char *const _path1 = entry ? concat_path_file(path1, _entry) : path1;*/ |
ft->ft_pos = pos; |
159 |
/*char *const _path2 = entry ? concat_path_file(path2, _entry) : path2;*/ |
fseeko(ft->ft_fp, pos, SEEK_SET); |
|
|
|
|
switch (val) { |
|
|
/* case D_ONLY: |
|
|
print_only(path1, entry); |
|
|
break; |
|
|
*/ |
|
|
case D_COMMON: |
|
|
printf("Common subdirectories: %s and %s\n", _path1, _path2); |
|
|
break; |
|
|
case D_BINARY: |
|
|
printf("Binary files %s and %s differ\n", _path1, _path2); |
|
|
break; |
|
|
case D_DIFFER: |
|
|
if (option_mask32 & FLAG_q) |
|
|
printf("Files %s and %s differ\n", _path1, _path2); |
|
|
break; |
|
|
case D_SAME: |
|
|
if (option_mask32 & FLAG_s) |
|
|
printf("Files %s and %s are identical\n", _path1, _path2); |
|
|
break; |
|
|
case D_ISDIR1: |
|
|
printf("File %s is a %s while file %s is a %s\n", |
|
|
_path1, "directory", _path2, "regular file"); |
|
|
break; |
|
|
case D_ISDIR2: |
|
|
printf("File %s is a %s while file %s is a %s\n", |
|
|
_path1, "regular file", _path2, "directory"); |
|
|
break; |
|
|
case D_SKIPPED1: |
|
|
printf("File %s is not a regular file or directory and was skipped\n", |
|
|
_path1); |
|
|
break; |
|
|
case D_SKIPPED2: |
|
|
printf("File %s is not a regular file or directory and was skipped\n", |
|
|
_path2); |
|
|
break; |
|
|
} |
|
|
/* |
|
|
if (entry) { |
|
|
free(_path1); |
|
|
free(_path2); |
|
160 |
} |
} |
|
*/ |
|
161 |
} |
} |
162 |
|
|
163 |
|
/* Reads tokens from given fp, handling -b and -w flags |
164 |
/* Read line, return its nonzero hash. Return 0 if EOF. |
* The user must reset tok every line start |
|
* |
|
|
* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
|
165 |
*/ |
*/ |
166 |
static ALWAYS_INLINE int fiddle_sum(int sum, int t) |
static int read_token(FILE_and_pos_t *ft, token_t tok) |
|
{ |
|
|
return sum * 127 + t; |
|
|
} |
|
|
static int readhash(FILE *fp) |
|
167 |
{ |
{ |
168 |
int i, t, space; |
tok |= TOK_EMPTY; |
169 |
int sum; |
while (!(tok & TOK_EOL)) { |
170 |
|
bool is_space; |
171 |
|
int t; |
172 |
|
|
173 |
|
t = fgetc(ft->ft_fp); |
174 |
|
if (t != EOF) |
175 |
|
ft->ft_pos++; |
176 |
|
is_space = (t == EOF || isspace(t)); |
177 |
|
|
178 |
|
/* If t == EOF (-1), set both TOK_EOF and TOK_EOL */ |
179 |
|
tok |= (t & (TOK_EOF + TOK_EOL)); |
180 |
|
/* Only EOL? */ |
181 |
|
if (t == '\n') |
182 |
|
tok |= TOK_EOL; |
183 |
|
|
184 |
sum = 1; |
if (option_mask32 & FLAG(i)) /* Handcoded tolower() */ |
185 |
space = 0; |
t = (t >= 'A' && t <= 'Z') ? t - ('A' - 'a') : t; |
|
i = 0; |
|
|
if (!(option_mask32 & (FLAG_b | FLAG_w))) { |
|
|
while ((t = getc(fp)) != '\n') { |
|
|
if (t == EOF) { |
|
|
if (i == 0) |
|
|
return 0; |
|
|
break; |
|
|
} |
|
|
sum = fiddle_sum(sum, t); |
|
|
i = 1; |
|
|
} |
|
|
} else { |
|
|
while (1) { |
|
|
switch (t = getc(fp)) { |
|
|
case '\t': |
|
|
case '\r': |
|
|
case '\v': |
|
|
case '\f': |
|
|
case ' ': |
|
|
space = 1; |
|
|
continue; |
|
|
default: |
|
|
if (space && !(option_mask32 & FLAG_w)) { |
|
|
i = 1; |
|
|
space = 0; |
|
|
} |
|
|
sum = fiddle_sum(sum, t); |
|
|
i = 1; |
|
|
continue; |
|
|
case EOF: |
|
|
if (i == 0) |
|
|
return 0; |
|
|
/* FALLTHROUGH */ |
|
|
case '\n': |
|
|
break; |
|
|
} |
|
|
break; |
|
|
} |
|
|
} |
|
|
/* |
|
|
* There is a remote possibility that we end up with a zero sum. |
|
|
* Zero is used as an EOF marker, so return 1 instead. |
|
|
*/ |
|
|
return (sum == 0 ? 1 : sum); |
|
|
} |
|
186 |
|
|
187 |
|
if ((option_mask32 & FLAG(w)) && is_space) |
188 |
|
continue; |
189 |
|
|
190 |
/* Our diff implementation is using seek. |
/* Trim char value to low 9 bits */ |
191 |
* When we meet non-seekable file, we must make a temp copy. |
t &= CHAR_MASK; |
|
*/ |
|
|
static char *make_temp(FILE *f, struct stat *sb) |
|
|
{ |
|
|
char *name; |
|
|
int fd; |
|
192 |
|
|
193 |
if (S_ISREG(sb->st_mode) || S_ISBLK(sb->st_mode)) |
if (option_mask32 & FLAG(b)) { |
194 |
return NULL; |
/* Was prev char whitespace? */ |
195 |
name = xstrdup("/tmp/difXXXXXX"); |
if (tok & TOK_SPACE) { /* yes */ |
196 |
fd = mkstemp(name); |
if (is_space) /* this one too, ignore it */ |
197 |
if (fd < 0) |
continue; |
198 |
bb_perror_msg_and_die("mkstemp"); |
tok &= ~TOK_SPACE; |
199 |
if (bb_copyfd_eof(fileno(f), fd) < 0) { |
} else if (is_space) { |
200 |
clean_up: |
/* 1st whitespace char. |
201 |
unlink(name); |
* Set TOK_SPACE and replace char by ' ' */ |
202 |
xfunc_die(); /* error message is printed by bb_copyfd_eof */ |
t = TOK_SPACE + ' '; |
203 |
} |
} |
204 |
fstat(fd, sb); |
} |
205 |
close(fd); |
/* Clear EMPTY */ |
206 |
if (freopen(name, "r+", f) == NULL) { |
tok &= ~(TOK_EMPTY + CHAR_MASK); |
207 |
bb_perror_msg("freopen"); |
/* Assign char value (low 9 bits) and maybe set TOK_SPACE */ |
208 |
goto clean_up; |
tok |= t; |
209 |
|
break; |
210 |
} |
} |
211 |
return name; |
#if 0 |
212 |
|
bb_error_msg("fp:%p tok:%x '%c'%s%s%s%s", fp, tok, tok & 0xff |
213 |
|
, tok & TOK_EOF ? " EOF" : "" |
214 |
|
, tok & TOK_EOL ? " EOL" : "" |
215 |
|
, tok & TOK_EMPTY ? " EMPTY" : "" |
216 |
|
, tok & TOK_SPACE ? " SPACE" : "" |
217 |
|
); |
218 |
|
#endif |
219 |
|
return tok; |
220 |
} |
} |
221 |
|
|
222 |
|
struct cand { |
223 |
|
int x; |
224 |
|
int y; |
225 |
|
int pred; |
226 |
|
}; |
227 |
|
|
228 |
/* |
static int search(const int *c, int k, int y, const struct cand *list) |
|
* Check to see if the given files differ. |
|
|
* Returns 0 if they are the same, 1 if different, and -1 on error. |
|
|
*/ |
|
|
static NOINLINE int files_differ(FILE *f1, FILE *f2) |
|
229 |
{ |
{ |
230 |
size_t i, j; |
int i, j; |
231 |
|
|
232 |
/* Prevent making copies for "/dev/null" (too common) */ |
if (list[c[k]].y < y) /* quick look for typical case */ |
233 |
/* Deal with input from pipes etc */ |
return k + 1; |
234 |
tempname1 = make_temp(f1, &stb1); |
|
235 |
tempname2 = make_temp(f2, &stb2); |
for (i = 0, j = k + 1;;) { |
236 |
if (stb1.st_size != stb2.st_size) { |
const int l = (i + j) >> 1; |
237 |
return 1; |
if (l > i) { |
238 |
} |
const int t = list[c[l]].y; |
239 |
while (1) { |
if (t > y) |
240 |
i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1); |
j = l; |
241 |
j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2); |
else if (t < y) |
242 |
if (i != j) |
i = l; |
243 |
return 1; |
else |
244 |
if (i == 0) |
return l; |
245 |
return (ferror(f1) || ferror(f2)) ? -1 : 0; |
} else |
246 |
if (memcmp(g_read_buf, |
return l + 1; |
|
g_read_buf + COMMON_BUFSIZE/2, i) != 0) |
|
|
return 1; |
|
247 |
} |
} |
248 |
} |
} |
249 |
|
|
250 |
|
static unsigned isqrt(unsigned n) |
|
static void prepare(int i, FILE *fp /*, off_t filesize*/) |
|
251 |
{ |
{ |
252 |
struct line *p; |
unsigned x = 1; |
253 |
int h; |
while (1) { |
254 |
size_t j, sz; |
const unsigned y = x; |
255 |
|
x = ((n / x) + x) >> 1; |
256 |
rewind(fp); |
if (x <= (y + 1) && x >= (y - 1)) |
257 |
|
return x; |
|
/*sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;*/ |
|
|
/*if (sz < 100)*/ |
|
|
sz = 100; |
|
|
|
|
|
p = xmalloc((sz + 3) * sizeof(p[0])); |
|
|
j = 0; |
|
|
while ((h = readhash(fp)) != 0) { /* while not EOF */ |
|
|
if (j == sz) { |
|
|
sz = sz * 3 / 2; |
|
|
p = xrealloc(p, (sz + 3) * sizeof(p[0])); |
|
|
} |
|
|
p[++j].value = h; |
|
258 |
} |
} |
|
nlen[i] = j; |
|
|
nfile[i] = p; |
|
259 |
} |
} |
260 |
|
|
261 |
|
static void stone(const int *a, int n, const int *b, int *J, int pref) |
262 |
static void prune(void) |
{ |
263 |
{ |
const unsigned isq = isqrt(n); |
264 |
int i, j; |
const unsigned bound = |
265 |
|
(option_mask32 & FLAG(d)) ? UINT_MAX : MAX(256, isq); |
266 |
for (pref = 0; pref < nlen[0] && pref < nlen[1] && |
int clen = 1; |
267 |
nfile[0][pref + 1].value == nfile[1][pref + 1].value; pref++) |
int clistlen = 100; |
268 |
continue; |
int k = 0; |
269 |
for (suff = 0; suff < nlen[0] - pref && suff < nlen[1] - pref && |
struct cand *clist = xzalloc(clistlen * sizeof(clist[0])); |
270 |
nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value; |
struct cand cand; |
271 |
suff++) |
struct cand *q; |
272 |
continue; |
int *klist = xzalloc((n + 2) * sizeof(klist[0])); |
273 |
for (j = 0; j < 2; j++) { |
/*clist[0] = (struct cand){0}; - xzalloc did it */ |
274 |
sfile[j] = nfile[j] + pref; |
/*klist[0] = 0; */ |
275 |
slen[j] = nlen[j] - pref - suff; |
|
276 |
for (i = 0; i <= slen[j]; i++) |
for (cand.x = 1; cand.x <= n; cand.x++) { |
277 |
sfile[j][i].serial = i; |
int j = a[cand.x], oldl = 0; |
278 |
|
unsigned numtries = 0; |
279 |
|
if (j == 0) |
280 |
|
continue; |
281 |
|
cand.y = -b[j]; |
282 |
|
cand.pred = klist[0]; |
283 |
|
do { |
284 |
|
int l, tc; |
285 |
|
if (cand.y <= clist[cand.pred].y) |
286 |
|
continue; |
287 |
|
l = search(klist, k, cand.y, clist); |
288 |
|
if (l != oldl + 1) |
289 |
|
cand.pred = klist[l - 1]; |
290 |
|
if (l <= k && clist[klist[l]].y <= cand.y) |
291 |
|
continue; |
292 |
|
if (clen == clistlen) { |
293 |
|
clistlen = clistlen * 11 / 10; |
294 |
|
clist = xrealloc(clist, clistlen * sizeof(clist[0])); |
295 |
|
} |
296 |
|
clist[clen] = cand; |
297 |
|
tc = klist[l]; |
298 |
|
klist[l] = clen++; |
299 |
|
if (l <= k) { |
300 |
|
cand.pred = tc; |
301 |
|
oldl = l; |
302 |
|
numtries++; |
303 |
|
} else { |
304 |
|
k++; |
305 |
|
break; |
306 |
|
} |
307 |
|
} while ((cand.y = b[++j]) > 0 && numtries < bound); |
308 |
} |
} |
309 |
|
/* Unravel */ |
310 |
|
for (q = clist + klist[k]; q->y; q = clist + q->pred) |
311 |
|
J[q->x + pref] = q->y + pref; |
312 |
|
free(klist); |
313 |
|
free(clist); |
314 |
} |
} |
315 |
|
|
316 |
|
struct line { |
317 |
|
/* 'serial' is not used in the begining, so we reuse it |
318 |
|
* to store line offsets, thus reducing memory pressure |
319 |
|
*/ |
320 |
|
union { |
321 |
|
unsigned serial; |
322 |
|
off_t offset; |
323 |
|
}; |
324 |
|
unsigned value; |
325 |
|
}; |
326 |
|
|
327 |
static void equiv(struct line *a, int n, struct line *b, int m, int *c) |
static void equiv(struct line *a, int n, struct line *b, int m, int *c) |
328 |
{ |
{ |
329 |
int i, j; |
int i = 1, j = 1; |
330 |
|
|
|
i = j = 1; |
|
331 |
while (i <= n && j <= m) { |
while (i <= n && j <= m) { |
332 |
if (a[i].value < b[j].value) |
if (a[i].value < b[j].value) |
333 |
a[i++].value = 0; |
a[i++].value = 0; |
350 |
c[j] = -1; |
c[j] = -1; |
351 |
} |
} |
352 |
|
|
353 |
|
static void unsort(const struct line *f, int l, int *b) |
|
static int isqrt(int n) |
|
|
{ |
|
|
int y, x; |
|
|
|
|
|
if (n == 0) |
|
|
return 0; |
|
|
x = 1; |
|
|
do { |
|
|
y = x; |
|
|
x = n / x; |
|
|
x += y; |
|
|
x /= 2; |
|
|
} while ((x - y) > 1 || (x - y) < -1); |
|
|
|
|
|
return x; |
|
|
} |
|
|
|
|
|
|
|
|
static int newcand(int x, int y, int pred) |
|
|
{ |
|
|
struct cand *q; |
|
|
|
|
|
if (clen == clistlen) { |
|
|
clistlen = clistlen * 11 / 10; |
|
|
clist = xrealloc(clist, clistlen * sizeof(struct cand)); |
|
|
} |
|
|
q = clist + clen; |
|
|
q->x = x; |
|
|
q->y = y; |
|
|
q->pred = pred; |
|
|
return clen++; |
|
|
} |
|
|
|
|
|
|
|
|
static int search(int *c, int k, int y) |
|
354 |
{ |
{ |
|
int i, j, l, t; |
|
|
|
|
|
if (clist[c[k]].y < y) /* quick look for typical case */ |
|
|
return k + 1; |
|
|
i = 0; |
|
|
j = k + 1; |
|
|
while (1) { |
|
|
l = i + j; |
|
|
if ((l >>= 1) <= i) |
|
|
break; |
|
|
t = clist[c[l]].y; |
|
|
if (t > y) |
|
|
j = l; |
|
|
else if (t < y) |
|
|
i = l; |
|
|
else |
|
|
return l; |
|
|
} |
|
|
return l + 1; |
|
|
} |
|
|
|
|
|
|
|
|
static int stone(int *a, int n, int *b, int *c) |
|
|
{ |
|
|
int i, k, y, j, l; |
|
|
int oldc, tc, oldl; |
|
|
unsigned int numtries; |
|
|
#if ENABLE_FEATURE_DIFF_MINIMAL |
|
|
const unsigned int bound = |
|
|
(option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n)); |
|
|
#else |
|
|
const unsigned int bound = MAX(256, isqrt(n)); |
|
|
#endif |
|
|
|
|
|
k = 0; |
|
|
c[0] = newcand(0, 0, 0); |
|
|
for (i = 1; i <= n; i++) { |
|
|
j = a[i]; |
|
|
if (j == 0) |
|
|
continue; |
|
|
y = -b[j]; |
|
|
oldl = 0; |
|
|
oldc = c[0]; |
|
|
numtries = 0; |
|
|
do { |
|
|
if (y <= clist[oldc].y) |
|
|
continue; |
|
|
l = search(c, k, y); |
|
|
if (l != oldl + 1) |
|
|
oldc = c[l - 1]; |
|
|
if (l <= k) { |
|
|
if (clist[c[l]].y <= y) |
|
|
continue; |
|
|
tc = c[l]; |
|
|
c[l] = newcand(i, y, oldc); |
|
|
oldc = tc; |
|
|
oldl = l; |
|
|
numtries++; |
|
|
} else { |
|
|
c[l] = newcand(i, y, oldc); |
|
|
k++; |
|
|
break; |
|
|
} |
|
|
} while ((y = b[++j]) > 0 && numtries < bound); |
|
|
} |
|
|
return k; |
|
|
} |
|
|
|
|
|
|
|
|
static void unravel(int p) |
|
|
{ |
|
|
struct cand *q; |
|
355 |
int i; |
int i; |
356 |
|
int *a = xmalloc((l + 1) * sizeof(a[0])); |
|
for (i = 0; i <= nlen[0]; i++) |
|
|
J[i] = i <= pref ? i : i > nlen[0] - suff ? i + nlen[1] - nlen[0] : 0; |
|
|
for (q = clist + p; q->y != 0; q = clist + q->pred) |
|
|
J[q->x + pref] = q->y + pref; |
|
|
} |
|
|
|
|
|
|
|
|
static void unsort(struct line *f, int l, int *b) |
|
|
{ |
|
|
int *a, i; |
|
|
|
|
|
a = xmalloc((l + 1) * sizeof(int)); |
|
357 |
for (i = 1; i <= l; i++) |
for (i = 1; i <= l; i++) |
358 |
a[f[i].serial] = f[i].value; |
a[f[i].serial] = f[i].value; |
359 |
for (i = 1; i <= l; i++) |
for (i = 1; i <= l; i++) |
361 |
free(a); |
free(a); |
362 |
} |
} |
363 |
|
|
364 |
|
static int line_compar(const void *a, const void *b) |
|
static int skipline(FILE *f) |
|
365 |
{ |
{ |
366 |
int i, c; |
#define l0 ((const struct line*)a) |
367 |
|
#define l1 ((const struct line*)b) |
368 |
for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) |
int r = l0->value - l1->value; |
369 |
continue; |
if (r) |
370 |
return i; |
return r; |
371 |
|
return l0->serial - l1->serial; |
372 |
|
#undef l0 |
373 |
|
#undef l1 |
374 |
} |
} |
375 |
|
|
376 |
|
static void fetch(FILE_and_pos_t *ft, const off_t *ix, int a, int b, int ch) |
|
/* |
|
|
* Check does double duty: |
|
|
* 1. ferret out any fortuitous correspondences due |
|
|
* to confounding by hashing (which result in "jackpot") |
|
|
* 2. collect random access indexes to the two files |
|
|
*/ |
|
|
static NOINLINE void check(FILE *f1, FILE *f2) |
|
377 |
{ |
{ |
378 |
int i, j, jackpot, c, d; |
int i, j, col; |
|
long ctold, ctnew; |
|
|
|
|
|
rewind(f1); |
|
|
rewind(f2); |
|
|
j = 1; |
|
|
ixold[0] = ixnew[0] = 0; |
|
|
jackpot = 0; |
|
|
ctold = ctnew = 0; |
|
|
for (i = 1; i <= nlen[0]; i++) { |
|
|
if (J[i] == 0) { |
|
|
ixold[i] = ctold += skipline(f1); |
|
|
continue; |
|
|
} |
|
|
while (j < J[i]) { |
|
|
ixnew[j] = ctnew += skipline(f2); |
|
|
j++; |
|
|
} |
|
|
if (option_mask32 & (FLAG_b | FLAG_w | FLAG_i)) { |
|
|
while (1) { |
|
|
c = getc(f1); |
|
|
d = getc(f2); |
|
|
/* |
|
|
* GNU diff ignores a missing newline |
|
|
* in one file if bflag || wflag. |
|
|
*/ |
|
|
if ((option_mask32 & (FLAG_b | FLAG_w)) |
|
|
&& ((c == EOF && d == '\n') || (c == '\n' && d == EOF)) |
|
|
) { |
|
|
break; |
|
|
} |
|
|
ctold++; |
|
|
ctnew++; |
|
|
if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) { |
|
|
do { |
|
|
if (c == '\n') |
|
|
break; |
|
|
ctold++; |
|
|
c = getc(f1); |
|
|
} while (isspace(c)); |
|
|
do { |
|
|
if (d == '\n') |
|
|
break; |
|
|
ctnew++; |
|
|
d = getc(f2); |
|
|
} while (isspace(d)); |
|
|
} else if (option_mask32 & FLAG_w) { |
|
|
while (isspace(c) && c != '\n') { |
|
|
c = getc(f1); |
|
|
ctold++; |
|
|
} |
|
|
while (isspace(d) && d != '\n') { |
|
|
d = getc(f2); |
|
|
ctnew++; |
|
|
} |
|
|
} |
|
|
if (c != d) { |
|
|
jackpot++; |
|
|
J[i] = 0; |
|
|
if (c != '\n' && c != EOF) |
|
|
ctold += skipline(f1); |
|
|
if (d != '\n' && c != EOF) |
|
|
ctnew += skipline(f2); |
|
|
break; |
|
|
} |
|
|
if (c == '\n' || c == EOF) |
|
|
break; |
|
|
} |
|
|
} else { |
|
|
while (1) { |
|
|
ctold++; |
|
|
ctnew++; |
|
|
c = getc(f1); |
|
|
d = getc(f2); |
|
|
if (c != d) { |
|
|
J[i] = 0; |
|
|
if (c != '\n' && c != EOF) |
|
|
ctold += skipline(f1); |
|
|
/* was buggy? "if (d != '\n' && c != EOF)" */ |
|
|
if (d != '\n' && d != EOF) |
|
|
ctnew += skipline(f2); |
|
|
break; |
|
|
} |
|
|
if (c == '\n' || c == EOF) |
|
|
break; |
|
|
} |
|
|
} |
|
|
ixold[i] = ctold; |
|
|
ixnew[j] = ctnew; |
|
|
j++; |
|
|
} |
|
|
for (; j <= nlen[1]; j++) |
|
|
ixnew[j] = ctnew += skipline(f2); |
|
|
} |
|
|
|
|
|
|
|
|
/* shellsort CACM #201 */ |
|
|
static void sort(struct line *a, int n) |
|
|
{ |
|
|
struct line *ai, *aim, w; |
|
|
int j, m = 0, k; |
|
|
|
|
|
if (n == 0) |
|
|
return; |
|
|
for (j = 1; j <= n; j *= 2) |
|
|
m = 2 * j - 1; |
|
|
for (m /= 2; m != 0; m /= 2) { |
|
|
k = n - m; |
|
|
for (j = 1; j <= k; j++) { |
|
|
for (ai = &a[j]; ai > a; ai -= m) { |
|
|
aim = &ai[m]; |
|
|
if (aim < ai) |
|
|
break; /* wraparound */ |
|
|
if (aim->value > ai[0].value |
|
|
|| (aim->value == ai[0].value && aim->serial > ai[0].serial) |
|
|
) { |
|
|
break; |
|
|
} |
|
|
w.value = ai[0].value; |
|
|
ai[0].value = aim->value; |
|
|
aim->value = w.value; |
|
|
w.serial = ai[0].serial; |
|
|
ai[0].serial = aim->serial; |
|
|
aim->serial = w.serial; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static void uni_range(int a, int b) |
|
|
{ |
|
|
if (a < b) |
|
|
printf("%d,%d", a, b - a + 1); |
|
|
else if (a == b) |
|
|
printf("%d", b); |
|
|
else |
|
|
printf("%d,0", b); |
|
|
} |
|
|
|
|
|
|
|
|
static void fetch(long *f, int a, int b, FILE *lb, int ch) |
|
|
{ |
|
|
int i, j, c, lastc, col, nc; |
|
|
|
|
|
if (a > b) |
|
|
return; |
|
379 |
for (i = a; i <= b; i++) { |
for (i = a; i <= b; i++) { |
380 |
fseek(lb, f[i - 1], SEEK_SET); |
seek_ft(ft, ix[i - 1]); |
381 |
nc = f[i] - f[i - 1]; |
putchar(ch); |
382 |
if (ch != '\0') { |
if (option_mask32 & FLAG(T)) |
383 |
putchar(ch); |
putchar('\t'); |
384 |
if (option_mask32 & FLAG_T) |
for (j = 0, col = 0; j < ix[i] - ix[i - 1]; j++) { |
385 |
putchar('\t'); |
int c = fgetc(ft->ft_fp); |
|
} |
|
|
col = 0; |
|
|
for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { |
|
|
c = getc(lb); |
|
386 |
if (c == EOF) { |
if (c == EOF) { |
387 |
printf("\n\\ No newline at end of file\n"); |
printf("\n\\ No newline at end of file\n"); |
388 |
return; |
return; |
389 |
} |
} |
390 |
if (c == '\t' && (option_mask32 & FLAG_t)) { |
ft->ft_pos++; |
391 |
do { |
if (c == '\t' && (option_mask32 & FLAG(t))) |
392 |
putchar(' '); |
do putchar(' '); while (++col & 7); |
393 |
} while (++col & 7); |
else { |
|
} else { |
|
394 |
putchar(c); |
putchar(c); |
395 |
col++; |
col++; |
396 |
} |
} |
398 |
} |
} |
399 |
} |
} |
400 |
|
|
401 |
|
/* Creates the match vector J, where J[i] is the index |
402 |
#if ENABLE_FEATURE_DIFF_BINARY |
* of the line in the new file corresponding to the line i |
403 |
static int asciifile(FILE *f) |
* in the old file. Lines start at 1 instead of 0, that value |
404 |
|
* being used instead to denote no corresponding line. |
405 |
|
* This vector is dynamically allocated and must be freed by the caller. |
406 |
|
* |
407 |
|
* * fp is an input parameter, where fp[0] and fp[1] are the open |
408 |
|
* old file and new file respectively. |
409 |
|
* * nlen is an output variable, where nlen[0] and nlen[1] |
410 |
|
* gets the number of lines in the old and new file respectively. |
411 |
|
* * ix is an output variable, where ix[0] and ix[1] gets |
412 |
|
* assigned dynamically allocated vectors of the offsets of the lines |
413 |
|
* of the old and new file respectively. These must be freed by the caller. |
414 |
|
*/ |
415 |
|
static NOINLINE int *create_J(FILE_and_pos_t ft[2], int nlen[2], off_t *ix[2]) |
416 |
{ |
{ |
417 |
int i, cnt; |
int *J, slen[2], *class, *member; |
418 |
|
struct line *nfile[2], *sfile[2]; |
419 |
if (option_mask32 & FLAG_a) |
int pref = 0, suff = 0, i, j, delta; |
420 |
return 1; |
|
421 |
rewind(f); |
/* Lines of both files are hashed, and in the process |
422 |
cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f); |
* their offsets are stored in the array ix[fileno] |
423 |
for (i = 0; i < cnt; i++) { |
* where fileno == 0 points to the old file, and |
424 |
if (!isprint(g_read_buf[i]) |
* fileno == 1 points to the new one. |
425 |
&& !isspace(g_read_buf[i]) |
*/ |
426 |
) { |
for (i = 0; i < 2; i++) { |
427 |
return 0; |
unsigned hash; |
428 |
|
token_t tok; |
429 |
|
size_t sz = 100; |
430 |
|
nfile[i] = xmalloc((sz + 3) * sizeof(nfile[i][0])); |
431 |
|
/* ft gets here without the correct position, cant use seek_ft */ |
432 |
|
ft[i].ft_pos = 0; |
433 |
|
fseeko(ft[i].ft_fp, 0, SEEK_SET); |
434 |
|
|
435 |
|
nlen[i] = 0; |
436 |
|
/* We could zalloc nfile, but then zalloc starts showing in gprof at ~1% */ |
437 |
|
nfile[i][0].offset = 0; |
438 |
|
goto start; /* saves code */ |
439 |
|
while (1) { |
440 |
|
tok = read_token(&ft[i], tok); |
441 |
|
if (!(tok & TOK_EMPTY)) { |
442 |
|
/* Hash algorithm taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */ |
443 |
|
/*hash = hash * 128 - hash + TOK2CHAR(tok); |
444 |
|
* gcc insists on optimizing above to "hash * 127 + ...", thus... */ |
445 |
|
unsigned o = hash - TOK2CHAR(tok); |
446 |
|
hash = hash * 128 - o; /* we want SPEED here */ |
447 |
|
continue; |
448 |
|
} |
449 |
|
if (nlen[i]++ == sz) { |
450 |
|
sz = sz * 3 / 2; |
451 |
|
nfile[i] = xrealloc(nfile[i], (sz + 3) * sizeof(nfile[i][0])); |
452 |
|
} |
453 |
|
/* line_compar needs hashes fit into positive int */ |
454 |
|
nfile[i][nlen[i]].value = hash & INT_MAX; |
455 |
|
/* like ftello(ft[i].ft_fp) but faster (avoids lseek syscall) */ |
456 |
|
nfile[i][nlen[i]].offset = ft[i].ft_pos; |
457 |
|
if (tok & TOK_EOF) { |
458 |
|
/* EOF counts as a token, so we have to adjust it here */ |
459 |
|
nfile[i][nlen[i]].offset++; |
460 |
|
break; |
461 |
|
} |
462 |
|
start: |
463 |
|
hash = tok = 0; |
464 |
} |
} |
465 |
|
/* Exclude lone EOF line from the end of the file, to make fetch()'s job easier */ |
466 |
|
if (nfile[i][nlen[i]].offset - nfile[i][nlen[i] - 1].offset == 1) |
467 |
|
nlen[i]--; |
468 |
|
/* Now we copy the line offsets into ix */ |
469 |
|
ix[i] = xmalloc((nlen[i] + 2) * sizeof(ix[i][0])); |
470 |
|
for (j = 0; j < nlen[i] + 1; j++) |
471 |
|
ix[i][j] = nfile[i][j].offset; |
472 |
|
} |
473 |
|
|
474 |
|
/* length of prefix and suffix is calculated */ |
475 |
|
for (; pref < nlen[0] && pref < nlen[1] && |
476 |
|
nfile[0][pref + 1].value == nfile[1][pref + 1].value; |
477 |
|
pref++); |
478 |
|
for (; suff < nlen[0] - pref && suff < nlen[1] - pref && |
479 |
|
nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value; |
480 |
|
suff++); |
481 |
|
/* Arrays are pruned by the suffix and prefix lenght, |
482 |
|
* the result being sorted and stored in sfile[fileno], |
483 |
|
* and their sizes are stored in slen[fileno] |
484 |
|
*/ |
485 |
|
for (j = 0; j < 2; j++) { |
486 |
|
sfile[j] = nfile[j] + pref; |
487 |
|
slen[j] = nlen[j] - pref - suff; |
488 |
|
for (i = 0; i <= slen[j]; i++) |
489 |
|
sfile[j][i].serial = i; |
490 |
|
qsort(sfile[j] + 1, slen[j], sizeof(*sfile[j]), line_compar); |
491 |
} |
} |
492 |
return 1; |
/* nfile arrays are reused to reduce memory pressure |
493 |
} |
* The #if zeroed out section performs the same task as the |
494 |
#else |
* one in the #else section. |
495 |
#define asciifile(f) 1 |
* Peak memory usage is higher, but one array copy is avoided |
496 |
#endif |
* by not using unsort() |
|
|
|
|
|
|
|
/* dump accumulated "unified" diff changes */ |
|
|
static void dump_unified_vec(FILE *f1, FILE *f2) |
|
|
{ |
|
|
struct context_vec *cvp = context_vec_start; |
|
|
int lowa, upb, lowc, upd; |
|
|
int a, b, c, d; |
|
|
char ch; |
|
|
|
|
|
if (context_vec_start > context_vec_ptr) |
|
|
return; |
|
|
|
|
|
b = d = 0; /* gcc */ |
|
|
lowa = MAX(1, cvp->a - opt_U_context); |
|
|
upb = MIN(nlen[0], context_vec_ptr->b + opt_U_context); |
|
|
lowc = MAX(1, cvp->c - opt_U_context); |
|
|
upd = MIN(nlen[1], context_vec_ptr->d + opt_U_context); |
|
|
|
|
|
printf("@@ -"); |
|
|
uni_range(lowa, upb); |
|
|
printf(" +"); |
|
|
uni_range(lowc, upd); |
|
|
printf(" @@\n"); |
|
|
|
|
|
/* |
|
|
* Output changes in "unified" diff format--the old and new lines |
|
|
* are printed together. |
|
497 |
*/ |
*/ |
|
for (; cvp <= context_vec_ptr; cvp++) { |
|
|
a = cvp->a; |
|
|
b = cvp->b; |
|
|
c = cvp->c; |
|
|
d = cvp->d; |
|
|
|
|
|
/* |
|
|
* c: both new and old changes |
|
|
* d: only changes in the old file |
|
|
* a: only changes in the new file |
|
|
*/ |
|
|
if (a <= b && c <= d) |
|
|
ch = 'c'; |
|
|
else |
|
|
ch = (a <= b) ? 'd' : 'a'; |
|
498 |
#if 0 |
#if 0 |
499 |
switch (ch) { |
member = xmalloc((slen[1] + 2) * sizeof(member[0])); |
500 |
case 'c': |
equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
501 |
// fetch() seeks! |
free(nfile[1]); |
502 |
fetch(ixold, lowa, a - 1, f1, ' '); |
|
503 |
fetch(ixold, a, b, f1, '-'); |
class = xmalloc((slen[0] + 1) * sizeof(class[0])); |
504 |
fetch(ixnew, c, d, f2, '+'); |
for (i = 1; i <= slen[0]; i++) /* Unsorting */ |
505 |
break; |
class[sfile[0][i].serial] = sfile[0][i].value; |
506 |
case 'd': |
free(nfile[0]); |
|
fetch(ixold, lowa, a - 1, f1, ' '); |
|
|
fetch(ixold, a, b, f1, '-'); |
|
|
break; |
|
|
case 'a': |
|
|
fetch(ixnew, lowc, c - 1, f2, ' '); |
|
|
fetch(ixnew, c, d, f2, '+'); |
|
|
break; |
|
|
} |
|
507 |
#else |
#else |
508 |
if (ch == 'c' || ch == 'd') { |
member = (int *)nfile[1]; |
509 |
fetch(ixold, lowa, a - 1, f1, ' '); |
equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
510 |
fetch(ixold, a, b, f1, '-'); |
member = xrealloc(member, (slen[1] + 2) * sizeof(member[0])); |
511 |
} |
|
512 |
if (ch == 'a') |
class = (int *)nfile[0]; |
513 |
fetch(ixnew, lowc, c - 1, f2, ' '); |
unsort(sfile[0], slen[0], (int *)nfile[0]); |
514 |
if (ch == 'c' || ch == 'a') |
class = xrealloc(class, (slen[0] + 2) * sizeof(class[0])); |
|
fetch(ixnew, c, d, f2, '+'); |
|
515 |
#endif |
#endif |
516 |
lowa = b + 1; |
J = xmalloc((nlen[0] + 2) * sizeof(J[0])); |
517 |
lowc = d + 1; |
/* The elements of J which fall inside the prefix and suffix regions |
518 |
} |
* are marked as unchanged, while the ones which fall outside |
519 |
fetch(ixnew, d + 1, upd, f2, ' '); |
* are initialized with 0 (no matches), so that function stone can |
520 |
|
* then assign them their right values |
521 |
|
*/ |
522 |
|
for (i = 0, delta = nlen[1] - nlen[0]; i <= nlen[0]; i++) |
523 |
|
J[i] = i <= pref ? i : |
524 |
|
i > (nlen[0] - suff) ? (i + delta) : 0; |
525 |
|
/* Here the magic is performed */ |
526 |
|
stone(class, slen[0], member, J, pref); |
527 |
|
J[nlen[0] + 1] = nlen[1] + 1; |
528 |
|
|
529 |
context_vec_ptr = context_vec_start - 1; |
free(class); |
530 |
} |
free(member); |
531 |
|
|
532 |
|
/* Both files are rescanned, in an effort to find any lines |
533 |
|
* which, due to limitations intrinsic to any hashing algorithm, |
534 |
|
* are different but ended up confounded as the same |
535 |
|
*/ |
536 |
|
for (i = 1; i <= nlen[0]; i++) { |
537 |
|
if (!J[i]) |
538 |
|
continue; |
539 |
|
|
540 |
static void print_header(const char *file1, const char *file2) |
seek_ft(&ft[0], ix[0][i - 1]); |
541 |
{ |
seek_ft(&ft[1], ix[1][J[i] - 1]); |
|
if (label1) |
|
|
printf("--- %s\n", label1); |
|
|
else |
|
|
printf("--- %s\t%s", file1, ctime(&stb1.st_mtime)); |
|
|
if (label2) |
|
|
printf("+++ %s\n", label2); |
|
|
else |
|
|
printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime)); |
|
|
} |
|
542 |
|
|
543 |
|
for (j = J[i]; i <= nlen[0] && J[i] == j; i++, j++) { |
544 |
|
token_t tok0 = 0, tok1 = 0; |
545 |
|
do { |
546 |
|
tok0 = read_token(&ft[0], tok0); |
547 |
|
tok1 = read_token(&ft[1], tok1); |
548 |
|
|
549 |
/* |
if (((tok0 ^ tok1) & TOK_EMPTY) != 0 /* one is empty (not both) */ |
550 |
* Indicate that there is a difference between lines a and b of the from file |
|| (!(tok0 & TOK_EMPTY) && TOK2CHAR(tok0) != TOK2CHAR(tok1)) |
551 |
* to get to lines c to d of the to file. If a is greater than b then there |
) { |
552 |
* are no lines in the from file involved and this means that there were |
J[i] = 0; /* Break the correspondence */ |
553 |
* lines appended (beginning at b). If c is greater than d then there are |
} |
554 |
* lines missing from the to file. |
} while (!(tok0 & tok1 & TOK_EMPTY)); |
555 |
*/ |
} |
|
static void change(char *file1, FILE *f1, char *file2, FILE *f2, |
|
|
int a, int b, int c, int d) |
|
|
{ |
|
|
if ((a > b && c > d) || (option_mask32 & FLAG_q)) { |
|
|
anychange = 1; |
|
|
return; |
|
556 |
} |
} |
557 |
|
|
558 |
/* |
return J; |
|
* Allocate change records as needed. |
|
|
*/ |
|
|
if (context_vec_ptr == context_vec_end - 1) { |
|
|
ptrdiff_t offset = context_vec_ptr - context_vec_start; |
|
|
|
|
|
max_context <<= 1; |
|
|
context_vec_start = xrealloc(context_vec_start, |
|
|
max_context * sizeof(struct context_vec)); |
|
|
context_vec_end = context_vec_start + max_context; |
|
|
context_vec_ptr = context_vec_start + offset; |
|
|
} |
|
|
if (anychange == 0) { |
|
|
/* |
|
|
* Print the context/unidiff header first time through. |
|
|
*/ |
|
|
print_header(file1, file2); |
|
|
} else if (a > context_vec_ptr->b + (2 * opt_U_context) + 1 |
|
|
&& c > context_vec_ptr->d + (2 * opt_U_context) + 1 |
|
|
) { |
|
|
/* |
|
|
* If this change is more than 'context' lines from the |
|
|
* previous change, dump the record and reset it. |
|
|
*/ |
|
|
// dump_unified_vec() seeks! |
|
|
dump_unified_vec(f1, f2); |
|
|
} |
|
|
context_vec_ptr++; |
|
|
context_vec_ptr->a = a; |
|
|
context_vec_ptr->b = b; |
|
|
context_vec_ptr->c = c; |
|
|
context_vec_ptr->d = d; |
|
|
anychange = 1; |
|
559 |
} |
} |
560 |
|
|
561 |
|
static bool diff(FILE* fp[2], char *file[2]) |
|
static void output(char *file1, FILE *f1, char *file2, FILE *f2) |
|
562 |
{ |
{ |
563 |
/* Note that j0 and j1 can't be used as they are defined in math.h. |
int nlen[2]; |
564 |
* This also allows the rather amusing variable 'j00'... */ |
off_t *ix[2]; |
565 |
int m, i0, i1, j00, j01; |
FILE_and_pos_t ft[2]; |
566 |
|
typedef struct { int a, b; } vec_t[2]; |
567 |
rewind(f1); |
vec_t *vec = NULL; |
568 |
rewind(f2); |
int i = 1, j, k, idx = -1; |
569 |
m = nlen[0]; |
bool anychange = false; |
570 |
J[0] = 0; |
int *J; |
571 |
J[m + 1] = nlen[1] + 1; |
|
572 |
for (i0 = 1; i0 <= m; i0 = i1 + 1) { |
ft[0].ft_fp = fp[0]; |
573 |
while (i0 <= m && J[i0] == J[i0 - 1] + 1) |
ft[1].ft_fp = fp[1]; |
574 |
i0++; |
/* note that ft[i].ft_pos is unintitalized, create_J() |
575 |
j00 = J[i0 - 1] + 1; |
* must not assume otherwise */ |
576 |
i1 = i0 - 1; |
J = create_J(ft, nlen, ix); |
|
while (i1 < m && J[i1 + 1] == 0) |
|
|
i1++; |
|
|
j01 = J[i1 + 1] - 1; |
|
|
J[i1] = j01; |
|
|
// change() seeks! |
|
|
change(file1, f1, file2, f2, i0, i1, j00, j01); |
|
|
} |
|
|
if (m == 0) { |
|
|
// change() seeks! |
|
|
change(file1, f1, file2, f2, 1, 0, 1, nlen[1]); |
|
|
} |
|
|
if (anychange != 0 && !(option_mask32 & FLAG_q)) { |
|
|
// dump_unified_vec() seeks! |
|
|
dump_unified_vec(f1, f2); |
|
|
} |
|
|
} |
|
577 |
|
|
578 |
/* |
do { |
579 |
* The following code uses an algorithm due to Harold Stone, |
bool nonempty = false; |
|
* which finds a pair of longest identical subsequences in |
|
|
* the two files. |
|
|
* |
|
|
* The major goal is to generate the match vector J. |
|
|
* J[i] is the index of the line in file1 corresponding |
|
|
* to line i in file0. J[i] = 0 if there is no |
|
|
* such line in file1. |
|
|
* |
|
|
* Lines are hashed so as to work in core. All potential |
|
|
* matches are located by sorting the lines of each file |
|
|
* on the hash (called "value"). In particular, this |
|
|
* collects the equivalence classes in file1 together. |
|
|
* Subroutine equiv replaces the value of each line in |
|
|
* file0 by the index of the first element of its |
|
|
* matching equivalence in (the reordered) file1. |
|
|
* To save space equiv squeezes file1 into a single |
|
|
* array member in which the equivalence classes |
|
|
* are simply concatenated, except that their first |
|
|
* members are flagged by changing sign. |
|
|
* |
|
|
* Next the indices that point into member are unsorted into |
|
|
* array class according to the original order of file0. |
|
|
* |
|
|
* The cleverness lies in routine stone. This marches |
|
|
* through the lines of file0, developing a vector klist |
|
|
* of "k-candidates". At step i a k-candidate is a matched |
|
|
* pair of lines x,y (x in file0, y in file1) such that |
|
|
* there is a common subsequence of length k |
|
|
* between the first i lines of file0 and the first y |
|
|
* lines of file1, but there is no such subsequence for |
|
|
* any smaller y. x is the earliest possible mate to y |
|
|
* that occurs in such a subsequence. |
|
|
* |
|
|
* Whenever any of the members of the equivalence class of |
|
|
* lines in file1 matable to a line in file0 has serial number |
|
|
* less than the y of some k-candidate, that k-candidate |
|
|
* with the smallest such y is replaced. The new |
|
|
* k-candidate is chained (via pred) to the current |
|
|
* k-1 candidate so that the actual subsequence can |
|
|
* be recovered. When a member has serial number greater |
|
|
* that the y of all k-candidates, the klist is extended. |
|
|
* At the end, the longest subsequence is pulled out |
|
|
* and placed in the array J by unravel |
|
|
* |
|
|
* With J in hand, the matches there recorded are |
|
|
* checked against reality to assure that no spurious |
|
|
* matches have crept in due to hashing. If they have, |
|
|
* they are broken, and "jackpot" is recorded--a harmless |
|
|
* matter except that a true match for a spuriously |
|
|
* mated line may now be unnecessarily reported as a change. |
|
|
* |
|
|
* Much of the complexity of the program comes simply |
|
|
* from trying to minimize core utilization and |
|
|
* maximize the range of doable problems by dynamically |
|
|
* allocating what is needed and reusing what is not. |
|
|
* The core requirements for problems larger than somewhat |
|
|
* are (in words) 2*length(file0) + length(file1) + |
|
|
* 3*(number of k-candidates installed), typically about |
|
|
* 6n words for files of length n. |
|
|
*/ |
|
|
/* NB: files can be not REGular. The only sure thing that they |
|
|
* are not both DIRectories. */ |
|
|
static unsigned diffreg(char *file1, char *file2, int flags) |
|
|
{ |
|
|
int *member; /* will be overlaid on nfile[1] */ |
|
|
int *class; /* will be overlaid on nfile[0] */ |
|
|
int *klist; /* will be overlaid on nfile[0] after class */ |
|
|
FILE *f1; |
|
|
FILE *f2; |
|
|
unsigned rval; |
|
|
int i; |
|
580 |
|
|
581 |
anychange = 0; |
while (1) { |
582 |
context_vec_ptr = context_vec_start - 1; |
vec_t v; |
|
tempname1 = tempname2 = NULL; |
|
|
|
|
|
/* Is any of them a directory? Then it's simple */ |
|
|
if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) |
|
|
return (S_ISDIR(stb1.st_mode) ? D_ISDIR1 : D_ISDIR2); |
|
|
|
|
|
/* None of them are directories */ |
|
|
rval = D_SAME; |
|
|
|
|
|
if (flags & D_EMPTY1) |
|
|
/* can't be stdin, but xfopen_stdin() is smaller code */ |
|
|
f1 = xfopen_stdin(bb_dev_null); |
|
|
else |
|
|
f1 = xfopen_stdin(file1); |
|
|
if (flags & D_EMPTY2) |
|
|
f2 = xfopen_stdin(bb_dev_null); |
|
|
else |
|
|
f2 = xfopen_stdin(file2); |
|
|
|
|
|
/* NB: if D_EMPTY1/2 is set, other file is always a regular file, |
|
|
* not pipe/fifo/chardev/etc - D_EMPTY is used by "diff -r" only, |
|
|
* and it never diffs non-ordinary files in subdirs. */ |
|
|
if (!(flags & (D_EMPTY1 | D_EMPTY2))) { |
|
|
/* Quick check whether they are different */ |
|
|
/* NB: copies non-REG files to tempfiles and fills tempname1/2 */ |
|
|
i = files_differ(f1, f2); |
|
|
if (i != 1) { /* not different? */ |
|
|
if (i != 0) /* error? */ |
|
|
exit_status |= 2; |
|
|
goto closem; |
|
|
} |
|
|
} |
|
583 |
|
|
584 |
if (!asciifile(f1) || !asciifile(f2)) { |
for (v[0].a = i; v[0].a <= nlen[0] && J[v[0].a] == J[v[0].a - 1] + 1; v[0].a++) |
585 |
rval = D_BINARY; |
continue; |
586 |
exit_status |= 1; |
v[1].a = J[v[0].a - 1] + 1; |
|
goto closem; |
|
|
} |
|
587 |
|
|
588 |
// Rewind inside! |
for (v[0].b = v[0].a - 1; v[0].b < nlen[0] && !J[v[0].b + 1]; v[0].b++) |
589 |
prepare(0, f1 /*, stb1.st_size*/); |
continue; |
590 |
prepare(1, f2 /*, stb2.st_size*/); |
v[1].b = J[v[0].b + 1] - 1; |
591 |
prune(); |
/* |
592 |
sort(sfile[0], slen[0]); |
* Indicate that there is a difference between lines a and b of the 'from' file |
593 |
sort(sfile[1], slen[1]); |
* to get to lines c to d of the 'to' file. If a is greater than b then there |
594 |
|
* are no lines in the 'from' file involved and this means that there were |
595 |
|
* lines appended (beginning at b). If c is greater than d then there are |
596 |
|
* lines missing from the 'to' file. |
597 |
|
*/ |
598 |
|
if (v[0].a <= v[0].b || v[1].a <= v[1].b) { |
599 |
|
/* |
600 |
|
* If this change is more than 'context' lines from the |
601 |
|
* previous change, dump the record and reset it. |
602 |
|
*/ |
603 |
|
int ct = (2 * opt_U_context) + 1; |
604 |
|
if (idx >= 0 |
605 |
|
&& v[0].a > vec[idx][0].b + ct |
606 |
|
&& v[1].a > vec[idx][1].b + ct |
607 |
|
) { |
608 |
|
break; |
609 |
|
} |
610 |
|
|
611 |
member = (int *) nfile[1]; |
for (j = 0; j < 2; j++) |
612 |
equiv(sfile[0], slen[0], sfile[1], slen[1], member); |
for (k = v[j].a; k < v[j].b; k++) |
613 |
//TODO: xrealloc_vector? |
nonempty |= (ix[j][k+1] - ix[j][k] != 1); |
|
member = xrealloc(member, (slen[1] + 2) * sizeof(int)); |
|
614 |
|
|
615 |
class = (int *) nfile[0]; |
vec = xrealloc_vector(vec, 6, ++idx); |
616 |
unsort(sfile[0], slen[0], class); |
memcpy(vec[idx], v, sizeof(v)); |
617 |
class = xrealloc(class, (slen[0] + 2) * sizeof(int)); |
} |
|
|
|
|
klist = xmalloc((slen[0] + 2) * sizeof(int)); |
|
|
clen = 0; |
|
|
clistlen = 100; |
|
|
clist = xmalloc(clistlen * sizeof(struct cand)); |
|
|
i = stone(class, slen[0], member, klist); |
|
|
free(member); |
|
|
free(class); |
|
618 |
|
|
619 |
J = xrealloc(J, (nlen[0] + 2) * sizeof(int)); |
i = v[0].b + 1; |
620 |
unravel(klist[i]); |
if (i > nlen[0]) |
621 |
free(clist); |
break; |
622 |
free(klist); |
J[v[0].b] = v[1].b; |
623 |
|
} |
624 |
|
if (idx < 0 || ((option_mask32 & FLAG(B)) && !nonempty)) |
625 |
|
goto cont; |
626 |
|
if (!(option_mask32 & FLAG(q))) { |
627 |
|
int lowa; |
628 |
|
vec_t span, *cvp = vec; |
629 |
|
|
630 |
|
if (!anychange) { |
631 |
|
/* Print the context/unidiff header first time through */ |
632 |
|
printf("--- %s\n", label[0] ? label[0] : file[0]); |
633 |
|
printf("+++ %s\n", label[1] ? label[1] : file[1]); |
634 |
|
} |
635 |
|
|
636 |
ixold = xrealloc(ixold, (nlen[0] + 2) * sizeof(long)); |
printf("@@"); |
637 |
ixnew = xrealloc(ixnew, (nlen[1] + 2) * sizeof(long)); |
for (j = 0; j < 2; j++) { |
638 |
// Rewind inside! |
int a = span[j].a = MAX(1, (*cvp)[j].a - opt_U_context); |
639 |
check(f1, f2); |
int b = span[j].b = MIN(nlen[j], vec[idx][j].b + opt_U_context); |
|
// Rewind inside! |
|
|
output(file1, f1, file2, f2); |
|
640 |
|
|
641 |
closem: |
printf(" %c%d", j ? '+' : '-', MIN(a, b)); |
642 |
if (anychange) { |
if (a == b) |
643 |
exit_status |= 1; |
continue; |
644 |
if (rval == D_SAME) |
printf(",%d", (a < b) ? b - a + 1 : 0); |
645 |
rval = D_DIFFER; |
} |
646 |
} |
printf(" @@\n"); |
647 |
fclose_if_not_stdin(f1); |
/* |
648 |
fclose_if_not_stdin(f2); |
* Output changes in "unified" diff format--the old and new lines |
649 |
if (tempname1) { |
* are printed together. |
650 |
unlink(tempname1); |
*/ |
651 |
free(tempname1); |
for (lowa = span[0].a; ; lowa = (*cvp++)[0].b + 1) { |
652 |
} |
bool end = cvp > &vec[idx]; |
653 |
if (tempname2) { |
fetch(&ft[0], ix[0], lowa, end ? span[0].b : (*cvp)[0].a - 1, ' '); |
654 |
unlink(tempname2); |
if (end) |
655 |
free(tempname2); |
break; |
656 |
|
for (j = 0; j < 2; j++) |
657 |
|
fetch(&ft[j], ix[j], (*cvp)[j].a, (*cvp)[j].b, j ? '+' : '-'); |
658 |
|
} |
659 |
|
} |
660 |
|
anychange = true; |
661 |
|
cont: |
662 |
|
idx = -1; |
663 |
|
} while (i <= nlen[0]); |
664 |
|
|
665 |
|
free(vec); |
666 |
|
free(ix[0]); |
667 |
|
free(ix[1]); |
668 |
|
free(J); |
669 |
|
return anychange; |
670 |
|
} |
671 |
|
|
672 |
|
static int diffreg(char *file[2]) |
673 |
|
{ |
674 |
|
FILE *fp[2] = { stdin, stdin }; |
675 |
|
bool binary = false, differ = false; |
676 |
|
int status = STATUS_SAME, i; |
677 |
|
|
678 |
|
for (i = 0; i < 2; i++) { |
679 |
|
int fd = open_or_warn_stdin(file[i]); |
680 |
|
if (fd == -1) |
681 |
|
goto out; |
682 |
|
/* Our diff implementation is using seek. |
683 |
|
* When we meet non-seekable file, we must make a temp copy. |
684 |
|
*/ |
685 |
|
if (lseek(fd, 0, SEEK_SET) == -1 && errno == ESPIPE) { |
686 |
|
char name[] = "/tmp/difXXXXXX"; |
687 |
|
int fd_tmp = mkstemp(name); |
688 |
|
if (fd_tmp < 0) |
689 |
|
bb_perror_msg_and_die("mkstemp"); |
690 |
|
unlink(name); |
691 |
|
if (bb_copyfd_eof(fd, fd_tmp) < 0) |
692 |
|
xfunc_die(); |
693 |
|
if (fd) /* Prevents closing of stdin */ |
694 |
|
close(fd); |
695 |
|
fd = fd_tmp; |
696 |
|
} |
697 |
|
fp[i] = fdopen(fd, "r"); |
698 |
} |
} |
|
return rval; |
|
|
} |
|
699 |
|
|
700 |
|
while (1) { |
701 |
#if ENABLE_FEATURE_DIFF_DIR |
const size_t sz = COMMON_BUFSIZE / 2; |
702 |
static void do_diff(char *dir1, char *path1, char *dir2, char *path2) |
char *const buf0 = bb_common_bufsiz1; |
703 |
{ |
char *const buf1 = buf0 + sz; |
704 |
int flags = 0; /*D_HEADER;*/ |
int j, k; |
705 |
int val; |
i = fread(buf0, 1, sz, fp[0]); |
706 |
char *fullpath1 = NULL; /* if -N */ |
j = fread(buf1, 1, sz, fp[1]); |
707 |
char *fullpath2 = NULL; |
if (i != j) { |
708 |
|
differ = true; |
709 |
if (path1) |
i = MIN(i, j); |
|
fullpath1 = concat_path_file(dir1, path1); |
|
|
if (path2) |
|
|
fullpath2 = concat_path_file(dir2, path2); |
|
|
|
|
|
if (!fullpath1 || stat(fullpath1, &stb1) != 0) { |
|
|
flags |= D_EMPTY1; |
|
|
memset(&stb1, 0, sizeof(stb1)); |
|
|
if (path2) { |
|
|
free(fullpath1); |
|
|
fullpath1 = concat_path_file(dir1, path2); |
|
710 |
} |
} |
711 |
} |
if (i == 0) |
712 |
if (!fullpath2 || stat(fullpath2, &stb2) != 0) { |
break; |
713 |
flags |= D_EMPTY2; |
for (k = 0; k < i; k++) { |
714 |
memset(&stb2, 0, sizeof(stb2)); |
if (!buf0[k] || !buf1[k]) |
715 |
stb2.st_mode = stb1.st_mode; |
binary = true; |
716 |
if (path1) { |
if (buf0[k] != buf1[k]) |
717 |
free(fullpath2); |
differ = true; |
|
fullpath2 = concat_path_file(dir2, path1); |
|
718 |
} |
} |
719 |
} |
} |
720 |
|
if (differ) { |
721 |
|
if (binary && !(option_mask32 & FLAG(a))) |
722 |
|
status = STATUS_BINARY; |
723 |
|
else if (diff(fp, file)) |
724 |
|
status = STATUS_DIFFER; |
725 |
|
} |
726 |
|
if (status != STATUS_SAME) |
727 |
|
exit_status |= 1; |
728 |
|
out: |
729 |
|
fclose_if_not_stdin(fp[0]); |
730 |
|
fclose_if_not_stdin(fp[1]); |
731 |
|
|
732 |
if (stb1.st_mode == 0) |
return status; |
|
stb1.st_mode = stb2.st_mode; |
|
|
|
|
|
if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { |
|
|
printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); |
|
|
goto ret; |
|
|
} |
|
|
|
|
|
if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) |
|
|
val = D_SKIPPED1; |
|
|
else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) |
|
|
val = D_SKIPPED2; |
|
|
else { |
|
|
/* Both files are either REGular or DIRectories */ |
|
|
val = diffreg(fullpath1, fullpath2, flags); |
|
|
} |
|
|
|
|
|
print_status(val, fullpath1, fullpath2 /*, NULL*/); |
|
|
ret: |
|
|
free(fullpath1); |
|
|
free(fullpath2); |
|
733 |
} |
} |
|
#endif |
|
734 |
|
|
735 |
|
static void print_status(int status, char *path[2]) |
736 |
|
{ |
737 |
|
switch (status) { |
738 |
|
case STATUS_BINARY: |
739 |
|
case STATUS_DIFFER: |
740 |
|
if ((option_mask32 & FLAG(q)) || status == STATUS_BINARY) |
741 |
|
printf("Files %s and %s differ\n", path[0], path[1]); |
742 |
|
break; |
743 |
|
case STATUS_SAME: |
744 |
|
if (option_mask32 & FLAG(s)) |
745 |
|
printf("Files %s and %s are identical\n", path[0], path[1]); |
746 |
|
break; |
747 |
|
} |
748 |
|
} |
749 |
|
|
750 |
#if ENABLE_FEATURE_DIFF_DIR |
#if ENABLE_FEATURE_DIFF_DIR |
751 |
|
struct dlist { |
752 |
|
size_t len; |
753 |
|
int s, e; |
754 |
|
char **dl; |
755 |
|
}; |
756 |
|
|
757 |
/* This function adds a filename to dl, the directory listing. */ |
/* This function adds a filename to dl, the directory listing. */ |
758 |
static int FAST_FUNC add_to_dirlist(const char *filename, |
static int FAST_FUNC add_to_dirlist(const char *filename, |
759 |
struct stat *sb UNUSED_PARAM, |
struct stat *sb UNUSED_PARAM, |
760 |
void *userdata, |
void *userdata, int depth UNUSED_PARAM) |
|
int depth UNUSED_PARAM) |
|
761 |
{ |
{ |
762 |
dl = xrealloc_vector(dl, 5, dl_count); |
struct dlist *const l = userdata; |
763 |
dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata); |
l->dl = xrealloc_vector(l->dl, 6, l->e); |
764 |
dl_count++; |
/* + 1 skips "/" after dirname */ |
765 |
|
l->dl[l->e] = xstrdup(filename + l->len + 1); |
766 |
|
l->e++; |
767 |
return TRUE; |
return TRUE; |
768 |
} |
} |
769 |
|
|
770 |
|
/* If recursion is not set, this function adds the directory |
771 |
/* This returns a sorted directory listing. */ |
* to the list and prevents recursive_action from recursing into it. |
772 |
static char **get_recursive_dirlist(char *path) |
*/ |
773 |
{ |
static int FAST_FUNC skip_dir(const char *filename, |
774 |
dl_count = 0; |
struct stat *sb, void *userdata, |
775 |
dl = xzalloc(sizeof(dl[0])); |
int depth) |
776 |
|
{ |
777 |
/* We need to trim root directory prefix. |
if (!(option_mask32 & FLAG(r)) && depth) { |
778 |
* Using void *userdata to specify its length, |
add_to_dirlist(filename, sb, userdata, depth); |
779 |
* add_to_dirlist will remove it. */ |
return SKIP; |
|
if (option_mask32 & FLAG_r) { |
|
|
recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS, |
|
|
add_to_dirlist, /* file_action */ |
|
|
NULL, /* dir_action */ |
|
|
(void*)(ptrdiff_t)(strlen(path) + 1), |
|
|
0); |
|
|
} else { |
|
|
DIR *dp; |
|
|
struct dirent *ep; |
|
|
|
|
|
dp = warn_opendir(path); |
|
|
while ((ep = readdir(dp))) { |
|
|
if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.')) |
|
|
continue; |
|
|
add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0); |
|
|
} |
|
|
closedir(dp); |
|
780 |
} |
} |
781 |
|
return TRUE; |
|
/* Sort dl alphabetically. */ |
|
|
qsort_string_vector(dl, dl_count); |
|
|
|
|
|
dl[dl_count] = NULL; |
|
|
return dl; |
|
782 |
} |
} |
783 |
|
|
784 |
|
static void diffdir(char *p[2], const char *s_start) |
|
static void diffdir(char *p1, char *p2) |
|
785 |
{ |
{ |
786 |
char **dirlist1, **dirlist2; |
struct dlist list[2]; |
787 |
char *dp1, *dp2; |
int i; |
|
int pos; |
|
|
|
|
|
/* Check for trailing slashes. */ |
|
|
dp1 = last_char_is(p1, '/'); |
|
|
if (dp1 != NULL) |
|
|
*dp1 = '\0'; |
|
|
dp2 = last_char_is(p2, '/'); |
|
|
if (dp2 != NULL) |
|
|
*dp2 = '\0'; |
|
|
|
|
|
/* Get directory listings for p1 and p2. */ |
|
|
dirlist1 = get_recursive_dirlist(p1); |
|
|
dirlist2 = get_recursive_dirlist(p2); |
|
|
|
|
|
/* If -S was set, find the starting point. */ |
|
|
if (opt_S_start) { |
|
|
while (*dirlist1 != NULL && strcmp(*dirlist1, opt_S_start) < 0) |
|
|
dirlist1++; |
|
|
while (*dirlist2 != NULL && strcmp(*dirlist2, opt_S_start) < 0) |
|
|
dirlist2++; |
|
|
if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) |
|
|
bb_error_msg(bb_msg_invalid_arg, "NULL", "-S"); |
|
|
} |
|
788 |
|
|
789 |
|
memset(&list, 0, sizeof(list)); |
790 |
|
for (i = 0; i < 2; i++) { |
791 |
|
/*list[i].s = list[i].e = 0; - memset did it */ |
792 |
|
/*list[i].dl = NULL; */ |
793 |
|
|
794 |
|
/* We need to trim root directory prefix. |
795 |
|
* Using list.len to specify its length, |
796 |
|
* add_to_dirlist will remove it. */ |
797 |
|
list[i].len = strlen(p[i]); |
798 |
|
recursive_action(p[i], ACTION_RECURSE | ACTION_FOLLOWLINKS, |
799 |
|
add_to_dirlist, skip_dir, &list[i], 0); |
800 |
|
/* Sort dl alphabetically. |
801 |
|
* GNU diff does this ignoring any number of trailing dots. |
802 |
|
* We don't, so for us dotted files almost always are |
803 |
|
* first on the list. |
804 |
|
*/ |
805 |
|
qsort_string_vector(list[i].dl, list[i].e); |
806 |
|
/* If -S was set, find the starting point. */ |
807 |
|
if (!s_start) |
808 |
|
continue; |
809 |
|
while (list[i].s < list[i].e && strcmp(list[i].dl[list[i].s], s_start) < 0) |
810 |
|
list[i].s++; |
811 |
|
} |
812 |
/* Now that both dirlist1 and dirlist2 contain sorted directory |
/* Now that both dirlist1 and dirlist2 contain sorted directory |
813 |
* listings, we can start to go through dirlist1. If both listings |
* listings, we can start to go through dirlist1. If both listings |
814 |
* contain the same file, then do a normal diff. Otherwise, behaviour |
* contain the same file, then do a normal diff. Otherwise, behaviour |
815 |
* is determined by whether the -N flag is set. */ |
* is determined by whether the -N flag is set. */ |
816 |
while (*dirlist1 != NULL || *dirlist2 != NULL) { |
while (1) { |
817 |
dp1 = *dirlist1; |
char *dp[2]; |
818 |
dp2 = *dirlist2; |
int pos; |
819 |
pos = dp1 == NULL ? 1 : (dp2 == NULL ? -1 : strcmp(dp1, dp2)); |
int k; |
820 |
|
|
821 |
|
dp[0] = list[0].s < list[0].e ? list[0].dl[list[0].s] : NULL; |
822 |
|
dp[1] = list[1].s < list[1].e ? list[1].dl[list[1].s] : NULL; |
823 |
|
if (!dp[0] && !dp[1]) |
824 |
|
break; |
825 |
|
pos = !dp[0] ? 1 : (!dp[1] ? -1 : strcmp(dp[0], dp[1])); |
826 |
|
k = pos > 0; |
827 |
|
if (pos && !(option_mask32 & FLAG(N))) |
828 |
|
printf("Only in %s: %s\n", p[k], dp[k]); |
829 |
|
else { |
830 |
|
char *fullpath[2], *path[2]; /* if -N */ |
831 |
|
|
832 |
|
for (i = 0; i < 2; i++) { |
833 |
|
if (pos == 0 || i == k) { |
834 |
|
path[i] = fullpath[i] = concat_path_file(p[i], dp[i]); |
835 |
|
stat(fullpath[i], &stb[i]); |
836 |
|
} else { |
837 |
|
fullpath[i] = concat_path_file(p[i], dp[1 - i]); |
838 |
|
path[i] = (char *)bb_dev_null; |
839 |
|
} |
840 |
|
} |
841 |
|
if (pos) |
842 |
|
stat(fullpath[k], &stb[1 - k]); |
843 |
|
|
844 |
|
if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) |
845 |
|
printf("Common subdirectories: %s and %s\n", fullpath[0], fullpath[1]); |
846 |
|
else if (!S_ISREG(stb[0].st_mode) && !S_ISDIR(stb[0].st_mode)) |
847 |
|
printf("File %s is not a regular file or directory and was skipped\n", fullpath[0]); |
848 |
|
else if (!S_ISREG(stb[1].st_mode) && !S_ISDIR(stb[1].st_mode)) |
849 |
|
printf("File %s is not a regular file or directory and was skipped\n", fullpath[1]); |
850 |
|
else if (S_ISDIR(stb[0].st_mode) != S_ISDIR(stb[1].st_mode)) { |
851 |
|
if (S_ISDIR(stb[0].st_mode)) |
852 |
|
printf("File %s is a %s while file %s is a %s\n", fullpath[0], "directory", fullpath[1], "regular file"); |
853 |
|
else |
854 |
|
printf("File %s is a %s while file %s is a %s\n", fullpath[0], "regular file", fullpath[1], "directory"); |
855 |
|
} else |
856 |
|
print_status(diffreg(path), fullpath); |
857 |
|
|
858 |
|
free(fullpath[0]); |
859 |
|
free(fullpath[1]); |
860 |
|
} |
861 |
|
free(dp[k]); |
862 |
|
list[k].s++; |
863 |
if (pos == 0) { |
if (pos == 0) { |
864 |
do_diff(p1, dp1, p2, dp2); |
free(dp[1 - k]); |
865 |
dirlist1++; |
list[1 - k].s++; |
|
dirlist2++; |
|
|
} else if (pos < 0) { |
|
|
if (option_mask32 & FLAG_N) |
|
|
do_diff(p1, dp1, p2, NULL); |
|
|
else |
|
|
print_only(p1, dp1); |
|
|
dirlist1++; |
|
|
} else { |
|
|
if (option_mask32 & FLAG_N) |
|
|
do_diff(p1, NULL, p2, dp2); |
|
|
else |
|
|
print_only(p2, dp2); |
|
|
dirlist2++; |
|
866 |
} |
} |
867 |
} |
} |
868 |
|
if (ENABLE_FEATURE_CLEAN_UP) { |
869 |
|
free(list[0].dl); |
870 |
|
free(list[1].dl); |
871 |
|
} |
872 |
} |
} |
873 |
#endif |
#endif |
874 |
|
|
875 |
|
#if ENABLE_FEATURE_DIFF_LONG_OPTIONS |
876 |
|
static const char diff_longopts[] ALIGN1 = |
877 |
|
"ignore-case\0" No_argument "i" |
878 |
|
"ignore-tab-expansion\0" No_argument "E" |
879 |
|
"ignore-space-change\0" No_argument "b" |
880 |
|
"ignore-all-space\0" No_argument "w" |
881 |
|
"ignore-blank-lines\0" No_argument "B" |
882 |
|
"text\0" No_argument "a" |
883 |
|
"unified\0" Required_argument "U" |
884 |
|
"label\0" Required_argument "L" |
885 |
|
"show-c-function\0" No_argument "p" |
886 |
|
"brief\0" No_argument "q" |
887 |
|
"expand-tabs\0" No_argument "t" |
888 |
|
"initial-tab\0" No_argument "T" |
889 |
|
"recursive\0" No_argument "r" |
890 |
|
"new-file\0" No_argument "N" |
891 |
|
"report-identical-files\0" No_argument "s" |
892 |
|
"starting-file\0" Required_argument "S" |
893 |
|
"minimal\0" No_argument "d" |
894 |
|
; |
895 |
|
#endif |
896 |
|
|
897 |
int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
898 |
int diff_main(int argc UNUSED_PARAM, char **argv) |
int diff_main(int argc UNUSED_PARAM, char **argv) |
899 |
{ |
{ |
900 |
int gotstdin = 0; |
int gotstdin = 0, i; |
901 |
char *f1, *f2; |
char *file[2], *s_start = NULL; |
902 |
llist_t *L_arg = NULL; |
llist_t *L_arg = NULL; |
903 |
|
|
904 |
INIT_G(); |
INIT_G(); |
905 |
|
|
906 |
/* exactly 2 params; collect multiple -L <label>; -U N */ |
/* exactly 2 params; collect multiple -L <label>; -U N */ |
907 |
opt_complementary = "=2:L::U+"; |
opt_complementary = "=2:L::U+"; |
908 |
getopt32(argv, "abdiL:NqrsS:tTU:wu" |
#if ENABLE_FEATURE_DIFF_LONG_OPTIONS |
909 |
"p" /* ignored (for compatibility) */, |
applet_long_options = diff_longopts; |
910 |
&L_arg, &opt_S_start, &opt_U_context); |
#endif |
911 |
/*argc -= optind;*/ |
getopt32(argv, "abdiL:NqrsS:tTU:wupBE", |
912 |
|
&L_arg, &s_start, &opt_U_context); |
913 |
argv += optind; |
argv += optind; |
914 |
while (L_arg) { |
while (L_arg) |
915 |
if (label1 && label2) |
label[!!label[0]] = llist_pop(&L_arg); |
916 |
bb_show_usage(); |
xfunc_error_retval = 2; |
917 |
if (label1) /* then label2 is NULL */ |
for (i = 0; i < 2; i++) { |
918 |
label2 = label1; |
file[i] = argv[i]; |
919 |
label1 = llist_pop(&L_arg); |
/* Compat: "diff file name_which_doesnt_exist" exits with 2 */ |
920 |
|
if (LONE_DASH(file[i])) { |
921 |
|
fstat(STDIN_FILENO, &stb[i]); |
922 |
|
gotstdin++; |
923 |
|
} else |
924 |
|
xstat(file[i], &stb[i]); |
925 |
} |
} |
926 |
|
xfunc_error_retval = 1; |
927 |
/* |
if (gotstdin && (S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode))) |
|
* Do sanity checks, fill in stb1 and stb2 and call the appropriate |
|
|
* driver routine. Both drivers use the contents of stb1 and stb2. |
|
|
*/ |
|
|
f1 = argv[0]; |
|
|
f2 = argv[1]; |
|
|
if (LONE_DASH(f1)) { |
|
|
fstat(STDIN_FILENO, &stb1); |
|
|
gotstdin++; |
|
|
} else |
|
|
xstat(f1, &stb1); |
|
|
if (LONE_DASH(f2)) { |
|
|
fstat(STDIN_FILENO, &stb2); |
|
|
gotstdin++; |
|
|
} else |
|
|
xstat(f2, &stb2); |
|
|
|
|
|
if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) |
|
928 |
bb_error_msg_and_die("can't compare stdin to a directory"); |
bb_error_msg_and_die("can't compare stdin to a directory"); |
929 |
|
|
930 |
if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { |
if (S_ISDIR(stb[0].st_mode) && S_ISDIR(stb[1].st_mode)) { |
931 |
#if ENABLE_FEATURE_DIFF_DIR |
#if ENABLE_FEATURE_DIFF_DIR |
932 |
diffdir(f1, f2); |
diffdir(file, s_start); |
|
return exit_status; |
|
933 |
#else |
#else |
934 |
bb_error_msg_and_die("no support for directory comparison"); |
bb_error_msg_and_die("no support for directory comparison"); |
935 |
#endif |
#endif |
936 |
|
} else { |
937 |
|
bool dirfile = S_ISDIR(stb[0].st_mode) || S_ISDIR(stb[1].st_mode); |
938 |
|
bool dir = S_ISDIR(stb[1].st_mode); |
939 |
|
if (dirfile) { |
940 |
|
const char *slash = strrchr(file[!dir], '/'); |
941 |
|
file[dir] = concat_path_file(file[dir], slash ? slash + 1 : file[!dir]); |
942 |
|
xstat(file[dir], &stb[dir]); |
943 |
|
} |
944 |
|
/* diffreg can get non-regular files here */ |
945 |
|
print_status(gotstdin > 1 ? STATUS_SAME : diffreg(file), file); |
946 |
|
|
947 |
|
if (dirfile) |
948 |
|
free(file[dir]); |
949 |
} |
} |
950 |
|
|
|
if (S_ISDIR(stb1.st_mode)) { /* "diff dir file" */ |
|
|
/* NB: "diff dir dir2/dir3/file" must become |
|
|
* "diff dir/file dir2/dir3/file" */ |
|
|
char *slash = strrchr(f2, '/'); |
|
|
f1 = concat_path_file(f1, slash ? slash + 1 : f2); |
|
|
xstat(f1, &stb1); |
|
|
} |
|
|
if (S_ISDIR(stb2.st_mode)) { |
|
|
char *slash = strrchr(f1, '/'); |
|
|
f2 = concat_path_file(f2, slash ? slash + 1 : f1); |
|
|
xstat(f2, &stb2); |
|
|
} |
|
|
|
|
|
/* diffreg can get non-regular files here, |
|
|
* they are not both DIRestories */ |
|
|
print_status((gotstdin > 1 ? D_SAME : diffreg(f1, f2, 0)), |
|
|
f1, f2 /*, NULL*/); |
|
951 |
return exit_status; |
return exit_status; |
952 |
} |
} |