Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/diff/src/analyze.c
39530 views
1
/* Analyze file differences for GNU DIFF.
2
3
Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4
2004 Free Software Foundation, Inc.
5
6
This file is part of GNU DIFF.
7
8
GNU DIFF is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2, or (at your option)
11
any later version.
12
13
GNU DIFF is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License
19
along with this program; see the file COPYING.
20
If not, write to the Free Software Foundation,
21
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23
/* The basic algorithm is described in:
24
"An O(ND) Difference Algorithm and its Variations", Eugene Myers,
25
Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
26
see especially section 4.2, which describes the variation used below.
27
Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
28
heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
29
at the price of producing suboptimal output for large inputs with
30
many differences.
31
32
The basic algorithm was independently discovered as described in:
33
"Algorithms for Approximate String Matching", E. Ukkonen,
34
Information and Control Vol. 64, 1985, pp. 100-118. */
35
36
#include "diff.h"
37
#include <cmpbuf.h>
38
#include <error.h>
39
#include <file-type.h>
40
#include <xalloc.h>
41
42
static lin *xvec, *yvec; /* Vectors being compared. */
43
static lin *fdiag; /* Vector, indexed by diagonal, containing
44
1 + the X coordinate of the point furthest
45
along the given diagonal in the forward
46
search of the edit matrix. */
47
static lin *bdiag; /* Vector, indexed by diagonal, containing
48
the X coordinate of the point furthest
49
along the given diagonal in the backward
50
search of the edit matrix. */
51
static lin too_expensive; /* Edit scripts longer than this are too
52
expensive to compute. */
53
54
#define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
55
56
struct partition
57
{
58
lin xmid, ymid; /* Midpoints of this partition. */
59
bool lo_minimal; /* Nonzero if low half will be analyzed minimally. */
60
bool hi_minimal; /* Likewise for high half. */
61
};
62
63
/* Find the midpoint of the shortest edit script for a specified
64
portion of the two files.
65
66
Scan from the beginnings of the files, and simultaneously from the ends,
67
doing a breadth-first search through the space of edit-sequence.
68
When the two searches meet, we have found the midpoint of the shortest
69
edit sequence.
70
71
If FIND_MINIMAL is nonzero, find the minimal edit script regardless
72
of expense. Otherwise, if the search is too expensive, use
73
heuristics to stop the search and report a suboptimal answer.
74
75
Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
76
XMID - YMID equals the number of inserted lines minus the number
77
of deleted lines (counting only lines before the midpoint).
78
79
Set PART->lo_minimal to true iff the minimal edit script for the
80
left half of the partition is known; similarly for PART->hi_minimal.
81
82
This function assumes that the first lines of the specified portions
83
of the two files do not match, and likewise that the last lines do not
84
match. The caller must trim matching lines from the beginning and end
85
of the portions it is going to specify.
86
87
If we return the "wrong" partitions,
88
the worst this can do is cause suboptimal diff output.
89
It cannot cause incorrect diff output. */
90
91
static void
92
diag (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal,
93
struct partition *part)
94
{
95
lin *const fd = fdiag; /* Give the compiler a chance. */
96
lin *const bd = bdiag; /* Additional help for the compiler. */
97
lin const *const xv = xvec; /* Still more help for the compiler. */
98
lin const *const yv = yvec; /* And more and more . . . */
99
lin const dmin = xoff - ylim; /* Minimum valid diagonal. */
100
lin const dmax = xlim - yoff; /* Maximum valid diagonal. */
101
lin const fmid = xoff - yoff; /* Center diagonal of top-down search. */
102
lin const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
103
lin fmin = fmid, fmax = fmid; /* Limits of top-down search. */
104
lin bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
105
lin c; /* Cost. */
106
bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
107
diagonal with respect to the northwest. */
108
109
fd[fmid] = xoff;
110
bd[bmid] = xlim;
111
112
for (c = 1;; ++c)
113
{
114
lin d; /* Active diagonal. */
115
bool big_snake = false;
116
117
/* Extend the top-down search by an edit step in each diagonal. */
118
fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
119
fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
120
for (d = fmax; d >= fmin; d -= 2)
121
{
122
lin x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
123
124
if (tlo >= thi)
125
x = tlo + 1;
126
else
127
x = thi;
128
oldx = x;
129
y = x - d;
130
while (x < xlim && y < ylim && xv[x] == yv[y])
131
++x, ++y;
132
if (x - oldx > SNAKE_LIMIT)
133
big_snake = true;
134
fd[d] = x;
135
if (odd && bmin <= d && d <= bmax && bd[d] <= x)
136
{
137
part->xmid = x;
138
part->ymid = y;
139
part->lo_minimal = part->hi_minimal = true;
140
return;
141
}
142
}
143
144
/* Similarly extend the bottom-up search. */
145
bmin > dmin ? bd[--bmin - 1] = LIN_MAX : ++bmin;
146
bmax < dmax ? bd[++bmax + 1] = LIN_MAX : --bmax;
147
for (d = bmax; d >= bmin; d -= 2)
148
{
149
lin x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
150
151
if (tlo < thi)
152
x = tlo;
153
else
154
x = thi - 1;
155
oldx = x;
156
y = x - d;
157
while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
158
--x, --y;
159
if (oldx - x > SNAKE_LIMIT)
160
big_snake = true;
161
bd[d] = x;
162
if (!odd && fmin <= d && d <= fmax && x <= fd[d])
163
{
164
part->xmid = x;
165
part->ymid = y;
166
part->lo_minimal = part->hi_minimal = true;
167
return;
168
}
169
}
170
171
if (find_minimal)
172
continue;
173
174
/* Heuristic: check occasionally for a diagonal that has made
175
lots of progress compared with the edit distance.
176
If we have any such, find the one that has made the most
177
progress and return it as if it had succeeded.
178
179
With this heuristic, for files with a constant small density
180
of changes, the algorithm is linear in the file size. */
181
182
if (200 < c && big_snake && speed_large_files)
183
{
184
lin best = 0;
185
186
for (d = fmax; d >= fmin; d -= 2)
187
{
188
lin dd = d - fmid;
189
lin x = fd[d];
190
lin y = x - d;
191
lin v = (x - xoff) * 2 - dd;
192
if (v > 12 * (c + (dd < 0 ? -dd : dd)))
193
{
194
if (v > best
195
&& xoff + SNAKE_LIMIT <= x && x < xlim
196
&& yoff + SNAKE_LIMIT <= y && y < ylim)
197
{
198
/* We have a good enough best diagonal;
199
now insist that it end with a significant snake. */
200
int k;
201
202
for (k = 1; xv[x - k] == yv[y - k]; k++)
203
if (k == SNAKE_LIMIT)
204
{
205
best = v;
206
part->xmid = x;
207
part->ymid = y;
208
break;
209
}
210
}
211
}
212
}
213
if (best > 0)
214
{
215
part->lo_minimal = true;
216
part->hi_minimal = false;
217
return;
218
}
219
220
best = 0;
221
for (d = bmax; d >= bmin; d -= 2)
222
{
223
lin dd = d - bmid;
224
lin x = bd[d];
225
lin y = x - d;
226
lin v = (xlim - x) * 2 + dd;
227
if (v > 12 * (c + (dd < 0 ? -dd : dd)))
228
{
229
if (v > best
230
&& xoff < x && x <= xlim - SNAKE_LIMIT
231
&& yoff < y && y <= ylim - SNAKE_LIMIT)
232
{
233
/* We have a good enough best diagonal;
234
now insist that it end with a significant snake. */
235
int k;
236
237
for (k = 0; xv[x + k] == yv[y + k]; k++)
238
if (k == SNAKE_LIMIT - 1)
239
{
240
best = v;
241
part->xmid = x;
242
part->ymid = y;
243
break;
244
}
245
}
246
}
247
}
248
if (best > 0)
249
{
250
part->lo_minimal = false;
251
part->hi_minimal = true;
252
return;
253
}
254
}
255
256
/* Heuristic: if we've gone well beyond the call of duty,
257
give up and report halfway between our best results so far. */
258
if (c >= too_expensive)
259
{
260
lin fxybest, fxbest;
261
lin bxybest, bxbest;
262
263
fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
264
265
/* Find forward diagonal that maximizes X + Y. */
266
fxybest = -1;
267
for (d = fmax; d >= fmin; d -= 2)
268
{
269
lin x = MIN (fd[d], xlim);
270
lin y = x - d;
271
if (ylim < y)
272
x = ylim + d, y = ylim;
273
if (fxybest < x + y)
274
{
275
fxybest = x + y;
276
fxbest = x;
277
}
278
}
279
280
/* Find backward diagonal that minimizes X + Y. */
281
bxybest = LIN_MAX;
282
for (d = bmax; d >= bmin; d -= 2)
283
{
284
lin x = MAX (xoff, bd[d]);
285
lin y = x - d;
286
if (y < yoff)
287
x = yoff + d, y = yoff;
288
if (x + y < bxybest)
289
{
290
bxybest = x + y;
291
bxbest = x;
292
}
293
}
294
295
/* Use the better of the two diagonals. */
296
if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
297
{
298
part->xmid = fxbest;
299
part->ymid = fxybest - fxbest;
300
part->lo_minimal = true;
301
part->hi_minimal = false;
302
}
303
else
304
{
305
part->xmid = bxbest;
306
part->ymid = bxybest - bxbest;
307
part->lo_minimal = false;
308
part->hi_minimal = true;
309
}
310
return;
311
}
312
}
313
}
314
315
/* Compare in detail contiguous subsequences of the two files
316
which are known, as a whole, to match each other.
317
318
The results are recorded in the vectors files[N].changed, by
319
storing 1 in the element for each line that is an insertion or deletion.
320
321
The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
322
323
Note that XLIM, YLIM are exclusive bounds.
324
All line numbers are origin-0 and discarded lines are not counted.
325
326
If FIND_MINIMAL, find a minimal difference no matter how
327
expensive it is. */
328
329
static void
330
compareseq (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal)
331
{
332
lin const *xv = xvec; /* Help the compiler. */
333
lin const *yv = yvec;
334
335
/* Slide down the bottom initial diagonal. */
336
while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
337
++xoff, ++yoff;
338
/* Slide up the top initial diagonal. */
339
while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
340
--xlim, --ylim;
341
342
/* Handle simple cases. */
343
if (xoff == xlim)
344
while (yoff < ylim)
345
files[1].changed[files[1].realindexes[yoff++]] = 1;
346
else if (yoff == ylim)
347
while (xoff < xlim)
348
files[0].changed[files[0].realindexes[xoff++]] = 1;
349
else
350
{
351
struct partition part;
352
353
/* Find a point of correspondence in the middle of the files. */
354
diag (xoff, xlim, yoff, ylim, find_minimal, &part);
355
356
/* Use the partitions to split this problem into subproblems. */
357
compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
358
compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
359
}
360
}
361
362
/* Discard lines from one file that have no matches in the other file.
363
364
A line which is discarded will not be considered by the actual
365
comparison algorithm; it will be as if that line were not in the file.
366
The file's `realindexes' table maps virtual line numbers
367
(which don't count the discarded lines) into real line numbers;
368
this is how the actual comparison algorithm produces results
369
that are comprehensible when the discarded lines are counted.
370
371
When we discard a line, we also mark it as a deletion or insertion
372
so that it will be printed in the output. */
373
374
static void
375
discard_confusing_lines (struct file_data filevec[])
376
{
377
int f;
378
lin i;
379
char *discarded[2];
380
lin *equiv_count[2];
381
lin *p;
382
383
/* Allocate our results. */
384
p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
385
* (2 * sizeof *p));
386
for (f = 0; f < 2; f++)
387
{
388
filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
389
filevec[f].realindexes = p; p += filevec[f].buffered_lines;
390
}
391
392
/* Set up equiv_count[F][I] as the number of lines in file F
393
that fall in equivalence class I. */
394
395
p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
396
equiv_count[0] = p;
397
equiv_count[1] = p + filevec[0].equiv_max;
398
399
for (i = 0; i < filevec[0].buffered_lines; ++i)
400
++equiv_count[0][filevec[0].equivs[i]];
401
for (i = 0; i < filevec[1].buffered_lines; ++i)
402
++equiv_count[1][filevec[1].equivs[i]];
403
404
/* Set up tables of which lines are going to be discarded. */
405
406
discarded[0] = zalloc (filevec[0].buffered_lines
407
+ filevec[1].buffered_lines);
408
discarded[1] = discarded[0] + filevec[0].buffered_lines;
409
410
/* Mark to be discarded each line that matches no line of the other file.
411
If a line matches many lines, mark it as provisionally discardable. */
412
413
for (f = 0; f < 2; f++)
414
{
415
size_t end = filevec[f].buffered_lines;
416
char *discards = discarded[f];
417
lin *counts = equiv_count[1 - f];
418
lin *equivs = filevec[f].equivs;
419
size_t many = 5;
420
size_t tem = end / 64;
421
422
/* Multiply MANY by approximate square root of number of lines.
423
That is the threshold for provisionally discardable lines. */
424
while ((tem = tem >> 2) > 0)
425
many *= 2;
426
427
for (i = 0; i < end; i++)
428
{
429
lin nmatch;
430
if (equivs[i] == 0)
431
continue;
432
nmatch = counts[equivs[i]];
433
if (nmatch == 0)
434
discards[i] = 1;
435
else if (nmatch > many)
436
discards[i] = 2;
437
}
438
}
439
440
/* Don't really discard the provisional lines except when they occur
441
in a run of discardables, with nonprovisionals at the beginning
442
and end. */
443
444
for (f = 0; f < 2; f++)
445
{
446
lin end = filevec[f].buffered_lines;
447
register char *discards = discarded[f];
448
449
for (i = 0; i < end; i++)
450
{
451
/* Cancel provisional discards not in middle of run of discards. */
452
if (discards[i] == 2)
453
discards[i] = 0;
454
else if (discards[i] != 0)
455
{
456
/* We have found a nonprovisional discard. */
457
register lin j;
458
lin length;
459
lin provisional = 0;
460
461
/* Find end of this run of discardable lines.
462
Count how many are provisionally discardable. */
463
for (j = i; j < end; j++)
464
{
465
if (discards[j] == 0)
466
break;
467
if (discards[j] == 2)
468
++provisional;
469
}
470
471
/* Cancel provisional discards at end, and shrink the run. */
472
while (j > i && discards[j - 1] == 2)
473
discards[--j] = 0, --provisional;
474
475
/* Now we have the length of a run of discardable lines
476
whose first and last are not provisional. */
477
length = j - i;
478
479
/* If 1/4 of the lines in the run are provisional,
480
cancel discarding of all provisional lines in the run. */
481
if (provisional * 4 > length)
482
{
483
while (j > i)
484
if (discards[--j] == 2)
485
discards[j] = 0;
486
}
487
else
488
{
489
register lin consec;
490
lin minimum = 1;
491
lin tem = length >> 2;
492
493
/* MINIMUM is approximate square root of LENGTH/4.
494
A subrun of two or more provisionals can stand
495
when LENGTH is at least 16.
496
A subrun of 4 or more can stand when LENGTH >= 64. */
497
while (0 < (tem >>= 2))
498
minimum <<= 1;
499
minimum++;
500
501
/* Cancel any subrun of MINIMUM or more provisionals
502
within the larger run. */
503
for (j = 0, consec = 0; j < length; j++)
504
if (discards[i + j] != 2)
505
consec = 0;
506
else if (minimum == ++consec)
507
/* Back up to start of subrun, to cancel it all. */
508
j -= consec;
509
else if (minimum < consec)
510
discards[i + j] = 0;
511
512
/* Scan from beginning of run
513
until we find 3 or more nonprovisionals in a row
514
or until the first nonprovisional at least 8 lines in.
515
Until that point, cancel any provisionals. */
516
for (j = 0, consec = 0; j < length; j++)
517
{
518
if (j >= 8 && discards[i + j] == 1)
519
break;
520
if (discards[i + j] == 2)
521
consec = 0, discards[i + j] = 0;
522
else if (discards[i + j] == 0)
523
consec = 0;
524
else
525
consec++;
526
if (consec == 3)
527
break;
528
}
529
530
/* I advances to the last line of the run. */
531
i += length - 1;
532
533
/* Same thing, from end. */
534
for (j = 0, consec = 0; j < length; j++)
535
{
536
if (j >= 8 && discards[i - j] == 1)
537
break;
538
if (discards[i - j] == 2)
539
consec = 0, discards[i - j] = 0;
540
else if (discards[i - j] == 0)
541
consec = 0;
542
else
543
consec++;
544
if (consec == 3)
545
break;
546
}
547
}
548
}
549
}
550
}
551
552
/* Actually discard the lines. */
553
for (f = 0; f < 2; f++)
554
{
555
char *discards = discarded[f];
556
lin end = filevec[f].buffered_lines;
557
lin j = 0;
558
for (i = 0; i < end; ++i)
559
if (minimal || discards[i] == 0)
560
{
561
filevec[f].undiscarded[j] = filevec[f].equivs[i];
562
filevec[f].realindexes[j++] = i;
563
}
564
else
565
filevec[f].changed[i] = 1;
566
filevec[f].nondiscarded_lines = j;
567
}
568
569
free (discarded[0]);
570
free (equiv_count[0]);
571
}
572
573
/* Adjust inserts/deletes of identical lines to join changes
574
as much as possible.
575
576
We do something when a run of changed lines include a
577
line at one end and have an excluded, identical line at the other.
578
We are free to choose which identical line is included.
579
`compareseq' usually chooses the one at the beginning,
580
but usually it is cleaner to consider the following identical line
581
to be the "change". */
582
583
static void
584
shift_boundaries (struct file_data filevec[])
585
{
586
int f;
587
588
for (f = 0; f < 2; f++)
589
{
590
char *changed = filevec[f].changed;
591
char *other_changed = filevec[1 - f].changed;
592
lin const *equivs = filevec[f].equivs;
593
lin i = 0;
594
lin j = 0;
595
lin i_end = filevec[f].buffered_lines;
596
597
while (1)
598
{
599
lin runlength, start, corresponding;
600
601
/* Scan forwards to find beginning of another run of changes.
602
Also keep track of the corresponding point in the other file. */
603
604
while (i < i_end && !changed[i])
605
{
606
while (other_changed[j++])
607
continue;
608
i++;
609
}
610
611
if (i == i_end)
612
break;
613
614
start = i;
615
616
/* Find the end of this run of changes. */
617
618
while (changed[++i])
619
continue;
620
while (other_changed[j])
621
j++;
622
623
do
624
{
625
/* Record the length of this run of changes, so that
626
we can later determine whether the run has grown. */
627
runlength = i - start;
628
629
/* Move the changed region back, so long as the
630
previous unchanged line matches the last changed one.
631
This merges with previous changed regions. */
632
633
while (start && equivs[start - 1] == equivs[i - 1])
634
{
635
changed[--start] = 1;
636
changed[--i] = 0;
637
while (changed[start - 1])
638
start--;
639
while (other_changed[--j])
640
continue;
641
}
642
643
/* Set CORRESPONDING to the end of the changed run, at the last
644
point where it corresponds to a changed run in the other file.
645
CORRESPONDING == I_END means no such point has been found. */
646
corresponding = other_changed[j - 1] ? i : i_end;
647
648
/* Move the changed region forward, so long as the
649
first changed line matches the following unchanged one.
650
This merges with following changed regions.
651
Do this second, so that if there are no merges,
652
the changed region is moved forward as far as possible. */
653
654
while (i != i_end && equivs[start] == equivs[i])
655
{
656
changed[start++] = 0;
657
changed[i++] = 1;
658
while (changed[i])
659
i++;
660
while (other_changed[++j])
661
corresponding = i;
662
}
663
}
664
while (runlength != i - start);
665
666
/* If possible, move the fully-merged run of changes
667
back to a corresponding run in the other file. */
668
669
while (corresponding < i)
670
{
671
changed[--start] = 1;
672
changed[--i] = 0;
673
while (other_changed[--j])
674
continue;
675
}
676
}
677
}
678
}
679
680
/* Cons an additional entry onto the front of an edit script OLD.
681
LINE0 and LINE1 are the first affected lines in the two files (origin 0).
682
DELETED is the number of lines deleted here from file 0.
683
INSERTED is the number of lines inserted here in file 1.
684
685
If DELETED is 0 then LINE0 is the number of the line before
686
which the insertion was done; vice versa for INSERTED and LINE1. */
687
688
static struct change *
689
add_change (lin line0, lin line1, lin deleted, lin inserted,
690
struct change *old)
691
{
692
struct change *new = xmalloc (sizeof *new);
693
694
new->line0 = line0;
695
new->line1 = line1;
696
new->inserted = inserted;
697
new->deleted = deleted;
698
new->link = old;
699
return new;
700
}
701
702
/* Scan the tables of which lines are inserted and deleted,
703
producing an edit script in reverse order. */
704
705
static struct change *
706
build_reverse_script (struct file_data const filevec[])
707
{
708
struct change *script = 0;
709
char *changed0 = filevec[0].changed;
710
char *changed1 = filevec[1].changed;
711
lin len0 = filevec[0].buffered_lines;
712
lin len1 = filevec[1].buffered_lines;
713
714
/* Note that changedN[len0] does exist, and is 0. */
715
716
lin i0 = 0, i1 = 0;
717
718
while (i0 < len0 || i1 < len1)
719
{
720
if (changed0[i0] | changed1[i1])
721
{
722
lin line0 = i0, line1 = i1;
723
724
/* Find # lines changed here in each file. */
725
while (changed0[i0]) ++i0;
726
while (changed1[i1]) ++i1;
727
728
/* Record this change. */
729
script = add_change (line0, line1, i0 - line0, i1 - line1, script);
730
}
731
732
/* We have reached lines in the two files that match each other. */
733
i0++, i1++;
734
}
735
736
return script;
737
}
738
739
/* Scan the tables of which lines are inserted and deleted,
740
producing an edit script in forward order. */
741
742
static struct change *
743
build_script (struct file_data const filevec[])
744
{
745
struct change *script = 0;
746
char *changed0 = filevec[0].changed;
747
char *changed1 = filevec[1].changed;
748
lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
749
750
/* Note that changedN[-1] does exist, and is 0. */
751
752
while (i0 >= 0 || i1 >= 0)
753
{
754
if (changed0[i0 - 1] | changed1[i1 - 1])
755
{
756
lin line0 = i0, line1 = i1;
757
758
/* Find # lines changed here in each file. */
759
while (changed0[i0 - 1]) --i0;
760
while (changed1[i1 - 1]) --i1;
761
762
/* Record this change. */
763
script = add_change (i0, i1, line0 - i0, line1 - i1, script);
764
}
765
766
/* We have reached lines in the two files that match each other. */
767
i0--, i1--;
768
}
769
770
return script;
771
}
772
773
/* If CHANGES, briefly report that two files differed.
774
Return 2 if trouble, CHANGES otherwise. */
775
static int
776
briefly_report (int changes, struct file_data const filevec[])
777
{
778
if (changes)
779
{
780
char const *label0 = file_label[0] ? file_label[0] : filevec[0].name;
781
char const *label1 = file_label[1] ? file_label[1] : filevec[1].name;
782
message ("Files %s and %s differ\n", label0, label1);
783
if (! brief)
784
changes = 2;
785
}
786
787
return changes;
788
}
789
790
/* Report the differences of two files. */
791
int
792
diff_2_files (struct comparison *cmp)
793
{
794
lin diags;
795
int f;
796
struct change *e, *p;
797
struct change *script;
798
int changes;
799
800
801
/* If we have detected that either file is binary,
802
compare the two files as binary. This can happen
803
only when the first chunk is read.
804
Also, --brief without any --ignore-* options means
805
we can speed things up by treating the files as binary. */
806
807
if (read_files (cmp->file, files_can_be_treated_as_binary))
808
{
809
/* Files with different lengths must be different. */
810
if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
811
&& (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
812
&& (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
813
changes = 1;
814
815
/* Standard input equals itself. */
816
else if (cmp->file[0].desc == cmp->file[1].desc)
817
changes = 0;
818
819
else
820
/* Scan both files, a buffer at a time, looking for a difference. */
821
{
822
/* Allocate same-sized buffers for both files. */
823
size_t lcm_max = PTRDIFF_MAX - 1;
824
size_t buffer_size =
825
buffer_lcm (sizeof (word),
826
buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
827
STAT_BLOCKSIZE (cmp->file[1].stat),
828
lcm_max),
829
lcm_max);
830
for (f = 0; f < 2; f++)
831
cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
832
833
for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
834
{
835
/* Read a buffer's worth from both files. */
836
for (f = 0; f < 2; f++)
837
if (0 <= cmp->file[f].desc)
838
file_block_read (&cmp->file[f],
839
buffer_size - cmp->file[f].buffered);
840
841
/* If the buffers differ, the files differ. */
842
if (cmp->file[0].buffered != cmp->file[1].buffered
843
|| memcmp (cmp->file[0].buffer,
844
cmp->file[1].buffer,
845
cmp->file[0].buffered))
846
{
847
changes = 1;
848
break;
849
}
850
851
/* If we reach end of file, the files are the same. */
852
if (cmp->file[0].buffered != buffer_size)
853
{
854
changes = 0;
855
break;
856
}
857
}
858
}
859
860
changes = briefly_report (changes, cmp->file);
861
}
862
else
863
{
864
/* Allocate vectors for the results of comparison:
865
a flag for each line of each file, saying whether that line
866
is an insertion or deletion.
867
Allocate an extra element, always 0, at each end of each vector. */
868
869
size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
870
char *flag_space = zalloc (s);
871
cmp->file[0].changed = flag_space + 1;
872
cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
873
874
/* Some lines are obviously insertions or deletions
875
because they don't match anything. Detect them now, and
876
avoid even thinking about them in the main comparison algorithm. */
877
878
discard_confusing_lines (cmp->file);
879
880
/* Now do the main comparison algorithm, considering just the
881
undiscarded lines. */
882
883
xvec = cmp->file[0].undiscarded;
884
yvec = cmp->file[1].undiscarded;
885
diags = (cmp->file[0].nondiscarded_lines
886
+ cmp->file[1].nondiscarded_lines + 3);
887
fdiag = xmalloc (diags * (2 * sizeof *fdiag));
888
bdiag = fdiag + diags;
889
fdiag += cmp->file[1].nondiscarded_lines + 1;
890
bdiag += cmp->file[1].nondiscarded_lines + 1;
891
892
/* Set TOO_EXPENSIVE to be approximate square root of input size,
893
bounded below by 256. */
894
too_expensive = 1;
895
for (; diags != 0; diags >>= 2)
896
too_expensive <<= 1;
897
too_expensive = MAX (256, too_expensive);
898
899
files[0] = cmp->file[0];
900
files[1] = cmp->file[1];
901
902
compareseq (0, cmp->file[0].nondiscarded_lines,
903
0, cmp->file[1].nondiscarded_lines, minimal);
904
905
free (fdiag - (cmp->file[1].nondiscarded_lines + 1));
906
907
/* Modify the results slightly to make them prettier
908
in cases where that can validly be done. */
909
910
shift_boundaries (cmp->file);
911
912
/* Get the results of comparison in the form of a chain
913
of `struct change's -- an edit script. */
914
915
if (output_style == OUTPUT_ED)
916
script = build_reverse_script (cmp->file);
917
else
918
script = build_script (cmp->file);
919
920
/* Set CHANGES if we had any diffs.
921
If some changes are ignored, we must scan the script to decide. */
922
if (ignore_blank_lines || ignore_regexp.fastmap)
923
{
924
struct change *next = script;
925
changes = 0;
926
927
while (next && changes == 0)
928
{
929
struct change *this, *end;
930
lin first0, last0, first1, last1;
931
932
/* Find a set of changes that belong together. */
933
this = next;
934
end = find_change (next);
935
936
/* Disconnect them from the rest of the changes, making them
937
a hunk, and remember the rest for next iteration. */
938
next = end->link;
939
end->link = 0;
940
941
/* Determine whether this hunk is really a difference. */
942
if (analyze_hunk (this, &first0, &last0, &first1, &last1))
943
changes = 1;
944
945
/* Reconnect the script so it will all be freed properly. */
946
end->link = next;
947
}
948
}
949
else
950
changes = (script != 0);
951
952
if (brief)
953
changes = briefly_report (changes, cmp->file);
954
else
955
{
956
if (changes | !no_diff_means_no_output)
957
{
958
/* Record info for starting up output,
959
to be used if and when we have some output to print. */
960
setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
961
file_label[1] ? file_label[1] : cmp->file[1].name,
962
cmp->parent != 0);
963
964
switch (output_style)
965
{
966
case OUTPUT_CONTEXT:
967
print_context_script (script, false);
968
break;
969
970
case OUTPUT_UNIFIED:
971
print_context_script (script, true);
972
break;
973
974
case OUTPUT_ED:
975
print_ed_script (script);
976
break;
977
978
case OUTPUT_FORWARD_ED:
979
pr_forward_ed_script (script);
980
break;
981
982
case OUTPUT_RCS:
983
print_rcs_script (script);
984
break;
985
986
case OUTPUT_NORMAL:
987
print_normal_script (script);
988
break;
989
990
case OUTPUT_IFDEF:
991
print_ifdef_script (script);
992
break;
993
994
case OUTPUT_SDIFF:
995
print_sdiff_script (script);
996
break;
997
998
default:
999
abort ();
1000
}
1001
1002
finish_output ();
1003
}
1004
}
1005
1006
free (cmp->file[0].undiscarded);
1007
1008
free (flag_space);
1009
1010
for (f = 0; f < 2; f++)
1011
{
1012
free (cmp->file[f].equivs);
1013
free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
1014
}
1015
1016
for (e = script; e; e = p)
1017
{
1018
p = e->link;
1019
free (e);
1020
}
1021
1022
if (! ROBUST_OUTPUT_STYLE (output_style))
1023
for (f = 0; f < 2; ++f)
1024
if (cmp->file[f].missing_newline)
1025
{
1026
error (0, 0, "%s: %s\n",
1027
file_label[f] ? file_label[f] : cmp->file[f].name,
1028
_("No newline at end of file"));
1029
changes = 2;
1030
}
1031
}
1032
1033
if (cmp->file[0].buffer != cmp->file[1].buffer)
1034
free (cmp->file[0].buffer);
1035
free (cmp->file[1].buffer);
1036
1037
return changes;
1038
}
1039
1040