Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/diff/src/io.c
39530 views
1
/* File I/O for GNU DIFF.
2
3
Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4
2004 Free Software Foundation, Inc.
5
6
This file is part of GNU DIFF.
7
8
GNU DIFF is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2, or (at your option)
11
any later version.
12
13
GNU DIFF is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License
19
along with this program; see the file COPYING.
20
If not, write to the Free Software Foundation,
21
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23
#include "diff.h"
24
#include <cmpbuf.h>
25
#include <file-type.h>
26
#include <setmode.h>
27
#include <xalloc.h>
28
29
/* Rotate an unsigned value to the left. */
30
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
31
32
/* Given a hash value and a new character, return a new hash value. */
33
#define HASH(h, c) ((c) + ROL (h, 7))
34
35
/* The type of a hash value. */
36
typedef size_t hash_value;
37
verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
38
39
/* Lines are put into equivalence classes of lines that match in lines_differ.
40
Each equivalence class is represented by one of these structures,
41
but only while the classes are being computed.
42
Afterward, each class is represented by a number. */
43
struct equivclass
44
{
45
lin next; /* Next item in this bucket. */
46
hash_value hash; /* Hash of lines in this class. */
47
char const *line; /* A line that fits this class. */
48
size_t length; /* That line's length, not counting its newline. */
49
};
50
51
/* Hash-table: array of buckets, each being a chain of equivalence classes.
52
buckets[-1] is reserved for incomplete lines. */
53
static lin *buckets;
54
55
/* Number of buckets in the hash table array, not counting buckets[-1]. */
56
static size_t nbuckets;
57
58
/* Array in which the equivalence classes are allocated.
59
The bucket-chains go through the elements in this array.
60
The number of an equivalence class is its index in this array. */
61
static struct equivclass *equivs;
62
63
/* Index of first free element in the array `equivs'. */
64
static lin equivs_index;
65
66
/* Number of elements allocated in the array `equivs'. */
67
static lin equivs_alloc;
68
69
/* Read a block of data into a file buffer, checking for EOF and error. */
70
71
void
72
file_block_read (struct file_data *current, size_t size)
73
{
74
if (size && ! current->eof)
75
{
76
size_t s = block_read (current->desc,
77
FILE_BUFFER (current) + current->buffered, size);
78
if (s == SIZE_MAX)
79
pfatal_with_name (current->name);
80
current->buffered += s;
81
current->eof = s < size;
82
}
83
}
84
85
/* Check for binary files and compare them for exact identity. */
86
87
/* Return 1 if BUF contains a non text character.
88
SIZE is the number of characters in BUF. */
89
90
#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
91
92
/* Get ready to read the current file.
93
Return nonzero if SKIP_TEST is zero,
94
and if it appears to be a binary file. */
95
96
static bool
97
sip (struct file_data *current, bool skip_test)
98
{
99
/* If we have a nonexistent file at this stage, treat it as empty. */
100
if (current->desc < 0)
101
{
102
/* Leave room for a sentinel. */
103
current->bufsize = sizeof (word);
104
current->buffer = xmalloc (current->bufsize);
105
}
106
else
107
{
108
current->bufsize = buffer_lcm (sizeof (word),
109
STAT_BLOCKSIZE (current->stat),
110
PTRDIFF_MAX - 2 * sizeof (word));
111
current->buffer = xmalloc (current->bufsize);
112
113
if (! skip_test)
114
{
115
/* Check first part of file to see if it's a binary file. */
116
117
bool was_binary = set_binary_mode (current->desc, true);
118
off_t buffered;
119
file_block_read (current, current->bufsize);
120
buffered = current->buffered;
121
122
if (! was_binary)
123
{
124
/* Revert to text mode and seek back to the beginning to
125
reread the file. Use relative seek, since file
126
descriptors like stdin might not start at offset
127
zero. */
128
129
if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
130
pfatal_with_name (current->name);
131
set_binary_mode (current->desc, false);
132
current->buffered = 0;
133
current->eof = false;
134
}
135
136
return binary_file_p (current->buffer, buffered);
137
}
138
}
139
140
current->buffered = 0;
141
current->eof = false;
142
return false;
143
}
144
145
/* Slurp the rest of the current file completely into memory. */
146
147
static void
148
slurp (struct file_data *current)
149
{
150
size_t cc;
151
152
if (current->desc < 0)
153
{
154
/* The file is nonexistent. */
155
return;
156
}
157
158
if (S_ISREG (current->stat.st_mode))
159
{
160
/* It's a regular file; slurp in the rest all at once. */
161
162
/* Get the size out of the stat block.
163
Allocate just enough room for appended newline plus word sentinel,
164
plus word-alignment since we want the buffer word-aligned. */
165
size_t file_size = current->stat.st_size;
166
cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
167
if (file_size != current->stat.st_size || cc < file_size
168
|| PTRDIFF_MAX <= cc)
169
xalloc_die ();
170
171
if (current->bufsize < cc)
172
{
173
current->bufsize = cc;
174
current->buffer = xrealloc (current->buffer, cc);
175
}
176
177
/* Try to read at least 1 more byte than the size indicates, to
178
detect whether the file is growing. This is a nicety for
179
users who run 'diff' on files while they are changing. */
180
181
if (current->buffered <= file_size)
182
{
183
file_block_read (current, file_size + 1 - current->buffered);
184
if (current->buffered <= file_size)
185
return;
186
}
187
}
188
189
/* It's not a regular file, or it's a growing regular file; read it,
190
growing the buffer as needed. */
191
192
file_block_read (current, current->bufsize - current->buffered);
193
194
if (current->buffered)
195
{
196
while (current->buffered == current->bufsize)
197
{
198
if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
199
xalloc_die ();
200
current->bufsize *= 2;
201
current->buffer = xrealloc (current->buffer, current->bufsize);
202
file_block_read (current, current->bufsize - current->buffered);
203
}
204
205
/* Allocate just enough room for appended newline plus word
206
sentinel, plus word-alignment. */
207
cc = current->buffered + 2 * sizeof (word);
208
current->bufsize = cc - cc % sizeof (word);
209
current->buffer = xrealloc (current->buffer, current->bufsize);
210
}
211
}
212
213
/* Split the file into lines, simultaneously computing the equivalence
214
class for each line. */
215
216
static void
217
find_and_hash_each_line (struct file_data *current)
218
{
219
hash_value h;
220
char const *p = current->prefix_end;
221
unsigned char c;
222
lin i, *bucket;
223
size_t length;
224
225
/* Cache often-used quantities in local variables to help the compiler. */
226
char const **linbuf = current->linbuf;
227
lin alloc_lines = current->alloc_lines;
228
lin line = 0;
229
lin linbuf_base = current->linbuf_base;
230
lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
231
struct equivclass *eqs = equivs;
232
lin eqs_index = equivs_index;
233
lin eqs_alloc = equivs_alloc;
234
char const *suffix_begin = current->suffix_begin;
235
char const *bufend = FILE_BUFFER (current) + current->buffered;
236
bool diff_length_compare_anyway =
237
ignore_white_space != IGNORE_NO_WHITE_SPACE;
238
bool same_length_diff_contents_compare_anyway =
239
diff_length_compare_anyway | ignore_case;
240
241
while (p < suffix_begin)
242
{
243
char const *ip = p;
244
245
h = 0;
246
247
/* Hash this line until we find a newline. */
248
if (ignore_case)
249
switch (ignore_white_space)
250
{
251
case IGNORE_ALL_SPACE:
252
while ((c = *p++) != '\n')
253
if (! isspace (c))
254
h = HASH (h, tolower (c));
255
break;
256
257
case IGNORE_SPACE_CHANGE:
258
while ((c = *p++) != '\n')
259
{
260
if (isspace (c))
261
{
262
do
263
if ((c = *p++) == '\n')
264
goto hashing_done;
265
while (isspace (c));
266
267
h = HASH (h, ' ');
268
}
269
270
/* C is now the first non-space. */
271
h = HASH (h, tolower (c));
272
}
273
break;
274
275
case IGNORE_TAB_EXPANSION:
276
{
277
size_t column = 0;
278
while ((c = *p++) != '\n')
279
{
280
size_t repetitions = 1;
281
282
switch (c)
283
{
284
case '\b':
285
column -= 0 < column;
286
break;
287
288
case '\t':
289
c = ' ';
290
repetitions = tabsize - column % tabsize;
291
column = (column + repetitions < column
292
? 0
293
: column + repetitions);
294
break;
295
296
case '\r':
297
column = 0;
298
break;
299
300
default:
301
c = tolower (c);
302
column++;
303
break;
304
}
305
306
do
307
h = HASH (h, c);
308
while (--repetitions != 0);
309
}
310
}
311
break;
312
313
default:
314
while ((c = *p++) != '\n')
315
h = HASH (h, tolower (c));
316
break;
317
}
318
else
319
switch (ignore_white_space)
320
{
321
case IGNORE_ALL_SPACE:
322
while ((c = *p++) != '\n')
323
if (! isspace (c))
324
h = HASH (h, c);
325
break;
326
327
case IGNORE_SPACE_CHANGE:
328
while ((c = *p++) != '\n')
329
{
330
if (isspace (c))
331
{
332
do
333
if ((c = *p++) == '\n')
334
goto hashing_done;
335
while (isspace (c));
336
337
h = HASH (h, ' ');
338
}
339
340
/* C is now the first non-space. */
341
h = HASH (h, c);
342
}
343
break;
344
345
case IGNORE_TAB_EXPANSION:
346
{
347
size_t column = 0;
348
while ((c = *p++) != '\n')
349
{
350
size_t repetitions = 1;
351
352
switch (c)
353
{
354
case '\b':
355
column -= 0 < column;
356
break;
357
358
case '\t':
359
c = ' ';
360
repetitions = tabsize - column % tabsize;
361
column = (column + repetitions < column
362
? 0
363
: column + repetitions);
364
break;
365
366
case '\r':
367
column = 0;
368
break;
369
370
default:
371
column++;
372
break;
373
}
374
375
do
376
h = HASH (h, c);
377
while (--repetitions != 0);
378
}
379
}
380
break;
381
382
default:
383
while ((c = *p++) != '\n')
384
h = HASH (h, c);
385
break;
386
}
387
388
hashing_done:;
389
390
bucket = &buckets[h % nbuckets];
391
length = p - ip - 1;
392
393
if (p == bufend
394
&& current->missing_newline
395
&& ROBUST_OUTPUT_STYLE (output_style))
396
{
397
/* This line is incomplete. If this is significant,
398
put the line into buckets[-1]. */
399
if (ignore_white_space < IGNORE_SPACE_CHANGE)
400
bucket = &buckets[-1];
401
402
/* Omit the inserted newline when computing linbuf later. */
403
p--;
404
bufend = suffix_begin = p;
405
}
406
407
for (i = *bucket; ; i = eqs[i].next)
408
if (!i)
409
{
410
/* Create a new equivalence class in this bucket. */
411
i = eqs_index++;
412
if (i == eqs_alloc)
413
{
414
if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
415
xalloc_die ();
416
eqs_alloc *= 2;
417
eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
418
}
419
eqs[i].next = *bucket;
420
eqs[i].hash = h;
421
eqs[i].line = ip;
422
eqs[i].length = length;
423
*bucket = i;
424
break;
425
}
426
else if (eqs[i].hash == h)
427
{
428
char const *eqline = eqs[i].line;
429
430
/* Reuse existing class if lines_differ reports the lines
431
equal. */
432
if (eqs[i].length == length)
433
{
434
/* Reuse existing equivalence class if the lines are identical.
435
This detects the common case of exact identity
436
faster than lines_differ would. */
437
if (memcmp (eqline, ip, length) == 0)
438
break;
439
if (!same_length_diff_contents_compare_anyway)
440
continue;
441
}
442
else if (!diff_length_compare_anyway)
443
continue;
444
445
if (! lines_differ (eqline, ip))
446
break;
447
}
448
449
/* Maybe increase the size of the line table. */
450
if (line == alloc_lines)
451
{
452
/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
453
if (PTRDIFF_MAX / 3 <= alloc_lines
454
|| PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
455
|| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
456
xalloc_die ();
457
alloc_lines = 2 * alloc_lines - linbuf_base;
458
cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
459
linbuf += linbuf_base;
460
linbuf = xrealloc (linbuf,
461
(alloc_lines - linbuf_base) * sizeof *linbuf);
462
linbuf -= linbuf_base;
463
}
464
linbuf[line] = ip;
465
cureqs[line] = i;
466
++line;
467
}
468
469
current->buffered_lines = line;
470
471
for (i = 0; ; i++)
472
{
473
/* Record the line start for lines in the suffix that we care about.
474
Record one more line start than lines,
475
so that we can compute the length of any buffered line. */
476
if (line == alloc_lines)
477
{
478
/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
479
if (PTRDIFF_MAX / 3 <= alloc_lines
480
|| PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
481
|| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
482
xalloc_die ();
483
alloc_lines = 2 * alloc_lines - linbuf_base;
484
linbuf += linbuf_base;
485
linbuf = xrealloc (linbuf,
486
(alloc_lines - linbuf_base) * sizeof *linbuf);
487
linbuf -= linbuf_base;
488
}
489
linbuf[line] = p;
490
491
if (p == bufend)
492
break;
493
494
if (context <= i && no_diff_means_no_output)
495
break;
496
497
line++;
498
499
while (*p++ != '\n')
500
continue;
501
}
502
503
/* Done with cache in local variables. */
504
current->linbuf = linbuf;
505
current->valid_lines = line;
506
current->alloc_lines = alloc_lines;
507
current->equivs = cureqs;
508
equivs = eqs;
509
equivs_alloc = eqs_alloc;
510
equivs_index = eqs_index;
511
}
512
513
/* Prepare the text. Make sure the text end is initialized.
514
Make sure text ends in a newline,
515
but remember that we had to add one.
516
Strip trailing CRs, if that was requested. */
517
518
static void
519
prepare_text (struct file_data *current)
520
{
521
size_t buffered = current->buffered;
522
char *p = FILE_BUFFER (current);
523
char *dst;
524
525
if (buffered == 0 || p[buffered - 1] == '\n')
526
current->missing_newline = false;
527
else
528
{
529
p[buffered++] = '\n';
530
current->missing_newline = true;
531
}
532
533
if (!p)
534
return;
535
536
/* Don't use uninitialized storage when planting or using sentinels. */
537
memset (p + buffered, 0, sizeof (word));
538
539
if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
540
{
541
char const *src = dst;
542
char const *srclim = p + buffered;
543
544
do
545
dst += ! ((*dst = *src++) == '\r' && *src == '\n');
546
while (src < srclim);
547
548
buffered -= src - dst;
549
}
550
551
current->buffered = buffered;
552
}
553
554
/* We have found N lines in a buffer of size S; guess the
555
proportionate number of lines that will be found in a buffer of
556
size T. However, do not guess a number of lines so large that the
557
resulting line table might cause overflow in size calculations. */
558
static lin
559
guess_lines (lin n, size_t s, size_t t)
560
{
561
size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
562
lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
563
return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
564
}
565
566
/* Given a vector of two file_data objects, find the identical
567
prefixes and suffixes of each object. */
568
569
static void
570
find_identical_ends (struct file_data filevec[])
571
{
572
word *w0, *w1;
573
char *p0, *p1, *buffer0, *buffer1;
574
char const *end0, *beg0;
575
char const **linbuf0, **linbuf1;
576
lin i, lines;
577
size_t n0, n1;
578
lin alloc_lines0, alloc_lines1;
579
lin buffered_prefix, prefix_count, prefix_mask;
580
lin middle_guess, suffix_guess;
581
582
slurp (&filevec[0]);
583
prepare_text (&filevec[0]);
584
if (filevec[0].desc != filevec[1].desc)
585
{
586
slurp (&filevec[1]);
587
prepare_text (&filevec[1]);
588
}
589
else
590
{
591
filevec[1].buffer = filevec[0].buffer;
592
filevec[1].bufsize = filevec[0].bufsize;
593
filevec[1].buffered = filevec[0].buffered;
594
filevec[1].missing_newline = filevec[0].missing_newline;
595
}
596
597
/* Find identical prefix. */
598
599
w0 = filevec[0].buffer;
600
w1 = filevec[1].buffer;
601
p0 = buffer0 = (char *) w0;
602
p1 = buffer1 = (char *) w1;
603
n0 = filevec[0].buffered;
604
n1 = filevec[1].buffered;
605
606
if (p0 == p1)
607
/* The buffers are the same; sentinels won't work. */
608
p0 = p1 += n1;
609
else
610
{
611
/* Insert end sentinels, in this case characters that are guaranteed
612
to make the equality test false, and thus terminate the loop. */
613
614
if (n0 < n1)
615
p0[n0] = ~p1[n0];
616
else
617
p1[n1] = ~p0[n1];
618
619
/* Loop until first mismatch, or to the sentinel characters. */
620
621
/* Compare a word at a time for speed. */
622
while (*w0 == *w1)
623
w0++, w1++;
624
625
/* Do the last few bytes of comparison a byte at a time. */
626
p0 = (char *) w0;
627
p1 = (char *) w1;
628
while (*p0 == *p1)
629
p0++, p1++;
630
631
/* Don't mistakenly count missing newline as part of prefix. */
632
if (ROBUST_OUTPUT_STYLE (output_style)
633
&& ((buffer0 + n0 - filevec[0].missing_newline < p0)
634
!=
635
(buffer1 + n1 - filevec[1].missing_newline < p1)))
636
p0--, p1--;
637
}
638
639
/* Now P0 and P1 point at the first nonmatching characters. */
640
641
/* Skip back to last line-beginning in the prefix,
642
and then discard up to HORIZON_LINES lines from the prefix. */
643
i = horizon_lines;
644
while (p0 != buffer0 && (p0[-1] != '\n' || i--))
645
p0--, p1--;
646
647
/* Record the prefix. */
648
filevec[0].prefix_end = p0;
649
filevec[1].prefix_end = p1;
650
651
/* Find identical suffix. */
652
653
/* P0 and P1 point beyond the last chars not yet compared. */
654
p0 = buffer0 + n0;
655
p1 = buffer1 + n1;
656
657
if (! ROBUST_OUTPUT_STYLE (output_style)
658
|| filevec[0].missing_newline == filevec[1].missing_newline)
659
{
660
end0 = p0; /* Addr of last char in file 0. */
661
662
/* Get value of P0 at which we should stop scanning backward:
663
this is when either P0 or P1 points just past the last char
664
of the identical prefix. */
665
beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
666
667
/* Scan back until chars don't match or we reach that point. */
668
for (; p0 != beg0; p0--, p1--)
669
if (*p0 != *p1)
670
{
671
/* Point at the first char of the matching suffix. */
672
beg0 = p0;
673
break;
674
}
675
676
/* Are we at a line-beginning in both files? If not, add the rest of
677
this line to the main body. Discard up to HORIZON_LINES lines from
678
the identical suffix. Also, discard one extra line,
679
because shift_boundaries may need it. */
680
i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
681
&&
682
(buffer1 == p1 || p1[-1] == '\n'));
683
while (i-- && p0 != end0)
684
while (*p0++ != '\n')
685
continue;
686
687
p1 += p0 - beg0;
688
}
689
690
/* Record the suffix. */
691
filevec[0].suffix_begin = p0;
692
filevec[1].suffix_begin = p1;
693
694
/* Calculate number of lines of prefix to save.
695
696
prefix_count == 0 means save the whole prefix;
697
we need this for options like -D that output the whole file,
698
or for enormous contexts (to avoid worrying about arithmetic overflow).
699
We also need it for options like -F that output some preceding line;
700
at least we will need to find the last few lines,
701
but since we don't know how many, it's easiest to find them all.
702
703
Otherwise, prefix_count != 0. Save just prefix_count lines at start
704
of the line buffer; they'll be moved to the proper location later.
705
Handle 1 more line than the context says (because we count 1 too many),
706
rounded up to the next power of 2 to speed index computation. */
707
708
if (no_diff_means_no_output && ! function_regexp.fastmap
709
&& context < LIN_MAX / 4 && context < n0)
710
{
711
middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
712
suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
713
for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
714
continue;
715
alloc_lines0 = (prefix_count + middle_guess
716
+ MIN (context, suffix_guess));
717
}
718
else
719
{
720
prefix_count = 0;
721
alloc_lines0 = guess_lines (0, 0, n0);
722
}
723
724
prefix_mask = prefix_count - 1;
725
lines = 0;
726
linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
727
p0 = buffer0;
728
729
/* If the prefix is needed, find the prefix lines. */
730
if (! (no_diff_means_no_output
731
&& filevec[0].prefix_end == p0
732
&& filevec[1].prefix_end == p1))
733
{
734
end0 = filevec[0].prefix_end;
735
while (p0 != end0)
736
{
737
lin l = lines++ & prefix_mask;
738
if (l == alloc_lines0)
739
{
740
if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
741
xalloc_die ();
742
alloc_lines0 *= 2;
743
linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
744
}
745
linbuf0[l] = p0;
746
while (*p0++ != '\n')
747
continue;
748
}
749
}
750
buffered_prefix = prefix_count && context < lines ? context : lines;
751
752
/* Allocate line buffer 1. */
753
754
middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
755
suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
756
alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
757
if (alloc_lines1 < buffered_prefix
758
|| PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
759
xalloc_die ();
760
linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
761
762
if (buffered_prefix != lines)
763
{
764
/* Rotate prefix lines to proper location. */
765
for (i = 0; i < buffered_prefix; i++)
766
linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
767
for (i = 0; i < buffered_prefix; i++)
768
linbuf0[i] = linbuf1[i];
769
}
770
771
/* Initialize line buffer 1 from line buffer 0. */
772
for (i = 0; i < buffered_prefix; i++)
773
linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
774
775
/* Record the line buffer, adjusted so that
776
linbuf[0] points at the first differing line. */
777
filevec[0].linbuf = linbuf0 + buffered_prefix;
778
filevec[1].linbuf = linbuf1 + buffered_prefix;
779
filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
780
filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
781
filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
782
filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
783
}
784
785
/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
786
than 2**k. This table is derived from Chris K. Caldwell's list
787
<http://www.utm.edu/research/primes/lists/2small/>. */
788
789
static unsigned char const prime_offset[] =
790
{
791
0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
792
15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
793
11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
794
55, 93, 1, 57, 25
795
};
796
797
/* Verify that this host's size_t is not too wide for the above table. */
798
799
verify (enough_prime_offsets,
800
sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
801
802
/* Given a vector of two file_data objects, read the file associated
803
with each one, and build the table of equivalence classes.
804
Return nonzero if either file appears to be a binary file.
805
If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
806
807
bool
808
read_files (struct file_data filevec[], bool pretend_binary)
809
{
810
int i;
811
bool skip_test = text | pretend_binary;
812
bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
813
814
if (filevec[0].desc != filevec[1].desc)
815
appears_binary |= sip (&filevec[1], skip_test | appears_binary);
816
else
817
{
818
filevec[1].buffer = filevec[0].buffer;
819
filevec[1].bufsize = filevec[0].bufsize;
820
filevec[1].buffered = filevec[0].buffered;
821
}
822
if (appears_binary)
823
{
824
set_binary_mode (filevec[0].desc, true);
825
set_binary_mode (filevec[1].desc, true);
826
return true;
827
}
828
829
find_identical_ends (filevec);
830
831
equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
832
if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
833
xalloc_die ();
834
equivs = xmalloc (equivs_alloc * sizeof *equivs);
835
/* Equivalence class 0 is permanently safe for lines that were not
836
hashed. Real equivalence classes start at 1. */
837
equivs_index = 1;
838
839
/* Allocate (one plus) a prime number of hash buckets. Use a prime
840
number between 1/3 and 2/3 of the value of equiv_allocs,
841
approximately. */
842
for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
843
continue;
844
nbuckets = ((size_t) 1 << i) - prime_offset[i];
845
if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
846
xalloc_die ();
847
buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
848
buckets++;
849
850
for (i = 0; i < 2; i++)
851
find_and_hash_each_line (&filevec[i]);
852
853
filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
854
855
free (equivs);
856
free (buckets - 1);
857
858
return false;
859
}
860
861