CoCalc -- io.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/diff/src/io.c
³⁹⁵³⁰ views
1
/* File I/O for GNU DIFF.
2

3
   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4
   2004 Free Software Foundation, Inc.
5

6
   This file is part of GNU DIFF.
7

8
   GNU DIFF is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 2, or (at your option)
11
   any later version.
12

13
   GNU DIFF is distributed in the hope that it will be useful,
14
   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
   GNU General Public License for more details.
17

18
   You should have received a copy of the GNU General Public License
19
   along with this program; see the file COPYING.
20
   If not, write to the Free Software Foundation,
21
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22

23
#include "diff.h"
24
#include <cmpbuf.h>
25
#include <file-type.h>
26
#include <setmode.h>
27
#include <xalloc.h>
28

29
/* Rotate an unsigned value to the left.  */
30
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
31

32
/* Given a hash value and a new character, return a new hash value.  */
33
#define HASH(h, c) ((c) + ROL (h, 7))
34

35
/* The type of a hash value.  */
36
typedef size_t hash_value;
37
verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
38

39
/* Lines are put into equivalence classes of lines that match in lines_differ.
40
   Each equivalence class is represented by one of these structures,
41
   but only while the classes are being computed.
42
   Afterward, each class is represented by a number.  */
43
struct equivclass
44
{
45
  lin next;		/* Next item in this bucket.  */
46
  hash_value hash;	/* Hash of lines in this class.  */
47
  char const *line;	/* A line that fits this class.  */
48
  size_t length;	/* That line's length, not counting its newline.  */
49
};
50

51
/* Hash-table: array of buckets, each being a chain of equivalence classes.
52
   buckets[-1] is reserved for incomplete lines.  */
53
static lin *buckets;
54

55
/* Number of buckets in the hash table array, not counting buckets[-1].  */
56
static size_t nbuckets;
57

58
/* Array in which the equivalence classes are allocated.
59
   The bucket-chains go through the elements in this array.
60
   The number of an equivalence class is its index in this array.  */
61
static struct equivclass *equivs;
62

63
/* Index of first free element in the array `equivs'.  */
64
static lin equivs_index;
65

66
/* Number of elements allocated in the array `equivs'.  */
67
static lin equivs_alloc;
68

69
/* Read a block of data into a file buffer, checking for EOF and error.  */
70

71
void
72
file_block_read (struct file_data *current, size_t size)
73
{
74
  if (size && ! current->eof)
75
    {
76
      size_t s = block_read (current->desc,
77
			     FILE_BUFFER (current) + current->buffered, size);
78
      if (s == SIZE_MAX)
79
	pfatal_with_name (current->name);
80
      current->buffered += s;
81
      current->eof = s < size;
82
    }
83
}
84

85
/* Check for binary files and compare them for exact identity.  */
86

87
/* Return 1 if BUF contains a non text character.
88
   SIZE is the number of characters in BUF.  */
89

90
#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
91

92
/* Get ready to read the current file.
93
   Return nonzero if SKIP_TEST is zero,
94
   and if it appears to be a binary file.  */
95

96
static bool
97
sip (struct file_data *current, bool skip_test)
98
{
99
  /* If we have a nonexistent file at this stage, treat it as empty.  */
100
  if (current->desc < 0)
101
    {
102
      /* Leave room for a sentinel.  */
103
      current->bufsize = sizeof (word);
104
      current->buffer = xmalloc (current->bufsize);
105
    }
106
  else
107
    {
108
      current->bufsize = buffer_lcm (sizeof (word),
109
				     STAT_BLOCKSIZE (current->stat),
110
				     PTRDIFF_MAX - 2 * sizeof (word));
111
      current->buffer = xmalloc (current->bufsize);
112

113
      if (! skip_test)
114
	{
115
	  /* Check first part of file to see if it's a binary file.  */
116

117
	  bool was_binary = set_binary_mode (current->desc, true);
118
	  off_t buffered;
119
	  file_block_read (current, current->bufsize);
120
	  buffered = current->buffered;
121

122
	  if (! was_binary)
123
	    {
124
	      /* Revert to text mode and seek back to the beginning to
125
		 reread the file.  Use relative seek, since file
126
		 descriptors like stdin might not start at offset
127
		 zero.  */
128

129
	      if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
130
		pfatal_with_name (current->name);
131
	      set_binary_mode (current->desc, false);
132
	      current->buffered = 0;
133
	      current->eof = false;
134
	    }
135

136
	  return binary_file_p (current->buffer, buffered);
137
	}
138
    }
139

140
  current->buffered = 0;
141
  current->eof = false;
142
  return false;
143
}
144

145
/* Slurp the rest of the current file completely into memory.  */
146

147
static void
148
slurp (struct file_data *current)
149
{
150
  size_t cc;
151

152
  if (current->desc < 0)
153
    {
154
      /* The file is nonexistent.  */
155
      return;
156
    }
157

158
  if (S_ISREG (current->stat.st_mode))
159
    {
160
      /* It's a regular file; slurp in the rest all at once.  */
161

162
      /* Get the size out of the stat block.
163
	 Allocate just enough room for appended newline plus word sentinel,
164
	 plus word-alignment since we want the buffer word-aligned.  */
165
      size_t file_size = current->stat.st_size;
166
      cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
167
      if (file_size != current->stat.st_size || cc < file_size
168
	  || PTRDIFF_MAX <= cc)
169
	xalloc_die ();
170

171
      if (current->bufsize < cc)
172
	{
173
	  current->bufsize = cc;
174
	  current->buffer = xrealloc (current->buffer, cc);
175
	}
176

177
      /* Try to read at least 1 more byte than the size indicates, to
178
	 detect whether the file is growing.  This is a nicety for
179
	 users who run 'diff' on files while they are changing.  */
180

181
      if (current->buffered <= file_size)
182
	{
183
	  file_block_read (current, file_size + 1 - current->buffered);
184
	  if (current->buffered <= file_size)
185
	    return;
186
	}
187
    }
188

189
  /* It's not a regular file, or it's a growing regular file; read it,
190
     growing the buffer as needed.  */
191

192
  file_block_read (current, current->bufsize - current->buffered);
193

194
  if (current->buffered)
195
    {
196
      while (current->buffered == current->bufsize)
197
	{
198
	  if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
199
	    xalloc_die ();
200
	  current->bufsize *= 2;
201
	  current->buffer = xrealloc (current->buffer, current->bufsize);
202
	  file_block_read (current, current->bufsize - current->buffered);
203
	}
204

205
      /* Allocate just enough room for appended newline plus word
206
	 sentinel, plus word-alignment.  */
207
      cc = current->buffered + 2 * sizeof (word);
208
      current->bufsize = cc - cc % sizeof (word);
209
      current->buffer = xrealloc (current->buffer, current->bufsize);
210
    }
211
}
212

213
/* Split the file into lines, simultaneously computing the equivalence
214
   class for each line.  */
215

216
static void
217
find_and_hash_each_line (struct file_data *current)
218
{
219
  hash_value h;
220
  char const *p = current->prefix_end;
221
  unsigned char c;
222
  lin i, *bucket;
223
  size_t length;
224

225
  /* Cache often-used quantities in local variables to help the compiler.  */
226
  char const **linbuf = current->linbuf;
227
  lin alloc_lines = current->alloc_lines;
228
  lin line = 0;
229
  lin linbuf_base = current->linbuf_base;
230
  lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
231
  struct equivclass *eqs = equivs;
232
  lin eqs_index = equivs_index;
233
  lin eqs_alloc = equivs_alloc;
234
  char const *suffix_begin = current->suffix_begin;
235
  char const *bufend = FILE_BUFFER (current) + current->buffered;
236
  bool diff_length_compare_anyway =
237
    ignore_white_space != IGNORE_NO_WHITE_SPACE;
238
  bool same_length_diff_contents_compare_anyway =
239
    diff_length_compare_anyway | ignore_case;
240

241
  while (p < suffix_begin)
242
    {
243
      char const *ip = p;
244

245
      h = 0;
246

247
      /* Hash this line until we find a newline.  */
248
      if (ignore_case)
249
	switch (ignore_white_space)
250
	  {
251
	  case IGNORE_ALL_SPACE:
252
	    while ((c = *p++) != '\n')
253
	      if (! isspace (c))
254
		h = HASH (h, tolower (c));
255
	    break;
256

257
	  case IGNORE_SPACE_CHANGE:
258
	    while ((c = *p++) != '\n')
259
	      {
260
		if (isspace (c))
261
		  {
262
		    do
263
		      if ((c = *p++) == '\n')
264
			goto hashing_done;
265
		    while (isspace (c));
266

267
		    h = HASH (h, ' ');
268
		  }
269

270
		/* C is now the first non-space.  */
271
		h = HASH (h, tolower (c));
272
	      }
273
	    break;
274

275
	  case IGNORE_TAB_EXPANSION:
276
	    {
277
	      size_t column = 0;
278
	      while ((c = *p++) != '\n')
279
		{
280
		  size_t repetitions = 1;
281

282
		  switch (c)
283
		    {
284
		    case '\b':
285
		      column -= 0 < column;
286
		      break;
287

288
		    case '\t':
289
		      c = ' ';
290
		      repetitions = tabsize - column % tabsize;
291
		      column = (column + repetitions < column
292
				? 0
293
				: column + repetitions);
294
		      break;
295

296
		    case '\r':
297
		      column = 0;
298
		      break;
299

300
		    default:
301
		      c = tolower (c);
302
		      column++;
303
		      break;
304
		    }
305

306
		  do
307
		    h = HASH (h, c);
308
		  while (--repetitions != 0);
309
		}
310
	    }
311
	    break;
312

313
	  default:
314
	    while ((c = *p++) != '\n')
315
	      h = HASH (h, tolower (c));
316
	    break;
317
	  }
318
      else
319
	switch (ignore_white_space)
320
	  {
321
	  case IGNORE_ALL_SPACE:
322
	    while ((c = *p++) != '\n')
323
	      if (! isspace (c))
324
		h = HASH (h, c);
325
	    break;
326

327
	  case IGNORE_SPACE_CHANGE:
328
	    while ((c = *p++) != '\n')
329
	      {
330
		if (isspace (c))
331
		  {
332
		    do
333
		      if ((c = *p++) == '\n')
334
			goto hashing_done;
335
		    while (isspace (c));
336

337
		    h = HASH (h, ' ');
338
		  }
339

340
		/* C is now the first non-space.  */
341
		h = HASH (h, c);
342
	      }
343
	    break;
344

345
	  case IGNORE_TAB_EXPANSION:
346
	    {
347
	      size_t column = 0;
348
	      while ((c = *p++) != '\n')
349
		{
350
		  size_t repetitions = 1;
351

352
		  switch (c)
353
		    {
354
		    case '\b':
355
		      column -= 0 < column;
356
		      break;
357

358
		    case '\t':
359
		      c = ' ';
360
		      repetitions = tabsize - column % tabsize;
361
		      column = (column + repetitions < column
362
				? 0
363
				: column + repetitions);
364
		      break;
365

366
		    case '\r':
367
		      column = 0;
368
		      break;
369

370
		    default:
371
		      column++;
372
		      break;
373
		    }
374

375
		  do
376
		    h = HASH (h, c);
377
		  while (--repetitions != 0);
378
		}
379
	    }
380
	    break;
381

382
	  default:
383
	    while ((c = *p++) != '\n')
384
	      h = HASH (h, c);
385
	    break;
386
	  }
387

388
   hashing_done:;
389

390
      bucket = &buckets[h % nbuckets];
391
      length = p - ip - 1;
392

393
      if (p == bufend
394
	  && current->missing_newline
395
	  && ROBUST_OUTPUT_STYLE (output_style))
396
	{
397
	  /* This line is incomplete.  If this is significant,
398
	     put the line into buckets[-1].  */
399
	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
400
	    bucket = &buckets[-1];
401

402
	  /* Omit the inserted newline when computing linbuf later.  */
403
	  p--;
404
	  bufend = suffix_begin = p;
405
	}
406

407
      for (i = *bucket;  ;  i = eqs[i].next)
408
	if (!i)
409
	  {
410
	    /* Create a new equivalence class in this bucket.  */
411
	    i = eqs_index++;
412
	    if (i == eqs_alloc)
413
	      {
414
		if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
415
		  xalloc_die ();
416
		eqs_alloc *= 2;
417
		eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
418
	      }
419
	    eqs[i].next = *bucket;
420
	    eqs[i].hash = h;
421
	    eqs[i].line = ip;
422
	    eqs[i].length = length;
423
	    *bucket = i;
424
	    break;
425
	  }
426
	else if (eqs[i].hash == h)
427
	  {
428
	    char const *eqline = eqs[i].line;
429

430
	    /* Reuse existing class if lines_differ reports the lines
431
               equal.  */
432
	    if (eqs[i].length == length)
433
	      {
434
		/* Reuse existing equivalence class if the lines are identical.
435
		   This detects the common case of exact identity
436
		   faster than lines_differ would.  */
437
		if (memcmp (eqline, ip, length) == 0)
438
		  break;
439
		if (!same_length_diff_contents_compare_anyway)
440
		  continue;
441
	      }
442
	    else if (!diff_length_compare_anyway)
443
	      continue;
444

445
	    if (! lines_differ (eqline, ip))
446
	      break;
447
	  }
448

449
      /* Maybe increase the size of the line table.  */
450
      if (line == alloc_lines)
451
	{
452
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
453
	  if (PTRDIFF_MAX / 3 <= alloc_lines
454
	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
455
	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
456
	    xalloc_die ();
457
	  alloc_lines = 2 * alloc_lines - linbuf_base;
458
	  cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
459
	  linbuf += linbuf_base;
460
	  linbuf = xrealloc (linbuf,
461
			     (alloc_lines - linbuf_base) * sizeof *linbuf);
462
	  linbuf -= linbuf_base;
463
	}
464
      linbuf[line] = ip;
465
      cureqs[line] = i;
466
      ++line;
467
    }
468

469
  current->buffered_lines = line;
470

471
  for (i = 0;  ;  i++)
472
    {
473
      /* Record the line start for lines in the suffix that we care about.
474
	 Record one more line start than lines,
475
	 so that we can compute the length of any buffered line.  */
476
      if (line == alloc_lines)
477
	{
478
	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
479
	  if (PTRDIFF_MAX / 3 <= alloc_lines
480
	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
481
	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
482
	    xalloc_die ();
483
	  alloc_lines = 2 * alloc_lines - linbuf_base;
484
	  linbuf += linbuf_base;
485
	  linbuf = xrealloc (linbuf,
486
			     (alloc_lines - linbuf_base) * sizeof *linbuf);
487
	  linbuf -= linbuf_base;
488
	}
489
      linbuf[line] = p;
490

491
      if (p == bufend)
492
	break;
493

494
      if (context <= i && no_diff_means_no_output)
495
	break;
496

497
      line++;
498

499
      while (*p++ != '\n')
500
	continue;
501
    }
502

503
  /* Done with cache in local variables.  */
504
  current->linbuf = linbuf;
505
  current->valid_lines = line;
506
  current->alloc_lines = alloc_lines;
507
  current->equivs = cureqs;
508
  equivs = eqs;
509
  equivs_alloc = eqs_alloc;
510
  equivs_index = eqs_index;
511
}
512

513
/* Prepare the text.  Make sure the text end is initialized.
514
   Make sure text ends in a newline,
515
   but remember that we had to add one.
516
   Strip trailing CRs, if that was requested.  */
517

518
static void
519
prepare_text (struct file_data *current)
520
{
521
  size_t buffered = current->buffered;
522
  char *p = FILE_BUFFER (current);
523
  char *dst;
524

525
  if (buffered == 0 || p[buffered - 1] == '\n')
526
    current->missing_newline = false;
527
  else
528
    {
529
      p[buffered++] = '\n';
530
      current->missing_newline = true;
531
    }
532

533
  if (!p)
534
    return;
535

536
  /* Don't use uninitialized storage when planting or using sentinels.  */
537
  memset (p + buffered, 0, sizeof (word));
538

539
  if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
540
    {
541
      char const *src = dst;
542
      char const *srclim = p + buffered;
543

544
      do
545
	dst += ! ((*dst = *src++) == '\r' && *src == '\n');
546
      while (src < srclim);
547

548
      buffered -= src - dst;
549
    }
550

551
  current->buffered = buffered;
552
}
553

554
/* We have found N lines in a buffer of size S; guess the
555
   proportionate number of lines that will be found in a buffer of
556
   size T.  However, do not guess a number of lines so large that the
557
   resulting line table might cause overflow in size calculations.  */
558
static lin
559
guess_lines (lin n, size_t s, size_t t)
560
{
561
  size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
562
  lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
563
  return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
564
}
565

566
/* Given a vector of two file_data objects, find the identical
567
   prefixes and suffixes of each object.  */
568

569
static void
570
find_identical_ends (struct file_data filevec[])
571
{
572
  word *w0, *w1;
573
  char *p0, *p1, *buffer0, *buffer1;
574
  char const *end0, *beg0;
575
  char const **linbuf0, **linbuf1;
576
  lin i, lines;
577
  size_t n0, n1;
578
  lin alloc_lines0, alloc_lines1;
579
  lin buffered_prefix, prefix_count, prefix_mask;
580
  lin middle_guess, suffix_guess;
581

582
  slurp (&filevec[0]);
583
  prepare_text (&filevec[0]);
584
  if (filevec[0].desc != filevec[1].desc)
585
    {
586
      slurp (&filevec[1]);
587
      prepare_text (&filevec[1]);
588
    }
589
  else
590
    {
591
      filevec[1].buffer = filevec[0].buffer;
592
      filevec[1].bufsize = filevec[0].bufsize;
593
      filevec[1].buffered = filevec[0].buffered;
594
      filevec[1].missing_newline = filevec[0].missing_newline;
595
    }
596

597
  /* Find identical prefix.  */
598

599
  w0 = filevec[0].buffer;
600
  w1 = filevec[1].buffer;
601
  p0 = buffer0 = (char *) w0;
602
  p1 = buffer1 = (char *) w1;
603
  n0 = filevec[0].buffered;
604
  n1 = filevec[1].buffered;
605

606
  if (p0 == p1)
607
    /* The buffers are the same; sentinels won't work.  */
608
    p0 = p1 += n1;
609
  else
610
    {
611
      /* Insert end sentinels, in this case characters that are guaranteed
612
	 to make the equality test false, and thus terminate the loop.  */
613

614
      if (n0 < n1)
615
	p0[n0] = ~p1[n0];
616
      else
617
	p1[n1] = ~p0[n1];
618

619
      /* Loop until first mismatch, or to the sentinel characters.  */
620

621
      /* Compare a word at a time for speed.  */
622
      while (*w0 == *w1)
623
	w0++, w1++;
624

625
      /* Do the last few bytes of comparison a byte at a time.  */
626
      p0 = (char *) w0;
627
      p1 = (char *) w1;
628
      while (*p0 == *p1)
629
	p0++, p1++;
630

631
      /* Don't mistakenly count missing newline as part of prefix.  */
632
      if (ROBUST_OUTPUT_STYLE (output_style)
633
	  && ((buffer0 + n0 - filevec[0].missing_newline < p0)
634
	      !=
635
	      (buffer1 + n1 - filevec[1].missing_newline < p1)))
636
	p0--, p1--;
637
    }
638

639
  /* Now P0 and P1 point at the first nonmatching characters.  */
640

641
  /* Skip back to last line-beginning in the prefix,
642
     and then discard up to HORIZON_LINES lines from the prefix.  */
643
  i = horizon_lines;
644
  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
645
    p0--, p1--;
646

647
  /* Record the prefix.  */
648
  filevec[0].prefix_end = p0;
649
  filevec[1].prefix_end = p1;
650

651
  /* Find identical suffix.  */
652

653
  /* P0 and P1 point beyond the last chars not yet compared.  */
654
  p0 = buffer0 + n0;
655
  p1 = buffer1 + n1;
656

657
  if (! ROBUST_OUTPUT_STYLE (output_style)
658
      || filevec[0].missing_newline == filevec[1].missing_newline)
659
    {
660
      end0 = p0;	/* Addr of last char in file 0.  */
661

662
      /* Get value of P0 at which we should stop scanning backward:
663
	 this is when either P0 or P1 points just past the last char
664
	 of the identical prefix.  */
665
      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
666

667
      /* Scan back until chars don't match or we reach that point.  */
668
      for (; p0 != beg0; p0--, p1--)
669
	if (*p0 != *p1)
670
	  {
671
	    /* Point at the first char of the matching suffix.  */
672
	    beg0 = p0;
673
	    break;
674
	  }
675

676
      /* Are we at a line-beginning in both files?  If not, add the rest of
677
	 this line to the main body.  Discard up to HORIZON_LINES lines from
678
	 the identical suffix.  Also, discard one extra line,
679
	 because shift_boundaries may need it.  */
680
      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
681
			    &&
682
			    (buffer1 == p1 || p1[-1] == '\n'));
683
      while (i-- && p0 != end0)
684
	while (*p0++ != '\n')
685
	  continue;
686

687
      p1 += p0 - beg0;
688
    }
689

690
  /* Record the suffix.  */
691
  filevec[0].suffix_begin = p0;
692
  filevec[1].suffix_begin = p1;
693

694
  /* Calculate number of lines of prefix to save.
695

696
     prefix_count == 0 means save the whole prefix;
697
     we need this for options like -D that output the whole file,
698
     or for enormous contexts (to avoid worrying about arithmetic overflow).
699
     We also need it for options like -F that output some preceding line;
700
     at least we will need to find the last few lines,
701
     but since we don't know how many, it's easiest to find them all.
702

703
     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
704
     of the line buffer; they'll be moved to the proper location later.
705
     Handle 1 more line than the context says (because we count 1 too many),
706
     rounded up to the next power of 2 to speed index computation.  */
707

708
  if (no_diff_means_no_output && ! function_regexp.fastmap
709
      && context < LIN_MAX / 4 && context < n0)
710
    {
711
      middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
712
      suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
713
      for (prefix_count = 1;  prefix_count <= context;  prefix_count *= 2)
714
	continue;
715
      alloc_lines0 = (prefix_count + middle_guess
716
		      + MIN (context, suffix_guess));
717
    }
718
  else
719
    {
720
      prefix_count = 0;
721
      alloc_lines0 = guess_lines (0, 0, n0);
722
    }
723

724
  prefix_mask = prefix_count - 1;
725
  lines = 0;
726
  linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
727
  p0 = buffer0;
728

729
  /* If the prefix is needed, find the prefix lines.  */
730
  if (! (no_diff_means_no_output
731
	 && filevec[0].prefix_end == p0
732
	 && filevec[1].prefix_end == p1))
733
    {
734
      end0 = filevec[0].prefix_end;
735
      while (p0 != end0)
736
	{
737
	  lin l = lines++ & prefix_mask;
738
	  if (l == alloc_lines0)
739
	    {
740
	      if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
741
		xalloc_die ();
742
	      alloc_lines0 *= 2;
743
	      linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
744
	    }
745
	  linbuf0[l] = p0;
746
	  while (*p0++ != '\n')
747
	    continue;
748
	}
749
    }
750
  buffered_prefix = prefix_count && context < lines ? context : lines;
751

752
  /* Allocate line buffer 1.  */
753

754
  middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
755
  suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
756
  alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
757
  if (alloc_lines1 < buffered_prefix
758
      || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
759
    xalloc_die ();
760
  linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
761

762
  if (buffered_prefix != lines)
763
    {
764
      /* Rotate prefix lines to proper location.  */
765
      for (i = 0;  i < buffered_prefix;  i++)
766
	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
767
      for (i = 0;  i < buffered_prefix;  i++)
768
	linbuf0[i] = linbuf1[i];
769
    }
770

771
  /* Initialize line buffer 1 from line buffer 0.  */
772
  for (i = 0; i < buffered_prefix; i++)
773
    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
774

775
  /* Record the line buffer, adjusted so that
776
     linbuf[0] points at the first differing line.  */
777
  filevec[0].linbuf = linbuf0 + buffered_prefix;
778
  filevec[1].linbuf = linbuf1 + buffered_prefix;
779
  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
780
  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
781
  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
782
  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
783
}
784

785
/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
786
   than 2**k.  This table is derived from Chris K. Caldwell's list
787
   <http://www.utm.edu/research/primes/lists/2small/>.  */
788

789
static unsigned char const prime_offset[] =
790
{
791
  0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
792
  15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
793
  11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
794
  55, 93, 1, 57, 25
795
};
796

797
/* Verify that this host's size_t is not too wide for the above table.  */
798

799
verify (enough_prime_offsets,
800
	sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
801

802
/* Given a vector of two file_data objects, read the file associated
803
   with each one, and build the table of equivalence classes.
804
   Return nonzero if either file appears to be a binary file.
805
   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
806

807
bool
808
read_files (struct file_data filevec[], bool pretend_binary)
809
{
810
  int i;
811
  bool skip_test = text | pretend_binary;
812
  bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
813

814
  if (filevec[0].desc != filevec[1].desc)
815
    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
816
  else
817
    {
818
      filevec[1].buffer = filevec[0].buffer;
819
      filevec[1].bufsize = filevec[0].bufsize;
820
      filevec[1].buffered = filevec[0].buffered;
821
    }
822
  if (appears_binary)
823
    {
824
      set_binary_mode (filevec[0].desc, true);
825
      set_binary_mode (filevec[1].desc, true);
826
      return true;
827
    }
828

829
  find_identical_ends (filevec);
830

831
  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
832
  if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
833
    xalloc_die ();
834
  equivs = xmalloc (equivs_alloc * sizeof *equivs);
835
  /* Equivalence class 0 is permanently safe for lines that were not
836
     hashed.  Real equivalence classes start at 1.  */
837
  equivs_index = 1;
838

839
  /* Allocate (one plus) a prime number of hash buckets.  Use a prime
840
     number between 1/3 and 2/3 of the value of equiv_allocs,
841
     approximately.  */
842
  for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
843
    continue;
844
  nbuckets = ((size_t) 1 << i) - prime_offset[i];
845
  if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
846
    xalloc_die ();
847
  buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
848
  buckets++;
849

850
  for (i = 0; i < 2; i++)
851
    find_and_hash_each_line (&filevec[i]);
852

853
  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
854

855
  free (equivs);
856
  free (buckets - 1);
857

858
  return false;
859
}
860

861
Product

Resources

Company