Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/diff/src/cmp.c
39530 views
1
/* cmp - compare two files byte by byte
2
3
Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4
2002, 2004 Free Software Foundation, Inc.
5
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
11
This program is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
See the GNU General Public License for more details.
15
16
You should have received a copy of the GNU General Public License
17
along with this program; see the file COPYING.
18
If not, write to the Free Software Foundation,
19
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21
#include "system.h"
22
#include "paths.h"
23
24
#include <stdio.h>
25
26
#include <c-stack.h>
27
#include <cmpbuf.h>
28
#include <error.h>
29
#include <exit.h>
30
#include <exitfail.h>
31
#include <file-type.h>
32
#include <getopt.h>
33
#include <hard-locale.h>
34
#include <inttostr.h>
35
#include <setmode.h>
36
#include <unlocked-io.h>
37
#include <version-etc.h>
38
#include <xalloc.h>
39
#include <xstrtol.h>
40
41
#if defined LC_MESSAGES && ENABLE_NLS
42
# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
43
#else
44
# define hard_locale_LC_MESSAGES 0
45
#endif
46
47
static int cmp (void);
48
static off_t file_position (int);
49
static size_t block_compare (word const *, word const *);
50
static size_t block_compare_and_count (word const *, word const *, off_t *);
51
static void sprintc (char *, unsigned char);
52
53
/* Name under which this program was invoked. */
54
char *program_name;
55
56
/* Filenames of the compared files. */
57
static char const *file[2];
58
59
/* File descriptors of the files. */
60
static int file_desc[2];
61
62
/* Status of the files. */
63
static struct stat stat_buf[2];
64
65
/* Read buffers for the files. */
66
static word *buffer[2];
67
68
/* Optimal block size for the files. */
69
static size_t buf_size;
70
71
/* Initial prefix to ignore for each file. */
72
static off_t ignore_initial[2];
73
74
/* Number of bytes to compare. */
75
static uintmax_t bytes = UINTMAX_MAX;
76
77
/* Output format. */
78
static enum comparison_type
79
{
80
type_first_diff, /* Print the first difference. */
81
type_all_diffs, /* Print all differences. */
82
type_status /* Exit status only. */
83
} comparison_type;
84
85
/* If nonzero, print values of bytes quoted like cat -t does. */
86
static bool opt_print_bytes;
87
88
/* Values for long options that do not have single-letter equivalents. */
89
enum
90
{
91
HELP_OPTION = CHAR_MAX + 1
92
};
93
94
static struct option const long_options[] =
95
{
96
{"print-bytes", 0, 0, 'b'},
97
{"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98
{"ignore-initial", 1, 0, 'i'},
99
{"verbose", 0, 0, 'l'},
100
{"bytes", 1, 0, 'n'},
101
{"silent", 0, 0, 's'},
102
{"quiet", 0, 0, 's'},
103
{"version", 0, 0, 'v'},
104
{"help", 0, 0, HELP_OPTION},
105
{0, 0, 0, 0}
106
};
107
108
static void try_help (char const *, char const *) __attribute__((noreturn));
109
static void
110
try_help (char const *reason_msgid, char const *operand)
111
{
112
if (reason_msgid)
113
error (0, 0, _(reason_msgid), operand);
114
error (EXIT_TROUBLE, 0,
115
_("Try `%s --help' for more information."), program_name);
116
abort ();
117
}
118
119
static char const valid_suffixes[] = "kKMGTPEZY0";
120
121
/* Update ignore_initial[F] according to the result of parsing an
122
*operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123
*after the operand. If DELIMITER is nonzero, the operand may be
124
*followed by DELIMITER; otherwise it must be null-terminated. */
125
static void
126
specify_ignore_initial (int f, char **argptr, char delimiter)
127
{
128
uintmax_t val;
129
off_t o;
130
char const *arg = *argptr;
131
strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132
if (! (e == LONGINT_OK
133
|| (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134
|| (o = val) < 0 || o != val || val == UINTMAX_MAX)
135
try_help ("invalid --ignore-initial value `%s'", arg);
136
if (ignore_initial[f] < o)
137
ignore_initial[f] = o;
138
}
139
140
/* Specify the output format. */
141
static void
142
specify_comparison_type (enum comparison_type t)
143
{
144
if (comparison_type && comparison_type != t)
145
try_help ("options -l and -s are incompatible", 0);
146
comparison_type = t;
147
}
148
149
static void
150
check_stdout (void)
151
{
152
if (ferror (stdout))
153
error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154
else if (fclose (stdout) != 0)
155
error (EXIT_TROUBLE, errno, "%s", _("standard output"));
156
}
157
158
static char const * const option_help_msgid[] = {
159
N_("-b --print-bytes Print differing bytes."),
160
N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
161
N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
162
N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163
N_("-l --verbose Output byte numbers and values of all differing bytes."),
164
N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
165
N_("-s --quiet --silent Output nothing; yield exit status only."),
166
N_("-v --version Output version info."),
167
N_("--help Output this help."),
168
0
169
};
170
171
static void
172
usage (void)
173
{
174
char const * const *p;
175
176
printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
177
program_name);
178
printf ("%s\n\n", _("Compare two files byte by byte."));
179
for (p = option_help_msgid; *p; p++)
180
printf (" %s\n", _(*p));
181
printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182
_("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183
_("SKIP values may be followed by the following multiplicative suffixes:\n\
184
kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185
GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186
_("If a FILE is `-' or missing, read standard input."),
187
_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188
_("Report bugs to <[email protected]>."));
189
}
190
191
int
192
main (int argc, char **argv)
193
{
194
int c, f, exit_status;
195
size_t words_per_buffer;
196
197
exit_failure = EXIT_TROUBLE;
198
initialize_main (&argc, &argv);
199
program_name = argv[0];
200
setlocale (LC_ALL, "");
201
bindtextdomain (PACKAGE, LOCALEDIR);
202
textdomain (PACKAGE);
203
c_stack_action (0);
204
205
/* Parse command line options. */
206
207
while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208
!= -1)
209
switch (c)
210
{
211
case 'b':
212
case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213
opt_print_bytes = true;
214
break;
215
216
case 'i':
217
specify_ignore_initial (0, &optarg, ':');
218
if (*optarg++ == ':')
219
specify_ignore_initial (1, &optarg, 0);
220
else if (ignore_initial[1] < ignore_initial[0])
221
ignore_initial[1] = ignore_initial[0];
222
break;
223
224
case 'l':
225
specify_comparison_type (type_all_diffs);
226
break;
227
228
case 'n':
229
{
230
uintmax_t n;
231
if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232
try_help ("invalid --bytes value `%s'", optarg);
233
if (n < bytes)
234
bytes = n;
235
}
236
break;
237
238
case 's':
239
specify_comparison_type (type_status);
240
break;
241
242
case 'v':
243
/* TRANSLATORS: Please translate the second "o" in "Torbjorn
244
Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245
WITH DIAERESIS) if possible. */
246
version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247
_("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
248
check_stdout ();
249
return EXIT_SUCCESS;
250
251
case HELP_OPTION:
252
usage ();
253
check_stdout ();
254
return EXIT_SUCCESS;
255
256
default:
257
try_help (0, 0);
258
}
259
260
if (optind == argc)
261
try_help ("missing operand after `%s'", argv[argc - 1]);
262
263
file[0] = argv[optind++];
264
file[1] = optind < argc ? argv[optind++] : "-";
265
266
for (f = 0; f < 2 && optind < argc; f++)
267
{
268
char *arg = argv[optind++];
269
specify_ignore_initial (f, &arg, 0);
270
}
271
272
if (optind < argc)
273
try_help ("extra operand `%s'", argv[optind]);
274
275
for (f = 0; f < 2; f++)
276
{
277
/* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278
stdin is closed and opening file[0] yields file descriptor 0. */
279
int f1 = f ^ (strcmp (file[1], "-") == 0);
280
281
/* Two files with the same name and offset are identical.
282
But wait until we open the file once, for proper diagnostics. */
283
if (f && ignore_initial[0] == ignore_initial[1]
284
&& file_name_cmp (file[0], file[1]) == 0)
285
return EXIT_SUCCESS;
286
287
file_desc[f1] = (strcmp (file[f1], "-") == 0
288
? STDIN_FILENO
289
: open (file[f1], O_RDONLY, 0));
290
if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291
{
292
if (file_desc[f1] < 0 && comparison_type == type_status)
293
exit (EXIT_TROUBLE);
294
else
295
error (EXIT_TROUBLE, errno, "%s", file[f1]);
296
}
297
298
set_binary_mode (file_desc[f1], true);
299
}
300
301
/* If the files are links to the same inode and have the same file position,
302
they are identical. */
303
304
if (0 < same_file (&stat_buf[0], &stat_buf[1])
305
&& same_file_attributes (&stat_buf[0], &stat_buf[1])
306
&& file_position (0) == file_position (1))
307
return EXIT_SUCCESS;
308
309
/* If output is redirected to the null device, we may assume `-s'. */
310
311
if (comparison_type != type_status)
312
{
313
struct stat outstat, nullstat;
314
315
if (fstat (STDOUT_FILENO, &outstat) == 0
316
&& stat (NULL_DEVICE, &nullstat) == 0
317
&& 0 < same_file (&outstat, &nullstat))
318
comparison_type = type_status;
319
}
320
321
/* If only a return code is needed,
322
and if both input descriptors are associated with plain files,
323
conclude that the files differ if they have different sizes
324
and if more bytes will be compared than are in the smaller file. */
325
326
if (comparison_type == type_status
327
&& S_ISREG (stat_buf[0].st_mode)
328
&& S_ISREG (stat_buf[1].st_mode))
329
{
330
off_t s0 = stat_buf[0].st_size - file_position (0);
331
off_t s1 = stat_buf[1].st_size - file_position (1);
332
if (s0 < 0)
333
s0 = 0;
334
if (s1 < 0)
335
s1 = 0;
336
if (s0 != s1 && MIN (s0, s1) < bytes)
337
exit (EXIT_FAILURE);
338
}
339
340
/* Get the optimal block size of the files. */
341
342
buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343
STAT_BLOCKSIZE (stat_buf[1]),
344
PTRDIFF_MAX - sizeof (word));
345
346
/* Allocate word-aligned buffers, with space for sentinels at the end. */
347
348
words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349
buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350
buffer[1] = buffer[0] + words_per_buffer;
351
352
exit_status = cmp ();
353
354
for (f = 0; f < 2; f++)
355
if (close (file_desc[f]) != 0)
356
error (EXIT_TROUBLE, errno, "%s", file[f]);
357
if (exit_status != 0 && comparison_type != type_status)
358
check_stdout ();
359
exit (exit_status);
360
return exit_status;
361
}
362
363
/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364
using `buffer[0]' and `buffer[1]'.
365
Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366
>1 if error. */
367
368
static int
369
cmp (void)
370
{
371
off_t line_number = 1; /* Line number (1...) of difference. */
372
off_t byte_number = 1; /* Byte number (1...) of difference. */
373
uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
374
size_t read0, read1; /* Number of bytes read from each file. */
375
size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
376
size_t smaller; /* The lesser of `read0' and `read1'. */
377
word *buffer0 = buffer[0];
378
word *buffer1 = buffer[1];
379
char *buf0 = (char *) buffer0;
380
char *buf1 = (char *) buffer1;
381
int ret = EXIT_SUCCESS;
382
int f;
383
int offset_width;
384
385
if (comparison_type == type_all_diffs)
386
{
387
off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388
389
for (f = 0; f < 2; f++)
390
if (S_ISREG (stat_buf[f].st_mode))
391
{
392
off_t file_bytes = stat_buf[f].st_size - file_position (f);
393
if (file_bytes < byte_number_max)
394
byte_number_max = file_bytes;
395
}
396
397
for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398
continue;
399
}
400
401
for (f = 0; f < 2; f++)
402
{
403
off_t ig = ignore_initial[f];
404
if (ig && file_position (f) == -1)
405
{
406
/* lseek failed; read and discard the ignored initial prefix. */
407
do
408
{
409
size_t bytes_to_read = MIN (ig, buf_size);
410
size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411
if (r != bytes_to_read)
412
{
413
if (r == SIZE_MAX)
414
error (EXIT_TROUBLE, errno, "%s", file[f]);
415
break;
416
}
417
ig -= r;
418
}
419
while (ig);
420
}
421
}
422
423
do
424
{
425
size_t bytes_to_read = buf_size;
426
427
if (remaining != UINTMAX_MAX)
428
{
429
if (remaining < bytes_to_read)
430
bytes_to_read = remaining;
431
remaining -= bytes_to_read;
432
}
433
434
read0 = block_read (file_desc[0], buf0, bytes_to_read);
435
if (read0 == SIZE_MAX)
436
error (EXIT_TROUBLE, errno, "%s", file[0]);
437
read1 = block_read (file_desc[1], buf1, bytes_to_read);
438
if (read1 == SIZE_MAX)
439
error (EXIT_TROUBLE, errno, "%s", file[1]);
440
441
/* Insert sentinels for the block compare. */
442
443
buf0[read0] = ~buf1[read0];
444
buf1[read1] = ~buf0[read1];
445
446
/* If the line number should be written for differing files,
447
compare the blocks and count the number of newlines
448
simultaneously. */
449
first_diff = (comparison_type == type_first_diff
450
? block_compare_and_count (buffer0, buffer1, &line_number)
451
: block_compare (buffer0, buffer1));
452
453
byte_number += first_diff;
454
smaller = MIN (read0, read1);
455
456
if (first_diff < smaller)
457
{
458
switch (comparison_type)
459
{
460
case type_first_diff:
461
{
462
char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463
char line_buf[INT_BUFSIZE_BOUND (off_t)];
464
char const *byte_num = offtostr (byte_number, byte_buf);
465
char const *line_num = offtostr (line_number, line_buf);
466
if (!opt_print_bytes)
467
{
468
/* See POSIX 1003.1-2001 for this format. This
469
message is used only in the POSIX locale, so it
470
need not be translated. */
471
static char const char_message[] =
472
"%s %s differ: char %s, line %s\n";
473
474
/* The POSIX rationale recommends using the word
475
"byte" outside the POSIX locale. Some gettext
476
implementations translate even in the POSIX
477
locale if certain other environment variables
478
are set, so use "byte" if a translation is
479
available, or if outside the POSIX locale. */
480
static char const byte_msgid[] =
481
N_("%s %s differ: byte %s, line %s\n");
482
char const *byte_message = _(byte_msgid);
483
bool use_byte_message = (byte_message != byte_msgid
484
|| hard_locale_LC_MESSAGES);
485
486
printf (use_byte_message ? byte_message : char_message,
487
file[0], file[1], byte_num, line_num);
488
}
489
else
490
{
491
unsigned char c0 = buf0[first_diff];
492
unsigned char c1 = buf1[first_diff];
493
char s0[5];
494
char s1[5];
495
sprintc (s0, c0);
496
sprintc (s1, c1);
497
printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498
file[0], file[1], byte_num, line_num,
499
c0, s0, c1, s1);
500
}
501
}
502
/* Fall through. */
503
case type_status:
504
return EXIT_FAILURE;
505
506
case type_all_diffs:
507
do
508
{
509
unsigned char c0 = buf0[first_diff];
510
unsigned char c1 = buf1[first_diff];
511
if (c0 != c1)
512
{
513
char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514
char const *byte_num = offtostr (byte_number, byte_buf);
515
if (!opt_print_bytes)
516
{
517
/* See POSIX 1003.1-2001 for this format. */
518
printf ("%*s %3o %3o\n",
519
offset_width, byte_num, c0, c1);
520
}
521
else
522
{
523
char s0[5];
524
char s1[5];
525
sprintc (s0, c0);
526
sprintc (s1, c1);
527
printf ("%*s %3o %-4s %3o %s\n",
528
offset_width, byte_num, c0, s0, c1, s1);
529
}
530
}
531
byte_number++;
532
first_diff++;
533
}
534
while (first_diff < smaller);
535
ret = EXIT_FAILURE;
536
break;
537
}
538
}
539
540
if (read0 != read1)
541
{
542
if (comparison_type != type_status)
543
{
544
/* See POSIX 1003.1-2001 for this format. */
545
fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546
}
547
548
return EXIT_FAILURE;
549
}
550
}
551
while (read0 == buf_size);
552
553
return ret;
554
}
555
556
/* Compare two blocks of memory P0 and P1 until they differ,
557
and count the number of '\n' occurrences in the common
558
part of P0 and P1.
559
If the blocks are not guaranteed to be different, put sentinels at the ends
560
of the blocks before calling this function.
561
562
Return the offset of the first byte that differs.
563
Increment *COUNT by the count of '\n' occurrences. */
564
565
static size_t
566
block_compare_and_count (word const *p0, word const *p1, off_t *count)
567
{
568
word l; /* One word from first buffer. */
569
word const *l0, *l1; /* Pointers into each buffer. */
570
char const *c0, *c1; /* Pointers for finding exact address. */
571
size_t cnt = 0; /* Number of '\n' occurrences. */
572
word nnnn; /* Newline, sizeof (word) times. */
573
int i;
574
575
nnnn = 0;
576
for (i = 0; i < sizeof nnnn; i++)
577
nnnn = (nnnn << CHAR_BIT) | '\n';
578
579
/* Find the rough position of the first difference by reading words,
580
not bytes. */
581
582
for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583
{
584
l ^= nnnn;
585
for (i = 0; i < sizeof l; i++)
586
{
587
unsigned char uc = l;
588
cnt += ! uc;
589
l >>= CHAR_BIT;
590
}
591
}
592
593
/* Find the exact differing position (endianness independent). */
594
595
for (c0 = (char const *) l0, c1 = (char const *) l1;
596
*c0 == *c1;
597
c0++, c1++)
598
cnt += *c0 == '\n';
599
600
*count += cnt;
601
return c0 - (char const *) p0;
602
}
603
604
/* Compare two blocks of memory P0 and P1 until they differ.
605
If the blocks are not guaranteed to be different, put sentinels at the ends
606
of the blocks before calling this function.
607
608
Return the offset of the first byte that differs. */
609
610
static size_t
611
block_compare (word const *p0, word const *p1)
612
{
613
word const *l0, *l1;
614
char const *c0, *c1;
615
616
/* Find the rough position of the first difference by reading words,
617
not bytes. */
618
619
for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
620
continue;
621
622
/* Find the exact differing position (endianness independent). */
623
624
for (c0 = (char const *) l0, c1 = (char const *) l1;
625
*c0 == *c1;
626
c0++, c1++)
627
continue;
628
629
return c0 - (char const *) p0;
630
}
631
632
/* Put into BUF the unsigned char C, making unprintable bytes
633
visible by quoting like cat -t does. */
634
635
static void
636
sprintc (char *buf, unsigned char c)
637
{
638
if (! isprint (c))
639
{
640
if (c >= 128)
641
{
642
*buf++ = 'M';
643
*buf++ = '-';
644
c -= 128;
645
}
646
if (c < 32)
647
{
648
*buf++ = '^';
649
c += 64;
650
}
651
else if (c == 127)
652
{
653
*buf++ = '^';
654
c = '?';
655
}
656
}
657
658
*buf++ = c;
659
*buf = 0;
660
}
661
662
/* Position file F to ignore_initial[F] bytes from its initial position,
663
and yield its new position. Don't try more than once. */
664
665
static off_t
666
file_position (int f)
667
{
668
static bool positioned[2];
669
static off_t position[2];
670
671
if (! positioned[f])
672
{
673
positioned[f] = true;
674
position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675
}
676
return position[f];
677
}
678
679