Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/diff/src/diff.c
39530 views
1
/* diff - compare files line by line
2
3
Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002,
4
2004 Free Software Foundation, Inc.
5
6
This file is part of GNU DIFF.
7
8
GNU DIFF is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2, or (at your option)
11
any later version.
12
13
GNU DIFF is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
See the GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License
19
along with GNU DIFF; see the file COPYING.
20
If not, write to the Free Software Foundation,
21
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23
#define GDIFF_MAIN
24
#include "diff.h"
25
#include "paths.h"
26
#include <c-stack.h>
27
#include <dirname.h>
28
#include <error.h>
29
#include <exclude.h>
30
#include <exit.h>
31
#include <exitfail.h>
32
#include <file-type.h>
33
#include <fnmatch.h>
34
#include <getopt.h>
35
#include <hard-locale.h>
36
#include <posixver.h>
37
#include <prepargs.h>
38
#include <quotesys.h>
39
#include <setmode.h>
40
#include <version-etc.h>
41
#include <xalloc.h>
42
43
#ifndef GUTTER_WIDTH_MINIMUM
44
# define GUTTER_WIDTH_MINIMUM 3
45
#endif
46
47
struct regexp_list
48
{
49
char *regexps; /* chars representing disjunction of the regexps */
50
size_t len; /* chars used in `regexps' */
51
size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */
52
bool multiple_regexps;/* Does `regexps' represent a disjunction? */
53
struct re_pattern_buffer *buf;
54
};
55
56
static int compare_files (struct comparison const *, char const *, char const *);
57
static void add_regexp (struct regexp_list *, char const *);
58
static void summarize_regexp_list (struct regexp_list *);
59
static void specify_style (enum output_style);
60
static void specify_value (char const **, char const *, char const *);
61
static void try_help (char const *, char const *) __attribute__((noreturn));
62
static void check_stdout (void);
63
static void usage (void);
64
65
/* If comparing directories, compare their common subdirectories
66
recursively. */
67
static bool recursive;
68
69
/* In context diffs, show previous lines that match these regexps. */
70
static struct regexp_list function_regexp_list;
71
72
/* Ignore changes affecting only lines that match these regexps. */
73
static struct regexp_list ignore_regexp_list;
74
75
#if HAVE_SETMODE_DOS
76
/* Use binary I/O when reading and writing data (--binary).
77
On POSIX hosts, this has no effect. */
78
static bool binary;
79
#else
80
enum { binary = true };
81
#endif
82
83
/* When comparing directories, if a file appears only in one
84
directory, treat it as present but empty in the other (-N).
85
Then `patch' would create the file with appropriate contents. */
86
static bool new_file;
87
88
/* When comparing directories, if a file appears only in the second
89
directory of the two, treat it as present but empty in the other
90
(--unidirectional-new-file).
91
Then `patch' would create the file with appropriate contents. */
92
static bool unidirectional_new_file;
93
94
/* Report files compared that are the same (-s).
95
Normally nothing is output when that happens. */
96
static bool report_identical_files;
97
98
99
/* Return a string containing the command options with which diff was invoked.
100
Spaces appear between what were separate ARGV-elements.
101
There is a space at the beginning but none at the end.
102
If there were no options, the result is an empty string.
103
104
Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
105
the length of that vector. */
106
107
static char *
108
option_list (char **optionvec, int count)
109
{
110
int i;
111
size_t size = 1;
112
char *result;
113
char *p;
114
115
for (i = 0; i < count; i++)
116
size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
117
118
p = result = xmalloc (size);
119
120
for (i = 0; i < count; i++)
121
{
122
*p++ = ' ';
123
p += quote_system_arg (p, optionvec[i]);
124
}
125
126
*p = 0;
127
return result;
128
}
129
130
131
/* Return an option value suitable for add_exclude. */
132
133
static int
134
exclude_options (void)
135
{
136
return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
137
}
138
139
static char const shortopts[] =
140
"0123456789abBcC:dD:eEfF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y";
141
142
/* Values for long options that do not have single-letter equivalents. */
143
enum
144
{
145
BINARY_OPTION = CHAR_MAX + 1,
146
FROM_FILE_OPTION,
147
HELP_OPTION,
148
HORIZON_LINES_OPTION,
149
IGNORE_FILE_NAME_CASE_OPTION,
150
INHIBIT_HUNK_MERGE_OPTION,
151
LEFT_COLUMN_OPTION,
152
LINE_FORMAT_OPTION,
153
NO_IGNORE_FILE_NAME_CASE_OPTION,
154
NORMAL_OPTION,
155
SDIFF_MERGE_ASSIST_OPTION,
156
STRIP_TRAILING_CR_OPTION,
157
SUPPRESS_COMMON_LINES_OPTION,
158
TABSIZE_OPTION,
159
TO_FILE_OPTION,
160
161
/* These options must be in sequence. */
162
UNCHANGED_LINE_FORMAT_OPTION,
163
OLD_LINE_FORMAT_OPTION,
164
NEW_LINE_FORMAT_OPTION,
165
166
/* These options must be in sequence. */
167
UNCHANGED_GROUP_FORMAT_OPTION,
168
OLD_GROUP_FORMAT_OPTION,
169
NEW_GROUP_FORMAT_OPTION,
170
CHANGED_GROUP_FORMAT_OPTION
171
};
172
173
static char const group_format_option[][sizeof "--unchanged-group-format"] =
174
{
175
"--unchanged-group-format",
176
"--old-group-format",
177
"--new-group-format",
178
"--changed-group-format"
179
};
180
181
static char const line_format_option[][sizeof "--unchanged-line-format"] =
182
{
183
"--unchanged-line-format",
184
"--old-line-format",
185
"--new-line-format"
186
};
187
188
static struct option const longopts[] =
189
{
190
{"binary", 0, 0, BINARY_OPTION},
191
{"brief", 0, 0, 'q'},
192
{"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
193
{"context", 2, 0, 'C'},
194
{"ed", 0, 0, 'e'},
195
{"exclude", 1, 0, 'x'},
196
{"exclude-from", 1, 0, 'X'},
197
{"expand-tabs", 0, 0, 't'},
198
{"forward-ed", 0, 0, 'f'},
199
{"from-file", 1, 0, FROM_FILE_OPTION},
200
{"help", 0, 0, HELP_OPTION},
201
{"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
202
{"ifdef", 1, 0, 'D'},
203
{"ignore-all-space", 0, 0, 'w'},
204
{"ignore-blank-lines", 0, 0, 'B'},
205
{"ignore-case", 0, 0, 'i'},
206
{"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
207
{"ignore-matching-lines", 1, 0, 'I'},
208
{"ignore-space-change", 0, 0, 'b'},
209
{"ignore-tab-expansion", 0, 0, 'E'},
210
{"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
211
{"initial-tab", 0, 0, 'T'},
212
{"label", 1, 0, 'L'},
213
{"left-column", 0, 0, LEFT_COLUMN_OPTION},
214
{"line-format", 1, 0, LINE_FORMAT_OPTION},
215
{"minimal", 0, 0, 'd'},
216
{"new-file", 0, 0, 'N'},
217
{"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
218
{"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
219
{"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
220
{"normal", 0, 0, NORMAL_OPTION},
221
{"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
222
{"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
223
{"paginate", 0, 0, 'l'},
224
{"rcs", 0, 0, 'n'},
225
{"recursive", 0, 0, 'r'},
226
{"report-identical-files", 0, 0, 's'},
227
{"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
228
{"show-c-function", 0, 0, 'p'},
229
{"show-function-line", 1, 0, 'F'},
230
{"side-by-side", 0, 0, 'y'},
231
{"speed-large-files", 0, 0, 'H'},
232
{"starting-file", 1, 0, 'S'},
233
{"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
234
{"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
235
{"tabsize", 1, 0, TABSIZE_OPTION},
236
{"text", 0, 0, 'a'},
237
{"to-file", 1, 0, TO_FILE_OPTION},
238
{"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
239
{"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
240
{"unidirectional-new-file", 0, 0, 'P'},
241
{"unified", 2, 0, 'U'},
242
{"version", 0, 0, 'v'},
243
{"width", 1, 0, 'W'},
244
{0, 0, 0, 0}
245
};
246
247
int
248
main (int argc, char **argv)
249
{
250
int exit_status = EXIT_SUCCESS;
251
int c;
252
int i;
253
int prev = -1;
254
lin ocontext = -1;
255
bool explicit_context = false;
256
size_t width = 0;
257
bool show_c_function = false;
258
char const *from_file = 0;
259
char const *to_file = 0;
260
uintmax_t numval;
261
char *numend;
262
263
/* Do our initializations. */
264
exit_failure = 2;
265
initialize_main (&argc, &argv);
266
program_name = argv[0];
267
setlocale (LC_ALL, "");
268
textdomain (PACKAGE);
269
c_stack_action (0);
270
function_regexp_list.buf = &function_regexp;
271
ignore_regexp_list.buf = &ignore_regexp;
272
re_set_syntax (RE_SYNTAX_GREP);
273
excluded = new_exclude ();
274
275
prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
276
277
/* Decode the options. */
278
279
while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
280
{
281
switch (c)
282
{
283
case 0:
284
break;
285
286
case '0':
287
case '1':
288
case '2':
289
case '3':
290
case '4':
291
case '5':
292
case '6':
293
case '7':
294
case '8':
295
case '9':
296
if (! ISDIGIT (prev))
297
ocontext = c - '0';
298
else if (LIN_MAX / 10 < ocontext
299
|| ((ocontext = 10 * ocontext + c - '0') < 0))
300
ocontext = LIN_MAX;
301
break;
302
303
case 'a':
304
text = true;
305
break;
306
307
case 'b':
308
if (ignore_white_space < IGNORE_SPACE_CHANGE)
309
ignore_white_space = IGNORE_SPACE_CHANGE;
310
break;
311
312
case 'B':
313
ignore_blank_lines = true;
314
break;
315
316
case 'C':
317
case 'U':
318
{
319
if (optarg)
320
{
321
numval = strtoumax (optarg, &numend, 10);
322
if (*numend)
323
try_help ("invalid context length `%s'", optarg);
324
if (LIN_MAX < numval)
325
numval = LIN_MAX;
326
}
327
else
328
numval = 3;
329
330
specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
331
if (context < numval)
332
context = numval;
333
explicit_context = true;
334
}
335
break;
336
337
case 'c':
338
specify_style (OUTPUT_CONTEXT);
339
if (context < 3)
340
context = 3;
341
break;
342
343
case 'd':
344
minimal = true;
345
break;
346
347
case 'D':
348
specify_style (OUTPUT_IFDEF);
349
{
350
static char const C_ifdef_group_formats[] =
351
"%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
352
char *b = xmalloc (sizeof C_ifdef_group_formats
353
+ 7 * strlen (optarg) - 14 /* 7*"%s" */
354
- 8 /* 5*"%%" + 3*"%c" */);
355
sprintf (b, C_ifdef_group_formats,
356
0,
357
optarg, optarg, 0,
358
optarg, optarg, 0,
359
optarg, optarg, optarg);
360
for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
361
{
362
specify_value (&group_format[i], b, "-D");
363
b += strlen (b) + 1;
364
}
365
}
366
break;
367
368
case 'e':
369
specify_style (OUTPUT_ED);
370
break;
371
372
case 'E':
373
if (ignore_white_space < IGNORE_TAB_EXPANSION)
374
ignore_white_space = IGNORE_TAB_EXPANSION;
375
break;
376
377
case 'f':
378
specify_style (OUTPUT_FORWARD_ED);
379
break;
380
381
case 'F':
382
add_regexp (&function_regexp_list, optarg);
383
break;
384
385
case 'h':
386
/* Split the files into chunks for faster processing.
387
Usually does not change the result.
388
389
This currently has no effect. */
390
break;
391
392
case 'H':
393
speed_large_files = true;
394
break;
395
396
case 'i':
397
ignore_case = true;
398
break;
399
400
case 'I':
401
add_regexp (&ignore_regexp_list, optarg);
402
break;
403
404
case 'l':
405
if (!pr_program[0])
406
try_help ("pagination not supported on this host", 0);
407
paginate = true;
408
#ifdef SIGCHLD
409
/* Pagination requires forking and waiting, and
410
System V fork+wait does not work if SIGCHLD is ignored. */
411
signal (SIGCHLD, SIG_DFL);
412
#endif
413
break;
414
415
case 'L':
416
if (!file_label[0])
417
file_label[0] = optarg;
418
else if (!file_label[1])
419
file_label[1] = optarg;
420
else
421
fatal ("too many file label options");
422
break;
423
424
case 'n':
425
specify_style (OUTPUT_RCS);
426
break;
427
428
case 'N':
429
new_file = true;
430
break;
431
432
case 'o':
433
/* Output in the old tradition style. */
434
specify_style (OUTPUT_NORMAL);
435
break;
436
437
case 'p':
438
show_c_function = true;
439
add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
440
break;
441
442
case 'P':
443
unidirectional_new_file = true;
444
break;
445
446
case 'q':
447
brief = true;
448
break;
449
450
case 'r':
451
recursive = true;
452
break;
453
454
case 's':
455
report_identical_files = true;
456
break;
457
458
case 'S':
459
specify_value (&starting_file, optarg, "-S");
460
break;
461
462
case 't':
463
expand_tabs = true;
464
break;
465
466
case 'T':
467
initial_tab = true;
468
break;
469
470
case 'u':
471
specify_style (OUTPUT_UNIFIED);
472
if (context < 3)
473
context = 3;
474
break;
475
476
case 'v':
477
version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION,
478
"Paul Eggert", "Mike Haertel", "David Hayes",
479
"Richard Stallman", "Len Tower", (char *) 0);
480
check_stdout ();
481
return EXIT_SUCCESS;
482
483
case 'w':
484
ignore_white_space = IGNORE_ALL_SPACE;
485
break;
486
487
case 'x':
488
add_exclude (excluded, optarg, exclude_options ());
489
break;
490
491
case 'X':
492
if (add_exclude_file (add_exclude, excluded, optarg,
493
exclude_options (), '\n'))
494
pfatal_with_name (optarg);
495
break;
496
497
case 'y':
498
specify_style (OUTPUT_SDIFF);
499
break;
500
501
case 'W':
502
numval = strtoumax (optarg, &numend, 10);
503
if (! (0 < numval && numval <= SIZE_MAX) || *numend)
504
try_help ("invalid width `%s'", optarg);
505
if (width != numval)
506
{
507
if (width)
508
fatal ("conflicting width options");
509
width = numval;
510
}
511
break;
512
513
case BINARY_OPTION:
514
#if HAVE_SETMODE_DOS
515
binary = true;
516
set_binary_mode (STDOUT_FILENO, true);
517
#endif
518
break;
519
520
case FROM_FILE_OPTION:
521
specify_value (&from_file, optarg, "--from-file");
522
break;
523
524
case HELP_OPTION:
525
usage ();
526
check_stdout ();
527
return EXIT_SUCCESS;
528
529
case HORIZON_LINES_OPTION:
530
numval = strtoumax (optarg, &numend, 10);
531
if (*numend)
532
try_help ("invalid horizon length `%s'", optarg);
533
horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
534
break;
535
536
case IGNORE_FILE_NAME_CASE_OPTION:
537
ignore_file_name_case = true;
538
break;
539
540
case INHIBIT_HUNK_MERGE_OPTION:
541
/* This option is obsolete, but accept it for backward
542
compatibility. */
543
break;
544
545
case LEFT_COLUMN_OPTION:
546
left_column = true;
547
break;
548
549
case LINE_FORMAT_OPTION:
550
specify_style (OUTPUT_IFDEF);
551
for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
552
specify_value (&line_format[i], optarg, "--line-format");
553
break;
554
555
case NO_IGNORE_FILE_NAME_CASE_OPTION:
556
ignore_file_name_case = false;
557
break;
558
559
case NORMAL_OPTION:
560
specify_style (OUTPUT_NORMAL);
561
break;
562
563
case SDIFF_MERGE_ASSIST_OPTION:
564
specify_style (OUTPUT_SDIFF);
565
sdiff_merge_assist = true;
566
break;
567
568
case STRIP_TRAILING_CR_OPTION:
569
strip_trailing_cr = true;
570
break;
571
572
case SUPPRESS_COMMON_LINES_OPTION:
573
suppress_common_lines = true;
574
break;
575
576
case TABSIZE_OPTION:
577
numval = strtoumax (optarg, &numend, 10);
578
if (! (0 < numval && numval <= SIZE_MAX) || *numend)
579
try_help ("invalid tabsize `%s'", optarg);
580
if (tabsize != numval)
581
{
582
if (tabsize)
583
fatal ("conflicting tabsize options");
584
tabsize = numval;
585
}
586
break;
587
588
case TO_FILE_OPTION:
589
specify_value (&to_file, optarg, "--to-file");
590
break;
591
592
case UNCHANGED_LINE_FORMAT_OPTION:
593
case OLD_LINE_FORMAT_OPTION:
594
case NEW_LINE_FORMAT_OPTION:
595
specify_style (OUTPUT_IFDEF);
596
c -= UNCHANGED_LINE_FORMAT_OPTION;
597
specify_value (&line_format[c], optarg, line_format_option[c]);
598
break;
599
600
case UNCHANGED_GROUP_FORMAT_OPTION:
601
case OLD_GROUP_FORMAT_OPTION:
602
case NEW_GROUP_FORMAT_OPTION:
603
case CHANGED_GROUP_FORMAT_OPTION:
604
specify_style (OUTPUT_IFDEF);
605
c -= UNCHANGED_GROUP_FORMAT_OPTION;
606
specify_value (&group_format[c], optarg, group_format_option[c]);
607
break;
608
609
default:
610
try_help (0, 0);
611
}
612
prev = c;
613
}
614
615
if (output_style == OUTPUT_UNSPECIFIED)
616
{
617
if (show_c_function)
618
{
619
specify_style (OUTPUT_CONTEXT);
620
if (ocontext < 0)
621
context = 3;
622
}
623
else
624
specify_style (OUTPUT_NORMAL);
625
}
626
627
if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
628
{
629
#ifdef ST_MTIM_NSEC
630
time_format = "%Y-%m-%d %H:%M:%S.%N %z";
631
#else
632
time_format = "%Y-%m-%d %H:%M:%S %z";
633
#endif
634
}
635
else
636
{
637
/* See POSIX 1003.1-2001 for this format. */
638
time_format = "%a %b %e %T %Y";
639
}
640
641
if (0 <= ocontext)
642
{
643
bool modern_usage = 200112 <= posix2_version ();
644
645
if ((output_style == OUTPUT_CONTEXT
646
|| output_style == OUTPUT_UNIFIED)
647
&& (context < ocontext
648
|| (ocontext < context && ! explicit_context)))
649
{
650
if (modern_usage)
651
{
652
error (0, 0,
653
_("`-%ld' option is obsolete; use `-%c %ld'"),
654
(long int) ocontext,
655
output_style == OUTPUT_CONTEXT ? 'C' : 'U',
656
(long int) ocontext);
657
try_help (0, 0);
658
}
659
context = ocontext;
660
}
661
else
662
{
663
if (modern_usage)
664
{
665
error (0, 0, _("`-%ld' option is obsolete; omit it"),
666
(long int) ocontext);
667
try_help (0, 0);
668
}
669
}
670
}
671
672
if (! tabsize)
673
tabsize = 8;
674
if (! width)
675
width = 130;
676
677
{
678
/* Maximize first the half line width, and then the gutter width,
679
according to the following constraints:
680
681
1. Two half lines plus a gutter must fit in a line.
682
2. If the half line width is nonzero:
683
a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
684
b. If tabs are not expanded to spaces,
685
a half line plus a gutter is an integral number of tabs,
686
so that tabs in the right column line up. */
687
688
intmax_t t = expand_tabs ? 1 : tabsize;
689
intmax_t w = width;
690
intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
691
sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
692
sdiff_column2_offset = sdiff_half_width ? off : w;
693
}
694
695
/* Make the horizon at least as large as the context, so that
696
shift_boundaries has more freedom to shift the first and last hunks. */
697
if (horizon_lines < context)
698
horizon_lines = context;
699
700
summarize_regexp_list (&function_regexp_list);
701
summarize_regexp_list (&ignore_regexp_list);
702
703
if (output_style == OUTPUT_IFDEF)
704
{
705
for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
706
if (!line_format[i])
707
line_format[i] = "%l\n";
708
if (!group_format[OLD])
709
group_format[OLD]
710
= group_format[CHANGED] ? group_format[CHANGED] : "%<";
711
if (!group_format[NEW])
712
group_format[NEW]
713
= group_format[CHANGED] ? group_format[CHANGED] : "%>";
714
if (!group_format[UNCHANGED])
715
group_format[UNCHANGED] = "%=";
716
if (!group_format[CHANGED])
717
group_format[CHANGED] = concat (group_format[OLD],
718
group_format[NEW], "");
719
}
720
721
no_diff_means_no_output =
722
(output_style == OUTPUT_IFDEF ?
723
(!*group_format[UNCHANGED]
724
|| (strcmp (group_format[UNCHANGED], "%=") == 0
725
&& !*line_format[UNCHANGED]))
726
: (output_style != OUTPUT_SDIFF) | suppress_common_lines);
727
728
files_can_be_treated_as_binary =
729
(brief & binary
730
& ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
731
| (ignore_regexp_list.regexps || ignore_white_space)));
732
733
switch_string = option_list (argv + 1, optind - 1);
734
735
if (from_file)
736
{
737
if (to_file)
738
fatal ("--from-file and --to-file both specified");
739
else
740
for (; optind < argc; optind++)
741
{
742
int status = compare_files ((struct comparison *) 0,
743
from_file, argv[optind]);
744
if (exit_status < status)
745
exit_status = status;
746
}
747
}
748
else
749
{
750
if (to_file)
751
for (; optind < argc; optind++)
752
{
753
int status = compare_files ((struct comparison *) 0,
754
argv[optind], to_file);
755
if (exit_status < status)
756
exit_status = status;
757
}
758
else
759
{
760
if (argc - optind != 2)
761
{
762
if (argc - optind < 2)
763
try_help ("missing operand after `%s'", argv[argc - 1]);
764
else
765
try_help ("extra operand `%s'", argv[optind + 2]);
766
}
767
768
exit_status = compare_files ((struct comparison *) 0,
769
argv[optind], argv[optind + 1]);
770
}
771
}
772
773
/* Print any messages that were saved up for last. */
774
print_message_queue ();
775
776
check_stdout ();
777
exit (exit_status);
778
return exit_status;
779
}
780
781
/* Append to REGLIST the regexp PATTERN. */
782
783
static void
784
add_regexp (struct regexp_list *reglist, char const *pattern)
785
{
786
size_t patlen = strlen (pattern);
787
char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
788
789
if (m != 0)
790
error (0, 0, "%s: %s", pattern, m);
791
else
792
{
793
char *regexps = reglist->regexps;
794
size_t len = reglist->len;
795
bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
796
size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
797
size_t size = reglist->size;
798
799
if (size <= newlen)
800
{
801
if (!size)
802
size = 1;
803
804
do size *= 2;
805
while (size <= newlen);
806
807
reglist->size = size;
808
reglist->regexps = regexps = xrealloc (regexps, size);
809
}
810
if (multiple_regexps)
811
{
812
regexps[len++] = '\\';
813
regexps[len++] = '|';
814
}
815
memcpy (regexps + len, pattern, patlen + 1);
816
}
817
}
818
819
/* Ensure that REGLIST represents the disjunction of its regexps.
820
This is done here, rather than earlier, to avoid O(N^2) behavior. */
821
822
static void
823
summarize_regexp_list (struct regexp_list *reglist)
824
{
825
if (reglist->regexps)
826
{
827
/* At least one regexp was specified. Allocate a fastmap for it. */
828
reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
829
if (reglist->multiple_regexps)
830
{
831
/* Compile the disjunction of the regexps.
832
(If just one regexp was specified, it is already compiled.) */
833
char const *m = re_compile_pattern (reglist->regexps, reglist->len,
834
reglist->buf);
835
if (m != 0)
836
error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
837
}
838
}
839
}
840
841
static void
842
try_help (char const *reason_msgid, char const *operand)
843
{
844
if (reason_msgid)
845
error (0, 0, _(reason_msgid), operand);
846
error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
847
program_name);
848
abort ();
849
}
850
851
static void
852
check_stdout (void)
853
{
854
if (ferror (stdout))
855
fatal ("write failed");
856
else if (fclose (stdout) != 0)
857
pfatal_with_name (_("standard output"));
858
}
859
860
static char const * const option_help_msgid[] = {
861
N_("Compare files line by line."),
862
"",
863
N_("-i --ignore-case Ignore case differences in file contents."),
864
N_("--ignore-file-name-case Ignore case when comparing file names."),
865
N_("--no-ignore-file-name-case Consider case when comparing file names."),
866
N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."),
867
N_("-b --ignore-space-change Ignore changes in the amount of white space."),
868
N_("-w --ignore-all-space Ignore all white space."),
869
N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."),
870
N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."),
871
N_("--strip-trailing-cr Strip trailing carriage return on input."),
872
#if HAVE_SETMODE_DOS
873
N_("--binary Read and write data in binary mode."),
874
#endif
875
N_("-a --text Treat all files as text."),
876
"",
877
N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\
878
-u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\
879
--label LABEL Use LABEL instead of file name.\n\
880
-p --show-c-function Show which C function each change is in.\n\
881
-F RE --show-function-line=RE Show the most recent line matching RE."),
882
N_("-q --brief Output only whether files differ."),
883
N_("-e --ed Output an ed script."),
884
N_("--normal Output a normal diff."),
885
N_("-n --rcs Output an RCS format diff."),
886
N_("-y --side-by-side Output in two columns.\n\
887
-W NUM --width=NUM Output at most NUM (default 130) print columns.\n\
888
--left-column Output only the left column of common lines.\n\
889
--suppress-common-lines Do not output common lines."),
890
N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."),
891
N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."),
892
N_("--line-format=LFMT Similar, but format all input lines with LFMT."),
893
N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."),
894
N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."),
895
N_(" GFMT may contain:\n\
896
%< lines from FILE1\n\
897
%> lines from FILE2\n\
898
%= lines common to FILE1 and FILE2\n\
899
%[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
900
LETTERs are as follows for new group, lower case for old group:\n\
901
F first line number\n\
902
L last line number\n\
903
N number of lines = L-F+1\n\
904
E F-1\n\
905
M L+1"),
906
N_(" LFMT may contain:\n\
907
%L contents of line\n\
908
%l contents of line, excluding any trailing newline\n\
909
%[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
910
N_(" Either GFMT or LFMT may contain:\n\
911
%% %\n\
912
%c'C' the single character C\n\
913
%c'\\OOO' the character with octal code OOO"),
914
"",
915
N_("-l --paginate Pass the output through `pr' to paginate it."),
916
N_("-t --expand-tabs Expand tabs to spaces in output."),
917
N_("-T --initial-tab Make tabs line up by prepending a tab."),
918
N_("--tabsize=NUM Tab stops are every NUM (default 8) print columns."),
919
"",
920
N_("-r --recursive Recursively compare any subdirectories found."),
921
N_("-N --new-file Treat absent files as empty."),
922
N_("--unidirectional-new-file Treat absent first files as empty."),
923
N_("-s --report-identical-files Report when two files are the same."),
924
N_("-x PAT --exclude=PAT Exclude files that match PAT."),
925
N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."),
926
N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."),
927
N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."),
928
N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."),
929
"",
930
N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."),
931
N_("-d --minimal Try hard to find a smaller set of changes."),
932
N_("--speed-large-files Assume large files and many scattered small changes."),
933
"",
934
N_("-v --version Output version info."),
935
N_("--help Output this help."),
936
"",
937
N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
938
N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
939
N_("If a FILE is `-', read standard input."),
940
N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
941
"",
942
N_("Report bugs to <[email protected]>."),
943
0
944
};
945
946
static void
947
usage (void)
948
{
949
char const * const *p;
950
951
printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
952
953
for (p = option_help_msgid; *p; p++)
954
{
955
if (!**p)
956
putchar ('\n');
957
else
958
{
959
char const *msg = _(*p);
960
char const *nl;
961
while ((nl = strchr (msg, '\n')))
962
{
963
int msglen = nl + 1 - msg;
964
printf (" %.*s", msglen, msg);
965
msg = nl + 1;
966
}
967
968
printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
969
}
970
}
971
}
972
973
/* Set VAR to VALUE, reporting an OPTION error if this is a
974
conflict. */
975
static void
976
specify_value (char const **var, char const *value, char const *option)
977
{
978
if (*var && strcmp (*var, value) != 0)
979
{
980
error (0, 0, _("conflicting %s option value `%s'"), option, value);
981
try_help (0, 0);
982
}
983
*var = value;
984
}
985
986
/* Set the output style to STYLE, diagnosing conflicts. */
987
static void
988
specify_style (enum output_style style)
989
{
990
if (output_style != style)
991
{
992
output_style = style;
993
}
994
}
995
996
/* Set the last-modified time of *ST to be the current time. */
997
998
static void
999
set_mtime_to_now (struct stat *st)
1000
{
1001
#ifdef ST_MTIM_NSEC
1002
1003
# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1004
if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1005
return;
1006
# endif
1007
1008
# if HAVE_GETTIMEOFDAY
1009
{
1010
struct timeval timeval;
1011
if (gettimeofday (&timeval, 0) == 0)
1012
{
1013
st->st_mtime = timeval.tv_sec;
1014
st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1015
return;
1016
}
1017
}
1018
# endif
1019
1020
#endif /* ST_MTIM_NSEC */
1021
1022
time (&st->st_mtime);
1023
}
1024
1025
/* Compare two files (or dirs) with parent comparison PARENT
1026
and names NAME0 and NAME1.
1027
(If PARENT is 0, then the first name is just NAME0, etc.)
1028
This is self-contained; it opens the files and closes them.
1029
1030
Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1031
different, EXIT_TROUBLE if there is a problem opening them. */
1032
1033
static int
1034
compare_files (struct comparison const *parent,
1035
char const *name0,
1036
char const *name1)
1037
{
1038
struct comparison cmp;
1039
#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1040
register int f;
1041
int status = EXIT_SUCCESS;
1042
bool same_files;
1043
char *free0, *free1;
1044
1045
/* If this is directory comparison, perhaps we have a file
1046
that exists only in one of the directories.
1047
If so, just print a message to that effect. */
1048
1049
if (! ((name0 && name1)
1050
|| (unidirectional_new_file && name1)
1051
|| new_file))
1052
{
1053
char const *name = name0 == 0 ? name1 : name0;
1054
char const *dir = parent->file[name0 == 0].name;
1055
1056
/* See POSIX 1003.1-2001 for this format. */
1057
message ("Only in %s: %s\n", dir, name);
1058
1059
/* Return EXIT_FAILURE so that diff_dirs will return
1060
EXIT_FAILURE ("some files differ"). */
1061
return EXIT_FAILURE;
1062
}
1063
1064
memset (cmp.file, 0, sizeof cmp.file);
1065
cmp.parent = parent;
1066
1067
/* cmp.file[f].desc markers */
1068
#define NONEXISTENT (-1) /* nonexistent file */
1069
#define UNOPENED (-2) /* unopened file (e.g. directory) */
1070
#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1071
1072
#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1073
1074
cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1075
cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1076
1077
/* Now record the full name of each file, including nonexistent ones. */
1078
1079
if (name0 == 0)
1080
name0 = name1;
1081
if (name1 == 0)
1082
name1 = name0;
1083
1084
if (!parent)
1085
{
1086
free0 = 0;
1087
free1 = 0;
1088
cmp.file[0].name = name0;
1089
cmp.file[1].name = name1;
1090
}
1091
else
1092
{
1093
cmp.file[0].name = free0
1094
= dir_file_pathname (parent->file[0].name, name0);
1095
cmp.file[1].name = free1
1096
= dir_file_pathname (parent->file[1].name, name1);
1097
}
1098
1099
/* Stat the files. */
1100
1101
for (f = 0; f < 2; f++)
1102
{
1103
if (cmp.file[f].desc != NONEXISTENT)
1104
{
1105
if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1106
{
1107
cmp.file[f].desc = cmp.file[0].desc;
1108
cmp.file[f].stat = cmp.file[0].stat;
1109
}
1110
else if (strcmp (cmp.file[f].name, "-") == 0)
1111
{
1112
cmp.file[f].desc = STDIN_FILENO;
1113
if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1114
cmp.file[f].desc = ERRNO_ENCODE (errno);
1115
else
1116
{
1117
if (S_ISREG (cmp.file[f].stat.st_mode))
1118
{
1119
off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1120
if (pos < 0)
1121
cmp.file[f].desc = ERRNO_ENCODE (errno);
1122
else
1123
cmp.file[f].stat.st_size =
1124
MAX (0, cmp.file[f].stat.st_size - pos);
1125
}
1126
1127
/* POSIX 1003.1-2001 requires current time for
1128
stdin. */
1129
set_mtime_to_now (&cmp.file[f].stat);
1130
}
1131
}
1132
else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1133
cmp.file[f].desc = ERRNO_ENCODE (errno);
1134
}
1135
}
1136
1137
/* Mark files as nonexistent as needed for -N and -P, if they are
1138
inaccessible empty regular files (the kind of files that 'patch'
1139
creates to indicate nonexistent backups), or if they are
1140
top-level files that do not exist but their counterparts do
1141
exist. */
1142
for (f = 0; f < 2; f++)
1143
if ((new_file || (f == 0 && unidirectional_new_file))
1144
&& (cmp.file[f].desc == UNOPENED
1145
? (S_ISREG (cmp.file[f].stat.st_mode)
1146
&& ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1147
&& cmp.file[f].stat.st_size == 0)
1148
: (cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1149
&& ! parent
1150
&& cmp.file[1 - f].desc == UNOPENED)))
1151
cmp.file[f].desc = NONEXISTENT;
1152
1153
for (f = 0; f < 2; f++)
1154
if (cmp.file[f].desc == NONEXISTENT)
1155
{
1156
memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1157
cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1158
}
1159
1160
for (f = 0; f < 2; f++)
1161
{
1162
int e = ERRNO_DECODE (cmp.file[f].desc);
1163
if (0 <= e)
1164
{
1165
errno = e;
1166
perror_with_name (cmp.file[f].name);
1167
status = EXIT_TROUBLE;
1168
}
1169
}
1170
1171
if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1172
{
1173
/* If one is a directory, and it was specified in the command line,
1174
use the file in that dir with the other file's basename. */
1175
1176
int fnm_arg = DIR_P (0);
1177
int dir_arg = 1 - fnm_arg;
1178
char const *fnm = cmp.file[fnm_arg].name;
1179
char const *dir = cmp.file[dir_arg].name;
1180
char const *filename = cmp.file[dir_arg].name = free0
1181
= dir_file_pathname (dir, base_name (fnm));
1182
1183
if (strcmp (fnm, "-") == 0)
1184
fatal ("cannot compare `-' to a directory");
1185
1186
if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1187
{
1188
perror_with_name (filename);
1189
status = EXIT_TROUBLE;
1190
}
1191
}
1192
1193
if (status != EXIT_SUCCESS)
1194
{
1195
/* One of the files should exist but does not. */
1196
}
1197
else if (cmp.file[0].desc == NONEXISTENT
1198
&& cmp.file[1].desc == NONEXISTENT)
1199
{
1200
/* Neither file "exists", so there's nothing to compare. */
1201
}
1202
else if ((same_files
1203
= (cmp.file[0].desc != NONEXISTENT
1204
&& cmp.file[1].desc != NONEXISTENT
1205
&& 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1206
&& same_file_attributes (&cmp.file[0].stat,
1207
&cmp.file[1].stat)))
1208
&& no_diff_means_no_output)
1209
{
1210
/* The two named files are actually the same physical file.
1211
We know they are identical without actually reading them. */
1212
}
1213
else if (DIR_P (0) & DIR_P (1))
1214
{
1215
if (output_style == OUTPUT_IFDEF)
1216
fatal ("-D option not supported with directories");
1217
1218
/* If both are directories, compare the files in them. */
1219
1220
if (parent && !recursive)
1221
{
1222
/* But don't compare dir contents one level down
1223
unless -r was specified.
1224
See POSIX 1003.1-2001 for this format. */
1225
message ("Common subdirectories: %s and %s\n",
1226
cmp.file[0].name, cmp.file[1].name);
1227
}
1228
else
1229
status = diff_dirs (&cmp, compare_files);
1230
}
1231
else if ((DIR_P (0) | DIR_P (1))
1232
|| (parent
1233
&& (! S_ISREG (cmp.file[0].stat.st_mode)
1234
|| ! S_ISREG (cmp.file[1].stat.st_mode))))
1235
{
1236
if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1237
{
1238
/* We have a subdirectory that exists only in one directory. */
1239
1240
if ((DIR_P (0) | DIR_P (1))
1241
&& recursive
1242
&& (new_file
1243
|| (unidirectional_new_file
1244
&& cmp.file[0].desc == NONEXISTENT)))
1245
status = diff_dirs (&cmp, compare_files);
1246
else
1247
{
1248
char const *dir
1249
= parent->file[cmp.file[0].desc == NONEXISTENT].name;
1250
1251
/* See POSIX 1003.1-2001 for this format. */
1252
message ("Only in %s: %s\n", dir, name0);
1253
1254
status = EXIT_FAILURE;
1255
}
1256
}
1257
else
1258
{
1259
/* We have two files that are not to be compared. */
1260
1261
/* See POSIX 1003.1-2001 for this format. */
1262
message5 ("File %s is a %s while file %s is a %s\n",
1263
file_label[0] ? file_label[0] : cmp.file[0].name,
1264
file_type (&cmp.file[0].stat),
1265
file_label[1] ? file_label[1] : cmp.file[1].name,
1266
file_type (&cmp.file[1].stat));
1267
1268
/* This is a difference. */
1269
status = EXIT_FAILURE;
1270
}
1271
}
1272
else if (files_can_be_treated_as_binary
1273
&& S_ISREG (cmp.file[0].stat.st_mode)
1274
&& S_ISREG (cmp.file[1].stat.st_mode)
1275
&& cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
1276
{
1277
message ("Files %s and %s differ\n",
1278
file_label[0] ? file_label[0] : cmp.file[0].name,
1279
file_label[1] ? file_label[1] : cmp.file[1].name);
1280
status = EXIT_FAILURE;
1281
}
1282
else
1283
{
1284
/* Both exist and neither is a directory. */
1285
1286
/* Open the files and record their descriptors. */
1287
1288
if (cmp.file[0].desc == UNOPENED)
1289
if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1290
{
1291
perror_with_name (cmp.file[0].name);
1292
status = EXIT_TROUBLE;
1293
}
1294
if (cmp.file[1].desc == UNOPENED)
1295
{
1296
if (same_files)
1297
cmp.file[1].desc = cmp.file[0].desc;
1298
else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1299
< 0)
1300
{
1301
perror_with_name (cmp.file[1].name);
1302
status = EXIT_TROUBLE;
1303
}
1304
}
1305
1306
#if HAVE_SETMODE_DOS
1307
if (binary)
1308
for (f = 0; f < 2; f++)
1309
if (0 <= cmp.file[f].desc)
1310
set_binary_mode (cmp.file[f].desc, true);
1311
#endif
1312
1313
/* Compare the files, if no error was found. */
1314
1315
if (status == EXIT_SUCCESS)
1316
status = diff_2_files (&cmp);
1317
1318
/* Close the file descriptors. */
1319
1320
if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1321
{
1322
perror_with_name (cmp.file[0].name);
1323
status = EXIT_TROUBLE;
1324
}
1325
if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1326
&& close (cmp.file[1].desc) != 0)
1327
{
1328
perror_with_name (cmp.file[1].name);
1329
status = EXIT_TROUBLE;
1330
}
1331
}
1332
1333
/* Now the comparison has been done, if no error prevented it,
1334
and STATUS is the value this function will return. */
1335
1336
if (status == EXIT_SUCCESS)
1337
{
1338
if (report_identical_files && !DIR_P (0))
1339
message ("Files %s and %s are identical\n",
1340
file_label[0] ? file_label[0] : cmp.file[0].name,
1341
file_label[1] ? file_label[1] : cmp.file[1].name);
1342
}
1343
else
1344
{
1345
/* Flush stdout so that the user sees differences immediately.
1346
This can hurt performance, unfortunately. */
1347
if (fflush (stdout) != 0)
1348
pfatal_with_name (_("standard output"));
1349
}
1350
1351
if (free0)
1352
free (free0);
1353
if (free1)
1354
free (free1);
1355
1356
return status;
1357
}
1358
1359