Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/less/pattern.c
39476 views
1
/*
2
* Copyright (C) 1984-2025 Mark Nudelman
3
*
4
* You may distribute under the terms of either the GNU General Public
5
* License or the Less License, as specified in the README file.
6
*
7
* For more information, see the README file.
8
*/
9
10
/*
11
* Routines to do pattern matching.
12
*/
13
14
#include "less.h"
15
16
extern int caseless;
17
extern int is_caseless;
18
extern int utf_mode;
19
20
/*
21
* Compile a search pattern, for future use by match_pattern.
22
*/
23
static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24
{
25
if (search_type & SRCH_NO_REGEX)
26
return (0);
27
{
28
#if HAVE_GNU_REGEX
29
struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30
ecalloc(1, sizeof(struct re_pattern_buffer));
31
re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32
if (re_compile_pattern(pattern, strlen(pattern), comp))
33
{
34
free(comp);
35
if (show_error)
36
error("Invalid pattern", NULL_PARG);
37
return (-1);
38
}
39
if (*comp_pattern != NULL)
40
{
41
regfree(*comp_pattern);
42
free(*comp_pattern);
43
}
44
*comp_pattern = comp;
45
#endif
46
#if HAVE_POSIX_REGCOMP
47
regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48
if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49
{
50
free(comp);
51
if (show_error)
52
error("Invalid pattern", NULL_PARG);
53
return (-1);
54
}
55
if (*comp_pattern != NULL)
56
{
57
regfree(*comp_pattern);
58
free(*comp_pattern);
59
}
60
*comp_pattern = comp;
61
#endif
62
#if HAVE_PCRE
63
constant char *errstring;
64
int erroffset;
65
PARG parg;
66
pcre *comp = pcre_compile(pattern,
67
((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68
(is_caseless ? PCRE_CASELESS : 0),
69
&errstring, &erroffset, NULL);
70
if (comp == NULL)
71
{
72
parg.p_string = (char *) errstring;
73
if (show_error)
74
error("%s", &parg);
75
return (-1);
76
}
77
*comp_pattern = comp;
78
#endif
79
#if HAVE_PCRE2
80
int errcode;
81
PCRE2_SIZE erroffset;
82
PARG parg;
83
pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84
((utf_mode) ? PCRE2_UTF | PCRE2_NO_UTF_CHECK : 0) |
85
(is_caseless ? PCRE2_CASELESS : 0),
86
&errcode, &erroffset, NULL);
87
if (comp == NULL)
88
{
89
if (show_error)
90
{
91
char msg[160];
92
pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
93
parg.p_string = msg;
94
error("%s", &parg);
95
}
96
return (-1);
97
}
98
*comp_pattern = comp;
99
#endif
100
#if HAVE_RE_COMP
101
PARG parg;
102
if ((parg.p_string = re_comp(pattern)) != NULL)
103
{
104
if (show_error)
105
error("%s", &parg);
106
return (-1);
107
}
108
*comp_pattern = 1;
109
#endif
110
#if HAVE_REGCMP
111
char *comp;
112
if ((comp = regcmp(pattern, 0)) == NULL)
113
{
114
if (show_error)
115
error("Invalid pattern", NULL_PARG);
116
return (-1);
117
}
118
if (comp_pattern != NULL)
119
free(*comp_pattern);
120
*comp_pattern = comp;
121
#endif
122
#if HAVE_V8_REGCOMP
123
struct regexp *comp;
124
reg_show_error = show_error;
125
comp = regcomp(pattern);
126
reg_show_error = 1;
127
if (comp == NULL)
128
{
129
/*
130
* regcomp has already printed an error message
131
* via regerror().
132
*/
133
return (-1);
134
}
135
if (*comp_pattern != NULL)
136
free(*comp_pattern);
137
*comp_pattern = comp;
138
#endif
139
}
140
return (0);
141
}
142
143
/*
144
* Like compile_pattern2, but convert the pattern to lowercase if necessary.
145
*/
146
public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
147
{
148
int result;
149
150
if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
151
{
152
result = compile_pattern2(pattern, search_type, comp_pattern, show_error);
153
} else
154
{
155
char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
156
cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC);
157
result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158
free(cvt_pattern);
159
}
160
return (result);
161
}
162
163
/*
164
* Forget that we have a compiled pattern.
165
*/
166
public void uncompile_pattern(PATTERN_TYPE *pattern)
167
{
168
#if HAVE_GNU_REGEX
169
if (*pattern != NULL)
170
{
171
regfree(*pattern);
172
free(*pattern);
173
}
174
*pattern = NULL;
175
#endif
176
#if HAVE_POSIX_REGCOMP
177
if (*pattern != NULL)
178
{
179
regfree(*pattern);
180
free(*pattern);
181
}
182
*pattern = NULL;
183
#endif
184
#if HAVE_PCRE
185
if (*pattern != NULL)
186
pcre_free(*pattern);
187
*pattern = NULL;
188
#endif
189
#if HAVE_PCRE2
190
if (*pattern != NULL)
191
pcre2_code_free(*pattern);
192
*pattern = NULL;
193
#endif
194
#if HAVE_RE_COMP
195
*pattern = 0;
196
#endif
197
#if HAVE_REGCMP
198
if (*pattern != NULL)
199
free(*pattern);
200
*pattern = NULL;
201
#endif
202
#if HAVE_V8_REGCOMP
203
if (*pattern != NULL)
204
free(*pattern);
205
*pattern = NULL;
206
#endif
207
}
208
209
#if 0
210
/*
211
* Can a pattern be successfully compiled?
212
*/
213
public int valid_pattern(char *pattern)
214
{
215
PATTERN_TYPE comp_pattern;
216
int result;
217
218
SET_NULL_PATTERN(comp_pattern);
219
result = compile_pattern2(pattern, 0, &comp_pattern, 0);
220
if (result != 0)
221
return (0);
222
uncompile_pattern(&comp_pattern);
223
return (1);
224
}
225
#endif
226
227
/*
228
* Is a compiled pattern null?
229
*/
230
public lbool is_null_pattern(PATTERN_TYPE pattern)
231
{
232
#if HAVE_GNU_REGEX
233
return (pattern == NULL);
234
#endif
235
#if HAVE_POSIX_REGCOMP
236
return (pattern == NULL);
237
#endif
238
#if HAVE_PCRE
239
return (pattern == NULL);
240
#endif
241
#if HAVE_PCRE2
242
return (pattern == NULL);
243
#endif
244
#if HAVE_RE_COMP
245
return (pattern == 0);
246
#endif
247
#if HAVE_REGCMP
248
return (pattern == NULL);
249
#endif
250
#if HAVE_V8_REGCOMP
251
return (pattern == NULL);
252
#endif
253
#if NO_REGEX
254
return (pattern == NULL);
255
#endif
256
}
257
/*
258
* Simple pattern matching function.
259
* It supports no metacharacters like *, etc.
260
*/
261
static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs)
262
{
263
constant char *pp;
264
constant char *lp;
265
constant char *pattern_end = pattern + pattern_len;
266
constant char *buf_end = buf + buf_len;
267
268
(void) nsubs;
269
for ( ; buf < buf_end; buf++)
270
{
271
for (pp = pattern, lp = buf; ; pp++, lp++)
272
{
273
char cp = *pp;
274
char cl = *lp;
275
if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
276
cp = ASCII_TO_LOWER(cp);
277
if (cp != cl)
278
break;
279
if (pp == pattern_end || lp == buf_end)
280
break;
281
}
282
if (pp == pattern_end)
283
{
284
*(*sp)++ = buf;
285
*(*ep)++ = lp;
286
return (1);
287
}
288
}
289
**sp = **ep = NULL;
290
return (0);
291
}
292
293
/*
294
* Perform a pattern match with the previously compiled pattern.
295
* Set sp[0] and ep[0] to the start and end of the matched string.
296
* Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
297
* Subpatterns are defined by parentheses in the regex language.
298
*/
299
static lbool match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, size_t line_off, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
300
{
301
int matched;
302
int line_len = (int) aline_len; /*{{type-issue}}*/
303
304
#if NO_REGEX
305
search_type |= SRCH_NO_REGEX;
306
#endif
307
if (search_type & SRCH_NO_REGEX)
308
matched = match(tpattern, strlen(tpattern), line + line_off, line_len - line_off, &sp, &ep, nsp);
309
else
310
{
311
#if HAVE_GNU_REGEX
312
{
313
struct re_registers search_regs;
314
pattern->not_bol = notbol;
315
pattern->regs_allocated = REGS_UNALLOCATED;
316
matched = re_search(pattern, line, line_len, line_off, line_len - line_off, &search_regs) >= 0;
317
if (matched)
318
{
319
*sp++ = line + search_regs.start[0];
320
*ep++ = line + search_regs.end[0];
321
}
322
}
323
#endif
324
#if HAVE_POSIX_REGCOMP
325
{
326
#define RM_COUNT (NUM_SEARCH_COLORS+2)
327
regmatch_t rm[RM_COUNT];
328
int flags = (notbol) ? REG_NOTBOL : 0;
329
#ifdef REG_STARTEND
330
flags |= REG_STARTEND;
331
rm[0].rm_so = line_off;
332
rm[0].rm_eo = line_len;
333
#else
334
line += line_off;
335
#endif
336
matched = !regexec(pattern, line, RM_COUNT, rm, flags);
337
if (matched)
338
{
339
int i;
340
int ecount;
341
for (ecount = RM_COUNT; ecount > 0; ecount--)
342
if (rm[ecount-1].rm_so >= 0)
343
break;
344
if (ecount >= nsp)
345
ecount = nsp-1;
346
for (i = 0; i < ecount; i++)
347
{
348
if (rm[i].rm_so < 0)
349
{
350
*sp++ = *ep++ = line;
351
} else
352
{
353
#ifndef __WATCOMC__
354
*sp++ = line + rm[i].rm_so;
355
*ep++ = line + rm[i].rm_eo;
356
#else
357
*sp++ = rm[i].rm_sp;
358
*ep++ = rm[i].rm_ep;
359
#endif
360
}
361
}
362
}
363
}
364
#endif
365
#if HAVE_PCRE
366
{
367
#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
368
int ovector[OVECTOR_COUNT];
369
int flags = (notbol) ? PCRE_NOTBOL : 0;
370
int i;
371
int ecount;
372
int mcount = pcre_exec(pattern, NULL, line, line_len,
373
line_off, flags, ovector, OVECTOR_COUNT);
374
matched = (mcount > 0);
375
ecount = nsp-1;
376
if (ecount > mcount) ecount = mcount;
377
for (i = 0; i < ecount*2; )
378
{
379
if (ovector[i] < 0 || ovector[i+1] < 0)
380
{
381
*sp++ = *ep++ = line;
382
i += 2;
383
} else
384
{
385
*sp++ = line + ovector[i++];
386
*ep++ = line + ovector[i++];
387
}
388
}
389
}
390
#endif
391
#if HAVE_PCRE2
392
{
393
int flags = (notbol) ? PCRE2_NOTBOL : 0;
394
pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
395
int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
396
line_off, flags, md, NULL);
397
matched = (mcount > 0);
398
if (matched)
399
{
400
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
401
int i;
402
int ecount = nsp-1;
403
if (ecount > mcount) ecount = mcount;
404
for (i = 0; i < ecount*2; )
405
{
406
if (ovector[i] < 0 || ovector[i+1] < 0)
407
{
408
*sp++ = *ep++ = line;
409
i += 2;
410
} else
411
{
412
*sp++ = line + ovector[i++];
413
*ep++ = line + ovector[i++];
414
}
415
}
416
}
417
pcre2_match_data_free(md);
418
}
419
#endif
420
#if HAVE_RE_COMP
421
matched = (re_exec(line + line_off) == 1);
422
/*
423
* re_exec doesn't seem to provide a way to get the matched string.
424
*/
425
#endif
426
#if HAVE_REGCMP
427
matched = ((*ep++ = regex(pattern, line + line_off)) != NULL);
428
if (matched)
429
*sp++ = __loc1;
430
#endif
431
#if HAVE_V8_REGCOMP
432
#if HAVE_REGEXEC2
433
matched = regexec2(pattern, line + line_off, notbol);
434
#else
435
matched = regexec(pattern, line + line_off);
436
#endif
437
if (matched)
438
{
439
*sp++ = pattern->startp[0];
440
*ep++ = pattern->endp[0];
441
}
442
#endif
443
}
444
*sp = *ep = NULL;
445
matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
446
((search_type & SRCH_NO_MATCH) && !matched);
447
return (matched != 0);
448
}
449
450
/*
451
* Return TRUE if the match satisfies all SUBSEARCH conditions.
452
*/
453
static lbool subsearch_ok(constant char **sp, constant char **ep, int search_type)
454
{
455
int i;
456
for (i = 1; i <= NUM_SEARCH_COLORS; i++)
457
{
458
if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
459
return FALSE;
460
}
461
return TRUE;
462
}
463
464
public lbool match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, size_t line_off, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
465
{
466
for (;;)
467
{
468
size_t mlen;
469
lbool matched = match_pattern1(pattern, tpattern, line, line_len, line_off, sp, ep, nsp, notbol, search_type);
470
if (!matched || subsearch_ok(sp, ep, search_type))
471
return matched;
472
mlen = ep[0] - line;
473
line += mlen;
474
line_len -= mlen;
475
notbol = 1;
476
}
477
}
478
479
/*
480
* Return the name of the pattern matching library.
481
*/
482
public constant char * pattern_lib_name(void)
483
{
484
#if HAVE_GNU_REGEX
485
return ("GNU");
486
#else
487
#if HAVE_POSIX_REGCOMP
488
return ("POSIX");
489
#else
490
#if HAVE_PCRE2
491
return ("PCRE2");
492
#else
493
#if HAVE_PCRE
494
return ("PCRE");
495
#else
496
#if HAVE_RE_COMP
497
return ("BSD");
498
#else
499
#if HAVE_REGCMP
500
return ("V8");
501
#else
502
#if HAVE_V8_REGCOMP
503
return ("Spencer V8");
504
#else
505
return ("no");
506
#endif
507
#endif
508
#endif
509
#endif
510
#endif
511
#endif
512
#endif
513
}
514
515