CoCalc -- fmtutils.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/libpcap/fmtutils.c
¹⁰⁴¹⁰² views
1
/*
2
 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3
 *	The Regents of the University of California.  All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. All advertising materials mentioning features or use of this software
14
 *    must display the following acknowledgement:
15
 *	This product includes software developed by the Computer Systems
16
 *	Engineering Group at Lawrence Berkeley Laboratory.
17
 * 4. Neither the name of the University nor of the Laboratory may be used
18
 *    to endorse or promote products derived from this software without
19
 *    specific prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
 * SUCH DAMAGE.
32
 */
33

34
/*
35
 * Utilities for message formatting used both by libpcap and rpcapd.
36
 */
37

38
#include <config.h>
39

40
#include "ftmacros.h"
41

42
#include <stddef.h>
43
#include <stdarg.h>
44
#include <stdio.h>
45
#include <string.h>
46
#include <errno.h>
47

48
#include "pcap-int.h"
49

50
#include "portability.h"
51

52
#include "fmtutils.h"
53

54
#ifdef _WIN32
55
#include "charconv.h"
56
#endif
57

58
/*
59
 * Set the encoding.
60
 */
61
#ifdef _WIN32
62
/*
63
 * True if we should use UTF-8.
64
 */
65
static int use_utf_8;
66

67
void
68
pcapint_fmt_set_encoding(unsigned int opts)
69
{
70
	if (opts == PCAP_CHAR_ENC_UTF_8)
71
		use_utf_8 = 1;
72
}
73
#else
74
void
75
pcapint_fmt_set_encoding(unsigned int opts _U_)
76
{
77
	/*
78
	 * Nothing to do here.
79
	 */
80
}
81
#endif
82

83
#ifdef _WIN32
84
/*
85
 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
86
 * a buffer starting at the specified location and stopping if we go
87
 * past the specified size.  This will only put out complete UTF-8
88
 * sequences.
89
 *
90
 * We do this ourselves because Microsoft doesn't offer a "convert and
91
 * stop at a UTF-8 character boundary if we run out of space" routine.
92
 */
93
#define IS_LEADING_SURROGATE(c) \
94
	((c) >= 0xd800 && (c) < 0xdc00)
95
#define IS_TRAILING_SURROGATE(c) \
96
	((c) >= 0xdc00 && (c) < 0xe000)
97
#define SURROGATE_VALUE(leading, trailing) \
98
	(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
99
#define REPLACEMENT_CHARACTER	0x0FFFD
100

101
static char *
102
utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
103
    size_t utf_8_len)
104
{
105
	wchar_t c, c2;
106
	uint32_t uc;
107

108
	if (utf_8_len == 0) {
109
		/*
110
		 * Not even enough room for a trailing '\0'.
111
		 * Don't put anything into the buffer.
112
		 */
113
		return (utf_8);
114
	}
115

116
	while ((c = *utf_16++) != '\0') {
117
		if (IS_LEADING_SURROGATE(c)) {
118
			/*
119
			 * Leading surrogate.  Must be followed by
120
			 * a trailing surrogate.
121
			 */
122
			c2 = *utf_16;
123
			if (c2 == '\0') {
124
				/*
125
				 * Oops, string ends with a lead
126
				 * surrogate.  Try to drop in
127
				 * a REPLACEMENT CHARACTER, and
128
				 * don't move the string pointer,
129
				 * so on the next trip through
130
				 * the loop we grab the terminating
131
				 * '\0' and quit.
132
				 */
133
				uc = REPLACEMENT_CHARACTER;
134
			} else {
135
				/*
136
				 * OK, we can consume this 2-octet
137
				 * value.
138
				 */
139
				utf_16++;
140
				if (IS_TRAILING_SURROGATE(c2)) {
141
					/*
142
					 * Trailing surrogate.
143
					 * This calculation will,
144
					 * for c being a leading
145
					 * surrogate and c2 being
146
					 * a trailing surrogate,
147
					 * produce a value between
148
					 * 0x100000 and 0x10ffff,
149
					 * so it's always going to be
150
					 * a valid Unicode code point.
151
					 */
152
					uc = SURROGATE_VALUE(c, c2);
153
				} else {
154
					/*
155
					 * Not a trailing surrogate;
156
					 * try to drop in a
157
					 * REPLACEMENT CHARACTER.
158
					 */
159
					uc = REPLACEMENT_CHARACTER;
160
				}
161
			}
162
		} else {
163
			/*
164
			 * Not a leading surrogate.
165
			 */
166
			if (IS_TRAILING_SURROGATE(c)) {
167
				/*
168
				 * Trailing surrogate without
169
				 * a preceding leading surrogate.
170
				 * Try to drop in a REPLACEMENT
171
				 * CHARACTER.
172
				 */
173
				uc = REPLACEMENT_CHARACTER;
174
			} else {
175
				/*
176
				 * This is a valid BMP character;
177
				 * drop it in.
178
				 */
179
				uc = c;
180
			}
181
		}
182

183
		/*
184
		 * OK, uc is a valid Unicode character; how
185
		 * many bytes worth of UTF-8 does it require?
186
		 */
187
		if (uc < 0x0080) {
188
			/* 1 byte. */
189
			if (utf_8_len < 2) {
190
				/*
191
				 * Not enough room for that byte
192
				 * plus a trailing '\0'.
193
				 */
194
				break;
195
			}
196
			*utf_8++ = (char)uc;
197
			utf_8_len--;
198
		} else if (uc < 0x0800) {
199
			/* 2 bytes. */
200
			if (utf_8_len < 3) {
201
				/*
202
				 * Not enough room for those bytes
203
				 * plus a trailing '\0'.
204
				 */
205
				break;
206
			}
207
			*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
208
			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
209
			utf_8_len -= 2;
210
		} else if (uc < 0x010000) {
211
			/* 3 bytes. */
212
			if (utf_8_len < 4) {
213
				/*
214
				 * Not enough room for those bytes
215
				 * plus a trailing '\0'.
216
				 */
217
				break;
218
			}
219
			*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
220
			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
221
			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
222
			utf_8_len -= 3;
223
		} else {
224
			/* 4 bytes. */
225
			if (utf_8_len < 5) {
226
				/*
227
				 * Not enough room for those bytes
228
				 * plus a trailing '\0'.
229
				 */
230
				break;
231
			}
232
			*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
233
			*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
234
			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
235
			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
236
			utf_8_len -= 3;
237
		}
238
	}
239

240
	/*
241
	 * OK, we have enough room for (at least) a trailing '\0'.
242
	 * (We started out with enough room, thanks to the test
243
	 * for a zero-length buffer at the beginning, and if
244
	 * there wasn't enough room for any character we wanted
245
	 * to put into the buffer *plus* a trailing '\0',
246
	 * we'd have quit before putting it into the buffer,
247
	 * and thus would have left enough room for the trailing
248
	 * '\0'.)
249
	 *
250
	 * Drop it in.
251
	 */
252
	*utf_8 = '\0';
253

254
	/*
255
	 * Return a pointer to the terminating '\0', in case we
256
	 * want to drop something in after that.
257
	 */
258
	return (utf_8);
259
}
260
#endif /* _WIN32 */
261

262
/*
263
 * Generate an error message based on a format, arguments, and an
264
 * errno, with a message for the errno after the formatted output.
265
 */
266
void
267
pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
268
    const char *fmt, ...)
269
{
270
	va_list ap;
271

272
	va_start(ap, fmt);
273
	pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
274
	va_end(ap);
275
}
276

277
void
278
pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
279
    const char *fmt, va_list ap)
280
{
281
	size_t msglen;
282
	char *p;
283
	size_t errbuflen_remaining;
284

285
	(void)vsnprintf(errbuf, errbuflen, fmt, ap);
286
	msglen = strlen(errbuf);
287

288
	/*
289
	 * Do we have enough space to append ": "?
290
	 * Including the terminating '\0', that's 3 bytes.
291
	 */
292
	if (msglen + 3 > errbuflen) {
293
		/* No - just give them what we've produced. */
294
		return;
295
	}
296
	p = errbuf + msglen;
297
	errbuflen_remaining = errbuflen - msglen;
298
	*p++ = ':';
299
	*p++ = ' ';
300
	*p = '\0';
301
	errbuflen_remaining -= 2;
302

303
	/*
304
	 * Now append the string for the error code.
305
	 */
306
#if defined(HAVE__WCSERROR_S)
307
	/*
308
	 * We have a Windows-style _wcserror_s().
309
	 * Generate a UTF-16LE error message.
310
	 */
311
	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
312
	errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
313
	if (err != 0) {
314
		/*
315
		 * It doesn't appear to be documented anywhere obvious
316
		 * what the error returns from _wcserror_s().
317
		 */
318
		snprintf(p, errbuflen_remaining, "Error %d", errnum);
319
		return;
320
	}
321

322
	/*
323
	 * Now convert it from UTF-16LE to UTF-8, dropping it in the
324
	 * remaining space in the buffer, and truncating it - cleanly,
325
	 * on a UTF-8 character boundary - if it doesn't fit.
326
	 */
327
	utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
328

329
	/*
330
	 * Now, if we're not in UTF-8 mode, convert errbuf to the
331
	 * local code page.
332
	 */
333
	if (!use_utf_8)
334
		utf_8_to_acp_truncated(errbuf);
335
#else
336
	/*
337
	 * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
338
	 * solve the non-UTF-16 part of this problem space.
339
	 */
340
	snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
341
#endif
342
}
343

344
#ifdef _WIN32
345
/*
346
 * Generate an error message based on a format, arguments, and a
347
 * Win32 error, with a message for the Win32 error after the formatted output.
348
 */
349
void
350
pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
351
    const char *fmt, ...)
352
{
353
	va_list ap;
354

355
	va_start(ap, fmt);
356
	pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
357
	va_end(ap);
358
}
359

360
void
361
pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
362
    const char *fmt, va_list ap)
363
{
364
	size_t msglen;
365
	char *p;
366
	size_t errbuflen_remaining;
367
	DWORD retval;
368
	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
369
	size_t utf_8_len;
370

371
	vsnprintf(errbuf, errbuflen, fmt, ap);
372
	msglen = strlen(errbuf);
373

374
	/*
375
	 * Do we have enough space to append ": "?
376
	 * Including the terminating '\0', that's 3 bytes.
377
	 */
378
	if (msglen + 3 > errbuflen) {
379
		/* No - just give them what we've produced. */
380
		return;
381
	}
382
	p = errbuf + msglen;
383
	errbuflen_remaining = errbuflen - msglen;
384
	*p++ = ':';
385
	*p++ = ' ';
386
	*p = '\0';
387
	msglen += 2;
388
	errbuflen_remaining -= 2;
389

390
	/*
391
	 * Now append the string for the error code.
392
	 *
393
	 * XXX - what language ID to use?
394
	 *
395
	 * For UN*Xes, pcap_strerror() may or may not return localized
396
	 * strings.
397
	 *
398
	 * We currently don't have localized messages for libpcap, but
399
	 * we might want to do so.  On the other hand, if most of these
400
	 * messages are going to be read by libpcap developers and
401
	 * perhaps by developers of libpcap-based applications, English
402
	 * might be a better choice, so the developer doesn't have to
403
	 * get the message translated if it's in a language they don't
404
	 * happen to understand.
405
	 */
406
	retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
407
	    NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
408
	    utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
409
	if (retval == 0) {
410
		/*
411
		 * Failed.
412
		 */
413
		snprintf(p, errbuflen_remaining,
414
		    "Couldn't get error message for error (%lu)", errnum);
415
		return;
416
	}
417

418
	/*
419
	 * Now convert it from UTF-16LE to UTF-8.
420
	 */
421
	p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
422

423
	/*
424
	 * Now append the error number, if it fits.
425
	 */
426
	utf_8_len = p - errbuf;
427
	errbuflen_remaining -= utf_8_len;
428
	if (utf_8_len == 0) {
429
		/* The message was empty. */
430
		snprintf(p, errbuflen_remaining, "(%lu)", errnum);
431
	} else
432
		snprintf(p, errbuflen_remaining, " (%lu)", errnum);
433

434
	/*
435
	 * Now, if we're not in UTF-8 mode, convert errbuf to the
436
	 * local code page.
437
	 */
438
	if (!use_utf_8)
439
		utf_8_to_acp_truncated(errbuf);
440
}
441
#endif
442

443
Product

Resources

Company