/*1* Copyright (c) 1993, 1994, 1995, 1996, 1997, 19982* The Regents of the University of California. All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12* 3. All advertising materials mentioning features or use of this software13* must display the following acknowledgement:14* This product includes software developed by the Computer Systems15* Engineering Group at Lawrence Berkeley Laboratory.16* 4. Neither the name of the University nor of the Laboratory may be used17* to endorse or promote products derived from this software without18* specific prior written permission.19*20* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND21* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE22* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE23* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE24* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL25* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS26* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)27* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT28* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY29* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF30* SUCH DAMAGE.31*/3233/*34* Utilities for message formatting used both by libpcap and rpcapd.35*/3637#include <config.h>3839#include "ftmacros.h"4041#include <stddef.h>42#include <stdarg.h>43#include <stdio.h>44#include <string.h>45#include <errno.h>4647#include "pcap-int.h"4849#include "portability.h"5051#include "fmtutils.h"5253#ifdef _WIN3254#include "charconv.h"55#endif5657/*58* Set the encoding.59*/60#ifdef _WIN3261/*62* True if we should use UTF-8.63*/64static int use_utf_8;6566void67pcapint_fmt_set_encoding(unsigned int opts)68{69if (opts == PCAP_CHAR_ENC_UTF_8)70use_utf_8 = 1;71}72#else73void74pcapint_fmt_set_encoding(unsigned int opts _U_)75{76/*77* Nothing to do here.78*/79}80#endif8182#ifdef _WIN3283/*84* Convert a null-terminated UTF-16LE string to UTF-8, putting it into85* a buffer starting at the specified location and stopping if we go86* past the specified size. This will only put out complete UTF-887* sequences.88*89* We do this ourselves because Microsoft doesn't offer a "convert and90* stop at a UTF-8 character boundary if we run out of space" routine.91*/92#define IS_LEADING_SURROGATE(c) \93((c) >= 0xd800 && (c) < 0xdc00)94#define IS_TRAILING_SURROGATE(c) \95((c) >= 0xdc00 && (c) < 0xe000)96#define SURROGATE_VALUE(leading, trailing) \97(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)98#define REPLACEMENT_CHARACTER 0x0FFFD99100static char *101utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,102size_t utf_8_len)103{104wchar_t c, c2;105uint32_t uc;106107if (utf_8_len == 0) {108/*109* Not even enough room for a trailing '\0'.110* Don't put anything into the buffer.111*/112return (utf_8);113}114115while ((c = *utf_16++) != '\0') {116if (IS_LEADING_SURROGATE(c)) {117/*118* Leading surrogate. Must be followed by119* a trailing surrogate.120*/121c2 = *utf_16;122if (c2 == '\0') {123/*124* Oops, string ends with a lead125* surrogate. Try to drop in126* a REPLACEMENT CHARACTER, and127* don't move the string pointer,128* so on the next trip through129* the loop we grab the terminating130* '\0' and quit.131*/132uc = REPLACEMENT_CHARACTER;133} else {134/*135* OK, we can consume this 2-octet136* value.137*/138utf_16++;139if (IS_TRAILING_SURROGATE(c2)) {140/*141* Trailing surrogate.142* This calculation will,143* for c being a leading144* surrogate and c2 being145* a trailing surrogate,146* produce a value between147* 0x100000 and 0x10ffff,148* so it's always going to be149* a valid Unicode code point.150*/151uc = SURROGATE_VALUE(c, c2);152} else {153/*154* Not a trailing surrogate;155* try to drop in a156* REPLACEMENT CHARACTER.157*/158uc = REPLACEMENT_CHARACTER;159}160}161} else {162/*163* Not a leading surrogate.164*/165if (IS_TRAILING_SURROGATE(c)) {166/*167* Trailing surrogate without168* a preceding leading surrogate.169* Try to drop in a REPLACEMENT170* CHARACTER.171*/172uc = REPLACEMENT_CHARACTER;173} else {174/*175* This is a valid BMP character;176* drop it in.177*/178uc = c;179}180}181182/*183* OK, uc is a valid Unicode character; how184* many bytes worth of UTF-8 does it require?185*/186if (uc < 0x0080) {187/* 1 byte. */188if (utf_8_len < 2) {189/*190* Not enough room for that byte191* plus a trailing '\0'.192*/193break;194}195*utf_8++ = (char)uc;196utf_8_len--;197} else if (uc < 0x0800) {198/* 2 bytes. */199if (utf_8_len < 3) {200/*201* Not enough room for those bytes202* plus a trailing '\0'.203*/204break;205}206*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;207*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;208utf_8_len -= 2;209} else if (uc < 0x010000) {210/* 3 bytes. */211if (utf_8_len < 4) {212/*213* Not enough room for those bytes214* plus a trailing '\0'.215*/216break;217}218*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;219*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;220*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;221utf_8_len -= 3;222} else {223/* 4 bytes. */224if (utf_8_len < 5) {225/*226* Not enough room for those bytes227* plus a trailing '\0'.228*/229break;230}231*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;232*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;233*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;234*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;235utf_8_len -= 3;236}237}238239/*240* OK, we have enough room for (at least) a trailing '\0'.241* (We started out with enough room, thanks to the test242* for a zero-length buffer at the beginning, and if243* there wasn't enough room for any character we wanted244* to put into the buffer *plus* a trailing '\0',245* we'd have quit before putting it into the buffer,246* and thus would have left enough room for the trailing247* '\0'.)248*249* Drop it in.250*/251*utf_8 = '\0';252253/*254* Return a pointer to the terminating '\0', in case we255* want to drop something in after that.256*/257return (utf_8);258}259#endif /* _WIN32 */260261/*262* Generate an error message based on a format, arguments, and an263* errno, with a message for the errno after the formatted output.264*/265void266pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,267const char *fmt, ...)268{269va_list ap;270271va_start(ap, fmt);272pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);273va_end(ap);274}275276void277pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,278const char *fmt, va_list ap)279{280size_t msglen;281char *p;282size_t errbuflen_remaining;283284(void)vsnprintf(errbuf, errbuflen, fmt, ap);285msglen = strlen(errbuf);286287/*288* Do we have enough space to append ": "?289* Including the terminating '\0', that's 3 bytes.290*/291if (msglen + 3 > errbuflen) {292/* No - just give them what we've produced. */293return;294}295p = errbuf + msglen;296errbuflen_remaining = errbuflen - msglen;297*p++ = ':';298*p++ = ' ';299*p = '\0';300errbuflen_remaining -= 2;301302/*303* Now append the string for the error code.304*/305#if defined(HAVE__WCSERROR_S)306/*307* We have a Windows-style _wcserror_s().308* Generate a UTF-16LE error message.309*/310wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];311errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);312if (err != 0) {313/*314* It doesn't appear to be documented anywhere obvious315* what the error returns from _wcserror_s().316*/317snprintf(p, errbuflen_remaining, "Error %d", errnum);318return;319}320321/*322* Now convert it from UTF-16LE to UTF-8, dropping it in the323* remaining space in the buffer, and truncating it - cleanly,324* on a UTF-8 character boundary - if it doesn't fit.325*/326utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);327328/*329* Now, if we're not in UTF-8 mode, convert errbuf to the330* local code page.331*/332if (!use_utf_8)333utf_8_to_acp_truncated(errbuf);334#else335/*336* Either Windows without _wcserror_s() or not Windows. Let pcap_strerror()337* solve the non-UTF-16 part of this problem space.338*/339snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));340#endif341}342343#ifdef _WIN32344/*345* Generate an error message based on a format, arguments, and a346* Win32 error, with a message for the Win32 error after the formatted output.347*/348void349pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,350const char *fmt, ...)351{352va_list ap;353354va_start(ap, fmt);355pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);356va_end(ap);357}358359void360pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,361const char *fmt, va_list ap)362{363size_t msglen;364char *p;365size_t errbuflen_remaining;366DWORD retval;367wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];368size_t utf_8_len;369370vsnprintf(errbuf, errbuflen, fmt, ap);371msglen = strlen(errbuf);372373/*374* Do we have enough space to append ": "?375* Including the terminating '\0', that's 3 bytes.376*/377if (msglen + 3 > errbuflen) {378/* No - just give them what we've produced. */379return;380}381p = errbuf + msglen;382errbuflen_remaining = errbuflen - msglen;383*p++ = ':';384*p++ = ' ';385*p = '\0';386msglen += 2;387errbuflen_remaining -= 2;388389/*390* Now append the string for the error code.391*392* XXX - what language ID to use?393*394* For UN*Xes, pcap_strerror() may or may not return localized395* strings.396*397* We currently don't have localized messages for libpcap, but398* we might want to do so. On the other hand, if most of these399* messages are going to be read by libpcap developers and400* perhaps by developers of libpcap-based applications, English401* might be a better choice, so the developer doesn't have to402* get the message translated if it's in a language they don't403* happen to understand.404*/405retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,406NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),407utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);408if (retval == 0) {409/*410* Failed.411*/412snprintf(p, errbuflen_remaining,413"Couldn't get error message for error (%lu)", errnum);414return;415}416417/*418* Now convert it from UTF-16LE to UTF-8.419*/420p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);421422/*423* Now append the error number, if it fits.424*/425utf_8_len = p - errbuf;426errbuflen_remaining -= utf_8_len;427if (utf_8_len == 0) {428/* The message was empty. */429snprintf(p, errbuflen_remaining, "(%lu)", errnum);430} else431snprintf(p, errbuflen_remaining, " (%lu)", errnum);432433/*434* Now, if we're not in UTF-8 mode, convert errbuf to the435* local code page.436*/437if (!use_utf_8)438utf_8_to_acp_truncated(errbuf);439}440#endif441442443