/*1* string.c : an XML string utilities module2*3* This module provides various utility functions for manipulating4* the xmlChar* type. All functions named xmlStr* have been moved here5* from the parser.c file (their original home).6*7* See Copyright for the status of this software.8*9* UTF8 string routines from:10* William Brack <[email protected]>11*12* [email protected]13*/1415#define IN_LIBXML16#include "libxml.h"1718#include <stdlib.h>19#include <string.h>20#include <limits.h>21#include <libxml/xmlmemory.h>22#include <libxml/parserInternals.h>23#include <libxml/xmlstring.h>2425#include "private/parser.h"26#include "private/string.h"2728/************************************************************************29* *30* Commodity functions to handle xmlChars *31* *32************************************************************************/3334/**35* xmlStrndup:36* @cur: the input xmlChar *37* @len: the len of @cur38*39* a strndup for array of xmlChar's40*41* Returns a new xmlChar * or NULL42*/43xmlChar *44xmlStrndup(const xmlChar *cur, int len) {45xmlChar *ret;4647if ((cur == NULL) || (len < 0)) return(NULL);48ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);49if (ret == NULL) {50return(NULL);51}52memcpy(ret, cur, len);53ret[len] = 0;54return(ret);55}5657/**58* xmlStrdup:59* @cur: the input xmlChar *60*61* a strdup for array of xmlChar's. Since they are supposed to be62* encoded in UTF-8 or an encoding with 8bit based chars, we assume63* a termination mark of '0'.64*65* Returns a new xmlChar * or NULL66*/67xmlChar *68xmlStrdup(const xmlChar *cur) {69const xmlChar *p = cur;7071if (cur == NULL) return(NULL);72while (*p != 0) p++; /* non input consuming */73return(xmlStrndup(cur, p - cur));74}7576/**77* xmlCharStrndup:78* @cur: the input char *79* @len: the len of @cur80*81* a strndup for char's to xmlChar's82*83* Returns a new xmlChar * or NULL84*/8586xmlChar *87xmlCharStrndup(const char *cur, int len) {88int i;89xmlChar *ret;9091if ((cur == NULL) || (len < 0)) return(NULL);92ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);93if (ret == NULL) {94return(NULL);95}96for (i = 0;i < len;i++) {97/* Explicit sign change */98ret[i] = (xmlChar) cur[i];99if (ret[i] == 0) return(ret);100}101ret[len] = 0;102return(ret);103}104105/**106* xmlCharStrdup:107* @cur: the input char *108*109* a strdup for char's to xmlChar's110*111* Returns a new xmlChar * or NULL112*/113114xmlChar *115xmlCharStrdup(const char *cur) {116const char *p = cur;117118if (cur == NULL) return(NULL);119while (*p != '\0') p++; /* non input consuming */120return(xmlCharStrndup(cur, p - cur));121}122123/**124* xmlStrcmp:125* @str1: the first xmlChar *126* @str2: the second xmlChar *127*128* a strcmp for xmlChar's129*130* Returns the integer result of the comparison131*/132133int134xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {135if (str1 == str2) return(0);136if (str1 == NULL) return(-1);137if (str2 == NULL) return(1);138#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION139return(strcmp((const char *)str1, (const char *)str2));140#else141do {142int tmp = *str1++ - *str2;143if (tmp != 0) return(tmp);144} while (*str2++ != 0);145return 0;146#endif147}148149/**150* xmlStrEqual:151* @str1: the first xmlChar *152* @str2: the second xmlChar *153*154* Check if both strings are equal of have same content.155* Should be a bit more readable and faster than xmlStrcmp()156*157* Returns 1 if they are equal, 0 if they are different158*/159160int161xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {162if (str1 == str2) return(1);163if (str1 == NULL) return(0);164if (str2 == NULL) return(0);165#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION166return(strcmp((const char *)str1, (const char *)str2) == 0);167#else168do {169if (*str1++ != *str2) return(0);170} while (*str2++);171return(1);172#endif173}174175/**176* xmlStrQEqual:177* @pref: the prefix of the QName178* @name: the localname of the QName179* @str: the second xmlChar *180*181* Check if a QName is Equal to a given string182*183* Returns 1 if they are equal, 0 if they are different184*/185186int187xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {188if (pref == NULL) return(xmlStrEqual(name, str));189if (name == NULL) return(0);190if (str == NULL) return(0);191192do {193if (*pref++ != *str) return(0);194} while ((*str++) && (*pref));195if (*str++ != ':') return(0);196do {197if (*name++ != *str) return(0);198} while (*str++);199return(1);200}201202/**203* xmlStrncmp:204* @str1: the first xmlChar *205* @str2: the second xmlChar *206* @len: the max comparison length207*208* a strncmp for xmlChar's209*210* Returns the integer result of the comparison211*/212213int214xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {215if (len <= 0) return(0);216if (str1 == str2) return(0);217if (str1 == NULL) return(-1);218if (str2 == NULL) return(1);219#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION220return(strncmp((const char *)str1, (const char *)str2, len));221#else222do {223int tmp = *str1++ - *str2;224if (tmp != 0 || --len == 0) return(tmp);225} while (*str2++ != 0);226return 0;227#endif228}229230static const xmlChar casemap[256] = {2310x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,2320x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,2330x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,2340x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,2350x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,2360x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,2370x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,2380x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,2390x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,2400x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,2410x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,2420x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,2430x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,2440x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,2450x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,2460x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,2470x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,2480x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,2490x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,2500x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,2510xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,2520xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,2530xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,2540xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,2550xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,2560xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,2570xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,2580xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,2590xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,2600xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,2610xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,2620xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF263};264265/**266* xmlStrcasecmp:267* @str1: the first xmlChar *268* @str2: the second xmlChar *269*270* a strcasecmp for xmlChar's271*272* Returns the integer result of the comparison273*/274275int276xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {277register int tmp;278279if (str1 == str2) return(0);280if (str1 == NULL) return(-1);281if (str2 == NULL) return(1);282do {283tmp = casemap[*str1++] - casemap[*str2];284if (tmp != 0) return(tmp);285} while (*str2++ != 0);286return 0;287}288289/**290* xmlStrncasecmp:291* @str1: the first xmlChar *292* @str2: the second xmlChar *293* @len: the max comparison length294*295* a strncasecmp for xmlChar's296*297* Returns the integer result of the comparison298*/299300int301xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {302register int tmp;303304if (len <= 0) return(0);305if (str1 == str2) return(0);306if (str1 == NULL) return(-1);307if (str2 == NULL) return(1);308do {309tmp = casemap[*str1++] - casemap[*str2];310if (tmp != 0 || --len == 0) return(tmp);311} while (*str2++ != 0);312return 0;313}314315/**316* xmlStrchr:317* @str: the xmlChar * array318* @val: the xmlChar to search319*320* a strchr for xmlChar's321*322* Returns the xmlChar * for the first occurrence or NULL.323*/324325const xmlChar *326xmlStrchr(const xmlChar *str, xmlChar val) {327if (str == NULL) return(NULL);328while (*str != 0) { /* non input consuming */329if (*str == val) return((xmlChar *) str);330str++;331}332return(NULL);333}334335/**336* xmlStrstr:337* @str: the xmlChar * array (haystack)338* @val: the xmlChar to search (needle)339*340* a strstr for xmlChar's341*342* Returns the xmlChar * for the first occurrence or NULL.343*/344345const xmlChar *346xmlStrstr(const xmlChar *str, const xmlChar *val) {347int n;348349if (str == NULL) return(NULL);350if (val == NULL) return(NULL);351n = xmlStrlen(val);352353if (n == 0) return(str);354while (*str != 0) { /* non input consuming */355if (*str == *val) {356if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);357}358str++;359}360return(NULL);361}362363/**364* xmlStrcasestr:365* @str: the xmlChar * array (haystack)366* @val: the xmlChar to search (needle)367*368* a case-ignoring strstr for xmlChar's369*370* Returns the xmlChar * for the first occurrence or NULL.371*/372373const xmlChar *374xmlStrcasestr(const xmlChar *str, const xmlChar *val) {375int n;376377if (str == NULL) return(NULL);378if (val == NULL) return(NULL);379n = xmlStrlen(val);380381if (n == 0) return(str);382while (*str != 0) { /* non input consuming */383if (casemap[*str] == casemap[*val])384if (!xmlStrncasecmp(str, val, n)) return(str);385str++;386}387return(NULL);388}389390/**391* xmlStrsub:392* @str: the xmlChar * array (haystack)393* @start: the index of the first char (zero based)394* @len: the length of the substring395*396* Extract a substring of a given string397*398* Returns the xmlChar * for the first occurrence or NULL.399*/400401xmlChar *402xmlStrsub(const xmlChar *str, int start, int len) {403int i;404405if (str == NULL) return(NULL);406if (start < 0) return(NULL);407if (len < 0) return(NULL);408409for (i = 0;i < start;i++) {410if (*str == 0) return(NULL);411str++;412}413if (*str == 0) return(NULL);414return(xmlStrndup(str, len));415}416417/**418* xmlStrlen:419* @str: the xmlChar * array420*421* length of a xmlChar's string422*423* Returns the number of xmlChar contained in the ARRAY.424*/425426int427xmlStrlen(const xmlChar *str) {428size_t len = str ? strlen((const char *)str) : 0;429return(len > INT_MAX ? 0 : len);430}431432/**433* xmlStrncat:434* @cur: the original xmlChar * array435* @add: the xmlChar * array added436* @len: the length of @add437*438* a strncat for array of xmlChar's, it will extend @cur with the len439* first bytes of @add. Note that if @len < 0 then this is an API error440* and NULL will be returned.441*442* Returns a new xmlChar *, the original @cur is reallocated and should443* not be freed.444*/445446xmlChar *447xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {448int size;449xmlChar *ret;450451if ((add == NULL) || (len == 0))452return(cur);453if (len < 0)454return(NULL);455if (cur == NULL)456return(xmlStrndup(add, len));457458size = xmlStrlen(cur);459if ((size < 0) || (size > INT_MAX - len))460return(NULL);461ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);462if (ret == NULL) {463return(cur);464}465memcpy(&ret[size], add, len);466ret[size + len] = 0;467return(ret);468}469470/**471* xmlStrncatNew:472* @str1: first xmlChar string473* @str2: second xmlChar string474* @len: the len of @str2 or < 0475*476* same as xmlStrncat, but creates a new string. The original477* two strings are not freed. If @len is < 0 then the length478* will be calculated automatically.479*480* Returns a new xmlChar * or NULL481*/482xmlChar *483xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {484int size;485xmlChar *ret;486487if (len < 0) {488len = xmlStrlen(str2);489if (len < 0)490return(NULL);491}492if ((str2 == NULL) || (len == 0))493return(xmlStrdup(str1));494if (str1 == NULL)495return(xmlStrndup(str2, len));496497size = xmlStrlen(str1);498if ((size < 0) || (size > INT_MAX - len))499return(NULL);500ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);501if (ret == NULL) {502return(xmlStrndup(str1, size));503}504memcpy(ret, str1, size);505memcpy(&ret[size], str2, len);506ret[size + len] = 0;507return(ret);508}509510/**511* xmlStrcat:512* @cur: the original xmlChar * array513* @add: the xmlChar * array added514*515* a strcat for array of xmlChar's. Since they are supposed to be516* encoded in UTF-8 or an encoding with 8bit based chars, we assume517* a termination mark of '0'.518*519* Returns a new xmlChar * containing the concatenated string. The original520* @cur is reallocated and should not be freed.521*/522xmlChar *523xmlStrcat(xmlChar *cur, const xmlChar *add) {524const xmlChar *p = add;525526if (add == NULL) return(cur);527if (cur == NULL)528return(xmlStrdup(add));529530while (*p != 0) p++; /* non input consuming */531return(xmlStrncat(cur, add, p - add));532}533534/**535* xmlStrPrintf:536* @buf: the result buffer.537* @len: the result buffer length.538* @msg: the message with printf formatting.539* @...: extra parameters for the message.540*541* Formats @msg and places result into @buf.542*543* Returns the number of characters written to @buf or -1 if an error occurs.544*/545int546xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {547va_list args;548int ret;549550if((buf == NULL) || (msg == NULL)) {551return(-1);552}553554va_start(args, msg);555ret = vsnprintf((char *) buf, len, (const char *) msg, args);556va_end(args);557buf[len - 1] = 0; /* be safe ! */558559return(ret);560}561562/**563* xmlStrVPrintf:564* @buf: the result buffer.565* @len: the result buffer length.566* @msg: the message with printf formatting.567* @ap: extra parameters for the message.568*569* Formats @msg and places result into @buf.570*571* Returns the number of characters written to @buf or -1 if an error occurs.572*/573int574xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {575int ret;576577if((buf == NULL) || (msg == NULL)) {578return(-1);579}580581ret = vsnprintf((char *) buf, len, (const char *) msg, ap);582buf[len - 1] = 0; /* be safe ! */583584return(ret);585}586587/************************************************************************588* *589* Generic UTF8 handling routines *590* *591* From rfc2044: encoding of the Unicode values on UTF-8: *592* *593* UCS-4 range (hex.) UTF-8 octet sequence (binary) *594* 0000 0000-0000 007F 0xxxxxxx *595* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *596* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *597* *598* I hope we won't use values > 0xFFFF anytime soon ! *599* *600************************************************************************/601602603/**604* xmlUTF8Size:605* @utf: pointer to the UTF8 character606*607* calculates the internal size of a UTF8 character608*609* returns the numbers of bytes in the character, -1 on format error610*/611int612xmlUTF8Size(const xmlChar *utf) {613xmlChar mask;614int len;615616if (utf == NULL)617return -1;618if (*utf < 0x80)619return 1;620/* check valid UTF8 character */621if (!(*utf & 0x40))622return -1;623/* determine number of bytes in char */624len = 2;625for (mask=0x20; mask != 0; mask>>=1) {626if (!(*utf & mask))627return len;628len++;629}630return -1;631}632633/**634* xmlUTF8Charcmp:635* @utf1: pointer to first UTF8 char636* @utf2: pointer to second UTF8 char637*638* compares the two UCS4 values639*640* returns result of the compare as with xmlStrncmp641*/642int643xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {644645if (utf1 == NULL ) {646if (utf2 == NULL)647return 0;648return -1;649}650return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));651}652653/**654* xmlUTF8Strlen:655* @utf: a sequence of UTF-8 encoded bytes656*657* compute the length of an UTF8 string, it doesn't do a full UTF8658* checking of the content of the string.659*660* Returns the number of characters in the string or -1 in case of error661*/662int663xmlUTF8Strlen(const xmlChar *utf) {664size_t ret = 0;665666if (utf == NULL)667return(-1);668669while (*utf != 0) {670if (utf[0] & 0x80) {671if ((utf[1] & 0xc0) != 0x80)672return(-1);673if ((utf[0] & 0xe0) == 0xe0) {674if ((utf[2] & 0xc0) != 0x80)675return(-1);676if ((utf[0] & 0xf0) == 0xf0) {677if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)678return(-1);679utf += 4;680} else {681utf += 3;682}683} else {684utf += 2;685}686} else {687utf++;688}689ret++;690}691return(ret > INT_MAX ? 0 : ret);692}693694/**695* xmlGetUTF8Char:696* @utf: a sequence of UTF-8 encoded bytes697* @len: a pointer to the minimum number of bytes present in698* the sequence. This is used to assure the next character699* is completely contained within the sequence.700*701* Read the first UTF8 character from @utf702*703* Returns the char value or -1 in case of error, and sets *len to704* the actual number of bytes consumed (0 in case of error)705*/706int707xmlGetUTF8Char(const unsigned char *utf, int *len) {708unsigned int c;709710if (utf == NULL)711goto error;712if (len == NULL)713goto error;714715c = utf[0];716if (c < 0x80) {717if (*len < 1)718goto error;719/* 1-byte code */720*len = 1;721} else {722if ((*len < 2) || ((utf[1] & 0xc0) != 0x80))723goto error;724if (c < 0xe0) {725if (c < 0xc2)726goto error;727/* 2-byte code */728*len = 2;729c = (c & 0x1f) << 6;730c |= utf[1] & 0x3f;731} else {732if ((*len < 3) || ((utf[2] & 0xc0) != 0x80))733goto error;734if (c < 0xf0) {735/* 3-byte code */736*len = 3;737c = (c & 0xf) << 12;738c |= (utf[1] & 0x3f) << 6;739c |= utf[2] & 0x3f;740if ((c < 0x800) || ((c >= 0xd800) && (c < 0xe000)))741goto error;742} else {743if ((*len < 4) || ((utf[3] & 0xc0) != 0x80))744goto error;745*len = 4;746/* 4-byte code */747c = (c & 0x7) << 18;748c |= (utf[1] & 0x3f) << 12;749c |= (utf[2] & 0x3f) << 6;750c |= utf[3] & 0x3f;751if ((c < 0x10000) || (c >= 0x110000))752goto error;753}754}755}756return(c);757758error:759if (len != NULL)760*len = 0;761return(-1);762}763764/**765* xmlCheckUTF8:766* @utf: Pointer to putative UTF-8 encoded string.767*768* Checks @utf for being valid UTF-8. @utf is assumed to be769* null-terminated. This function is not super-strict, as it will770* allow longer UTF-8 sequences than necessary. Note that Java is771* capable of producing these sequences if provoked. Also note, this772* routine checks for the 4-byte maximum size, but does not check for773* 0x10ffff maximum value.774*775* Return value: true if @utf is valid.776**/777int778xmlCheckUTF8(const unsigned char *utf)779{780int ix;781unsigned char c;782783if (utf == NULL)784return(0);785/*786* utf is a string of 1, 2, 3 or 4 bytes. The valid strings787* are as follows (in "bit format"):788* 0xxxxxxx valid 1-byte789* 110xxxxx 10xxxxxx valid 2-byte790* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte791* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte792*/793while ((c = utf[0])) { /* string is 0-terminated */794ix = 0;795if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */796ix = 1;797} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */798if ((utf[1] & 0xc0 ) != 0x80)799return 0;800ix = 2;801} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */802if (((utf[1] & 0xc0) != 0x80) ||803((utf[2] & 0xc0) != 0x80))804return 0;805ix = 3;806} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */807if (((utf[1] & 0xc0) != 0x80) ||808((utf[2] & 0xc0) != 0x80) ||809((utf[3] & 0xc0) != 0x80))810return 0;811ix = 4;812} else /* unknown encoding */813return 0;814utf += ix;815}816return(1);817}818819/**820* xmlUTF8Strsize:821* @utf: a sequence of UTF-8 encoded bytes822* @len: the number of characters in the array823*824* storage size of an UTF8 string825* the behaviour is not guaranteed if the input string is not UTF-8826*827* Returns the storage size of828* the first 'len' characters of ARRAY829*/830831int832xmlUTF8Strsize(const xmlChar *utf, int len) {833const xmlChar *ptr=utf;834int ch;835size_t ret;836837if (utf == NULL)838return(0);839840if (len <= 0)841return(0);842843while ( len-- > 0) {844if ( !*ptr )845break;846if ( (ch = *ptr++) & 0x80)847while ((ch<<=1) & 0x80 ) {848if (*ptr == 0) break;849ptr++;850}851}852ret = ptr - utf;853return (ret > INT_MAX ? 0 : ret);854}855856857/**858* xmlUTF8Strndup:859* @utf: the input UTF8 *860* @len: the len of @utf (in chars)861*862* a strndup for array of UTF8's863*864* Returns a new UTF8 * or NULL865*/866xmlChar *867xmlUTF8Strndup(const xmlChar *utf, int len) {868xmlChar *ret;869int i;870871if ((utf == NULL) || (len < 0)) return(NULL);872i = xmlUTF8Strsize(utf, len);873ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);874if (ret == NULL) {875return(NULL);876}877memcpy(ret, utf, i);878ret[i] = 0;879return(ret);880}881882/**883* xmlUTF8Strpos:884* @utf: the input UTF8 *885* @pos: the position of the desired UTF8 char (in chars)886*887* a function to provide the equivalent of fetching a888* character from a string array889*890* Returns a pointer to the UTF8 character or NULL891*/892const xmlChar *893xmlUTF8Strpos(const xmlChar *utf, int pos) {894int ch;895896if (utf == NULL) return(NULL);897if (pos < 0)898return(NULL);899while (pos--) {900if ((ch=*utf++) == 0) return(NULL);901if ( ch & 0x80 ) {902/* if not simple ascii, verify proper format */903if ( (ch & 0xc0) != 0xc0 )904return(NULL);905/* then skip over remaining bytes for this char */906while ( (ch <<= 1) & 0x80 )907if ( (*utf++ & 0xc0) != 0x80 )908return(NULL);909}910}911return((xmlChar *)utf);912}913914/**915* xmlUTF8Strloc:916* @utf: the input UTF8 *917* @utfchar: the UTF8 character to be found918*919* a function to provide the relative location of a UTF8 char920*921* Returns the relative character position of the desired char922* or -1 if not found923*/924int925xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {926size_t i;927int size;928int ch;929930if (utf==NULL || utfchar==NULL) return -1;931size = xmlUTF8Strsize(utfchar, 1);932for(i=0; (ch=*utf) != 0; i++) {933if (xmlStrncmp(utf, utfchar, size)==0)934return(i > INT_MAX ? 0 : i);935utf++;936if ( ch & 0x80 ) {937/* if not simple ascii, verify proper format */938if ( (ch & 0xc0) != 0xc0 )939return(-1);940/* then skip over remaining bytes for this char */941while ( (ch <<= 1) & 0x80 )942if ( (*utf++ & 0xc0) != 0x80 )943return(-1);944}945}946947return(-1);948}949/**950* xmlUTF8Strsub:951* @utf: a sequence of UTF-8 encoded bytes952* @start: relative pos of first char953* @len: total number to copy954*955* Create a substring from a given UTF-8 string956* Note: positions are given in units of UTF-8 chars957*958* Returns a pointer to a newly created string959* or NULL if any problem960*/961962xmlChar *963xmlUTF8Strsub(const xmlChar *utf, int start, int len) {964int i;965int ch;966967if (utf == NULL) return(NULL);968if (start < 0) return(NULL);969if (len < 0) return(NULL);970971/*972* Skip over any leading chars973*/974for (i = 0;i < start;i++) {975if ((ch=*utf++) == 0) return(NULL);976if ( ch & 0x80 ) {977/* if not simple ascii, verify proper format */978if ( (ch & 0xc0) != 0xc0 )979return(NULL);980/* then skip over remaining bytes for this char */981while ( (ch <<= 1) & 0x80 )982if ( (*utf++ & 0xc0) != 0x80 )983return(NULL);984}985}986987return(xmlUTF8Strndup(utf, len));988}989990/**991* xmlEscapeFormatString:992* @msg: a pointer to the string in which to escape '%' characters.993* Must be a heap-allocated buffer created by libxml2 that may be994* returned, or that may be freed and replaced.995*996* Replaces the string pointed to by 'msg' with an escaped string.997* Returns the same string with all '%' characters escaped.998*/999xmlChar *1000xmlEscapeFormatString(xmlChar **msg)1001{1002xmlChar *msgPtr = NULL;1003xmlChar *result = NULL;1004xmlChar *resultPtr = NULL;1005size_t count = 0;1006size_t msgLen = 0;1007size_t resultLen = 0;10081009if (!msg || !*msg)1010return(NULL);10111012for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {1013++msgLen;1014if (*msgPtr == '%')1015++count;1016}10171018if (count == 0)1019return(*msg);10201021if ((count > INT_MAX) || (msgLen > INT_MAX - count))1022return(NULL);1023resultLen = msgLen + count + 1;1024result = (xmlChar *) xmlMallocAtomic(resultLen);1025if (result == NULL) {1026/* Clear *msg to prevent format string vulnerabilities in1027out-of-memory situations. */1028xmlFree(*msg);1029*msg = NULL;1030return(NULL);1031}10321033for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {1034*resultPtr = *msgPtr;1035if (*msgPtr == '%')1036*(++resultPtr) = '%';1037}1038result[resultLen - 1] = '\0';10391040xmlFree(*msg);1041*msg = result;10421043return *msg;1044}104510461047