Path: blob/master/thirdparty/icu4c/common/cstring.cpp
9902 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3******************************************************************************4*5* Copyright (C) 1997-2011, International Business Machines6* Corporation and others. All Rights Reserved.7*8******************************************************************************9*10* File CSTRING.C11*12* @author Helena Shih13*14* Modification History:15*16* Date Name Description17* 6/18/98 hshih Created18* 09/08/98 stephen Added include for ctype, for Mac Port19* 11/15/99 helena Integrated S/390 IEEE changes.20******************************************************************************21*/22232425#include <stdlib.h>26#include <stdio.h>27#include "unicode/utypes.h"28#include "cmemory.h"29#include "cstring.h"30#include "uassert.h"3132/*33* We hardcode case conversion for invariant characters to match our expectation34* and the compiler execution charset.35* This prevents problems on systems36* - with non-default casing behavior, like Turkish system locales where37* tolower('I') maps to dotless i and toupper('i') maps to dotted I38* - where there are no lowercase Latin characters at all, or using different39* codes (some old EBCDIC codepages)40*41* This works because the compiler usually runs on a platform where the execution42* charset includes all of the invariant characters at their expected43* code positions, so that the char * string literals in ICU code match44* the char literals here.45*46* Note that the set of lowercase Latin letters is discontiguous in EBCDIC47* and the set of uppercase Latin letters is discontiguous as well.48*/4950U_CAPI UBool U_EXPORT251uprv_isASCIILetter(char c) {52#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY53return54('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||55('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');56#else57return ('a'<=c && c<='z') || ('A'<=c && c<='Z');58#endif59}6061U_CAPI char U_EXPORT262uprv_toupper(char c) {63#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY64if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {65c=(char)(c+('A'-'a'));66}67#else68if('a'<=c && c<='z') {69c=(char)(c+('A'-'a'));70}71#endif72return c;73}747576#if 077/*78* Commented out because cstring.h defines uprv_tolower() to be79* the same as either uprv_asciitolower() or uprv_ebcdictolower()80* to reduce the amount of code to cover with tests.81*82* Note that this uprv_tolower() definition is likely to work for most83* charset families, not just ASCII and EBCDIC, because its #else branch84* is written generically.85*/86U_CAPI char U_EXPORT287uprv_tolower(char c) {88#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY89if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {90c=(char)(c+('a'-'A'));91}92#else93if('A'<=c && c<='Z') {94c=(char)(c+('a'-'A'));95}96#endif97return c;98}99#endif100101U_CAPI char U_EXPORT2102uprv_asciitolower(char c) {103if(0x41<=c && c<=0x5a) {104c=(char)(c+0x20);105}106return c;107}108109U_CAPI char U_EXPORT2110uprv_ebcdictolower(char c) {111if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||112(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||113(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)114) {115c=(char)(c-0x40);116}117return c;118}119120121U_CAPI char* U_EXPORT2122T_CString_toLowerCase(char* str)123{124char* origPtr = str;125126if (str) {127do128*str = uprv_tolower(*str);129while (*(str++));130}131132return origPtr;133}134135U_CAPI char* U_EXPORT2136T_CString_toUpperCase(char* str)137{138char* origPtr = str;139140if (str) {141do142*str = uprv_toupper(*str);143while (*(str++));144}145146return origPtr;147}148149/*150* Takes a int32_t and fills in a char* string with that number "radix"-based.151* Does not handle negative values (makes an empty string for them).152* Writes at most 12 chars ("-2147483647" plus NUL).153* Returns the length of the string (not including the NUL).154*/155U_CAPI int32_t U_EXPORT2156T_CString_integerToString(char* buffer, int32_t v, int32_t radix)157{158char tbuf[30];159int32_t tbx = sizeof(tbuf);160uint8_t digit;161int32_t length = 0;162uint32_t uval;163164U_ASSERT(radix>=2 && radix<=16);165uval = (uint32_t) v;166if(v<0 && radix == 10) {167/* Only in base 10 do we conside numbers to be signed. */168uval = (uint32_t)(-v);169buffer[length++] = '-';170}171172tbx = sizeof(tbuf)-1;173tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */174do {175digit = (uint8_t)(uval % radix);176tbuf[--tbx] = (char)(T_CString_itosOffset(digit));177uval = uval / radix;178} while (uval != 0);179180/* copy converted number into user buffer */181uprv_strcpy(buffer+length, tbuf+tbx);182length += sizeof(tbuf) - tbx -1;183return length;184}185186187188/*189* Takes a int64_t and fills in a char* string with that number "radix"-based.190* Writes at most 21: chars ("-9223372036854775807" plus NUL).191* Returns the length of the string, not including the terminating NUL.192*/193U_CAPI int32_t U_EXPORT2194T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)195{196char tbuf[30];197int32_t tbx = sizeof(tbuf);198uint8_t digit;199int32_t length = 0;200uint64_t uval;201202U_ASSERT(radix>=2 && radix<=16);203uval = (uint64_t) v;204if(v<0 && radix == 10) {205/* Only in base 10 do we conside numbers to be signed. */206uval = (uint64_t)(-v);207buffer[length++] = '-';208}209210tbx = sizeof(tbuf)-1;211tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */212do {213digit = (uint8_t)(uval % radix);214tbuf[--tbx] = (char)(T_CString_itosOffset(digit));215uval = uval / radix;216} while (uval != 0);217218/* copy converted number into user buffer */219uprv_strcpy(buffer+length, tbuf+tbx);220length += sizeof(tbuf) - tbx -1;221return length;222}223224225U_CAPI int32_t U_EXPORT2226T_CString_stringToInteger(const char *integerString, int32_t radix)227{228char *end;229return uprv_strtoul(integerString, &end, radix);230231}232233U_CAPI int U_EXPORT2234uprv_stricmp(const char *str1, const char *str2) {235if(str1==nullptr) {236if(str2==nullptr) {237return 0;238} else {239return -1;240}241} else if(str2==nullptr) {242return 1;243} else {244/* compare non-nullptr strings lexically with lowercase */245int rc;246unsigned char c1, c2;247248for(;;) {249c1=(unsigned char)*str1;250c2=(unsigned char)*str2;251if(c1==0) {252if(c2==0) {253return 0;254} else {255return -1;256}257} else if(c2==0) {258return 1;259} else {260/* compare non-zero characters with lowercase */261rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);262if(rc!=0) {263return rc;264}265}266++str1;267++str2;268}269}270}271272U_CAPI int U_EXPORT2273uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {274if(str1==nullptr) {275if(str2==nullptr) {276return 0;277} else {278return -1;279}280} else if(str2==nullptr) {281return 1;282} else {283/* compare non-nullptr strings lexically with lowercase */284int rc;285unsigned char c1, c2;286287for(; n--;) {288c1=(unsigned char)*str1;289c2=(unsigned char)*str2;290if(c1==0) {291if(c2==0) {292return 0;293} else {294return -1;295}296} else if(c2==0) {297return 1;298} else {299/* compare non-zero characters with lowercase */300rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);301if(rc!=0) {302return rc;303}304}305++str1;306++str2;307}308}309310return 0;311}312313U_CAPI char* U_EXPORT2314uprv_strdup(const char *src) {315size_t len = uprv_strlen(src) + 1;316char *dup = (char *) uprv_malloc(len);317318if (dup) {319uprv_memcpy(dup, src, len);320}321322return dup;323}324325U_CAPI char* U_EXPORT2326uprv_strndup(const char *src, int32_t n) {327char *dup;328329if(n < 0) {330dup = uprv_strdup(src);331} else {332dup = (char*)uprv_malloc(n+1);333if (dup) {334uprv_memcpy(dup, src, n);335dup[n] = 0;336}337}338339return dup;340}341342343