Path: blob/master/waterbox/libc/internals/_PDCLIB_encoding.h
2 views
/* Encoding support <_PDCLIB_encoding.h>12This file is part of the Public Domain C Library (PDCLib).3Permission is granted to use, modify, and / or redistribute at will.4*/56#ifndef __PDCLIB_ENCODING_H7#define __PDCLIB_ENCODING_H __PDCLIB_ENCODING_H89#include <uchar.h>1011/* Must be cauued with bufsize >= 1, in != NULL, out != NULL, ps != NULL12*13* Converts a UTF-16 (char16_t) to a UCS4 (char32_t) value. Returns14* 1, 2 : Valid character (converted to UCS-4)15* -1 : Encoding error16* -2 : Partial character (only lead surrogate in buffer)17*/18static inline int _PDCLIB_c16rtoc32(19_PDCLIB_char32_t *_PDCLIB_restrict out,20const _PDCLIB_char16_t *_PDCLIB_restrict in,21_PDCLIB_size_t bufsize,22_PDCLIB_mbstate_t *_PDCLIB_restrict ps23)24{25if(ps->_Surrogate) {26// We already have a lead surrogate27if((*in & ~0x3FF) != 0xDC00) {28// Encoding error29return -1;30} else {31// Decode and reset state32*out = (ps->_Surrogate & 0x3FF) << 10 | (*in & 0x3FF);33ps->_Surrogate = 0;34return 1;35}36} if((*in & ~0x3FF) == 0xD800) {37// Lead surrogate38if(bufsize >= 2) {39// Buffer big enough40if((in[1] & ~0x3FF) != 0xDC00) {41// Encoding error42return -1;43} else {44*out = (in[0] & 0x3FF) << 10 | (in[1] & 0x3FF);45return 2;46}47} else {48// Buffer too small - update state49ps->_Surrogate = *in;50return -2;51}52} else {53// BMP character54*out = *in;55return 1;56}57}5859static inline _PDCLIB_size_t _PDCLIB_c32rtoc16(60_PDCLIB_wchar_t *_PDCLIB_restrict out,61const _PDCLIB_char32_t *_PDCLIB_restrict in,62_PDCLIB_size_t bufsize,63_PDCLIB_mbstate_t *_PDCLIB_restrict ps64)65{66if(ps->_Surrogate) {67*out = ps->_Surrogate;68ps->_Surrogate = 0;69return 0;70}7172if(*in <= 0xFFFF) {73// BMP character74*out = *in;75return 1;76} else {77// Supplementary plane character78*out = 0xD800 | (*in >> 10);79if(bufsize >= 2) {80out[1] = 0xDC00 | (*in & 0x3FF);81return 2;82} else {83ps->_Surrogate = 0xDC00 | (*in & 0x3FF);84return 1;85}86}87}8889struct _PDCLIB_charcodec_t {90/* Reads at most *_P_insz code units from *_P_inbuf and writes the result91* into *_P_outbuf, writing at most *_P_outsz code units. Updates92* *_P_outbuf, *_P_outsz, *_P_inbuf, *_P_outsz with the resulting state93*94* If _P_outbuf is NULL, then the input must be processed but no output95* generated. _P_outsz may be processed as normal.96*97* Returns true if the conversion completed successfully (i.e. one of98* _P_outsize or _P_insize reached zero and no coding errors were99* encountered), else return false.100*/101102/* mbsinit. Mandatory. */103_PDCLIB_bool (*__mbsinit)(const _PDCLIB_mbstate_t *_P_ps);104105/* UCS-4 variants. Mandatory. */106107_PDCLIB_bool (*__mbstoc32s)(108_PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,109_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,110const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,111_PDCLIB_size_t *_PDCLIB_restrict _P_insz,112_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps113);114115_PDCLIB_bool (*__c32stombs)(116char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,117_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,118const _PDCLIB_char32_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,119_PDCLIB_size_t *_PDCLIB_restrict _P_insz,120_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps121);122123/* UTF-16 variants; same as above except optional.124*125* If not provided, _PDCLib will internally synthesize on top of the UCS-4126* variants above, albeit at a performance cost.127*/128129_PDCLIB_bool (*__mbstoc16s)(130_PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,131_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,132const char *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,133_PDCLIB_size_t *_PDCLIB_restrict _P_insz,134_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps135);136137_PDCLIB_bool (*__c16stombs)(138char *_PDCLIB_restrict *_PDCLIB_restrict _P_outbuf,139_PDCLIB_size_t *_PDCLIB_restrict _P_outsz,140const _PDCLIB_char16_t *_PDCLIB_restrict *_PDCLIB_restrict _P_inbuf,141_PDCLIB_size_t *_PDCLIB_restrict _P_insz,142_PDCLIB_mbstate_t *_PDCLIB_restrict _P_ps143);144145size_t __mb_max;146};147148/* mbstate _PendState values */149enum {150/* Nothing pending; _PendChar ignored */151_PendClear = 0,152153/* Process the character stored in _PendChar before reading the buffer154* passed for the conversion155*/156_PendPrefix = 1,157};158159/* XXX Defining these here is temporary - will move to xlocale in future */160size_t mbrtoc16_l(161char16_t *_PDCLIB_restrict pc16,162const char *_PDCLIB_restrict s,163size_t n,164mbstate_t *_PDCLIB_restrict ps,165_PDCLIB_locale_t _PDCLIB_restrict l);166167size_t c16rtomb_l(168char *_PDCLIB_restrict s,169char16_t c16,170mbstate_t *_PDCLIB_restrict ps,171_PDCLIB_locale_t _PDCLIB_restrict l);172173size_t mbrtoc32_l(174char32_t *_PDCLIB_restrict pc32,175const char *_PDCLIB_restrict s,176size_t n,177mbstate_t *_PDCLIB_restrict ps,178_PDCLIB_locale_t _PDCLIB_restrict l);179180size_t c32rtomb_l(181char *_PDCLIB_restrict s,182char32_t c32,183mbstate_t *_PDCLIB_restrict ps,184_PDCLIB_locale_t _PDCLIB_restrict l);185186#define _PDCLIB_WCHAR_ENCODING_UTF16 16187#define _PDCLIB_WCHAR_ENCODING_UCS4 32188189#if !defined(_PDCLIB_WCHAR_ENCODING)190#define _PDCLIB_WCHAR_ENCODING 0191#endif192193#if _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UTF16194#define _PDCLIB_mbrtocwc_l mbrtoc16_l195#define _PDCLIB_mbrtocwc mbrtoc16196#define _PDCLIB_cwcrtomb_l c16rtomb_l197#define _PDCLIB_cwcrtomb c16rtomb198#elif _PDCLIB_WCHAR_ENCODING == _PDCLIB_WCHAR_ENCODING_UCS4199#define _PDCLIB_mbrtocwc_l mbrtoc32_l200#define _PDCLIB_mbrtocwc mbrtoc32201#define _PDCLIB_cwcrtomb_l c32rtomb_l202#define _PDCLIB_cwcrtomb c32rtomb203#else204#error _PDCLIB_WCHAR_ENCODING not defined correctly205#error Define to one of _PDCLIB_WCHAR_ENCODING_UCS4 or _PDCLIB_WCHAR_ENCODING_UTF16206#endif207208#endif209210211