CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Data/Encoding/Utf16.h
Views: 1401
1
#pragma once
2
3
#include <cstdint>
4
5
#include "Common/BitSet.h"
6
7
// Should optimize out.
8
#define UTF16_IS_LITTLE_ENDIAN (*(const uint16_t *)"\0\xff" >= 0x100)
9
10
template <bool is_little>
11
uint16_t UTF16_Swap(uint16_t u) {
12
if (is_little) {
13
return UTF16_IS_LITTLE_ENDIAN ? u : swap16(u);
14
} else {
15
return UTF16_IS_LITTLE_ENDIAN ? swap16(u) : u;
16
}
17
}
18
19
template <bool is_little>
20
struct UTF16_Type {
21
public:
22
static const char32_t INVALID = (char32_t)-1;
23
24
UTF16_Type(const char16_t *c) : c_(c), index_(0) {}
25
26
char32_t next() {
27
const char32_t u = UTF16_Swap<is_little>(c_[index_++]);
28
29
// Surrogate pair. UTF-16 is so simple. We assume it's valid.
30
if ((u & 0xF800) == 0xD800) {
31
return 0x10000 + (((u & 0x3FF) << 10) | (UTF16_Swap<is_little>(c_[index_++]) & 0x3FF));
32
}
33
return u;
34
}
35
36
bool end() const {
37
return c_[index_] == 0;
38
}
39
40
int length() const {
41
int len = 0;
42
for (UTF16_Type<is_little> dec(c_); !dec.end(); dec.next())
43
++len;
44
return len;
45
}
46
47
int shortIndex() const {
48
return index_;
49
}
50
51
static int encode(char16_t *dest, char32_t u) {
52
if (u >= 0x10000) {
53
u -= 0x10000;
54
*dest++ = UTF16_Swap<is_little>(0xD800 + ((u >> 10) & 0x3FF));
55
*dest = UTF16_Swap<is_little>(0xDC00 + ((u >> 0) & 0x3FF));
56
return 2;
57
} else {
58
*dest = UTF16_Swap<is_little>((char16_t)u);
59
return 1;
60
}
61
}
62
63
// Rejects non-UCS2 codepoints.
64
static int encodeUCS2(char16_t *dest, char32_t u) {
65
if (u >= 0x10000 || (u >= 0xD800 && u <= 0xDFFF)) {
66
return 0;
67
} else {
68
*dest = UTF16_Swap<is_little>((char16_t)u);
69
return 1;
70
}
71
}
72
73
static int encodeUnits(char32_t u) {
74
if (u >= 0x10000) {
75
return 2;
76
} else {
77
return 1;
78
}
79
}
80
81
static int encodeUnitsUCS2(char32_t u) {
82
if (u >= 0x10000 || (u >= 0xD800 && u <= 0xDFFF)) {
83
return 0;
84
} else {
85
return 1;
86
}
87
}
88
private:
89
const char16_t *c_;
90
int index_;
91
};
92
93
typedef UTF16_Type<true> UTF16LE;
94
typedef UTF16_Type<false> UTF16BE;
95
96