CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Data/Encoding/Shiftjis.h
Views: 1401
1
#pragma once
2
3
#include <cstdint>
4
5
// Warning: decodes/encodes JIS, not Unicode.
6
// Use a table to map.
7
struct ShiftJIS {
8
static const uint32_t INVALID = (uint32_t) -1;
9
10
ShiftJIS(const char *c) : c_(c), index_(0) {}
11
12
uint32_t next() {
13
uint32_t j = (uint8_t)c_[index_++];
14
15
int row;
16
bool emojiAdjust = false;
17
switch (j >> 4) {
18
case 0x8:
19
if (j == 0x80) {
20
return INVALID;
21
}
22
// Intentional fall-through.
23
case 0x9:
24
case 0xE:
25
row = ((j & 0x3F) << 1) - 0x01;
26
break;
27
28
case 0xF:
29
emojiAdjust = true;
30
if (j < 0xF4) {
31
row = ((j & 0x7F) << 1) - 0x59;
32
} else if (j < 0xFD) {
33
row = ((j & 0x7F) << 1) - 0x1B;
34
} else {
35
return j;
36
}
37
break;
38
39
// Anything else (i.e. <= 0x7x, 0xAx, 0xBx, 0xCx, and 0xDx) is JIS X 0201, return directly.
40
default:
41
return j;
42
}
43
44
// Okay, if we didn't return, it's time for the second byte (the cell.)
45
j = (uint8_t)c_[index_++];
46
// Not a valid second byte.
47
if (j < 0x40 || j == 0x7F || j >= 0xFD) {
48
return INVALID;
49
}
50
51
if (j >= 0x9F) {
52
// This range means the row was even.
53
++row;
54
j -= 0x7E;
55
} else {
56
if (j >= 0x80) {
57
j -= 0x20;
58
} else {
59
// Yuck. They wrapped around 0x7F, so we subtract one less.
60
j -= 0x20 - 1;
61
}
62
63
if (emojiAdjust) {
64
// These are shoved in where they'll fit.
65
if (row == 0x87) {
66
// First byte was 0xF0.
67
row = 0x81;
68
} else if (row == 0x8B) {
69
// First byte was 0xF2.
70
row = 0x85;
71
} else if (row == 0xCD) {
72
// First byte was 0xF4.
73
row = 0x8F;
74
}
75
}
76
}
77
78
// j is already the cell + 0x20.
79
return ((row + 0x20) << 8) | j;
80
}
81
82
bool end() const {
83
return c_[index_] == 0;
84
}
85
86
int length() const {
87
int len = 0;
88
for (ShiftJIS dec(c_); !dec.end(); dec.next())
89
++len;
90
return len;
91
}
92
93
int byteIndex() const {
94
return index_;
95
}
96
97
static int encode(char *dest, uint32_t j) {
98
int row = (j >> 8) - 0x20;
99
int offsetCell = j & 0xFF;
100
101
// JIS X 0201.
102
if ((j & ~0xFF) == 0) {
103
*dest = j;
104
return 1;
105
}
106
107
if (row < 0x3F) {
108
*dest++ = 0x80 + ((row + 1) >> 1);
109
} else if (row < 0x5F) {
110
// Reduce by 0x40 to account for the above range.
111
*dest++ = 0xE0 + ((row - 0x40 + 1) >> 1);
112
} else if (row >= 0x80) {
113
// TODO
114
}
115
116
if (row & 1) {
117
if (offsetCell < 0x60) {
118
// Subtract one to shift around 0x7F.
119
*dest++ = offsetCell + 0x20 - 1;
120
} else {
121
*dest++ = offsetCell + 0x20;
122
}
123
} else {
124
*dest++ = offsetCell + 0x7E;
125
}
126
127
return 2;
128
}
129
130
static int encodeUnits(uint32_t j) {
131
if ((j & ~0xFF) == 0) {
132
return 1;
133
}
134
return 2;
135
}
136
137
private:
138
const char *c_;
139
int index_;
140
};
141
142