CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Data/Text/WrapText.cpp
Views: 1401
1
#include <cstring>
2
#include "Common/Render/DrawBuffer.h"
3
#include "Common/Data/Encoding/Utf8.h"
4
#include "Common/Data/Text/WrapText.h"
5
6
bool WordWrapper::IsCJK(uint32_t c) {
7
if (c < 0x1000) {
8
return false;
9
}
10
11
// CJK characters can be wrapped more freely.
12
bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
13
result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
14
#if 0
15
result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
16
result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
17
result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
18
result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
19
#else
20
result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
21
#endif
22
result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
23
result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
24
result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
25
result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
26
result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
27
return result;
28
}
29
30
bool WordWrapper::IsPunctuation(uint32_t c) {
31
switch (c) {
32
// TODO: This list of punctuation is very incomplete.
33
case ',':
34
case '.':
35
case ':':
36
case '!':
37
case ')':
38
case '?':
39
case 0x00AD: // SOFT HYPHEN
40
case 0x3001: // IDEOGRAPHIC COMMA
41
case 0x3002: // IDEOGRAPHIC FULL STOP
42
case 0x06D4: // ARABIC FULL STOP
43
case 0xFF01: // FULLWIDTH EXCLAMATION MARK
44
case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
45
case 0xFF1F: // FULLWIDTH QUESTION MARK
46
return true;
47
48
default:
49
return false;
50
}
51
}
52
53
bool WordWrapper::IsSpace(uint32_t c) {
54
switch (c) {
55
case '\t':
56
case ' ':
57
case 0x2002: // EN SPACE
58
case 0x2003: // EM SPACE
59
case 0x3000: // IDEOGRAPHIC SPACE
60
return true;
61
62
default:
63
return false;
64
}
65
}
66
67
bool WordWrapper::IsShy(uint32_t c) {
68
return c == 0x00AD; // SOFT HYPHEN
69
}
70
71
std::string WordWrapper::Wrapped() {
72
if (out_.empty()) {
73
Wrap();
74
}
75
return out_;
76
}
77
78
bool WordWrapper::WrapBeforeWord() {
79
if (flags_ & FLAG_WRAP_TEXT) {
80
if (x_ + wordWidth_ > maxW_ && !out_.empty()) {
81
if (IsShy(lastChar_)) {
82
// Soft hyphen, replace it with a real hyphen since we wrapped at it.
83
// TODO: There's an edge case here where the hyphen might not fit.
84
out_[out_.size() - 2] = '-';
85
out_[out_.size() - 1] = '\n';
86
} else {
87
out_ += "\n";
88
}
89
lastChar_ = '\n';
90
lastLineStart_ = out_.size();
91
x_ = 0.0f;
92
forceEarlyWrap_ = false;
93
return true;
94
}
95
}
96
if (flags_ & FLAG_ELLIPSIZE_TEXT) {
97
const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";
98
if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {
99
AddEllipsis();
100
skipNextWord_ = true;
101
if ((flags_ & FLAG_WRAP_TEXT) == 0) {
102
scanForNewline_ = true;
103
}
104
}
105
}
106
return false;
107
}
108
109
void WordWrapper::AddEllipsis() {
110
if (!out_.empty() && IsSpaceOrShy(lastChar_)) {
111
UTF8 utf(out_.c_str(), (int)out_.size());
112
utf.bwd();
113
out_.resize(utf.byteIndex());
114
out_ += "...";
115
} else {
116
out_ += "...";
117
}
118
lastChar_ = '.';
119
x_ += ellipsisWidth_;
120
}
121
122
void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
123
int lastWordStartIndex = lastIndex_;
124
if (WrapBeforeWord()) {
125
// Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace
126
UTF8 utf8Word(str_, lastWordStartIndex);
127
while (lastWordStartIndex < endIndex) {
128
const uint32_t c = utf8Word.next();
129
if (!IsSpace(c)) {
130
break;
131
}
132
lastWordStartIndex = utf8Word.byteIndex();
133
}
134
}
135
136
lastEllipsisIndex_ = -1;
137
if (skipNextWord_) {
138
lastIndex_ = endIndex;
139
return;
140
}
141
142
// This will include the newline.
143
if (x_ <= maxW_) {
144
out_.append(str_.data() + lastWordStartIndex, str_.data() + endIndex);
145
} else {
146
scanForNewline_ = true;
147
}
148
if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {
149
out_ += "\n";
150
lastChar_ = '\n';
151
lastLineStart_ = out_.size();
152
scanForNewline_ = false;
153
x_ = 0.0f;
154
} else {
155
// We may have appended a newline - check.
156
size_t pos = out_.find_last_of('\n');
157
if (pos != out_.npos) {
158
lastLineStart_ = pos + 1;
159
}
160
161
if (lastChar == -1 && !out_.empty()) {
162
UTF8 utf(out_.c_str(), (int)out_.size());
163
utf.bwd();
164
lastChar = utf.next();
165
}
166
lastChar_ = lastChar;
167
168
if (lastLineStart_ != out_.size()) {
169
// To account for kerning around spaces, we recalculate the entire line width.
170
x_ = MeasureWidth(std::string_view(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_));
171
} else {
172
x_ = 0.0f;
173
}
174
}
175
lastIndex_ = endIndex;
176
wordWidth_ = 0.0f;
177
}
178
179
void WordWrapper::Wrap() {
180
// First, let's check if it fits as-is.
181
size_t len = str_.length();
182
if (MeasureWidth(str_) <= maxW_) {
183
// If it fits, we don't need to go through each character.
184
out_ = std::string(str_);
185
return;
186
}
187
188
out_.clear();
189
// We know it'll be approximately this size. It's fine if the guess is a little off.
190
out_.reserve(len + len / 16);
191
192
if (flags_ & FLAG_ELLIPSIZE_TEXT) {
193
ellipsisWidth_ = MeasureWidth("...");
194
}
195
196
for (UTF8 utf(str_); !utf.end(); ) {
197
int beforeIndex = utf.byteIndex();
198
uint32_t c = utf.next();
199
int afterIndex = utf.byteIndex();
200
201
// Is this a newline character, hard wrapping?
202
if (c == '\n') {
203
if (skipNextWord_) {
204
lastIndex_ = beforeIndex;
205
skipNextWord_ = false;
206
}
207
// This will include the newline character.
208
AppendWord(afterIndex, c, false);
209
// We wrapped once, so stop forcing.
210
forceEarlyWrap_ = false;
211
scanForNewline_ = false;
212
continue;
213
}
214
215
if (scanForNewline_) {
216
// We're discarding the rest of the characters until a newline (no wrapping.)
217
lastIndex_ = afterIndex;
218
continue;
219
}
220
221
// Measure the entire word for kerning purposes. May not be 100% perfect.
222
float newWordWidth = 0.0f;
223
if (afterIndex <= str_.length()) {
224
newWordWidth = MeasureWidth(str_.substr(lastIndex_, afterIndex - lastIndex_));
225
}
226
227
// Is this the end of a word (space)? We'll also output up to a soft hyphen.
228
if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {
229
AppendWord(afterIndex, c, false);
230
skipNextWord_ = false;
231
continue;
232
}
233
234
// We're scanning for the next word.
235
if (skipNextWord_)
236
continue;
237
238
if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {
239
float checkX = x_;
240
// If we allow wrapping, assume we'll wrap as needed.
241
if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {
242
checkX = 0;
243
}
244
245
// If we can only fit an ellipsis, time to output and skip ahead.
246
// Ignore x for newWordWidth, because we might wrap.
247
if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {
248
lastEllipsisIndex_ = beforeIndex;
249
continue;
250
}
251
}
252
253
// Can the word fit on a line even all by itself so far?
254
if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
255
// If we had a good place for an ellipsis, let's do that.
256
if (lastEllipsisIndex_ != -1) {
257
AppendWord(lastEllipsisIndex_, -1, false);
258
AddEllipsis();
259
skipNextWord_ = true;
260
if ((flags_ & FLAG_WRAP_TEXT) == 0) {
261
scanForNewline_ = true;
262
}
263
continue;
264
}
265
266
// Doesn't fit. Let's drop what's there so far onto its own line.
267
if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {
268
// Let's put as many characters as will fit on the previous line.
269
// This word can't fit on one line even, so it's going to be cut into pieces anyway.
270
// Better to avoid huge gaps, in that case.
271
forceEarlyWrap_ = true;
272
273
// Now rewind back to where the word started so we can wrap at the opportune moment.
274
wordWidth_ = 0.0f;
275
while (utf.byteIndex() > lastIndex_) {
276
utf.bwd();
277
}
278
continue;
279
}
280
// Now, add the word so far (without this latest character) and break.
281
AppendWord(beforeIndex, -1, true);
282
forceEarlyWrap_ = false;
283
// The current character will be handled as part of the next word.
284
continue;
285
}
286
287
if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {
288
if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {
289
// Now, add the word so far (without this latest character) and show the ellipsis.
290
AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);
291
AddEllipsis();
292
forceEarlyWrap_ = false;
293
skipNextWord_ = true;
294
if ((flags_ & FLAG_WRAP_TEXT) == 0) {
295
scanForNewline_ = true;
296
}
297
continue;
298
}
299
}
300
301
wordWidth_ = newWordWidth;
302
303
// Is this the end of a word via punctuation / CJK?
304
if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
305
// CJK doesn't require spaces, so we treat each letter as its own word.
306
AppendWord(afterIndex, c, false);
307
}
308
}
309
310
// Now insert the rest of the string - the last word.
311
AppendWord((int)len, 0, false);
312
}
313
314