CoCalc -- WrapText.cpp

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Data/Text/WrapText.cpp
Views: ¹⁴⁰¹
1
#include <cstring>
2
#include "Common/Render/DrawBuffer.h"
3
#include "Common/Data/Encoding/Utf8.h"
4
#include "Common/Data/Text/WrapText.h"
5

6
bool WordWrapper::IsCJK(uint32_t c) {
7
	if (c < 0x1000) {
8
		return false;
9
	}
10

11
	// CJK characters can be wrapped more freely.
12
	bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
13
	result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
14
#if 0
15
	result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
16
	result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
17
	result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
18
	result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
19
#else
20
	result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
21
#endif
22
	result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
23
	result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
24
	result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
25
	result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
26
	result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
27
	return result;
28
}
29

30
bool WordWrapper::IsPunctuation(uint32_t c) {
31
	switch (c) {
32
	// TODO: This list of punctuation is very incomplete.
33
	case ',':
34
	case '.':
35
	case ':':
36
	case '!':
37
	case ')':
38
	case '?':
39
	case 0x00AD: // SOFT HYPHEN
40
	case 0x3001: // IDEOGRAPHIC COMMA
41
	case 0x3002: // IDEOGRAPHIC FULL STOP
42
	case 0x06D4: // ARABIC FULL STOP
43
	case 0xFF01: // FULLWIDTH EXCLAMATION MARK
44
	case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
45
	case 0xFF1F: // FULLWIDTH QUESTION MARK
46
		return true;
47

48
	default:
49
		return false;
50
	}
51
}
52

53
bool WordWrapper::IsSpace(uint32_t c) {
54
	switch (c) {
55
	case '\t':
56
	case ' ':
57
	case 0x2002: // EN SPACE
58
	case 0x2003: // EM SPACE
59
	case 0x3000: // IDEOGRAPHIC SPACE
60
		return true;
61

62
	default:
63
		return false;
64
	}
65
}
66

67
bool WordWrapper::IsShy(uint32_t c) {
68
	return c == 0x00AD; // SOFT HYPHEN
69
}
70

71
std::string WordWrapper::Wrapped() {
72
	if (out_.empty()) {
73
		Wrap();
74
	}
75
	return out_;
76
}
77

78
bool WordWrapper::WrapBeforeWord() {
79
	if (flags_ & FLAG_WRAP_TEXT) {
80
		if (x_ + wordWidth_ > maxW_ && !out_.empty()) {
81
			if (IsShy(lastChar_)) {
82
				// Soft hyphen, replace it with a real hyphen since we wrapped at it.
83
				// TODO: There's an edge case here where the hyphen might not fit.
84
				out_[out_.size() - 2] = '-';
85
				out_[out_.size() - 1] = '\n';
86
			} else {
87
				out_ += "\n";
88
			}
89
			lastChar_ = '\n';
90
			lastLineStart_ = out_.size();
91
			x_ = 0.0f;
92
			forceEarlyWrap_ = false;
93
			return true;
94
		}
95
	}
96
	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
97
		const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";
98
		if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {
99
			AddEllipsis();
100
			skipNextWord_ = true;
101
			if ((flags_ & FLAG_WRAP_TEXT) == 0) {
102
				scanForNewline_ = true;
103
			}
104
		}
105
	}
106
	return false;
107
}
108

109
void WordWrapper::AddEllipsis() {
110
	if (!out_.empty() && IsSpaceOrShy(lastChar_)) {
111
		UTF8 utf(out_.c_str(), (int)out_.size());
112
		utf.bwd();
113
		out_.resize(utf.byteIndex());
114
		out_ += "...";
115
	} else {
116
		out_ += "...";
117
	}
118
	lastChar_ = '.';
119
	x_ += ellipsisWidth_;
120
}
121

122
void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {
123
	int lastWordStartIndex = lastIndex_;
124
	if (WrapBeforeWord()) {
125
		// Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace
126
		UTF8 utf8Word(str_, lastWordStartIndex);
127
		while (lastWordStartIndex < endIndex) {
128
			const uint32_t c = utf8Word.next();
129
			if (!IsSpace(c)) {
130
				break;
131
			}
132
			lastWordStartIndex = utf8Word.byteIndex();
133
		}
134
	}
135

136
	lastEllipsisIndex_ = -1;
137
	if (skipNextWord_) {
138
		lastIndex_ = endIndex;
139
		return;
140
	}
141

142
	// This will include the newline.
143
	if (x_ <= maxW_) {
144
		out_.append(str_.data() + lastWordStartIndex, str_.data() + endIndex);
145
	} else {
146
		scanForNewline_ = true;
147
	}
148
	if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {
149
		out_ += "\n";
150
		lastChar_ = '\n';
151
		lastLineStart_ = out_.size();
152
		scanForNewline_ = false;
153
		x_ = 0.0f;
154
	} else {
155
		// We may have appended a newline - check.
156
		size_t pos = out_.find_last_of('\n');
157
		if (pos != out_.npos) {
158
			lastLineStart_ = pos + 1;
159
		}
160

161
		if (lastChar == -1 && !out_.empty()) {
162
			UTF8 utf(out_.c_str(), (int)out_.size());
163
			utf.bwd();
164
			lastChar = utf.next();
165
		}
166
		lastChar_ = lastChar;
167

168
		if (lastLineStart_ != out_.size()) {
169
			// To account for kerning around spaces, we recalculate the entire line width.
170
			x_ = MeasureWidth(std::string_view(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_));
171
		} else {
172
			x_ = 0.0f;
173
		}
174
	}
175
	lastIndex_ = endIndex;
176
	wordWidth_ = 0.0f;
177
}
178

179
void WordWrapper::Wrap() {
180
	// First, let's check if it fits as-is.
181
	size_t len = str_.length();
182
	if (MeasureWidth(str_) <= maxW_) {
183
		// If it fits, we don't need to go through each character.
184
		out_ = std::string(str_);
185
		return;
186
	}
187

188
	out_.clear();
189
	// We know it'll be approximately this size. It's fine if the guess is a little off.
190
	out_.reserve(len + len / 16);
191

192
	if (flags_ & FLAG_ELLIPSIZE_TEXT) {
193
		ellipsisWidth_ = MeasureWidth("...");
194
	}
195

196
	for (UTF8 utf(str_); !utf.end(); ) {
197
		int beforeIndex = utf.byteIndex();
198
		uint32_t c = utf.next();
199
		int afterIndex = utf.byteIndex();
200

201
		// Is this a newline character, hard wrapping?
202
		if (c == '\n') {
203
			if (skipNextWord_) {
204
				lastIndex_ = beforeIndex;
205
				skipNextWord_ = false;
206
			}
207
			// This will include the newline character.
208
			AppendWord(afterIndex, c, false);
209
			// We wrapped once, so stop forcing.
210
			forceEarlyWrap_ = false;
211
			scanForNewline_ = false;
212
			continue;
213
		}
214

215
		if (scanForNewline_) {
216
			// We're discarding the rest of the characters until a newline (no wrapping.)
217
			lastIndex_ = afterIndex;
218
			continue;
219
		}
220

221
		// Measure the entire word for kerning purposes.  May not be 100% perfect.
222
		float newWordWidth = 0.0f;
223
		if (afterIndex <= str_.length()) {
224
			newWordWidth = MeasureWidth(str_.substr(lastIndex_, afterIndex - lastIndex_));
225
		}
226

227
		// Is this the end of a word (space)?  We'll also output up to a soft hyphen.
228
		if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {
229
			AppendWord(afterIndex, c, false);
230
			skipNextWord_ = false;
231
			continue;
232
		}
233

234
		// We're scanning for the next word.
235
		if (skipNextWord_)
236
			continue;
237

238
		if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {
239
			float checkX = x_;
240
			// If we allow wrapping, assume we'll wrap as needed.
241
			if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {
242
				checkX = 0;
243
			}
244

245
			// If we can only fit an ellipsis, time to output and skip ahead.
246
			// Ignore x for newWordWidth, because we might wrap.
247
			if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {
248
				lastEllipsisIndex_ = beforeIndex;
249
				continue;
250
			}
251
		}
252

253
		// Can the word fit on a line even all by itself so far?
254
		if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
255
			// If we had a good place for an ellipsis, let's do that.
256
			if (lastEllipsisIndex_ != -1) {
257
				AppendWord(lastEllipsisIndex_, -1, false);
258
				AddEllipsis();
259
				skipNextWord_ = true;
260
				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
261
					scanForNewline_ = true;
262
				}
263
				continue;
264
			}
265

266
			// Doesn't fit.  Let's drop what's there so far onto its own line.
267
			if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {
268
				// Let's put as many characters as will fit on the previous line.
269
				// This word can't fit on one line even, so it's going to be cut into pieces anyway.
270
				// Better to avoid huge gaps, in that case.
271
				forceEarlyWrap_ = true;
272

273
				// Now rewind back to where the word started so we can wrap at the opportune moment.
274
				wordWidth_ = 0.0f;
275
				while (utf.byteIndex() > lastIndex_) {
276
					utf.bwd();
277
				}
278
				continue;
279
			}
280
			// Now, add the word so far (without this latest character) and break.
281
			AppendWord(beforeIndex, -1, true);
282
			forceEarlyWrap_ = false;
283
			// The current character will be handled as part of the next word.
284
			continue;
285
		}
286

287
		if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {
288
			if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {
289
				// Now, add the word so far (without this latest character) and show the ellipsis.
290
				AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);
291
				AddEllipsis();
292
				forceEarlyWrap_ = false;
293
				skipNextWord_ = true;
294
				if ((flags_ & FLAG_WRAP_TEXT) == 0) {
295
					scanForNewline_ = true;
296
				}
297
				continue;
298
			}
299
		}
300

301
		wordWidth_ = newWordWidth;
302

303
		// Is this the end of a word via punctuation / CJK?
304
		if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
305
			// CJK doesn't require spaces, so we treat each letter as its own word.
306
			AppendWord(afterIndex, c, false);
307
		}
308
	}
309

310
	// Now insert the rest of the string - the last word.
311
	AppendWord((int)len, 0, false);
312
}
313

314
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company