CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/Data/Text/WrapText.cpp
Views: 1401
#include <cstring>1#include "Common/Render/DrawBuffer.h"2#include "Common/Data/Encoding/Utf8.h"3#include "Common/Data/Text/WrapText.h"45bool WordWrapper::IsCJK(uint32_t c) {6if (c < 0x1000) {7return false;8}910// CJK characters can be wrapped more freely.11bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.12result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.13#if 014result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.15result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed16result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility17result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A18#else19result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed20#endif21result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs22result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables23result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs24result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B25result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement26return result;27}2829bool WordWrapper::IsPunctuation(uint32_t c) {30switch (c) {31// TODO: This list of punctuation is very incomplete.32case ',':33case '.':34case ':':35case '!':36case ')':37case '?':38case 0x00AD: // SOFT HYPHEN39case 0x3001: // IDEOGRAPHIC COMMA40case 0x3002: // IDEOGRAPHIC FULL STOP41case 0x06D4: // ARABIC FULL STOP42case 0xFF01: // FULLWIDTH EXCLAMATION MARK43case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS44case 0xFF1F: // FULLWIDTH QUESTION MARK45return true;4647default:48return false;49}50}5152bool WordWrapper::IsSpace(uint32_t c) {53switch (c) {54case '\t':55case ' ':56case 0x2002: // EN SPACE57case 0x2003: // EM SPACE58case 0x3000: // IDEOGRAPHIC SPACE59return true;6061default:62return false;63}64}6566bool WordWrapper::IsShy(uint32_t c) {67return c == 0x00AD; // SOFT HYPHEN68}6970std::string WordWrapper::Wrapped() {71if (out_.empty()) {72Wrap();73}74return out_;75}7677bool WordWrapper::WrapBeforeWord() {78if (flags_ & FLAG_WRAP_TEXT) {79if (x_ + wordWidth_ > maxW_ && !out_.empty()) {80if (IsShy(lastChar_)) {81// Soft hyphen, replace it with a real hyphen since we wrapped at it.82// TODO: There's an edge case here where the hyphen might not fit.83out_[out_.size() - 2] = '-';84out_[out_.size() - 1] = '\n';85} else {86out_ += "\n";87}88lastChar_ = '\n';89lastLineStart_ = out_.size();90x_ = 0.0f;91forceEarlyWrap_ = false;92return true;93}94}95if (flags_ & FLAG_ELLIPSIZE_TEXT) {96const bool hasEllipsis = out_.size() > 3 && out_.substr(out_.size() - 3) == "...";97if (x_ + wordWidth_ > maxW_ && !hasEllipsis) {98AddEllipsis();99skipNextWord_ = true;100if ((flags_ & FLAG_WRAP_TEXT) == 0) {101scanForNewline_ = true;102}103}104}105return false;106}107108void WordWrapper::AddEllipsis() {109if (!out_.empty() && IsSpaceOrShy(lastChar_)) {110UTF8 utf(out_.c_str(), (int)out_.size());111utf.bwd();112out_.resize(utf.byteIndex());113out_ += "...";114} else {115out_ += "...";116}117lastChar_ = '.';118x_ += ellipsisWidth_;119}120121void WordWrapper::AppendWord(int endIndex, int lastChar, bool addNewline) {122int lastWordStartIndex = lastIndex_;123if (WrapBeforeWord()) {124// Advance to the first non-whitespace UTF-8 character in the following word (if any) to prevent starting the new line with a whitespace125UTF8 utf8Word(str_, lastWordStartIndex);126while (lastWordStartIndex < endIndex) {127const uint32_t c = utf8Word.next();128if (!IsSpace(c)) {129break;130}131lastWordStartIndex = utf8Word.byteIndex();132}133}134135lastEllipsisIndex_ = -1;136if (skipNextWord_) {137lastIndex_ = endIndex;138return;139}140141// This will include the newline.142if (x_ <= maxW_) {143out_.append(str_.data() + lastWordStartIndex, str_.data() + endIndex);144} else {145scanForNewline_ = true;146}147if (addNewline && (flags_ & FLAG_WRAP_TEXT)) {148out_ += "\n";149lastChar_ = '\n';150lastLineStart_ = out_.size();151scanForNewline_ = false;152x_ = 0.0f;153} else {154// We may have appended a newline - check.155size_t pos = out_.find_last_of('\n');156if (pos != out_.npos) {157lastLineStart_ = pos + 1;158}159160if (lastChar == -1 && !out_.empty()) {161UTF8 utf(out_.c_str(), (int)out_.size());162utf.bwd();163lastChar = utf.next();164}165lastChar_ = lastChar;166167if (lastLineStart_ != out_.size()) {168// To account for kerning around spaces, we recalculate the entire line width.169x_ = MeasureWidth(std::string_view(out_.c_str() + lastLineStart_, out_.size() - lastLineStart_));170} else {171x_ = 0.0f;172}173}174lastIndex_ = endIndex;175wordWidth_ = 0.0f;176}177178void WordWrapper::Wrap() {179// First, let's check if it fits as-is.180size_t len = str_.length();181if (MeasureWidth(str_) <= maxW_) {182// If it fits, we don't need to go through each character.183out_ = std::string(str_);184return;185}186187out_.clear();188// We know it'll be approximately this size. It's fine if the guess is a little off.189out_.reserve(len + len / 16);190191if (flags_ & FLAG_ELLIPSIZE_TEXT) {192ellipsisWidth_ = MeasureWidth("...");193}194195for (UTF8 utf(str_); !utf.end(); ) {196int beforeIndex = utf.byteIndex();197uint32_t c = utf.next();198int afterIndex = utf.byteIndex();199200// Is this a newline character, hard wrapping?201if (c == '\n') {202if (skipNextWord_) {203lastIndex_ = beforeIndex;204skipNextWord_ = false;205}206// This will include the newline character.207AppendWord(afterIndex, c, false);208// We wrapped once, so stop forcing.209forceEarlyWrap_ = false;210scanForNewline_ = false;211continue;212}213214if (scanForNewline_) {215// We're discarding the rest of the characters until a newline (no wrapping.)216lastIndex_ = afterIndex;217continue;218}219220// Measure the entire word for kerning purposes. May not be 100% perfect.221float newWordWidth = 0.0f;222if (afterIndex <= str_.length()) {223newWordWidth = MeasureWidth(str_.substr(lastIndex_, afterIndex - lastIndex_));224}225226// Is this the end of a word (space)? We'll also output up to a soft hyphen.227if (wordWidth_ > 0.0f && IsSpaceOrShy(c)) {228AppendWord(afterIndex, c, false);229skipNextWord_ = false;230continue;231}232233// We're scanning for the next word.234if (skipNextWord_)235continue;236237if ((flags_ & FLAG_ELLIPSIZE_TEXT) != 0 && wordWidth_ > 0.0f && lastEllipsisIndex_ == -1) {238float checkX = x_;239// If we allow wrapping, assume we'll wrap as needed.240if ((flags_ & FLAG_WRAP_TEXT) != 0 && x_ >= maxW_) {241checkX = 0;242}243244// If we can only fit an ellipsis, time to output and skip ahead.245// Ignore x for newWordWidth, because we might wrap.246if (checkX + wordWidth_ + ellipsisWidth_ <= maxW_ && newWordWidth + ellipsisWidth_ > maxW_) {247lastEllipsisIndex_ = beforeIndex;248continue;249}250}251252// Can the word fit on a line even all by itself so far?253if (wordWidth_ > 0.0f && newWordWidth > maxW_) {254// If we had a good place for an ellipsis, let's do that.255if (lastEllipsisIndex_ != -1) {256AppendWord(lastEllipsisIndex_, -1, false);257AddEllipsis();258skipNextWord_ = true;259if ((flags_ & FLAG_WRAP_TEXT) == 0) {260scanForNewline_ = true;261}262continue;263}264265// Doesn't fit. Let's drop what's there so far onto its own line.266if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_ && (flags_ & FLAG_WRAP_TEXT) != 0) {267// Let's put as many characters as will fit on the previous line.268// This word can't fit on one line even, so it's going to be cut into pieces anyway.269// Better to avoid huge gaps, in that case.270forceEarlyWrap_ = true;271272// Now rewind back to where the word started so we can wrap at the opportune moment.273wordWidth_ = 0.0f;274while (utf.byteIndex() > lastIndex_) {275utf.bwd();276}277continue;278}279// Now, add the word so far (without this latest character) and break.280AppendWord(beforeIndex, -1, true);281forceEarlyWrap_ = false;282// The current character will be handled as part of the next word.283continue;284}285286if ((flags_ & FLAG_ELLIPSIZE_TEXT) && wordWidth_ > 0.0f && x_ + newWordWidth + ellipsisWidth_ > maxW_) {287if ((flags_ & FLAG_WRAP_TEXT) == 0 && x_ + wordWidth_ + ellipsisWidth_ <= maxW_) {288// Now, add the word so far (without this latest character) and show the ellipsis.289AppendWord(lastEllipsisIndex_ != -1 ? lastEllipsisIndex_ : beforeIndex, -1, false);290AddEllipsis();291forceEarlyWrap_ = false;292skipNextWord_ = true;293if ((flags_ & FLAG_WRAP_TEXT) == 0) {294scanForNewline_ = true;295}296continue;297}298}299300wordWidth_ = newWordWidth;301302// Is this the end of a word via punctuation / CJK?303if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {304// CJK doesn't require spaces, so we treat each letter as its own word.305AppendWord(afterIndex, c, false);306}307}308309// Now insert the rest of the string - the last word.310AppendWord((int)len, 0, false);311}312313314