Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icui18n/brktrans.cpp
12343 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
* Copyright (C) 2008-2015, International Business Machines
6
* Corporation and others. All Rights Reserved.
7
**********************************************************************
8
* Date Name Description
9
* 05/11/2008 Andy Heninger Port from Java
10
**********************************************************************
11
*/
12
13
#include <utility>
14
15
#include "unicode/utypes.h"
16
17
#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
18
19
#include "unicode/brkiter.h"
20
#include "unicode/localpointer.h"
21
#include "unicode/uchar.h"
22
#include "unicode/unifilt.h"
23
#include "unicode/uniset.h"
24
25
#include "brktrans.h"
26
#include "cmemory.h"
27
#include "mutex.h"
28
#include "uprops.h"
29
#include "uinvchar.h"
30
#include "util.h"
31
#include "uvectr32.h"
32
33
U_NAMESPACE_BEGIN
34
35
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
36
37
static const UChar SPACE = 32; // ' '
38
39
40
/**
41
* Constructs a transliterator with the default delimiters '{' and
42
* '}'.
43
*/
44
BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
45
Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
46
cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
47
}
48
49
50
/**
51
* Destructor.
52
*/
53
BreakTransliterator::~BreakTransliterator() {
54
}
55
56
/**
57
* Copy constructor.
58
*/
59
BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
60
Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
61
}
62
63
64
/**
65
* Transliterator API.
66
*/
67
BreakTransliterator* BreakTransliterator::clone() const {
68
return new BreakTransliterator(*this);
69
}
70
71
/**
72
* Implements {@link Transliterator#handleTransliterate}.
73
*/
74
void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
75
UBool isIncremental ) const {
76
77
UErrorCode status = U_ZERO_ERROR;
78
LocalPointer<BreakIterator> bi;
79
LocalPointer<UVector32> boundaries;
80
81
{
82
Mutex m;
83
BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
84
boundaries = std::move(nonConstThis->cachedBoundaries);
85
bi = std::move(nonConstThis->cachedBI);
86
}
87
if (bi.isNull()) {
88
bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
89
}
90
if (boundaries.isNull()) {
91
boundaries.adoptInstead(new UVector32(status));
92
}
93
94
if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
95
return;
96
}
97
98
boundaries->removeAllElements();
99
UnicodeString sText = replaceableAsString(text);
100
bi->setText(sText);
101
bi->preceding(offsets.start);
102
103
// To make things much easier, we will stack the boundaries, and then insert at the end.
104
// generally, we won't need too many, since we will be filtered.
105
106
int32_t boundary;
107
for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
108
if (boundary == 0) continue;
109
// HACK: Check to see that preceding item was a letter
110
111
UChar32 cp = sText.char32At(boundary-1);
112
int type = u_charType(cp);
113
//System.out.println(Integer.toString(cp,16) + " (before): " + type);
114
if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
115
116
cp = sText.char32At(boundary);
117
type = u_charType(cp);
118
//System.out.println(Integer.toString(cp,16) + " (after): " + type);
119
if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
120
121
boundaries->addElement(boundary, status);
122
// printf("Boundary at %d\n", boundary);
123
}
124
125
int delta = 0;
126
int lastBoundary = 0;
127
128
if (boundaries->size() != 0) { // if we found something, adjust
129
delta = boundaries->size() * fInsertion.length();
130
lastBoundary = boundaries->lastElementi();
131
132
// we do this from the end backwards, so that we don't have to keep updating.
133
134
while (boundaries->size() > 0) {
135
boundary = boundaries->popi();
136
text.handleReplaceBetween(boundary, boundary, fInsertion);
137
}
138
}
139
140
// Now fix up the return values
141
offsets.contextLimit += delta;
142
offsets.limit += delta;
143
offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
144
145
// Return break iterator & boundaries vector to the cache.
146
{
147
Mutex m;
148
BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
149
if (nonConstThis->cachedBI.isNull()) {
150
nonConstThis->cachedBI = std::move(bi);
151
}
152
if (nonConstThis->cachedBoundaries.isNull()) {
153
nonConstThis->cachedBoundaries = std::move(boundaries);
154
}
155
}
156
157
// TODO: do something with U_FAILURE(status);
158
// (need to look at transliterators overall, not just here.)
159
}
160
161
//
162
// getInsertion()
163
//
164
const UnicodeString &BreakTransliterator::getInsertion() const {
165
return fInsertion;
166
}
167
168
//
169
// setInsertion()
170
//
171
void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
172
this->fInsertion = insertion;
173
}
174
175
//
176
// replaceableAsString Hack to let break iterators work
177
// on the replaceable text from transliterators.
178
// In practice, the only real Replaceable type that we
179
// will be seeing is UnicodeString, so this function
180
// will normally be efficient.
181
//
182
UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
183
UnicodeString s;
184
UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
185
if (rs != NULL) {
186
s = *rs;
187
} else {
188
r.extractBetween(0, r.length(), s);
189
}
190
return s;
191
}
192
193
U_NAMESPACE_END
194
195
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
196
197