Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/docs/_static/language_data.js
469 views
1
/*
2
* language_data.js
3
* ~~~~~~~~~~~~~~~~
4
*
5
* This script contains the language-specific data used by searchtools.js,
6
* namely the list of stopwords, stemmer, scorer and splitter.
7
*
8
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
9
* :license: BSD, see LICENSE for details.
10
*
11
*/
12
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
13
/* Non-minified version is copied as a separate JS file, if available */
14
/**
15
* Porter Stemmer
16
*/
17
var Stemmer = function() {
18
var step2list = {
19
ational: 'ate',
20
tional: 'tion',
21
enci: 'ence',
22
anci: 'ance',
23
izer: 'ize',
24
bli: 'ble',
25
alli: 'al',
26
entli: 'ent',
27
eli: 'e',
28
ousli: 'ous',
29
ization: 'ize',
30
ation: 'ate',
31
ator: 'ate',
32
alism: 'al',
33
iveness: 'ive',
34
fulness: 'ful',
35
ousness: 'ous',
36
aliti: 'al',
37
iviti: 'ive',
38
biliti: 'ble',
39
logi: 'log'
40
};
41
var step3list = {
42
icate: 'ic',
43
ative: '',
44
alize: 'al',
45
iciti: 'ic',
46
ical: 'ic',
47
ful: '',
48
ness: ''
49
};
50
var c = "[^aeiou]"; // consonant
51
var v = "[aeiouy]"; // vowel
52
var C = c + "[^aeiouy]*"; // consonant sequence
53
var V = v + "[aeiou]*"; // vowel sequence
54
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
55
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
56
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
57
var s_v = "^(" + C + ")?" + v; // vowel in stem
58
this.stemWord = function (w) {
59
var stem;
60
var suffix;
61
var firstch;
62
var origword = w;
63
if (w.length < 3)
64
return w;
65
var re;
66
var re2;
67
var re3;
68
var re4;
69
firstch = w.substr(0,1);
70
if (firstch == "y")
71
w = firstch.toUpperCase() + w.substr(1);
72
// Step 1a
73
re = /^(.+?)(ss|i)es$/;
74
re2 = /^(.+?)([^s])s$/;
75
if (re.test(w))
76
w = w.replace(re,"$1$2");
77
else if (re2.test(w))
78
w = w.replace(re2,"$1$2");
79
// Step 1b
80
re = /^(.+?)eed$/;
81
re2 = /^(.+?)(ed|ing)$/;
82
if (re.test(w)) {
83
var fp = re.exec(w);
84
re = new RegExp(mgr0);
85
if (re.test(fp[1])) {
86
re = /.$/;
87
w = w.replace(re,"");
88
}
89
}
90
else if (re2.test(w)) {
91
var fp = re2.exec(w);
92
stem = fp[1];
93
re2 = new RegExp(s_v);
94
if (re2.test(stem)) {
95
w = stem;
96
re2 = /(at|bl|iz)$/;
97
re3 = new RegExp("([^aeiouylsz])\\1$");
98
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
99
if (re2.test(w))
100
w = w + "e";
101
else if (re3.test(w)) {
102
re = /.$/;
103
w = w.replace(re,"");
104
}
105
else if (re4.test(w))
106
w = w + "e";
107
}
108
}
109
// Step 1c
110
re = /^(.+?)y$/;
111
if (re.test(w)) {
112
var fp = re.exec(w);
113
stem = fp[1];
114
re = new RegExp(s_v);
115
if (re.test(stem))
116
w = stem + "i";
117
}
118
// Step 2
119
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
120
if (re.test(w)) {
121
var fp = re.exec(w);
122
stem = fp[1];
123
suffix = fp[2];
124
re = new RegExp(mgr0);
125
if (re.test(stem))
126
w = stem + step2list[suffix];
127
}
128
// Step 3
129
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
130
if (re.test(w)) {
131
var fp = re.exec(w);
132
stem = fp[1];
133
suffix = fp[2];
134
re = new RegExp(mgr0);
135
if (re.test(stem))
136
w = stem + step3list[suffix];
137
}
138
// Step 4
139
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
140
re2 = /^(.+?)(s|t)(ion)$/;
141
if (re.test(w)) {
142
var fp = re.exec(w);
143
stem = fp[1];
144
re = new RegExp(mgr1);
145
if (re.test(stem))
146
w = stem;
147
}
148
else if (re2.test(w)) {
149
var fp = re2.exec(w);
150
stem = fp[1] + fp[2];
151
re2 = new RegExp(mgr1);
152
if (re2.test(stem))
153
w = stem;
154
}
155
// Step 5
156
re = /^(.+?)e$/;
157
if (re.test(w)) {
158
var fp = re.exec(w);
159
stem = fp[1];
160
re = new RegExp(mgr1);
161
re2 = new RegExp(meq1);
162
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
163
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
164
w = stem;
165
}
166
re = /ll$/;
167
re2 = new RegExp(mgr1);
168
if (re.test(w) && re2.test(w)) {
169
re = /.$/;
170
w = w.replace(re,"");
171
}
172
// and turn initial Y back to y
173
if (firstch == "y")
174
w = firstch.toLowerCase() + w.substr(1);
175
return w;
176
}
177
}
178
179