Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/docs/_static/language_data.js
798 views
1
/*
2
* This script contains the language-specific data used by searchtools.js,
3
* namely the list of stopwords, stemmer, scorer and splitter.
4
*/
5
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
6
/* Non-minified version is copied as a separate JS file, if available */
7
/**
8
* Porter Stemmer
9
*/
10
var Stemmer = function() {
11
var step2list = {
12
ational: 'ate',
13
tional: 'tion',
14
enci: 'ence',
15
anci: 'ance',
16
izer: 'ize',
17
bli: 'ble',
18
alli: 'al',
19
entli: 'ent',
20
eli: 'e',
21
ousli: 'ous',
22
ization: 'ize',
23
ation: 'ate',
24
ator: 'ate',
25
alism: 'al',
26
iveness: 'ive',
27
fulness: 'ful',
28
ousness: 'ous',
29
aliti: 'al',
30
iviti: 'ive',
31
biliti: 'ble',
32
logi: 'log'
33
};
34
var step3list = {
35
icate: 'ic',
36
ative: '',
37
alize: 'al',
38
iciti: 'ic',
39
ical: 'ic',
40
ful: '',
41
ness: ''
42
};
43
var c = "[^aeiou]"; // consonant
44
var v = "[aeiouy]"; // vowel
45
var C = c + "[^aeiouy]*"; // consonant sequence
46
var V = v + "[aeiou]*"; // vowel sequence
47
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
48
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
49
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
50
var s_v = "^(" + C + ")?" + v; // vowel in stem
51
this.stemWord = function (w) {
52
var stem;
53
var suffix;
54
var firstch;
55
var origword = w;
56
if (w.length < 3)
57
return w;
58
var re;
59
var re2;
60
var re3;
61
var re4;
62
firstch = w.substr(0,1);
63
if (firstch == "y")
64
w = firstch.toUpperCase() + w.substr(1);
65
// Step 1a
66
re = /^(.+?)(ss|i)es$/;
67
re2 = /^(.+?)([^s])s$/;
68
if (re.test(w))
69
w = w.replace(re,"$1$2");
70
else if (re2.test(w))
71
w = w.replace(re2,"$1$2");
72
// Step 1b
73
re = /^(.+?)eed$/;
74
re2 = /^(.+?)(ed|ing)$/;
75
if (re.test(w)) {
76
var fp = re.exec(w);
77
re = new RegExp(mgr0);
78
if (re.test(fp[1])) {
79
re = /.$/;
80
w = w.replace(re,"");
81
}
82
}
83
else if (re2.test(w)) {
84
var fp = re2.exec(w);
85
stem = fp[1];
86
re2 = new RegExp(s_v);
87
if (re2.test(stem)) {
88
w = stem;
89
re2 = /(at|bl|iz)$/;
90
re3 = new RegExp("([^aeiouylsz])\\1$");
91
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
92
if (re2.test(w))
93
w = w + "e";
94
else if (re3.test(w)) {
95
re = /.$/;
96
w = w.replace(re,"");
97
}
98
else if (re4.test(w))
99
w = w + "e";
100
}
101
}
102
// Step 1c
103
re = /^(.+?)y$/;
104
if (re.test(w)) {
105
var fp = re.exec(w);
106
stem = fp[1];
107
re = new RegExp(s_v);
108
if (re.test(stem))
109
w = stem + "i";
110
}
111
// Step 2
112
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
113
if (re.test(w)) {
114
var fp = re.exec(w);
115
stem = fp[1];
116
suffix = fp[2];
117
re = new RegExp(mgr0);
118
if (re.test(stem))
119
w = stem + step2list[suffix];
120
}
121
// Step 3
122
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
123
if (re.test(w)) {
124
var fp = re.exec(w);
125
stem = fp[1];
126
suffix = fp[2];
127
re = new RegExp(mgr0);
128
if (re.test(stem))
129
w = stem + step3list[suffix];
130
}
131
// Step 4
132
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
133
re2 = /^(.+?)(s|t)(ion)$/;
134
if (re.test(w)) {
135
var fp = re.exec(w);
136
stem = fp[1];
137
re = new RegExp(mgr1);
138
if (re.test(stem))
139
w = stem;
140
}
141
else if (re2.test(w)) {
142
var fp = re2.exec(w);
143
stem = fp[1] + fp[2];
144
re2 = new RegExp(mgr1);
145
if (re2.test(stem))
146
w = stem;
147
}
148
// Step 5
149
re = /^(.+?)e$/;
150
if (re.test(w)) {
151
var fp = re.exec(w);
152
stem = fp[1];
153
re = new RegExp(mgr1);
154
re2 = new RegExp(meq1);
155
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
156
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
157
w = stem;
158
}
159
re = /ll$/;
160
re2 = new RegExp(mgr1);
161
if (re.test(w) && re2.test(w)) {
162
re = /.$/;
163
w = w.replace(re,"");
164
}
165
// and turn initial Y back to y
166
if (firstch == "y")
167
w = firstch.toLowerCase() + w.substr(1);
168
return w;
169
}
170
}
171
172