Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
MR414N-ID
GitHub Repository: MR414N-ID/botku2
Path: blob/master/node_modules/@bochilteam/scraper/lib/cjs/others/wikipedia.js
1126 views
1
"use strict";
2
var __importDefault = (this && this.__importDefault) || function (mod) {
3
return (mod && mod.__esModule) ? mod : { "default": mod };
4
};
5
Object.defineProperty(exports, "__esModule", { value: true });
6
const cheerio_1 = __importDefault(require("cheerio"));
7
const got_1 = __importDefault(require("got"));
8
const utils_js_1 = require("../utils.js");
9
async function wikipedia(query, lang = 'id') {
10
const html = await getHtml(lang, query);
11
const $ = cheerio_1.default.load(html);
12
const title = $('#firstHeading > i').text().trim();
13
const img = getImgLink($('td.infobox-image > a.image > img[src]').attr('src'));
14
const articles = [];
15
let end = false;
16
let start = false;
17
$('#mw-content-text > div.mw-parser-output')
18
.children()
19
.map(function () {
20
if (/p|h[2-4]|div/.test(this.name) && !end) {
21
let text = '';
22
const h = /h[2-4]/.test(this.name);
23
const div = /div/.test(this.name);
24
const el = $(this);
25
if (h &&
26
/referen|Примечания|Notes_et_références/i.test(el.find('span.mw-headline').attr('id'))) {
27
return (end = true);
28
}
29
const math = $(this).find('span.mwe-math-element');
30
if (math.length) {
31
math.replaceWith($(`<span>${math
32
.text()
33
.trim()
34
.replace(/(.*displaystyle.*|\\n)/, '')}</span>`));
35
}
36
if (div &&
37
el.hasClass('thumb') &&
38
el.find('div.thumbinner > a > img[src]').length) {
39
text = getImgLink(el.find('div.thumbinner > a > img[src]').attr('src'));
40
}
41
else if (div && el.find('div > ol > li[id]').length) {
42
el.find('div > ol > li[id]').each(function () {
43
text += $(this).text().trim() + '\n';
44
});
45
}
46
else
47
text = el.text().trim();
48
if (!start && this.name === 'p' && !end && text)
49
start = true;
50
if (text && start && !el.find('div > ul > li').length) {
51
articles.push((h ? '\n' : '') + text);
52
}
53
}
54
return true;
55
});
56
return {
57
title,
58
img,
59
articles: articles.join('\n\n')
60
};
61
}
62
exports.default = wikipedia;
63
function isSupportLang(lang) {
64
return ['en', 'id'].includes(lang);
65
}
66
async function getHtml(lang, query) {
67
query = encodeURIComponent(query.trim());
68
const defaultLink = `https://${isSupportLang(lang) ? lang : 'id'}.wikipedia.org`;
69
let res = await (0, got_1.default)(defaultLink + '/wiki/' + query);
70
if (!(res.statusCode === 404))
71
return res.body;
72
const link = `${defaultLink}/w/index.php?${lang === 'id'
73
? `title=Istimewa:Pencarian&search=${query}&fulltext=1&ns0=1`
74
: `search=${query}&title=Special:Search&profile=advanced&fulltext=1&ns0=1`}`;
75
res = await (0, got_1.default)(link);
76
const html = res.body;
77
const $ = cheerio_1.default.load(html);
78
const results = [];
79
$('ul.mw-search-results > li.mw-search-result').each(function () {
80
var _a;
81
const link = (_a = $(this)
82
.find('div.mw-search-result-heading > a[href]')
83
.attr('href')) === null || _a === void 0 ? void 0 : _a.trim();
84
if (link)
85
results.push(encodeURI(link));
86
});
87
if (results[0])
88
return (await (0, got_1.default)(defaultLink + results[0])).body;
89
throw new utils_js_1.ScraperError('404 Not Found!!');
90
}
91
function getImgLink(link = '') {
92
if (!/https:/i.test(link))
93
link = encodeURI('https:' + link);
94
return link;
95
}
96
//# sourceMappingURL=wikipedia.js.map
97