Path: blob/master/node_modules/@bochilteam/scraper/lib/cjs/others/wikipedia.js
1126 views
"use strict";1var __importDefault = (this && this.__importDefault) || function (mod) {2return (mod && mod.__esModule) ? mod : { "default": mod };3};4Object.defineProperty(exports, "__esModule", { value: true });5const cheerio_1 = __importDefault(require("cheerio"));6const got_1 = __importDefault(require("got"));7const utils_js_1 = require("../utils.js");8async function wikipedia(query, lang = 'id') {9const html = await getHtml(lang, query);10const $ = cheerio_1.default.load(html);11const title = $('#firstHeading > i').text().trim();12const img = getImgLink($('td.infobox-image > a.image > img[src]').attr('src'));13const articles = [];14let end = false;15let start = false;16$('#mw-content-text > div.mw-parser-output')17.children()18.map(function () {19if (/p|h[2-4]|div/.test(this.name) && !end) {20let text = '';21const h = /h[2-4]/.test(this.name);22const div = /div/.test(this.name);23const el = $(this);24if (h &&25/referen|Примечания|Notes_et_références/i.test(el.find('span.mw-headline').attr('id'))) {26return (end = true);27}28const math = $(this).find('span.mwe-math-element');29if (math.length) {30math.replaceWith($(`<span>${math31.text()32.trim()33.replace(/(.*displaystyle.*|\\n)/, '')}</span>`));34}35if (div &&36el.hasClass('thumb') &&37el.find('div.thumbinner > a > img[src]').length) {38text = getImgLink(el.find('div.thumbinner > a > img[src]').attr('src'));39}40else if (div && el.find('div > ol > li[id]').length) {41el.find('div > ol > li[id]').each(function () {42text += $(this).text().trim() + '\n';43});44}45else46text = el.text().trim();47if (!start && this.name === 'p' && !end && text)48start = true;49if (text && start && !el.find('div > ul > li').length) {50articles.push((h ? '\n' : '') + text);51}52}53return true;54});55return {56title,57img,58articles: articles.join('\n\n')59};60}61exports.default = wikipedia;62function isSupportLang(lang) {63return ['en', 'id'].includes(lang);64}65async function getHtml(lang, query) {66query = encodeURIComponent(query.trim());67const defaultLink = `https://${isSupportLang(lang) ? lang : 'id'}.wikipedia.org`;68let res = await (0, got_1.default)(defaultLink + '/wiki/' + query);69if (!(res.statusCode === 404))70return res.body;71const link = `${defaultLink}/w/index.php?${lang === 'id'72? `title=Istimewa:Pencarian&search=${query}&fulltext=1&ns0=1`73: `search=${query}&title=Special:Search&profile=advanced&fulltext=1&ns0=1`}`;74res = await (0, got_1.default)(link);75const html = res.body;76const $ = cheerio_1.default.load(html);77const results = [];78$('ul.mw-search-results > li.mw-search-result').each(function () {79var _a;80const link = (_a = $(this)81.find('div.mw-search-result-heading > a[href]')82.attr('href')) === null || _a === void 0 ? void 0 : _a.trim();83if (link)84results.push(encodeURI(link));85});86if (results[0])87return (await (0, got_1.default)(defaultLink + results[0])).body;88throw new utils_js_1.ScraperError('404 Not Found!!');89}90function getImgLink(link = '') {91if (!/https:/i.test(link))92link = encodeURI('https:' + link);93return link;94}95//# sourceMappingURL=wikipedia.js.map9697