Path: blob/master/node_modules/@bochilteam/scraper/lib/esm/others/wikipedia.js
1126 views
import cheerio from 'cheerio';1import got from 'got';2import { ScraperError } from '../utils.js';3export default async function wikipedia(query, lang = 'id') {4const html = await getHtml(lang, query);5const $ = cheerio.load(html);6const title = $('#firstHeading > i').text().trim();7const img = getImgLink($('td.infobox-image > a.image > img[src]').attr('src'));8const articles = [];9let end = false;10let start = false;11$('#mw-content-text > div.mw-parser-output')12.children()13.map(function () {14if (/p|h[2-4]|div/.test(this.name) && !end) {15let text = '';16const h = /h[2-4]/.test(this.name);17const div = /div/.test(this.name);18const el = $(this);19if (h &&20/referen|Примечания|Notes_et_références/i.test(el.find('span.mw-headline').attr('id'))) {21return (end = true);22}23const math = $(this).find('span.mwe-math-element');24if (math.length) {25math.replaceWith($(`<span>${math26.text()27.trim()28.replace(/(.*displaystyle.*|\\n)/, '')}</span>`));29}30if (div &&31el.hasClass('thumb') &&32el.find('div.thumbinner > a > img[src]').length) {33text = getImgLink(el.find('div.thumbinner > a > img[src]').attr('src'));34}35else if (div && el.find('div > ol > li[id]').length) {36el.find('div > ol > li[id]').each(function () {37text += $(this).text().trim() + '\n';38});39}40else41text = el.text().trim();42if (!start && this.name === 'p' && !end && text)43start = true;44if (text && start && !el.find('div > ul > li').length) {45articles.push((h ? '\n' : '') + text);46}47}48return true;49});50return {51title,52img,53articles: articles.join('\n\n')54};55}56function isSupportLang(lang) {57return ['en', 'id'].includes(lang);58}59async function getHtml(lang, query) {60query = encodeURIComponent(query.trim());61const defaultLink = `https://${isSupportLang(lang) ? lang : 'id'}.wikipedia.org`;62let res = await got(defaultLink + '/wiki/' + query);63if (!(res.statusCode === 404))64return res.body;65const link = `${defaultLink}/w/index.php?${lang === 'id'66? `title=Istimewa:Pencarian&search=${query}&fulltext=1&ns0=1`67: `search=${query}&title=Special:Search&profile=advanced&fulltext=1&ns0=1`}`;68res = await got(link);69const html = res.body;70const $ = cheerio.load(html);71const results = [];72$('ul.mw-search-results > li.mw-search-result').each(function () {73var _a;74const link = (_a = $(this)75.find('div.mw-search-result-heading > a[href]')76.attr('href')) === null || _a === void 0 ? void 0 : _a.trim();77if (link)78results.push(encodeURI(link));79});80if (results[0])81return (await got(defaultLink + results[0])).body;82throw new ScraperError('404 Not Found!!');83}84function getImgLink(link = '') {85if (!/https:/i.test(link))86link = encodeURI('https:' + link);87return link;88}89//# sourceMappingURL=wikipedia.js.map9091