Path: blob/master/tests/discovery/test_baidusearch.py
906 views
import pytest12from theHarvester.discovery import baidusearch345class TestBaiduSearch:6@pytest.mark.asyncio7async def test_process_and_parsing(self, monkeypatch):8called = {}910async def fake_fetch_all(urls, headers=None, proxy=False):11called["urls"] = urls12called["headers"] = headers13called["proxy"] = proxy14return [15"Contact [email protected] on a.example.com \n",16" [email protected] is here and www.example.com appears \n",17" Visit sub.a.example.com. [email protected] \n",18]1920# Patch the AsyncFetcher.fetch_all to avoid network I/O21import theHarvester.lib.core as core_module2223monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)24# Make user agent deterministic (not strictly necessary, but stable)25monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)2627search = baidusearch.SearchBaidu(word="example.com", limit=21)28await search.process(proxy=True)2930expected_urls = [31"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",32"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",33"https://www.baidu.com/s?wd=%40example.com&pn=20&oq=example.com",34]35assert called["urls"] == expected_urls36assert called["proxy"] is True3738emails = await search.get_emails()39hosts = await search.get_hostnames()4041# Ensure our expected values are present42assert "[email protected]" in emails43assert "[email protected]" in emails44assert "[email protected]" in emails4546assert {"a.example.com", "www.example.com", "sub.a.example.com"} <= set(hosts)4748@pytest.mark.asyncio49async def test_pagination_limit_exclusive(self, monkeypatch):50captured = {}5152async def fake_fetch_all(urls, headers=None, proxy=False):53captured["urls"] = urls54return [""] * len(urls)5556import theHarvester.lib.core as core_module5758monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)59monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)6061search = baidusearch.SearchBaidu(word="example.com", limit=20)62await search.process()6364# For limit=20, range(0, 20, 10) yields 0 and 10 only (20 is excluded)65assert captured["urls"] == [66"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",67"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",68]697071