Path: blob/master/tests/discovery/test_baidusearch.py
609 views
from _pytest.mark.structures import MarkDecorator1import pytest23from theHarvester.discovery import baidusearch45pytestmark: MarkDecorator = pytest.mark.asyncio678class TestBaiduSearch:9async def test_process_and_parsing(self, monkeypatch):10called = {}1112async def fake_fetch_all(urls, headers=None, proxy=False):13called["urls"] = urls14called["headers"] = headers15called["proxy"] = proxy16return [17"Contact [email protected] on a.example.com \n",18" [email protected] is here and www.example.com appears \n",19" Visit sub.a.example.com. [email protected] \n",20]2122# Patch the AsyncFetcher.fetch_all to avoid network I/O23import theHarvester.lib.core as core_module2425monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)26# Make user agent deterministic (not strictly necessary, but stable)27monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)2829search = baidusearch.SearchBaidu(word="example.com", limit=21)30await search.process(proxy=True)3132expected_urls = [33"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",34"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",35"https://www.baidu.com/s?wd=%40example.com&pn=20&oq=example.com",36]37assert called["urls"] == expected_urls38assert called["proxy"] is True3940emails = await search.get_emails()41hosts = await search.get_hostnames()4243# Ensure our expected values are present44assert "[email protected]" in emails45assert "[email protected]" in emails46assert "[email protected]" in emails4748assert {"a.example.com", "www.example.com", "sub.a.example.com"} <= set(hosts)4950async def test_pagination_limit_exclusive(self, monkeypatch):51captured = {}5253async def fake_fetch_all(urls, headers=None, proxy=False):54captured["urls"] = urls55return [""] * len(urls)5657import theHarvester.lib.core as core_module5859monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)60monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)6162search = baidusearch.SearchBaidu(word="example.com", limit=20)63await search.process()6465# For limit=20, range(0, 20, 10) yields 0 and 10 only (20 is excluded)66assert captured["urls"] == [67"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",68"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",69]707172