Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
laramies
GitHub Repository: laramies/theHarvester
Path: blob/master/tests/discovery/test_baidusearch.py
609 views
1
from _pytest.mark.structures import MarkDecorator
2
import pytest
3
4
from theHarvester.discovery import baidusearch
5
6
pytestmark: MarkDecorator = pytest.mark.asyncio
7
8
9
class TestBaiduSearch:
10
async def test_process_and_parsing(self, monkeypatch):
11
called = {}
12
13
async def fake_fetch_all(urls, headers=None, proxy=False):
14
called["urls"] = urls
15
called["headers"] = headers
16
called["proxy"] = proxy
17
return [
18
"Contact [email protected] on a.example.com \n",
19
" [email protected] is here and www.example.com appears \n",
20
" Visit sub.a.example.com. [email protected] \n",
21
]
22
23
# Patch the AsyncFetcher.fetch_all to avoid network I/O
24
import theHarvester.lib.core as core_module
25
26
monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)
27
# Make user agent deterministic (not strictly necessary, but stable)
28
monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)
29
30
search = baidusearch.SearchBaidu(word="example.com", limit=21)
31
await search.process(proxy=True)
32
33
expected_urls = [
34
"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",
35
"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",
36
"https://www.baidu.com/s?wd=%40example.com&pn=20&oq=example.com",
37
]
38
assert called["urls"] == expected_urls
39
assert called["proxy"] is True
40
41
emails = await search.get_emails()
42
hosts = await search.get_hostnames()
43
44
# Ensure our expected values are present
45
assert "[email protected]" in emails
46
assert "[email protected]" in emails
47
assert "[email protected]" in emails
48
49
assert {"a.example.com", "www.example.com", "sub.a.example.com"} <= set(hosts)
50
51
async def test_pagination_limit_exclusive(self, monkeypatch):
52
captured = {}
53
54
async def fake_fetch_all(urls, headers=None, proxy=False):
55
captured["urls"] = urls
56
return [""] * len(urls)
57
58
import theHarvester.lib.core as core_module
59
60
monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)
61
monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)
62
63
search = baidusearch.SearchBaidu(word="example.com", limit=20)
64
await search.process()
65
66
# For limit=20, range(0, 20, 10) yields 0 and 10 only (20 is excluded)
67
assert captured["urls"] == [
68
"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",
69
"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",
70
]
71
72