Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
laramies
GitHub Repository: laramies/theHarvester
Path: blob/master/tests/discovery/test_baidusearch.py
906 views
1
import pytest
2
3
from theHarvester.discovery import baidusearch
4
5
6
class TestBaiduSearch:
7
@pytest.mark.asyncio
8
async def test_process_and_parsing(self, monkeypatch):
9
called = {}
10
11
async def fake_fetch_all(urls, headers=None, proxy=False):
12
called["urls"] = urls
13
called["headers"] = headers
14
called["proxy"] = proxy
15
return [
16
"Contact [email protected] on a.example.com \n",
17
" [email protected] is here and www.example.com appears \n",
18
" Visit sub.a.example.com. [email protected] \n",
19
]
20
21
# Patch the AsyncFetcher.fetch_all to avoid network I/O
22
import theHarvester.lib.core as core_module
23
24
monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)
25
# Make user agent deterministic (not strictly necessary, but stable)
26
monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)
27
28
search = baidusearch.SearchBaidu(word="example.com", limit=21)
29
await search.process(proxy=True)
30
31
expected_urls = [
32
"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",
33
"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",
34
"https://www.baidu.com/s?wd=%40example.com&pn=20&oq=example.com",
35
]
36
assert called["urls"] == expected_urls
37
assert called["proxy"] is True
38
39
emails = await search.get_emails()
40
hosts = await search.get_hostnames()
41
42
# Ensure our expected values are present
43
assert "[email protected]" in emails
44
assert "[email protected]" in emails
45
assert "[email protected]" in emails
46
47
assert {"a.example.com", "www.example.com", "sub.a.example.com"} <= set(hosts)
48
49
@pytest.mark.asyncio
50
async def test_pagination_limit_exclusive(self, monkeypatch):
51
captured = {}
52
53
async def fake_fetch_all(urls, headers=None, proxy=False):
54
captured["urls"] = urls
55
return [""] * len(urls)
56
57
import theHarvester.lib.core as core_module
58
59
monkeypatch.setattr(core_module.AsyncFetcher, "fetch_all", fake_fetch_all)
60
monkeypatch.setattr(core_module.Core, "get_user_agent", staticmethod(lambda: "UA"), raising=True)
61
62
search = baidusearch.SearchBaidu(word="example.com", limit=20)
63
await search.process()
64
65
# For limit=20, range(0, 20, 10) yields 0 and 10 only (20 is excluded)
66
assert captured["urls"] == [
67
"https://www.baidu.com/s?wd=%40example.com&pn=0&oq=example.com",
68
"https://www.baidu.com/s?wd=%40example.com&pn=10&oq=example.com",
69
]
70
71