Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
laramies
GitHub Repository: laramies/theHarvester
Path: blob/master/tests/discovery/test_thc.py
902 views
1
#!/usr/bin/env python3
2
# coding=utf-8
3
"""
4
Tests for THC (ip.thc.org) discovery module.
5
6
THC provides multiple endpoints:
7
- Subdomain enumeration
8
- CNAME lookup
9
- Reverse DNS lookup
10
11
API Documentation: https://ip.thc.org/docs/
12
"""
13
import os
14
from typing import Optional
15
16
import httpx
17
import pytest
18
19
from theHarvester.discovery import thc
20
from theHarvester.lib.core import Core
21
22
github_ci: Optional[str] = os.getenv('GITHUB_ACTIONS')
23
24
25
# =============================================================================
26
# 1. Direct API Tests (Endpoint Validation)
27
# =============================================================================
28
class TestThcApi:
29
"""Tests to validate that the THC API responds correctly."""
30
31
@pytest.mark.asyncio
32
async def test_api_subdomains_download_endpoint_responds(self) -> None:
33
"""Verify that the subdomain download endpoint responds."""
34
url = 'https://ip.thc.org/api/v1/subdomains/download?domain=google.com&limit=10&hide_header=true'
35
headers = {'User-Agent': Core.get_user_agent()}
36
try:
37
response = httpx.get(url, headers=headers, timeout=30)
38
assert response.status_code == 200
39
except (httpx.TimeoutException, httpx.RequestError):
40
pytest.skip('Skipping due to network error')
41
42
@pytest.mark.asyncio
43
async def test_api_subdomains_returns_text_format(self) -> None:
44
"""Verify that the response is plain text."""
45
url = 'https://ip.thc.org/api/v1/subdomains/download?domain=google.com&limit=5&hide_header=true'
46
headers = {'User-Agent': Core.get_user_agent()}
47
try:
48
response = httpx.get(url, headers=headers, timeout=30)
49
content_type = response.headers.get('content-type', '')
50
assert 'text' in content_type or 'octet-stream' in content_type or response.status_code == 200
51
except (httpx.TimeoutException, httpx.RequestError):
52
pytest.skip('Skipping due to network error')
53
54
@pytest.mark.asyncio
55
async def test_api_cli_subdomain_endpoint(self) -> None:
56
"""Verify CLI endpoint /sb/{domain}."""
57
url = 'https://ip.thc.org/sb/google.com?l=5&noheader'
58
headers = {'User-Agent': Core.get_user_agent()}
59
try:
60
response = httpx.get(url, headers=headers, timeout=30)
61
assert response.status_code == 200
62
except (httpx.TimeoutException, httpx.RequestError):
63
pytest.skip('Skipping due to network error')
64
65
@pytest.mark.asyncio
66
async def test_api_returns_rate_limit_headers(self) -> None:
67
"""Verify that the API returns rate limit headers."""
68
url = 'https://ip.thc.org/api/v1/subdomains/download?domain=example.com&limit=1&hide_header=true'
69
headers = {'User-Agent': Core.get_user_agent()}
70
try:
71
response = httpx.get(url, headers=headers, timeout=30)
72
assert 'x-ratelimit-limit' in response.headers
73
assert 'x-ratelimit-remaining' in response.headers
74
except (httpx.TimeoutException, httpx.RequestError):
75
pytest.skip('Skipping due to network error')
76
77
78
# =============================================================================
79
# 2. Subdomain Search Tests (Main Functionality)
80
# =============================================================================
81
class TestThcSubdomainSearch:
82
"""Tests for subdomain search functionality."""
83
84
@staticmethod
85
def domain() -> str:
86
return 'tesla.com'
87
88
@staticmethod
89
def small_domain() -> str:
90
return 'thc.org'
91
92
@pytest.mark.asyncio
93
async def test_search_returns_set(self) -> None:
94
"""Verify that get_hostnames() returns a set."""
95
search = thc.SearchThc(self.domain())
96
try:
97
await search.process()
98
except (httpx.TimeoutException, httpx.RequestError):
99
pytest.skip('Skipping due to network error')
100
result = await search.get_hostnames()
101
assert isinstance(result, set)
102
103
@pytest.mark.asyncio
104
async def test_search_finds_subdomains(self) -> None:
105
"""Verify that it finds subdomains for a known domain."""
106
search = thc.SearchThc(self.domain())
107
try:
108
await search.process()
109
except (httpx.TimeoutException, httpx.RequestError):
110
pytest.skip('Skipping due to network error')
111
result = await search.get_hostnames()
112
assert len(result) > 0, 'Should find at least one subdomain for tesla.com'
113
114
@pytest.mark.asyncio
115
async def test_search_results_contain_target_domain(self) -> None:
116
"""Verify that all results contain the target domain."""
117
search = thc.SearchThc(self.small_domain())
118
try:
119
await search.process()
120
except (httpx.TimeoutException, httpx.RequestError):
121
pytest.skip('Skipping due to network error')
122
result = await search.get_hostnames()
123
for hostname in result:
124
assert self.small_domain() in hostname, f'{hostname} should contain {self.small_domain()}'
125
126
@pytest.mark.asyncio
127
async def test_search_no_duplicates(self) -> None:
128
"""Verify that there are no duplicates in the results."""
129
search = thc.SearchThc(self.domain())
130
try:
131
await search.process()
132
except (httpx.TimeoutException, httpx.RequestError):
133
pytest.skip('Skipping due to network error')
134
result = await search.get_hostnames()
135
result_list = list(result)
136
assert len(result_list) == len(set(result_list))
137
138
139
# =============================================================================
140
# 3. Edge Case Tests
141
# =============================================================================
142
class TestThcEdgeCases:
143
"""Tests for edge cases and error handling."""
144
145
@pytest.mark.asyncio
146
async def test_search_nonexistent_domain(self) -> None:
147
"""Verify behavior with non-existent domain."""
148
search = thc.SearchThc('this-domain-definitely-does-not-exist-12345.com')
149
try:
150
await search.process()
151
except (httpx.TimeoutException, httpx.RequestError):
152
pytest.skip('Skipping due to network error')
153
except Exception:
154
pass
155
result = await search.get_hostnames()
156
assert isinstance(result, set)
157
158
@pytest.mark.asyncio
159
async def test_search_empty_domain(self) -> None:
160
"""Verify behavior with empty domain."""
161
search = thc.SearchThc('')
162
try:
163
await search.process()
164
except (httpx.TimeoutException, httpx.RequestError):
165
pytest.skip('Skipping due to network error')
166
except Exception:
167
pass
168
result = await search.get_hostnames()
169
assert isinstance(result, set)
170
171
@pytest.mark.asyncio
172
async def test_search_special_characters_domain(self) -> None:
173
"""Verify behavior with special characters."""
174
search = thc.SearchThc('example.com; DROP TABLE domains;--')
175
try:
176
await search.process()
177
except (httpx.TimeoutException, httpx.RequestError):
178
pytest.skip('Skipping due to network error')
179
except Exception:
180
pass
181
result = await search.get_hostnames()
182
assert isinstance(result, set)
183
184
@pytest.mark.asyncio
185
async def test_search_unicode_domain(self) -> None:
186
"""Verify behavior with IDN/unicode domain."""
187
search = thc.SearchThc('xn--mnchen-3ya.de')
188
try:
189
await search.process()
190
except (httpx.TimeoutException, httpx.RequestError):
191
pytest.skip('Skipping due to network error')
192
except Exception:
193
pass
194
result = await search.get_hostnames()
195
assert isinstance(result, set)
196
197
@pytest.mark.asyncio
198
async def test_search_subdomain_as_input(self) -> None:
199
"""Verify behavior when a subdomain is passed as input."""
200
search = thc.SearchThc('www.google.com')
201
try:
202
await search.process()
203
except (httpx.TimeoutException, httpx.RequestError):
204
pytest.skip('Skipping due to network error')
205
result = await search.get_hostnames()
206
assert isinstance(result, set)
207
208
209
# =============================================================================
210
# 4. Proxy Tests
211
# =============================================================================
212
class TestThcProxy:
213
"""Tests for proxy functionality."""
214
215
@staticmethod
216
def domain() -> str:
217
return 'example.com'
218
219
@pytest.mark.asyncio
220
async def test_process_accepts_proxy_parameter(self) -> None:
221
"""Verify that process() accepts proxy parameter."""
222
search = thc.SearchThc(self.domain())
223
try:
224
await search.process(proxy=False)
225
except (httpx.TimeoutException, httpx.RequestError):
226
pytest.skip('Skipping due to network error')
227
result = await search.get_hostnames()
228
assert isinstance(result, set)
229
230
@pytest.mark.asyncio
231
async def test_proxy_attribute_is_set(self) -> None:
232
"""Verify that the proxy attribute is set correctly."""
233
search = thc.SearchThc(self.domain())
234
assert search.proxy is False
235
236
237
# =============================================================================
238
# 5. Initialization and Attributes Tests
239
# =============================================================================
240
class TestThcInitialization:
241
"""Tests for class initialization and structure."""
242
243
def test_init_sets_word(self) -> None:
244
"""Verify that __init__ sets the domain."""
245
domain = 'test.com'
246
search = thc.SearchThc(domain)
247
assert search.word == domain
248
249
def test_init_creates_empty_results(self) -> None:
250
"""Verify that results is initialized empty."""
251
search = thc.SearchThc('test.com')
252
assert hasattr(search, 'results')
253
assert len(search.results) == 0
254
255
def test_init_proxy_default_false(self) -> None:
256
"""Verify that proxy is False by default."""
257
search = thc.SearchThc('test.com')
258
assert search.proxy is False
259
260
def test_init_has_rate_limit_settings(self) -> None:
261
"""Verify that rate limit settings are initialized."""
262
search = thc.SearchThc('test.com')
263
assert hasattr(search, 'max_retries')
264
assert hasattr(search, 'base_delay')
265
assert search.max_retries == 3
266
assert search.base_delay == 2
267
268
def test_class_has_required_methods(self) -> None:
269
"""Verify that the class has the required methods."""
270
search = thc.SearchThc('test.com')
271
assert hasattr(search, 'do_search')
272
assert hasattr(search, 'get_hostnames')
273
assert hasattr(search, 'process')
274
assert callable(search.do_search)
275
assert callable(search.get_hostnames)
276
assert callable(search.process)
277
278
279
# =============================================================================
280
# 6. Response Format Tests
281
# =============================================================================
282
class TestThcResponseFormat:
283
"""Tests to verify response format."""
284
285
@staticmethod
286
def domain() -> str:
287
return 'github.com'
288
289
@pytest.mark.asyncio
290
async def test_hostnames_are_strings(self) -> None:
291
"""Verify that all hostnames are strings."""
292
search = thc.SearchThc(self.domain())
293
try:
294
await search.process()
295
except (httpx.TimeoutException, httpx.RequestError):
296
pytest.skip('Skipping due to network error')
297
result = await search.get_hostnames()
298
for hostname in result:
299
assert isinstance(hostname, str)
300
301
@pytest.mark.asyncio
302
async def test_hostnames_are_valid_format(self) -> None:
303
"""Verify that hostnames have valid format."""
304
search = thc.SearchThc(self.domain())
305
try:
306
await search.process()
307
except (httpx.TimeoutException, httpx.RequestError):
308
pytest.skip('Skipping due to network error')
309
result = await search.get_hostnames()
310
for hostname in result:
311
assert ' ' not in hostname
312
assert '\n' not in hostname
313
assert '\t' not in hostname
314
315
@pytest.mark.asyncio
316
async def test_hostnames_are_lowercase(self) -> None:
317
"""Verify that hostnames are lowercase."""
318
search = thc.SearchThc(self.domain())
319
try:
320
await search.process()
321
except (httpx.TimeoutException, httpx.RequestError):
322
pytest.skip('Skipping due to network error')
323
result = await search.get_hostnames()
324
for hostname in result:
325
assert hostname == hostname.lower()
326
327
328
# =============================================================================
329
# 7. Integration Tests with theHarvester
330
# =============================================================================
331
@pytest.mark.skipif(github_ci == 'true', reason='Skip integration tests in CI')
332
class TestThcIntegration:
333
"""Integration tests with theHarvester framework."""
334
335
@pytest.mark.asyncio
336
async def test_module_can_be_imported(self) -> None:
337
"""Verify that the module can be imported."""
338
from theHarvester.discovery import thc as thc_module
339
assert thc_module is not None
340
341
@pytest.mark.asyncio
342
async def test_search_class_exists(self) -> None:
343
"""Verify that SearchThc class exists."""
344
from theHarvester.discovery import thc as thc_module
345
assert hasattr(thc_module, 'SearchThc')
346
347
@pytest.mark.asyncio
348
async def test_compatible_with_store_function(self) -> None:
349
"""Verify compatibility with store function from __main__.py."""
350
search = thc.SearchThc('example.com')
351
assert hasattr(search, 'process')
352
assert hasattr(search, 'get_hostnames')
353
354
355
if __name__ == '__main__':
356
pytest.main()
357
358