CoCalc -- test_security.py

GitHub Repository: laramies/theHarvester
Path: blob/master/tests/test_security.py
⁸⁸³ views
1
import os
2
import re
3
import tempfile
4
from pathlib import Path
5

6
import pytest
7
from fastapi.testclient import TestClient
8

9
from theHarvester.__main__ import sanitize_filename, sanitize_for_xml
10

11

12
class TestCORSConfiguration:
13
    """Test CORS security configuration."""
14

15
    def test_cors_does_not_allow_credentials_with_wildcard_origins(self):
16
        """
17
        Security Test: CORS should not allow credentials with wildcard origins.
18

19
        This prevents credential theft attacks where any origin can make
20
        authenticated requests to the API.
21
        """
22
        from theHarvester.lib.api.api import app
23

24
        # Find CORS middleware in the app
25
        cors_middleware = None
26
        for middleware in app.user_middleware:
27
            if 'CORSMiddleware' in str(middleware.cls):
28
                cors_middleware = middleware
29
                break
30

31
        assert cors_middleware is not None, 'CORS middleware should be configured'
32

33
        # Check that if allow_origins contains '*', allow_credentials must be False
34
        # Access kwargs from the middleware
35
        options = cors_middleware.kwargs
36
        allow_origins = options.get('allow_origins', [])
37
        allow_credentials = options.get('allow_credentials', False)
38

39
        if isinstance(allow_origins, (list, tuple, set)) and '*' in allow_origins:
40
            assert (
41
                allow_credentials is False
42
            ), 'CRITICAL: CORS must not allow credentials with wildcard origins (CVE risk)'
43

44
    def test_cors_restricts_http_methods(self):
45
        """
46
        Security Test: CORS should restrict HTTP methods to only what's needed.
47

48
        Reduces attack surface by limiting available methods.
49
        """
50
        from theHarvester.lib.api.api import app
51

52
        cors_middleware = None
53
        for middleware in app.user_middleware:
54
            if 'CORSMiddleware' in str(middleware.cls):
55
                cors_middleware = middleware
56
                break
57

58
        assert cors_middleware is not None
59

60
        options = cors_middleware.kwargs
61
        allow_methods = options.get('allow_methods', [])
62

63
        # Should not allow all methods
64
        assert allow_methods != ['*'], 'CORS should restrict HTTP methods, not allow all (*)'
65

66
        # Should only allow necessary methods (GET, POST for this API)
67
        if isinstance(allow_methods, list):
68
            dangerous_methods = {'DELETE', 'PUT', 'PATCH', 'TRACE', 'CONNECT'}
69
            allowed_set = {m.upper() for m in allow_methods}
70
            assert not (
71
                allowed_set & dangerous_methods
72
            ), f'Unnecessary HTTP methods detected: {allowed_set & dangerous_methods}'
73

74

75
class TestXMLInjectionPrevention:
76
    """Test XML injection prevention."""
77

78
    def test_sanitize_for_xml_escapes_special_characters(self):
79
        """
80
        Security Test: Verify XML special characters are properly escaped.
81

82
        Prevents XML injection attacks.
83
        """
84
        # Test all XML special characters
85
        test_cases = [
86
            ('&', '&amp;'),
87
            ('<', '&lt;'),
88
            ('>', '&gt;'),
89
            ('"', '&quot;'),
90
            ("'", '&apos;'),
91
            ('<script>alert("XSS")</script>', '&lt;script&gt;alert(&quot;XSS&quot;)&lt;/script&gt;'),
92
            ('[email protected] & <test>', '[email protected] &amp; &lt;test&gt;'),
93
            ('Normal text', 'Normal text'),
94
        ]
95

96
        for input_text, expected_output in test_cases:
97
            result = sanitize_for_xml(input_text)
98
            assert result == expected_output, f'Failed to properly escape: {input_text}'
99

100
    def test_sanitize_for_xml_prevents_xml_entity_injection(self):
101
        """
102
        Security Test: Prevent XML entity injection attempts.
103
        """
104
        malicious_inputs = [
105
            '<?xml version="1.0"?><!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>',
106
            '<!ENTITY xxe SYSTEM "file:///dev/random">',
107
            '<![CDATA[malicious]]>',
108
            '&#x3C;script&#x3E;',
109
        ]
110

111
        for malicious_input in malicious_inputs:
112
            result = sanitize_for_xml(malicious_input)
113
            # Ensure dangerous characters are escaped
114
            assert '&lt;' in result or '&amp;' in result, f'Failed to sanitize: {malicious_input}'
115
            assert '<' not in result or result == malicious_input.replace('<', '&lt;'), f'XML tags not escaped: {malicious_input}'
116

117
    def test_command_line_args_are_sanitized_in_xml_output(self):
118
        """
119
        Security Test: Command line arguments must be sanitized before XML output.
120

121
        This test is a conceptual check - in real usage, ensure the XML writing
122
        code uses sanitize_for_xml() on all user-controlled data.
123
        """
124
        # Simulate dangerous command line arguments
125
        dangerous_args = [
126
            '--domain=test.com',
127
            "--source='<script>alert(1)</script>'",
128
            '--output="; rm -rf /',
129
            '--domain=example.com&param=<injection>',
130
        ]
131

132
        for arg in dangerous_args:
133
            sanitized = sanitize_for_xml(arg)
134
            # Verify no unescaped XML special characters remain
135
            assert '<script>' not in sanitized, f'Script tag not escaped in: {arg}'
136
            assert '&param=' not in sanitized or '&amp;' in sanitized, f'Ampersand not escaped in: {arg}'
137

138

139
class TestInformationDisclosure:
140
    """Test information disclosure prevention."""
141

142
    @pytest.fixture
143
    def client(self):
144
        """Create a test client for API testing."""
145
        from theHarvester.lib.api.api import app
146

147
        return TestClient(app)
148

149
    def test_api_does_not_expose_traceback_in_error_responses(self, client):
150
        """
151
        Security Test: API should never expose stack traces to clients.
152

153
        Stack traces can reveal sensitive information about the system.
154
        """
155
        # Test the /sources endpoint with a simulated error condition
156
        response = client.get('/sources')
157

158
        # Even if there's an error, traceback should not be in response
159
        if response.status_code >= 400:
160
            response_data = response.json()
161
            assert 'traceback' not in response_data, 'Traceback exposed in error response'
162
            assert 'Traceback' not in str(response_data), 'Traceback text found in response'
163
            assert 'File "' not in str(response_data), 'File paths exposed in response'
164

165
    def test_error_responses_do_not_leak_internal_paths(self, client):
166
        """
167
        Security Test: Error messages should not reveal internal file paths.
168
        """
169
        # Try various endpoints
170
        endpoints = ['/sources', '/dnsbrute?domain=test', '/query?domain=test&source=baidu']
171

172
        for endpoint in endpoints:
173
            response = client.get(endpoint)
174
            response_text = str(response.json() if response.status_code != 200 else {})
175

176
            # Check for common path leakage patterns
177
            path_patterns = [
178
                r'/home/\w+/',
179
                r'/usr/local/',
180
                r'C:\\Users\\',
181
                r'/var/www/',
182
                r'site-packages/',
183
                r'\.py:\d+',  # filename.py:123
184
            ]
185

186
            for pattern in path_patterns:
187
                matches = re.findall(pattern, response_text)
188
                assert not matches, f'Internal path leaked in {endpoint}: {matches}'
189

190
    def test_debug_mode_does_not_expose_sensitive_info(self, client, monkeypatch):
191
        """
192
        Security Test: Even with DEBUG=1, sensitive info should not be exposed to clients.
193
        """
194
        # Set DEBUG environment variable
195
        monkeypatch.setenv('DEBUG', '1')
196

197
        # Make request that might trigger an error
198
        response = client.get('/dnsbrute?domain=')  # Invalid request
199

200
        if response.status_code >= 400:
201
            response_data = response.json()
202
            # Even with DEBUG=1, traceback should NOT be sent to client
203
            assert 'traceback' not in response_data, 'DEBUG mode exposes tracebacks to clients'
204

205

206
class TestPathTraversalPrevention:
207
    """Test path traversal prevention."""
208

209
    def test_sanitize_filename_removes_path_components(self):
210
        """
211
        Security Test: Filenames should not contain path traversal sequences.
212
        """
213
        dangerous_filenames = [
214
            '../../../etc/passwd',
215
            '..\\..\\..\\windows\\system32\\config\\sam',
216
            '/etc/passwd',
217
            'C:\\Windows\\System32\\config\\sam',
218
            '../../sensitive_file.txt',
219
            './../hidden_file',
220
            'subdir/../../../etc/passwd',
221
        ]
222

223
        for dangerous_filename in dangerous_filenames:
224
            result = sanitize_filename(dangerous_filename)
225

226
            # Should not contain any path separators
227
            assert '/' not in result, f'Path separator found in sanitized filename: {result}'
228
            assert '\\' not in result, f'Windows path separator found: {result}'
229

230
            # Should not start with .. (parent directory reference at the beginning is most dangerous)
231
            assert not result.startswith('..'), f'Parent directory reference at start: {result}'
232

233
            # Should only be the basename
234
            assert os.path.dirname(result) == '', f'Path component remains: {result}'
235

236
    def test_sanitize_filename_removes_dangerous_characters(self):
237
        """
238
        Security Test: Filenames should only contain safe characters.
239
        """
240
        test_cases = [
241
            'file; rm -rf /',
242
            'file`whoami`.txt',
243
            'file$(malicious).txt',
244
            'file|cmd.txt',
245
            'file&background.txt',
246
            'normal-file_123.txt',
247
        ]
248

249
        for input_filename in test_cases:
250
            result = sanitize_filename(input_filename)
251

252
            # Should not be empty
253
            assert len(result) > 0, f'Sanitized filename is empty for: {input_filename}'
254

255
            # Should not contain shell special characters
256
            dangerous_chars = [';', '|', '&', '$', '`', '(', ')', '{', '}', '[', ']', '<', '>']
257
            for char in dangerous_chars:
258
                assert char not in result, f'Dangerous character {char} found in: {result}'
259

260
            # Should only contain alphanumeric, dash, underscore, and dot
261
            assert re.match(r'^[a-zA-Z0-9._-]+$', result), f'Invalid characters in sanitized filename: {result}'
262

263
    def test_sanitize_filename_prevents_hidden_files(self):
264
        """
265
        Security Test: Prevent creation of hidden files.
266
        """
267
        hidden_files = ['.bashrc', '.ssh_config', '.env', '..hidden', '.']
268

269
        for hidden_file in hidden_files:
270
            result = sanitize_filename(hidden_file)
271

272
            # Should not start with a dot (except for allowed extensions)
273
            if result:  # If not empty
274
                assert not result.startswith('.'), f'Hidden file not prevented: {result}'
275

276
    def test_filename_sanitization_preserves_safe_filenames(self):
277
        """
278
        Security Test: Safe filenames should remain mostly unchanged.
279
        """
280
        safe_filenames = [
281
            'report.json',
282
            'results_2024-01-17.xml',
283
            'scan-output.txt',
284
            'data_file_v2.csv',
285
        ]
286

287
        for safe_filename in safe_filenames:
288
            result = sanitize_filename(safe_filename)
289

290
            # Safe filenames should be preserved (possibly with minor changes)
291
            assert len(result) > 0, 'Safe filename was completely removed'
292
            assert '.' in result if '.' in safe_filename else True, 'File extension removed incorrectly'
293

294
    def test_path_traversal_in_file_operations(self):
295
        """
296
        Integration Test: Verify file operations don't allow path traversal.
297
        """
298
        # This tests the actual usage in the code
299
        from theHarvester.__main__ import sanitize_filename
300

301
        # Simulate user input
302
        user_input = '../../../etc/passwd'
303
        sanitized = sanitize_filename(user_input)
304

305
        # Try to create a file with sanitized name
306
        with tempfile.TemporaryDirectory() as tmpdir:
307
            safe_path = os.path.join(tmpdir, sanitized)
308

309
            # Ensure the resolved path is still within tmpdir
310
            assert os.path.commonpath([tmpdir, safe_path]) == tmpdir, 'Path traversal detected!'
311

312
            # Verify we can't escape the directory
313
            assert tmpdir in os.path.abspath(safe_path), 'File path escaped temporary directory'
314

315

316
class TestSecurityBestPractices:
317
    """Additional security best practices tests."""
318

319
    def test_no_hardcoded_secrets_in_code(self):
320
        """
321
        Security Test: Ensure no hardcoded secrets in main code files.
322
        """
323
        # Check main application files for common secret patterns
324
        files_to_check = [
325
            'theHarvester/__main__.py',
326
            'theHarvester/lib/api/api.py',
327
            'theHarvester/lib/core.py',
328
        ]
329

330
        # Patterns that might indicate hardcoded secrets
331
        secret_patterns = [
332
            r'password\s*=\s*["\'][^"\']+["\']',
333
            r'api_key\s*=\s*["\'][a-zA-Z0-9]{20,}["\']',
334
            r'secret\s*=\s*["\'][^"\']+["\']',
335
            r'token\s*=\s*["\'][a-zA-Z0-9]{20,}["\']',
336
        ]
337

338
        for file_path in files_to_check:
339
            if os.path.exists(file_path):
340
                with open(file_path) as f:
341
                    content = f.read()
342

343
                for pattern in secret_patterns:
344
                    matches = re.findall(pattern, content, re.IGNORECASE)
345
                    # Filter out obvious non-secrets (like example values, empty strings, variable names)
346
                    real_matches = [
347
                        m
348
                        for m in matches
349
                        if 'example' not in m.lower()
350
                        and 'your_' not in m.lower()
351
                        and '""' not in m
352
                        and "''" not in m
353
                    ]
354
                    assert not real_matches, f'Potential hardcoded secret in {file_path}: {real_matches}'
355

356
    def test_api_has_rate_limiting(self):
357
        """
358
        Security Test: Verify API endpoints have rate limiting enabled.
359
        """
360
        from theHarvester.lib.api.api import app
361

362
        # Check that rate limiting is configured
363
        assert hasattr(app.state, 'limiter'), 'Rate limiter not configured'
364
        assert app.state.limiter is not None, 'Rate limiter is None'
365

366
    def test_sensitive_endpoints_require_validation(self):
367
        """
368
        Security Test: Ensure sensitive endpoints validate input.
369
        """
370
        from fastapi.testclient import TestClient
371

372
        from theHarvester.lib.api.api import app
373

374
        client = TestClient(app)
375

376
        # Test that endpoints reject invalid input
377
        # Note: The /query endpoint requires 'source' as a list parameter
378
        test_cases = [
379
            ('/dnsbrute?domain=', 400),  # Empty domain should be rejected
380
        ]
381

382
        for endpoint, expected_status in test_cases:
383
            response = client.get(endpoint)
384
            assert (
385
                response.status_code >= 400
386
            ), f'Endpoint {endpoint} should reject invalid input (got {response.status_code})'
387

388
        # Test query endpoint with proper parameter format but invalid domain
389
        response = client.get('/query?domain=a&source=baidu')  # Too short domain
390
        # This may or may not fail depending on validation, but we check it doesn't crash
391
        assert response.status_code in [200, 400, 422, 500], 'Unexpected status code'
392

393

394
if __name__ == '__main__':
395
    pytest.main([__file__, '-v'])
396

397
Product

Resources

Company