# -*- coding: utf-8 -*-1# This program is free software; you can redistribute it and/or modify2# it under the terms of the GNU General Public License as published by3# the Free Software Foundation; either version 2 of the License, or4# (at your option) any later version.5#6# This program is distributed in the hope that it will be useful,7# but WITHOUT ANY WARRANTY; without even the implied warranty of8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9# GNU General Public License for more details.10#11# You should have received a copy of the GNU General Public License12# along with this program; if not, write to the Free Software13# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,14# MA 02110-1301, USA.15#16# Author: Mauro Soria1718from unittest import TestCase1920from lib.core.settings import DUMMY_URL21from lib.utils.crawl import Crawler222324class TestCrawl(TestCase):25def test_text_crawl(self):26html_doc = f'Link: {DUMMY_URL}foobar'27self.assertEqual(Crawler.text_crawl(DUMMY_URL, DUMMY_URL, html_doc), {"foobar"})2829def test_html_crawl(self):30html_doc = f'<a href="{DUMMY_URL}foo">link</a><script src="/bar.js"><img src="/bar.png">'31self.assertEqual(Crawler.html_crawl(DUMMY_URL, DUMMY_URL, html_doc), {"foo", "bar.js"})3233def test_robots_crawl(self):34robots_txt = """35User-agent: Googlebot36Disallow: /path13738User-agent: *39Allow: /path2"""40self.assertEqual(Crawler.robots_crawl(DUMMY_URL, DUMMY_URL, robots_txt), {"path1", "path2"})414243