Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
afnan47
GitHub Repository: afnan47/sem7
Path: blob/main/IR/Assignment 4/four_webCrawler.py
418 views
1
import requests
2
import lxml
3
from bs4 import BeautifulSoup
4
from xlwt import *
5
workbook = Workbook(encoding = 'utf-8')
6
table = workbook.add_sheet('data')
7
table.write(0, 0, 'Number')
8
table.write(0, 1, 'movie_url')
9
table.write(0, 2, 'movie_name')
10
table.write(0, 3, 'movie_introduction')
11
line = 1
12
url = "https://www.rottentomatoes.com/top/bestofrt/"
13
headers = {
14
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
15
}
16
f = requests.get(url, headers = headers)
17
movies_lst = []
18
soup = BeautifulSoup(f.content, 'lxml')
19
movies = soup.find_all('a', {
20
'class': 'js-tile-link'
21
})
22
num = 0
23
for anchor in movies:
24
try:
25
urls = 'https://www.rottentomatoes.com' + anchor.get('href')
26
movies_lst.append(urls)
27
num += 1
28
movie_url = urls
29
movie_f = requests.get(movie_url, headers = headers)
30
movie_soup = BeautifulSoup(movie_f.content, 'lxml')
31
movie_content = movie_soup.find('div', {
32
'class': 'movie_synopsis clamp clamp-6 js-clamp'
33
})
34
print(num, urls, '\n','Movie:' + urls.split('/')[-1])
35
print('Movie info:' + movie_content.string.strip())
36
table.write(line, 0, num)
37
table.write(line, 1, urls)
38
table.write(line, 2, urls.split('/')[-1])
39
table.write(line, 3, movie_content.string.strip())
40
line += 1
41
workbook.save('movies_top100.xls')
42
except TypeError:
43
pass
44
45
46
#SAMPLE OUTPUT
47
48
# 1 https://www.rottentomatoes.com/m/hold_me_tight_2021
49
# Movie:hold_me_tight_2021
50
# Movie info:In Hold Me Tight, Vicky Krieps (Phantom Thread, Bergman Island) gives another riveting
51
# performance as Clarisse, a woman on the run from her family for reasons that aren't immediately clear.
52
# Widely renowned as one of France's great contemporary actors but less well-known in North America for his
53
# equally impressive work behind the camera, Mathieu Amalric's sixth feature as director is his most
54
# ambitious to date. This virtuosic, daringly fluid portrait of a woman in crisis alternates between
55
# Clarisse's adventures on the road and scenes of her abandoned husband Marc (Arieh Worthalter) as he
56
# struggles to take care of their children at home. Amalric's film keeps viewers uncertain as to the
57
# reality of what they're seeing until the final moments of this moving, unpredictable, and richly rewarding family drama.
58
# 2 https://www.rottentomatoes.com/m/my_fathers_dragon_2022
59
# Movie:my_fathers_dragon_2022
60
# Movie info:Struggling to cope after a move to the city with his mother, Elmer runs away in search
61
# of Wild Island and a young dragon who waits to be rescued. Elmer's adventures introduce him to
62
# ferocious beasts, a mysterious island and the friendship of a lifetime.
63
# 3 https://www.rottentomatoes.com/m/fire_of_love
64
# Movie:fire_of_love
65
# Movie info:Fire of Love tells the story of two French lovers, Katia and Maurice Krafft, who died
66
# in a volcanic explosion doing the very thing that brought them together: unraveling the mysteries
67
# of our planet, while simultaneously capturing the most explosive volcano imagery ever recorded.
68
# Along the way, they changed our understanding of the natural world, and saved tens of thousands of lives.
69
# Previously unseen hours of pristine 16-millimeter film and thousands of photographs reveal the birth of
70
# modern volcanology through an unlikely lens -- the love of its two pioneers.
71