Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
chinoogawa
GitHub Repository: chinoogawa/fbht
Path: blob/master/MyParser.py
206 views
1
from HTMLParser import HTMLParser
2
import simplejson as json
3
import database
4
import re
5
6
class MyHTMLParser(HTMLParser):
7
def array(self):
8
self.dataArray = []
9
def handle_data(self, data):
10
self.dataArray.append(data)
11
12
def htmlFormat(json_dump):
13
html = "<p><img src=\"https://graph.facebook.com/%s/picture\" > Name: %s - Link: <a href=\"%s\">facebook profile</a> - Gender: %s -Locale: %s</p>" %(json_dump['username'],json_dump['name'], json_dump['link'], json_dump['gender'], json_dump['locale'])
14
return html
15
16
def parceros(json_dump):
17
parser = MyHTMLParser()
18
parser.array()
19
names = []
20
userIds = []
21
try:
22
to_parse = str(json_dump['domops'][0][3]['__html'])
23
parser.feed(to_parse)
24
except:
25
print 'Error in json dump or parser.feed'
26
i = 0
27
while True:
28
if ((parser.dataArray[i] == 'Test Users') or (parser.dataArray[i] == 'Delete') or (parser.dataArray[i] == 'Add') or
29
(parser.dataArray[i] == 'Name') or (parser.dataArray[i] == 'User ID') or (parser.dataArray[i] == 'Email') or
30
(parser.dataArray[i] == 'Edit') or (parser.dataArray[i] == 'tfbnw.net')):
31
del parser.dataArray[i]
32
else:
33
i += 1
34
if i == len(parser.dataArray):
35
break
36
37
i = 0
38
39
while i < (len(parser.dataArray) - 2):
40
names.append(parser.dataArray[i])
41
userIds.append(parser.dataArray[i+1])
42
i = i + 3
43
44
if ( userIds!=[] and names!=[]) and (parser.dataArray[0] != 'This app has no Test Users.'):
45
database.insertTestUsersDev(userIds,names)
46
return 1
47
else:
48
return -1
49
50
51
def parseData(dataRaw):
52
parser = MyHTMLParser()
53
parser.array()
54
names = []
55
userIds = []
56
emails = []
57
passwords = []
58
59
60
61
for data in dataRaw:
62
if data=="":
63
continue
64
text = data.strip("for (;;);")
65
json_dump = json.loads(text)
66
try:
67
to_parse = str(json_dump['jsmods']['markup'][0][1]['__html'])
68
parser.feed(to_parse)
69
except:
70
print 'Error in json dump or parser.feed'
71
72
for i in range(len(parser.dataArray)):
73
if parser.dataArray[i] == 'Name':
74
names.append(parser.dataArray[i+1])
75
continue
76
if parser.dataArray[i] == 'User ID':
77
userIds.append(parser.dataArray[i+1])
78
continue
79
if parser.dataArray[i] == 'Login email':
80
emails.append(parser.dataArray[i+1]+'@'+parser.dataArray[i+2])
81
continue
82
if parser.dataArray[i] == 'Login password':
83
passwords.append(parser.dataArray[i+1])
84
continue
85
86
if ( userIds!=[] and names!=[] and emails!=[] and passwords!=[] ):
87
database.insertTestUsers(userIds,names,emails,passwords)
88
return 1
89
else:
90
return -1
91
'''
92
try:
93
for i in range(len(names)):
94
print names[i] + ' ' + userIds[i] + ' ' + emails[i] + ' ' + passwords[i] + ' '
95
except:
96
print 'for error in MyParser'
97
'''
98
99
def parseOnline(data):
100
buddies = []
101
start = 0
102
while True:
103
match = re.search("fbid=", data[start:])
104
if match is not None:
105
start += match.end()
106
matchBis = re.search("&",data[start:])
107
if matchBis is not None:
108
end = matchBis.end() + start
109
buddies.append(str(data[start:end-1]))
110
start = end
111
end = 0
112
else:
113
break
114
return buddies
115
116
def parseFriends(data):
117
start = 0
118
end = 0
119
lines = []
120
friends = []
121
while True:
122
match = re.search(r'href="/([a-zA-Z]*[0-9]*[\.]*)+(\?fref=fr_tab)',data[start:])
123
if match is not None:
124
lines.append(match.group())
125
start += match.end()
126
else:
127
break
128
129
for linea in lines:
130
name = linea.split('/')[1].split('?')[0]
131
friends.append(name)
132
match = re.search("[a-zA-Z]+\?v=friends&amp;mutual&amp;startindex=[0-9]+",data)
133
if match is not None:
134
raw = match.group()
135
next = raw.replace("&amp;","&")
136
else:
137
match = re.search("[a-zA-Z]+/friends\?([a-zA-Z]+=[0-9]+)+(&amp;)*([a-zA-Z]+=[0-9]+)*",data)
138
if match is not None:
139
raw = match.group()
140
next = raw.replace("&amp;","&")
141
else:
142
next = -1
143
return friends,next
144
145
def parsePending():
146
response = open("respuesta.html","r")
147
struct = []
148
aux = []
149
while True:
150
151
linea = response.readline()
152
if not linea: break
153
match = re.search('/ajax/reqs.php'+'(.+)',str(linea))
154
if match is not None:
155
struct.append(re.search('/ajax/reqs.php'+'(.+)',str(linea)).group())
156
157
158
for lines in struct:
159
start = 0
160
while True:
161
match = re.search('[0-9]{15}',str(lines)[start:])
162
if match is not None:
163
if str(lines)[start + match.start():start + match.end()] not in aux:
164
aux.append(str(lines)[start + match.start():start + match.end()])
165
start += match.end()
166
else:
167
break;
168
return aux
169