Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sundowndev
GitHub Repository: sundowndev/phoneinfoga
Path: blob/master/lib/remote/googlecse_scanner.go
988 views
1
package remote
2
3
import (
4
"context"
5
"errors"
6
"fmt"
7
"github.com/sundowndev/dorkgen"
8
"github.com/sundowndev/dorkgen/googlesearch"
9
"github.com/sundowndev/phoneinfoga/v2/lib/number"
10
"google.golang.org/api/customsearch/v1"
11
"google.golang.org/api/googleapi"
12
"google.golang.org/api/option"
13
"net/http"
14
"os"
15
"strconv"
16
)
17
18
const GoogleCSE = "googlecse"
19
20
type googleCSEScanner struct {
21
MaxResults int64
22
httpClient *http.Client
23
}
24
25
type ResultItem struct {
26
Title string `json:"title,omitempty" console:"Title,omitempty"`
27
URL string `json:"url,omitempty" console:"URL,omitempty"`
28
}
29
30
type GoogleCSEScannerResponse struct {
31
Homepage string `json:"homepage,omitempty" console:"Homepage,omitempty"`
32
ResultCount int `json:"result_count" console:"Results shown"`
33
TotalResultCount int `json:"total_result_count" console:"Total number of results"`
34
TotalRequestCount int `json:"total_request_count" console:"Requests made"`
35
Items []ResultItem `json:"items,omitempty" console:"Items,omitempty"`
36
}
37
38
func NewGoogleCSEScanner(HTTPclient *http.Client) Scanner {
39
// CSE limits you to 10 pages of results with max 10 results per page
40
// We only fetch the first page of results by default for each request
41
maxResults := 10
42
if v := os.Getenv("GOOGLECSE_MAX_RESULTS"); v != "" {
43
val, err := strconv.Atoi(v)
44
if err == nil {
45
if val > 100 {
46
val = 100
47
}
48
maxResults = val
49
}
50
}
51
52
return &googleCSEScanner{
53
MaxResults: int64(maxResults),
54
httpClient: HTTPclient,
55
}
56
}
57
58
func (s *googleCSEScanner) Name() string {
59
return GoogleCSE
60
}
61
62
func (s *googleCSEScanner) Description() string {
63
return "Googlecse searches for footprints of a given phone number on the web using Google Custom Search Engine."
64
}
65
66
func (s *googleCSEScanner) DryRun(_ number.Number, opts ScannerOptions) error {
67
if opts.GetStringEnv("GOOGLECSE_CX") == "" || opts.GetStringEnv("GOOGLE_API_KEY") == "" {
68
return errors.New("search engine ID and/or API key is not defined")
69
}
70
return nil
71
}
72
73
func (s *googleCSEScanner) Run(n number.Number, opts ScannerOptions) (interface{}, error) {
74
var allItems []*customsearch.Result
75
var dorks []*GoogleSearchDork
76
var totalResultCount int
77
var totalRequestCount int
78
var cx = opts.GetStringEnv("GOOGLECSE_CX")
79
var apikey = opts.GetStringEnv("GOOGLE_API_KEY")
80
81
dorks = append(dorks, s.generateDorkQueries(n)...)
82
83
customsearchService, err := customsearch.NewService(
84
context.Background(),
85
option.WithAPIKey(apikey),
86
option.WithHTTPClient(s.httpClient),
87
)
88
if err != nil {
89
return nil, err
90
}
91
92
for _, req := range dorks {
93
n, items, err := s.search(customsearchService, req.Dork, cx)
94
if err != nil {
95
if s.isRateLimit(err) {
96
return nil, errors.New("rate limit exceeded, see https://developers.google.com/custom-search/v1/overview#pricing")
97
}
98
return nil, err
99
}
100
allItems = append(allItems, items...)
101
totalResultCount += n
102
totalRequestCount++
103
}
104
105
var data GoogleCSEScannerResponse
106
for _, item := range allItems {
107
data.Items = append(data.Items, ResultItem{
108
Title: item.Title,
109
URL: item.Link,
110
})
111
}
112
data.Homepage = fmt.Sprintf("https://cse.google.com/cse?cx=%s", cx)
113
data.ResultCount = len(allItems)
114
data.TotalResultCount = totalResultCount
115
data.TotalRequestCount = totalRequestCount
116
117
return data, nil
118
}
119
120
func (s *googleCSEScanner) search(service *customsearch.Service, q string, cx string) (int, []*customsearch.Result, error) {
121
var results []*customsearch.Result
122
var totalResultCount int
123
124
offset := int64(0)
125
for offset < s.MaxResults {
126
search := service.Cse.List()
127
search.Cx(cx)
128
search.Q(q)
129
search.Start(offset)
130
searchQuery, err := search.Do()
131
if err != nil {
132
return 0, nil, err
133
}
134
results = append(results, searchQuery.Items...)
135
totalResultCount, err = strconv.Atoi(searchQuery.SearchInformation.TotalResults)
136
if err != nil {
137
return 0, nil, err
138
}
139
if totalResultCount <= int(s.MaxResults) {
140
break
141
}
142
offset += int64(len(searchQuery.Items))
143
}
144
145
return totalResultCount, results, nil
146
}
147
148
func (s *googleCSEScanner) isRateLimit(theError error) bool {
149
if theError == nil {
150
return false
151
}
152
var err *googleapi.Error
153
if !errors.As(theError, &err) {
154
return false
155
}
156
if theError.(*googleapi.Error).Code != 429 {
157
return false
158
}
159
return true
160
}
161
162
func (s *googleCSEScanner) generateDorkQueries(number number.Number) (results []*GoogleSearchDork) {
163
var dorks = []*googlesearch.GoogleSearch{
164
dorkgen.NewGoogleSearch().
165
InText(number.International).
166
Or().
167
InText(number.E164).
168
Or().
169
InText(number.RawLocal).
170
Or().
171
InText(number.Local),
172
dorkgen.NewGoogleSearch().
173
Group(dorkgen.NewGoogleSearch().
174
Ext("doc").
175
Or().
176
Ext("docx").
177
Or().
178
Ext("odt").
179
Or().
180
Ext("pdf").
181
Or().
182
Ext("rtf").
183
Or().
184
Ext("sxw").
185
Or().
186
Ext("psw").
187
Or().
188
Ext("ppt").
189
Or().
190
Ext("pptx").
191
Or().
192
Ext("pps").
193
Or().
194
Ext("csv").
195
Or().
196
Ext("txt").
197
Or().
198
Ext("xls")).
199
InText(number.International).
200
Or().
201
InText(number.E164).
202
Or().
203
InText(number.RawLocal).
204
Or().
205
InText(number.Local),
206
}
207
208
for _, dork := range dorks {
209
results = append(results, &GoogleSearchDork{
210
Number: number.E164,
211
Dork: dork.String(),
212
URL: dork.URL(),
213
})
214
}
215
216
return results
217
}
218
219