Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ulixee
GitHub Repository: ulixee/secret-agent
Path: blob/main/plugins/default-browser-emulator/test/DomExtractor.js
1029 views
1
// copied from double-agent. do not modify manually!
2
3
function DomExtractor(selfName, pageMeta = {}) {
4
const { saveToUrl, pageUrl, pageHost, pageName } = pageMeta;
5
const skipProps = [
6
'Fingerprint2',
7
'pageQueue',
8
'runDomExtractor',
9
'pageLoaded',
10
'axios',
11
'justAFunction',
12
];
13
14
const skipValues = ['innerHTML', 'outerHTML', 'innerText', 'outerText'];
15
16
const doNotInvoke = [
17
'print',
18
'alert',
19
'prompt',
20
'confirm',
21
'open',
22
'close',
23
'reload',
24
'assert',
25
'requestPermission',
26
'screenshot',
27
'pageLoaded',
28
'delete',
29
'clear',
30
'read',
31
32
'start',
33
'stop',
34
35
'write',
36
'writeln',
37
'replaceWith',
38
'remove',
39
40
'self.history.back',
41
'self.history.forward',
42
'self.history.go',
43
'self.history.pushState',
44
'self.history.replaceState',
45
46
'getUserMedia',
47
'requestFullscreen',
48
'webkitRequestFullScreen',
49
'webkitRequestFullscreen',
50
'getDisplayMedia',
51
].map(x => x.replace(/self\./g, `${selfName}.`));
52
53
const doNotAccess = [
54
'self.CSSAnimation.prototype.timeline', // crashes Safari
55
'self.Animation.prototype.timeline', // crashes Safari
56
'self.CSSTransition.prototype.timeline', // crashes Safari
57
].map(x => x.replace(/self\./g, `${selfName}.`));
58
59
const excludedInheritedKeys = ['name', 'length', 'constructor'];
60
const loadedObjects = new Map([[self, selfName]]);
61
const hierarchyNav = new Map();
62
const detached = {};
63
64
async function extractPropsFromObject(obj, parentPath) {
65
let keys = [];
66
let symbols = [];
67
try {
68
for (let key of Object.getOwnPropertyNames(obj)) {
69
if (!keys.includes(key)) keys.push(key);
70
}
71
} catch (err) {}
72
try {
73
symbols = Object.getOwnPropertySymbols(obj);
74
for (let key of symbols) {
75
if (!keys.includes(key)) keys.push(key);
76
}
77
} catch (err) {}
78
79
try {
80
for (let key in obj) {
81
if (!keys.includes(key)) keys.push(key);
82
}
83
} catch (err) {}
84
85
const newObj = {
86
_$protos: await loadProtoHierarchy(obj, parentPath),
87
};
88
if (
89
parentPath.includes(`${selfName}.document.`) &&
90
!parentPath.includes(`${selfName}.document.documentElement`) &&
91
newObj._$protos.includes('HTMLElement.prototype')
92
) {
93
newObj._$skipped = 'SKIPPED ELEMENT';
94
return newObj;
95
}
96
97
if (parentPath.includes('new()') && parentPath.endsWith('.ownerElement')) {
98
newObj._$skipped = 'SKIPPED ELEMENT';
99
return newObj;
100
}
101
102
if (parentPath.split('.').length >= 8) {
103
newObj._$skipped = 'SKIPPED MAX DEPTH';
104
return newObj;
105
}
106
107
const isNewObject = parentPath.includes('.new()');
108
if (isNewObject && newObj._$protos[0] === 'HTMLDocument.prototype') {
109
newObj._$skipped = 'SKIPPED DOCUMENT';
110
newObj._$type = 'HTMLDocument.prototype';
111
return newObj;
112
}
113
if (Object.isFrozen(obj)) newObj._$isFrozen = true;
114
if (Object.isSealed(obj)) newObj._$isSealed = true;
115
if (!newObj._$protos.length) delete newObj._$protos;
116
117
const inheritedProps = [];
118
if (isNewObject) {
119
let proto = obj;
120
while (!!proto) {
121
proto = Object.getPrototypeOf(proto);
122
if (
123
!proto ||
124
proto === Object ||
125
proto === Object.prototype ||
126
proto === Function ||
127
proto === Function.prototype ||
128
proto === HTMLElement.prototype ||
129
proto === EventTarget.prototype
130
)
131
break;
132
for (const key of Object.getOwnPropertyNames(proto)) {
133
if (!keys.includes(key) && !excludedInheritedKeys.includes(key)) inheritedProps.push(key);
134
}
135
}
136
}
137
// TODO: re-enable inherited properties once we are on stable ground with chrome flags
138
// keys.push(...inheritedProps)
139
140
for (const key of keys) {
141
if (skipProps.includes(key)) {
142
continue;
143
}
144
if (key === 'constructor') continue;
145
146
const path = parentPath + '.' + String(key);
147
if (path.endsWith('_GLOBAL_HOOK__')) continue;
148
149
const prop = '' + String(key);
150
151
if (
152
path.startsWith(`${selfName}.document`) &&
153
typeof key === 'string' &&
154
(key.startsWith('child') ||
155
key.startsWith('first') ||
156
key.startsWith('last') ||
157
key.startsWith('next') ||
158
key.startsWith('prev') ||
159
key === 'textContent' ||
160
key === 'text')
161
) {
162
newObj[prop] = { _$type: 'dom', _$skipped: 'SKIPPED DOM' };
163
continue;
164
}
165
166
if (path.startsWith(`${selfName}.document`) && path.split('.').length > 5) {
167
newObj[prop] = { _$type: 'object', _$skipped: 'SKIPPED DEPTH' };
168
continue;
169
}
170
171
if (key === 'style') {
172
if (isNewObject) {
173
newObj[prop] = { _$type: 'object', _$skipped: 'SKIPPED STYLE' };
174
continue;
175
}
176
}
177
if (hierarchyNav.has(path)) {
178
newObj[prop] = hierarchyNav.get(path);
179
continue;
180
}
181
182
if (doNotAccess.includes(path)) {
183
continue;
184
}
185
try {
186
const isOwnProp =
187
obj.hasOwnProperty && obj.hasOwnProperty(key) && !inheritedProps.includes(key);
188
const value = await extractPropValue(obj, key, path, !isOwnProp);
189
if (value && typeof value === 'string' && value.startsWith('REF:') && !isOwnProp) {
190
// don't assign here
191
//console.log('skipping ref', value);
192
} else {
193
newObj[prop] = value;
194
}
195
} catch (err) {
196
newObj[prop] = err.toString();
197
}
198
}
199
if (obj.prototype) {
200
let instance;
201
let constructorException;
202
try {
203
instance = await new obj();
204
} catch (err) {
205
constructorException = err.toString();
206
}
207
if (constructorException) {
208
newObj['new()'] = { _$type: 'constructor', _$constructorException: constructorException };
209
} else {
210
try {
211
newObj['new()'] = await extractPropsFromObject(instance, parentPath + '.new()');
212
newObj['new()']._$type = 'constructor';
213
} catch (err) {
214
newObj['new()'] = err.toString();
215
}
216
}
217
}
218
return newObj;
219
}
220
221
async function loadProtoHierarchy(obj, parentPath) {
222
const hierarchy = [];
223
let proto = obj;
224
if (typeof proto === 'function') return hierarchy;
225
226
while (!!proto) {
227
proto = Object.getPrototypeOf(proto);
228
229
if (!proto) break;
230
231
try {
232
let name = getObjectName(proto);
233
if (name && !hierarchy.includes(name)) hierarchy.push(name);
234
235
if (loadedObjects.has(proto)) continue;
236
237
let path = `${selfName}.${name}`;
238
let topType = name.split('.').shift();
239
if (!(topType in self)) {
240
path = 'detached.' + name;
241
}
242
243
if (!hierarchyNav.has(path)) {
244
hierarchyNav.set(path, {});
245
const extracted = await extractPropsFromObject(proto, path);
246
hierarchyNav.set(path, extracted);
247
if (!path.includes(`${selfName}.`)) {
248
detached[name] = extracted;
249
}
250
}
251
} catch (err) {}
252
}
253
return hierarchy;
254
}
255
256
async function extractPropValue(obj, key, path, isInherited) {
257
if (obj === null || obj === undefined || !key) {
258
return undefined;
259
}
260
261
let accessException;
262
let value = await new Promise(async (resolve, reject) => {
263
let didResolve = false;
264
// if you wait on a promise, it will hang!
265
const t = setTimeout(() => reject('Likely a Promise'), 600);
266
try {
267
const p = await obj[key];
268
if (didResolve) return;
269
didResolve = true;
270
clearTimeout(t);
271
resolve(p);
272
} catch (err) {
273
if (didResolve) return;
274
clearTimeout(t);
275
reject(err);
276
}
277
}).catch(err => {
278
accessException = err;
279
});
280
281
if (
282
value &&
283
path !== `${selfName}.document` &&
284
(typeof value === 'function' || typeof value === 'object' || typeof value === 'symbol')
285
) {
286
if (loadedObjects.has(value)) {
287
// TODO: re-enable invoking re-used functions once we are on stable ground with chrome flags
288
const shouldContinue = false; //typeof value === 'function' && (isInherited || !path.replace(String(key), '').includes(String(key)));
289
if (!shouldContinue) return 'REF: ' + loadedObjects.get(value);
290
}
291
// safari will end up in an infinite loop since each plugin is a new object as your traverse
292
if (path.includes('.navigator') && path.endsWith('.enabledPlugin')) {
293
return `REF: ${selfName}.navigator.plugins.X`;
294
}
295
loadedObjects.set(value, path);
296
}
297
298
let details = {};
299
if (value && (typeof value === 'object' || typeof value === 'function')) {
300
details = await extractPropsFromObject(value, path);
301
}
302
const descriptor = await getDescriptor(obj, key, accessException, path);
303
304
if (!Object.keys(descriptor).length && !Object.keys(details).length) return undefined;
305
const prop = Object.assign(details, descriptor);
306
if (prop._$value === 'REF: ' + path) {
307
prop._$value = undefined;
308
}
309
310
return prop;
311
}
312
313
async function getDescriptor(obj, key, accessException, path) {
314
const objDesc = Object.getOwnPropertyDescriptor(obj, key);
315
316
if (objDesc) {
317
let value;
318
try {
319
value = objDesc.value;
320
if (!value && !accessException) {
321
value = obj[key];
322
}
323
} catch (err) {}
324
325
let type = typeof value;
326
value = getJsonUsableValue(value, key);
327
const functionDetails = await getFunctionDetails(value, obj, key, type, path);
328
type = functionDetails.type;
329
330
const flags = [];
331
if (objDesc.configurable) flags.push('c');
332
if (objDesc.enumerable) flags.push('e');
333
if (objDesc.writable) flags.push('w');
334
335
return {
336
_$type: type,
337
_$function: functionDetails.func,
338
_$invocation: functionDetails.invocation,
339
_$flags: flags.join(''),
340
_$accessException: accessException ? accessException.toString() : undefined,
341
_$value: value,
342
_$get: objDesc.get ? objDesc.get.toString() : undefined,
343
_$set: objDesc.set ? objDesc.set.toString() : undefined,
344
_$getToStringToString: objDesc.get ? objDesc.get.toString.toString() : undefined,
345
_$setToStringToString: objDesc.set ? objDesc.set.toString.toString() : undefined,
346
};
347
} else {
348
const plainObject = {};
349
350
if (accessException && String(accessException).includes('Likely a Promise')) {
351
plainObject._$value = 'Likely a Promise';
352
} else if (accessException) return plainObject;
353
let value;
354
try {
355
value = obj[key];
356
} catch (err) {}
357
358
let type = typeof value;
359
if (value && Array.isArray(value)) type = 'array';
360
361
const functionDetails = await getFunctionDetails(value, obj, key, type, path);
362
plainObject._$type = functionDetails.type;
363
plainObject._$value = getJsonUsableValue(value, key);
364
plainObject._$function = functionDetails.func;
365
plainObject._$invocation = functionDetails.invocation;
366
367
return plainObject;
368
}
369
}
370
371
async function getFunctionDetails(value, obj, key, type, path) {
372
let func;
373
let invocation;
374
if (type === 'undefined') type = undefined;
375
if (type === 'function') {
376
try {
377
func = String(value);
378
} catch (err) {
379
func = err.toString();
380
}
381
try {
382
if (!doNotInvoke.includes(key) && !doNotInvoke.includes(path) && !value.prototype) {
383
invocation = await new Promise(async (resolve, reject) => {
384
const c = setTimeout(() => reject('Promise-like'), 650);
385
let didReply = false;
386
try {
387
let answer = obj[key]();
388
if (answer && answer.on) {
389
answer.on('error', err => {
390
console.log('Error', err, obj, key);
391
});
392
}
393
answer = await answer;
394
395
if (didReply) return;
396
clearTimeout(c);
397
didReply = true;
398
resolve(answer);
399
} catch (err) {
400
if (didReply) return;
401
didReply = true;
402
clearTimeout(c);
403
reject(err);
404
}
405
});
406
}
407
} catch (err) {
408
invocation = err ? err.toString() : err;
409
}
410
}
411
412
return {
413
type,
414
func,
415
invocation: func || invocation !== undefined ? getJsonUsableValue(invocation) : undefined,
416
};
417
}
418
419
function getJsonUsableValue(value, key) {
420
if (key && skipValues.includes(key)) {
421
return 'SKIPPED VALUE';
422
}
423
424
try {
425
if (value && typeof value === 'symbol') {
426
value = '' + String(value);
427
} else if (value && (value instanceof Promise || typeof value.then === 'function')) {
428
value = 'Promise';
429
} else if (value && typeof value === 'object') {
430
const values = [];
431
432
if (loadedObjects.has(value)) {
433
return 'REF: ' + loadedObjects.get(value);
434
}
435
436
if (value.join !== undefined) {
437
// is array
438
for (const prop in value) {
439
values.push(getJsonUsableValue(value[prop]));
440
}
441
return `[${values.join(',')}]`;
442
}
443
444
for (const prop in value) {
445
if (value.hasOwnProperty(prop)) {
446
values.push(prop + ': ' + getJsonUsableValue(value[prop]));
447
}
448
}
449
return `{${values.map(x => x.toString()).join(',')}}`;
450
} else if (typeof value === 'function') {
451
return value.toString();
452
} else if (value && typeof value === 'string') {
453
if (pageUrl) {
454
while (value.includes(pageUrl)) {
455
value = value.replace(pageUrl, '<URL>');
456
}
457
}
458
if (pageHost) {
459
while (value.includes(pageHost)) {
460
value = value.replace(pageHost, '<HOST>');
461
}
462
}
463
464
value = value.replace(/<url>\:\d+\:\d+/g, '<url>:<lines>');
465
} else {
466
return value;
467
}
468
} catch (err) {
469
value = err.toString();
470
}
471
return value;
472
}
473
474
function getObjectName(obj) {
475
if (obj === Object) return 'Object';
476
if (obj === Object.prototype) return 'Object.prototype';
477
try {
478
if (typeof obj === 'symbol') {
479
return '' + String(obj);
480
}
481
} catch (err) {}
482
try {
483
let name = obj[Symbol.toStringTag];
484
if (!name) {
485
try {
486
name = obj.name;
487
} catch (err) {}
488
}
489
490
if (obj.constructor) {
491
const constructorName = obj.constructor.name;
492
493
if (
494
constructorName &&
495
constructorName !== Function.name &&
496
constructorName !== Object.name
497
) {
498
name = constructorName;
499
}
500
}
501
502
if ('prototype' in obj) {
503
name = obj.prototype[Symbol.toStringTag] || obj.prototype.name || name;
504
if (name) return name;
505
}
506
507
if (typeof obj === 'function') {
508
if (name && name !== Function.name) return name;
509
return obj.constructor.name;
510
}
511
512
if (!name) return;
513
514
return name + '.prototype';
515
} catch (err) {}
516
}
517
518
async function runAndSave() {
519
self.addEventListener('unhandledrejection', function (promiseRejectionEvent) {
520
console.log(promiseRejectionEvent);
521
});
522
523
const props = await extractPropsFromObject(self, selfName);
524
525
await fetch(saveToUrl, {
526
method: 'POST',
527
body: JSON.stringify({
528
[selfName]: props,
529
detached,
530
}),
531
headers: {
532
'Content-Type': 'application/json',
533
'Page-Name': pageName,
534
},
535
});
536
}
537
538
async function run(obj, parentPath, extractKeys = []) {
539
const result = await extractPropsFromObject(obj, parentPath);
540
541
if (extractKeys && extractKeys.length) {
542
const extracted = {};
543
for (const key of extractKeys) {
544
extracted[key] = result[key];
545
}
546
return JSON.stringify({ window: extracted, windowKeys: Object.keys(result) });
547
}
548
// NOTE: need to stringify to make sure this transfers same as it will from a browser window
549
return JSON.stringify({ window: result, detached });
550
}
551
552
this.run = run;
553
this.runAndSave = runAndSave;
554
555
return this;
556
}
557
558
module.exports = DomExtractor;
559
if (typeof exports !== 'undefined') exports.default = DomExtractor;
560
561