Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
maurosoria
GitHub Repository: maurosoria/dirsearch
Path: blob/master/lib/utils/common.py
896 views
1
# -*- coding: utf-8 -*-
2
# This program is free software; you can redistribute it and/or modify
3
# it under the terms of the GNU General Public License as published by
4
# the Free Software Foundation; either version 2 of the License, or
5
# (at your option) any later version.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
15
# MA 02110-1301, USA.
16
#
17
# Author: Mauro Soria
18
19
import os
20
import sys
21
import re
22
23
from functools import reduce
24
from json import dumps
25
from html import escape
26
from ipaddress import IPv4Network, IPv6Network
27
from urllib.parse import quote, unquote, urljoin
28
29
from lib.core.settings import (
30
INVALID_CHARS_FOR_WINDOWS_FILENAME,
31
INVALID_FILENAME_CHAR_REPLACEMENT,
32
IS_WINDOWS,
33
URL_SAFE_CHARS,
34
SCRIPT_PATH,
35
TEXT_CHARS,
36
)
37
from lib.utils.file import FileUtils
38
39
40
def get_config_file():
41
return os.environ.get("DIRSEARCH_CONFIG") or FileUtils.build_path(SCRIPT_PATH, "config.ini")
42
43
44
def safequote(string_: str) -> str:
45
return quote(string_, safe=URL_SAFE_CHARS)
46
47
48
def _strip_and_uniquify_callback(array, item):
49
item = item.strip()
50
if not item or item in array:
51
return array
52
53
return array + [item]
54
55
56
# Strip values and remove duplicates from a list, respect the order
57
def strip_and_uniquify(array, type_=list):
58
return type_(reduce(_strip_and_uniquify_callback, array, []))
59
60
61
def lstrip_once(string, pattern):
62
if string.startswith(pattern):
63
return string[len(pattern):]
64
65
return string
66
67
68
def rstrip_once(string, pattern):
69
if string.endswith(pattern):
70
return string[:-len(pattern)]
71
72
return string
73
74
75
# Some characters are denied in file name by Windows
76
def get_valid_filename(string):
77
for char in INVALID_CHARS_FOR_WINDOWS_FILENAME:
78
string = string.replace(char, INVALID_FILENAME_CHAR_REPLACEMENT)
79
80
return string
81
82
83
def get_readable_size(num):
84
base = 1024
85
units = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
86
87
for unit in units:
88
if -base < num < base:
89
return f"{num}{unit}"
90
91
num = round(num / base)
92
93
return f"{num}TB"
94
95
96
def is_binary(bytes) -> bool:
97
return bool(bytes.translate(None, TEXT_CHARS))
98
99
100
def is_ipv6(ip):
101
return ip.count(":") >= 2
102
103
104
def iprange(subnet):
105
network = IPv4Network(subnet)
106
if is_ipv6(subnet):
107
network = IPv6Network(subnet)
108
109
return [str(ip) for ip in network]
110
111
112
# The browser direction behavior when you click on <a href="bar">link</a>
113
# (https://website.com/folder/foo -> https://website.com/folder/bar)
114
def merge_path(url, path):
115
parts = url.split("/")
116
# Normalize path like the browser does (dealing with ../ and ./)
117
path = urljoin("/", path).lstrip("/")
118
parts[-1] = path
119
120
return "/".join(parts)
121
122
123
# Reference: https://stackoverflow.com/questions/46129898/conflict-between-sys-stdin-and-input-eoferror-eof-when-reading-a-line
124
def read_stdin():
125
buffer = sys.stdin.read()
126
127
try:
128
if IS_WINDOWS:
129
tty = "CON:"
130
else:
131
tty = os.ttyname(sys.stdout.fileno())
132
133
sys.stdin = open(tty)
134
except OSError:
135
pass
136
137
return buffer
138
139
140
# Replace a path from an HTML body, where the path might be encoded/decoded
141
# in many different ways (URL encoding, HTML escaping, ...).
142
#
143
# Note:
144
# - :path: argument must not start with an "/".
145
# - The path in the body followed by an alphanumeric character won't
146
# be replaced. For example, "abc" will be replaced from "abc def" but
147
# not "abcdef".
148
def replace_path(string, path, replace_with):
149
def sub(string, to_replace, replace_with):
150
regex = re.escape(to_replace) + "(?=[^\\w]|$)"
151
return re.sub(regex, replace_with, string)
152
153
path = "/" + path
154
string = sub(string, quote(path), replace_with)
155
string = sub(string, quote(quote(path)), replace_with)
156
string = sub(string, unquote(path), replace_with)
157
string = sub(string, unquote(unquote(path)), replace_with)
158
string = sub(string, escape(path), replace_with)
159
string = sub(string, dumps(path), replace_with)
160
return sub(string, path, replace_with)
161
162