Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hhhrrrttt222111
GitHub Repository: hhhrrrttt222111/Dorkify
Path: blob/master/venv/Lib/site-packages/bs4/formatter.py
811 views
1
from bs4.dammit import EntitySubstitution
2
3
class Formatter(EntitySubstitution):
4
"""Describes a strategy to use when outputting a parse tree to a string.
5
6
Some parts of this strategy come from the distinction between
7
HTML4, HTML5, and XML. Others are configurable by the user.
8
9
Formatters are passed in as the `formatter` argument to methods
10
like `PageElement.encode`. Most people won't need to think about
11
formatters, and most people who need to think about them can pass
12
in one of these predefined strings as `formatter` rather than
13
making a new Formatter object:
14
15
For HTML documents:
16
* 'html' - HTML entity substitution for generic HTML documents. (default)
17
* 'html5' - HTML entity substitution for HTML5 documents.
18
* 'minimal' - Only make the substitutions necessary to guarantee
19
valid HTML.
20
* None - Do not perform any substitution. This will be faster
21
but may result in invalid markup.
22
23
For XML documents:
24
* 'html' - Entity substitution for XHTML documents.
25
* 'minimal' - Only make the substitutions necessary to guarantee
26
valid XML. (default)
27
* None - Do not perform any substitution. This will be faster
28
but may result in invalid markup.
29
"""
30
# Registries of XML and HTML formatters.
31
XML_FORMATTERS = {}
32
HTML_FORMATTERS = {}
33
34
HTML = 'html'
35
XML = 'xml'
36
37
HTML_DEFAULTS = dict(
38
cdata_containing_tags=set(["script", "style"]),
39
)
40
41
def _default(self, language, value, kwarg):
42
if value is not None:
43
return value
44
if language == self.XML:
45
return set()
46
return self.HTML_DEFAULTS[kwarg]
47
48
def __init__(
49
self, language=None, entity_substitution=None,
50
void_element_close_prefix='/', cdata_containing_tags=None,
51
):
52
"""Constructor.
53
54
:param language: This should be Formatter.XML if you are formatting
55
XML markup and Formatter.HTML if you are formatting HTML markup.
56
57
:param entity_substitution: A function to call to replace special
58
characters with XML/HTML entities. For examples, see
59
bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
60
:param void_element_close_prefix: By default, void elements
61
are represented as <tag/> (XML rules) rather than <tag>
62
(HTML rules). To get <tag>, pass in the empty string.
63
:param cdata_containing_tags: The list of tags that are defined
64
as containing CDATA in this dialect. For example, in HTML,
65
<script> and <style> tags are defined as containing CDATA,
66
and their contents should not be formatted.
67
"""
68
self.language = language
69
self.entity_substitution = entity_substitution
70
self.void_element_close_prefix = void_element_close_prefix
71
self.cdata_containing_tags = self._default(
72
language, cdata_containing_tags, 'cdata_containing_tags'
73
)
74
75
def substitute(self, ns):
76
"""Process a string that needs to undergo entity substitution.
77
This may be a string encountered in an attribute value or as
78
text.
79
80
:param ns: A string.
81
:return: A string with certain characters replaced by named
82
or numeric entities.
83
"""
84
if not self.entity_substitution:
85
return ns
86
from .element import NavigableString
87
if (isinstance(ns, NavigableString)
88
and ns.parent is not None
89
and ns.parent.name in self.cdata_containing_tags):
90
# Do nothing.
91
return ns
92
# Substitute.
93
return self.entity_substitution(ns)
94
95
def attribute_value(self, value):
96
"""Process the value of an attribute.
97
98
:param ns: A string.
99
:return: A string with certain characters replaced by named
100
or numeric entities.
101
"""
102
return self.substitute(value)
103
104
def attributes(self, tag):
105
"""Reorder a tag's attributes however you want.
106
107
By default, attributes are sorted alphabetically. This makes
108
behavior consistent between Python 2 and Python 3, and preserves
109
backwards compatibility with older versions of Beautiful Soup.
110
"""
111
if tag.attrs is None:
112
return []
113
return sorted(tag.attrs.items())
114
115
116
class HTMLFormatter(Formatter):
117
"""A generic Formatter for HTML."""
118
REGISTRY = {}
119
def __init__(self, *args, **kwargs):
120
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
121
122
123
class XMLFormatter(Formatter):
124
"""A generic Formatter for XML."""
125
REGISTRY = {}
126
def __init__(self, *args, **kwargs):
127
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
128
129
130
# Set up aliases for the default formatters.
131
HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
132
entity_substitution=EntitySubstitution.substitute_html
133
)
134
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
135
entity_substitution=EntitySubstitution.substitute_html,
136
void_element_close_prefix = None
137
)
138
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
139
entity_substitution=EntitySubstitution.substitute_xml
140
)
141
HTMLFormatter.REGISTRY[None] = HTMLFormatter(
142
entity_substitution=None
143
)
144
XMLFormatter.REGISTRY["html"] = XMLFormatter(
145
entity_substitution=EntitySubstitution.substitute_html
146
)
147
XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
148
entity_substitution=EntitySubstitution.substitute_xml
149
)
150
XMLFormatter.REGISTRY[None] = Formatter(
151
Formatter(Formatter.XML, entity_substitution=None)
152
)
153
154