CoCalc -- docstringparser.py

GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/bcdoc/docstringparser.py
¹⁵⁶⁶ views
1
# Copyright 2012-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
#     http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
from html.parser import HTMLParser
14

15

16
class DocStringParser(HTMLParser):
17
    """
18
    A simple HTML parser.  Focused on converting the subset of HTML
19
    that appears in the documentation strings of the JSON models into
20
    simple ReST format.
21
    """
22

23
    def __init__(self, doc):
24
        self.tree = None
25
        self.doc = doc
26
        HTMLParser.__init__(self)
27

28
    def reset(self):
29
        HTMLParser.reset(self)
30
        self.tree = HTMLTree(self.doc)
31

32
    def feed(self, data):
33
        # HTMLParser is an old style class, so the super() method will not work.
34
        HTMLParser.feed(self, data)
35
        self.tree.write()
36
        self.tree = HTMLTree(self.doc)
37

38
    def close(self):
39
        HTMLParser.close(self)
40
        # Write if there is anything remaining.
41
        self.tree.write()
42
        self.tree = HTMLTree(self.doc)
43

44
    def handle_starttag(self, tag, attrs):
45
        self.tree.add_tag(tag, attrs=attrs)
46

47
    def handle_endtag(self, tag):
48
        self.tree.add_tag(tag, is_start=False)
49

50
    def handle_data(self, data):
51
        self.tree.add_data(data)
52

53

54
class HTMLTree:
55
    """
56
    A tree which handles HTML nodes. Designed to work with a python HTML parser,
57
    meaning that the current_node will be the most recently opened tag. When
58
    a tag is closed, the current_node moves up to the parent node.
59
    """
60

61
    def __init__(self, doc):
62
        self.doc = doc
63
        self.head = StemNode()
64
        self.current_node = self.head
65
        self.unhandled_tags = []
66

67
    def add_tag(self, tag, attrs=None, is_start=True):
68
        if not self._doc_has_handler(tag, is_start):
69
            self.unhandled_tags.append(tag)
70
            return
71

72
        if is_start:
73
            if tag == 'li':
74
                node = LineItemNode(attrs)
75
            else:
76
                node = TagNode(tag, attrs)
77
            self.current_node.add_child(node)
78
            self.current_node = node
79
        else:
80
            self.current_node = self.current_node.parent
81

82
    def _doc_has_handler(self, tag, is_start):
83
        if is_start:
84
            handler_name = 'start_%s' % tag
85
        else:
86
            handler_name = 'end_%s' % tag
87

88
        return hasattr(self.doc.style, handler_name)
89

90
    def add_data(self, data):
91
        self.current_node.add_child(DataNode(data))
92

93
    def write(self):
94
        self.head.write(self.doc)
95

96

97
class Node:
98
    def __init__(self, parent=None):
99
        self.parent = parent
100

101
    def write(self, doc):
102
        raise NotImplementedError
103

104

105
class StemNode(Node):
106
    def __init__(self, parent=None):
107
        super().__init__(parent)
108
        self.children = []
109

110
    def add_child(self, child):
111
        child.parent = self
112
        self.children.append(child)
113

114
    def write(self, doc):
115
        self._write_children(doc)
116

117
    def _write_children(self, doc):
118
        for child in self.children:
119
            child.write(doc)
120

121

122
class TagNode(StemNode):
123
    """
124
    A generic Tag node. It will verify that handlers exist before writing.
125
    """
126

127
    def __init__(self, tag, attrs=None, parent=None):
128
        super().__init__(parent)
129
        self.attrs = attrs
130
        self.tag = tag
131

132
    def write(self, doc):
133
        self._write_start(doc)
134
        self._write_children(doc)
135
        self._write_end(doc)
136

137
    def _write_start(self, doc):
138
        handler_name = 'start_%s' % self.tag
139
        if hasattr(doc.style, handler_name):
140
            getattr(doc.style, handler_name)(self.attrs)
141

142
    def _write_end(self, doc):
143
        handler_name = 'end_%s' % self.tag
144
        if hasattr(doc.style, handler_name):
145
            getattr(doc.style, handler_name)()
146

147

148
class LineItemNode(TagNode):
149
    def __init__(self, attrs=None, parent=None):
150
        super().__init__('li', attrs, parent)
151

152
    def write(self, doc):
153
        self._lstrip(self)
154
        super().write(doc)
155

156
    def _lstrip(self, node):
157
        """
158
        Traverses the tree, stripping out whitespace until text data is found
159
        :param node: The node to strip
160
        :return: True if non-whitespace data was found, False otherwise
161
        """
162
        for child in node.children:
163
            if isinstance(child, DataNode):
164
                child.lstrip()
165
                if child.data:
166
                    return True
167
            else:
168
                found = self._lstrip(child)
169
                if found:
170
                    return True
171

172
        return False
173

174

175
class DataNode(Node):
176
    """
177
    A Node that contains only string data.
178
    """
179

180
    def __init__(self, data, parent=None):
181
        super().__init__(parent)
182
        if not isinstance(data, str):
183
            raise ValueError("Expecting string type, %s given." % type(data))
184
        self.data = data
185

186
    def lstrip(self):
187
        self.data = self.data.lstrip()
188

189
    def write(self, doc):
190
        if not self.data:
191
            return
192

193
        if self.data.isspace():
194
            str_data = ' '
195
        else:
196
            end_space = self.data[-1].isspace()
197
            words = self.data.split()
198
            words = doc.translate_words(words)
199
            str_data = ' '.join(words)
200
            if end_space:
201
                str_data += ' '
202

203
        doc.handle_data(str_data)
204

205
Product

Resources

Company