CoCalc -- logicparser.py

GitHub Repository: sagemath/sagesmc
Path: blob/master/src/sage/logic/logicparser.py
⁸⁸¹⁷ views
1
r"""
2
Module that creates and modifies parse trees of well formed boolean formulas.
3

4
A parse tree of a boolean formula is a nested list, where each branch is either
5
a single variable, or a formula composed of either two variables and a binary
6
operator or one variable and a unary operator. The function parse() produces
7
a parse tree that is simplified for the purposes of more efficient truth value
8
evaluation. The function polish_parse() produces the full parse tree of a boolean
9
formula which is used in functions related to proof and inference.  That is,
10
parse() is meant to be used with functions in the logic module that perform
11
semantic operations on a boolean formula, and polish_parse() is to be used with
12
functions that perform syntactic operations on a boolean formula.
13

14
AUTHORS:
15

16
- Chris Gorecki (2007): initial version
17

18
- Paul Scurek (2013-08-01): added polish_parse, cleaned up python code,
19
  updated docstring formatting
20

21
EXAMPLES:
22

23
Find the parse tree and variables of a string representation of a boolean formula::
24

25
    sage: import sage.logic.logicparser as logicparser
26
    sage: s = 'a|b&c'
27
    sage: t = logicparser.parse(s)
28
    sage: t
29
    (['|', 'a', ['&', 'b', 'c']], ['a', 'b', 'c'])
30

31
Find the full syntax parse tree of a string representation of a boolean formula::
32

33
    sage: import sage.logic.logicparser as logicparser
34
    sage: s = '(a&b)->~~c'
35
    sage: logicparser.polish_parse(s)
36
    ['->', ['&', 'a', 'b'], ['~', ['~', 'c']]]
37

38
Find the tokens and distinct variables of a boolean formula::
39

40
    sage: import sage.logic.logicparser as logicparser
41
    sage: s = '~(a|~b)<->(c->c)'
42
    sage: logicparser.tokenize(s)
43
    (['(', '~', '(', 'a', '|', '~', 'b', ')', '<->', '(', 'c', '->', 'c', ')', ')'], ['a', 'b', 'c'])
44

45
Find the parse tree of a boolean formula from a list of the formula's tokens::
46

47
    sage: import sage.logic.logicparser as logicparser
48
    sage: t = ['(', 'a', '->', '~', 'c', ')']
49
    sage: logicparser.tree_parse(t)
50
    ['->', 'a', ['~', 'c', None]]
51
    sage: r = ['(', '~', '~', 'a', '|', 'b', ')']
52
    sage: logicparser.tree_parse(r)
53
    ['|', 'a', 'b']
54

55
Find the full syntax parse tree of a boolean formula from a list of tokens::
56

57
    sage: import sage.logic.logicparser as logicparser
58
    sage: t = ['(', 'a', '->', '~', 'c', ')']
59
    sage: logicparser.tree_parse(t, polish = True)
60
    ['->', 'a', ['~', 'c']]
61
    sage: r = ['(', '~', '~', 'a', '|', 'b', ')']
62
    sage: logicparser.tree_parse(r, polish = True)
63
    ['|', ['~', ['~', 'a']], 'b']
64

65

66
"""
67
#*****************************************************************************
68
#       Copyright (C) 2007 Chris Gorecki <[email protected]>
69
#       Copyright (C) 2013 Paul Scurek <[email protected]>
70
#
71
#  Distributed under the terms of the GNU General Public License (GPL)
72
#  as published by the Free Software Foundation; either version 2 of
73
#  the License, or (at your option) any later version.
74
#                  http://www.gnu.org/licenses/
75
#*****************************************************************************
76

77
from types import *
78
import string
79

80
__symbols = '()&|~<->^'
81
__op_list = ['~', '&', '|', '^', '->', '<->']
82

83
def parse(s):
84
    r"""
85
    Return a parse tree from a boolean formula s.
86

87
    INPUT:
88

89
    - ``s`` -- a string containing a boolean formula.
90

91
    OUTPUT:
92

93
    A list containing the prase tree and a list containing the
94
    variables in a boolean formula in this order:
95

96
    1. the list containing the pase tree
97
    2. the list containing the variables
98

99
    EXAMPLES:
100

101
    This example illustrates how to produce the parse tree of a boolean formula s.
102

103
    ::
104

105
        sage: import sage.logic.logicparser as logicparser
106
        sage: s = 'a|b&c'
107
        sage: t = logicparser.parse(s)
108
        sage: t
109
        (['|', 'a', ['&', 'b', 'c']], ['a', 'b', 'c'])
110
    """
111
    toks, vars_order = tokenize(s)
112
    tree = tree_parse(toks)
113
    # special case of tree == single variable
114
    if isinstance(tree, StringType):
115
        return ['&', tree, tree], vars_order
116
    return tree, vars_order
117

118
def polish_parse(s):
119
    r"""
120
    Return the full syntax parse tree from a boolean formula s.
121

122
    INPUT:
123

124
    - ``s`` -- a string containing a boolean expression
125

126
    OUTPUT:
127

128
    The full syntax parse tree as a nested list
129

130
    EXAMPLES:
131

132
    This example illustrates how to find the full syntax parse tree of a boolean formula.
133

134
    ::
135

136
        sage: import sage.logic.logicparser as logicparser
137
        sage: s = 'a|~~b'
138
        sage: t = logicparser.polish_parse(s)
139
        sage: t
140
        ['|', 'a', ['~', ['~', 'b']]]
141

142
    AUTHORS:
143

144
    - Paul Scurek (2013-08-03)
145
    """
146
    toks, vars_order = tokenize(s)
147
    tree = tree_parse(toks, polish = True)
148
    # special case where the formula s is a single variable
149
    if isinstance(tree, StringType):
150
        return vars_order
151
    return tree
152

153
def tokenize(s):
154
    r"""
155
    Return the tokens and the distinct variables appearing in a boolean formula s.
156

157
    INPUT:
158

159
    - ``s`` -- a string representation of a boolean formula
160

161
    OUTPUT:
162

163
    The tokens and variables as an ordered pair of lists in the following order:
164

165
    1. A list containing the tokens of s, in the order they appear in s
166
    2. A list containing the distinct variables in s, in the order they appearn in s
167

168
    EXAMPLES:
169

170
    This example illustrates how to tokenize a string representation of a boolean formula.
171

172
    ::
173

174
        sage: import sage.logic.logicparser as logicparser
175
        sage: s = 'a|b&c'
176
        sage: t = logicparser.tokenize(s)
177
        sage: t
178
        (['(', 'a', '|', 'b', '&', 'c', ')'], ['a', 'b', 'c'])
179
    """
180
    i = 0
181
    toks = ['(']
182
    vars_order = []
183

184
    while i < len(s):
185
        tok = ""
186
        skip = valid = 1
187
        if s[i] in '()~&|^':
188
            tok = s[i]
189
        elif s[i:i + 2] == '->':
190
            tok = '->'
191
            skip = 2
192
        elif s[i:i + 3] == '<->':
193
            tok = '<->'
194
            skip = 3
195
        # check to see if '-', '<' or '>' are used incorrectly
196
        elif s[i] in '<->':
197
            msg = "'%s' can only be used as part of the operators '<->' or '->'." % (s[i])
198
            raise SyntaxError, msg
199
        if len(tok) > 0:
200
            toks.append(tok)
201
            i += skip
202
            continue
203
        else:
204
            # token is a variable name
205
            if s[i] == ' ':
206
                i += 1
207
                continue
208

209
            while i < len(s) and s[i] not in __symbols and s[i] != ' ':
210
                tok += s[i]
211
                i += 1
212

213
            if len(tok) > 0:
214
                if tok[0] not in string.letters:
215
                    valid = 0
216
                for c in tok:
217
                    if c not in string.letters and c not in string.digits and c != '_':
218
                        valid = 0
219

220
            if valid == 1:
221
                toks.append(tok)
222
                if tok not in vars_order:
223
                    vars_order.append(tok)
224
            else:
225
                msg = 'invalid variable name ' + tok
226
                msg += ": identifiers must begin with a letter and contain only "
227
                msg += "alphanumerics and underscores"
228
                raise NameError, msg
229

230
    toks.append(')')
231
    return toks, vars_order
232

233
def tree_parse(toks, polish = False):
234
    r"""
235
    Return a parse tree from the tokens in toks.
236

237
    INPUT:
238

239
    - ``toks`` -- a list of tokens from a boolean formula
240

241
    - ``polish`` -- (default: False) a boolean.  When true, tree_parse will return
242
      the full syntax parse tree.
243

244
    OUTPUT:
245

246
    A parse tree in the form of a nested list that depends on ``polish`` as follows:
247

248
    polish == False -- Return a simplified parse tree.
249

250
    polish == True -- Return the full syntax parse tree.
251

252
    EXAMPLES:
253

254
    This example illustrates the use of tree_parse when polish == False.
255

256
    ::
257

258
        sage: import sage.logic.logicparser as logicparser
259
        sage: t = ['(', 'a', '|', 'b', '&', 'c', ')']
260
        sage: logicparser.tree_parse(t)
261
        ['|', 'a', ['&', 'b', 'c']]
262

263
    We now demonstrate the use of tree_parse when polish == True.
264

265
    ::
266

267
        sage: t = ['(', 'a', '->', '~', '~', 'b', ')']
268
        sage: logicparser.tree_parse(t)
269
        ['->', 'a', 'b']
270
        sage: t = ['(', 'a', '->', '~', '~', 'b', ')']
271
        sage: logicparser.tree_parse(t, polish = True)
272
        ['->', 'a', ['~', ['~', 'b']]]
273
    """
274
    stack = []
275
    for tok in toks:
276
        stack.append(tok)
277
        if tok == ')':
278
            lrtoks = []
279
            while tok != '(':
280
                tok = stack.pop()
281
                lrtoks.insert(0, tok)
282
            branch = parse_ltor(lrtoks[1:-1], polish = polish)
283
            stack.append(branch)
284
    return stack[0]
285

286
def parse_ltor(toks, n = 0, polish = False):
287
    r"""
288
    Return a parse tree from toks, where each token in toks is atomic.
289

290
    INPUT:
291

292
    - ``toks`` -- a list of tokens. Each token is atomic.
293

294
    - ``n`` -- (default: 0) an integer representing which order of
295
      operations are occurring
296

297
    - ``polish`` -- (default: False) a boolean.  When true, double negations
298
      are not cancelled and negated statements are turned into list of length two.
299

300
    OUTPUT:
301

302
    The parse tree as a nested list that depends on ``polish`` as follows:
303

304
    polish == False - Return a simplified parse tree.
305

306
    polish == True - Return the full syntax parse tree.
307

308
    EXAMPLES:
309

310
    This example illustrates the use of parse_ltor when polish == False.
311

312
    ::
313

314
        sage: import sage.logic.logicparser as logicparser
315
        sage: t = ['a', '|', 'b', '&', 'c']
316
        sage: logicparser.parse_ltor(t)
317
        ['|', 'a', ['&', 'b', 'c']]
318

319
    ::
320

321
        sage: import sage.logic.logicparser as logicparser
322
        sage: t = ['a', '->', '~', '~', 'b']
323
        sage: logicparser.parse_ltor(t)
324
        ['->', 'a', 'b']
325

326
    We now repeat the previous example, but with polish == True.
327

328
    ::
329

330
        sage: import sage.logic.logicparser as logicparser
331
        sage: t = ['a', '->', '~', '~', 'b']
332
        sage: logicparser.parse_ltor(t, polish = True)
333
        ['->', 'a', ['~', ['~', 'b']]]
334

335
    """
336
    i = 0
337
    for tok in toks:
338
        if tok == __op_list[n]:
339
            if tok == '~':
340
                if not polish:
341
                    # cancel double negations
342
                    if toks[i] == '~' and toks[i + 1] == '~':
343
                        del toks[i]
344
                        del toks[i]
345
                        return parse_ltor(toks, n)
346
                    args = [toks[i], toks[i + 1], None]
347
                    toks[i] = args
348
                    del toks[i + 1]
349
                    return parse_ltor(toks, n)
350
                # This executes when creating the full syntax parse tree
351
                else:
352
                    j = i
353
                    while toks[j] == '~':
354
                        j += 1
355
                    while j > i:
356
                        args = [toks[j - 1], toks[j]]
357
                        toks[j - 1] = args
358
                        del toks[j]
359
                        j -= 1
360
                    return parse_ltor(toks, n = n, polish = polish)
361
            else:
362
                args = [toks[i - 1], toks[i], toks[i + 1]]
363
                toks[i - 1] = [args[1], args[0], args[2]]
364
                del toks[i]
365
                del toks[i]
366
                return parse_ltor(toks, n)
367
        i += 1
368
    if n + 1 < len(__op_list):
369
        return parse_ltor(toks, n + 1)
370
    if len(toks) > 1:
371
        raise SyntaxError
372
    return toks[0]
373

374
def apply_func(tree, func):
375
    r"""
376
    Apply func to each node of tree.  Return a new parse tree.
377

378
    INPUT:
379

380
    - ``tree`` -- a parse tree of a boolean formula
381

382
    - ``func`` -- a function to be applied to each node of tree.  This may
383
      be a function that comes from elsewhere in the logic module.
384

385
    OUTPUT:
386

387
    The new parse tree in the form of a nested list
388

389
    EXAMPLES:
390

391
    This example uses :func:`apply_func` where ``func`` switches two entries of tree.
392

393
    ::
394

395
        sage: import sage.logic.logicparser as logicparser
396
        sage: t = ['|', ['&', 'a', 'b'], ['&', 'a', 'c']]
397
        sage: f = lambda t: [t[0], t[2], t[1]]
398
        sage: logicparser.apply_func(t, f)
399
        ['|', ['&', 'c', 'a'], ['&', 'b', 'a']]
400
    """
401
    if type(tree[1]) is ListType and type(tree[2]) is ListType:
402
        lval = apply_func(tree[1], func)
403
        rval = apply_func(tree[2], func)
404
    elif type(tree[1]) is ListType:
405
        lval = apply_func(tree[1], func)
406
        rval = tree[2]
407
    elif type(tree[2]) is ListType:
408
        lval = tree[1]
409
        rval = apply_func(tree[2], func)
410
    else:
411
        lval = tree[1]
412
        rval = tree[2]
413
    return func([tree[0], lval, rval])
414

415

416

417
Product

Resources

Company