Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sage
Path: blob/develop/build/sage_bootstrap/levenshtein.py
4052 views
1
# -*- coding: utf-8 -*-
2
u"""
3
Levenshtein Distance
4
5
The Levenshtein distance between two words is the minimal number of
6
edits that turn one word into the other. Here, "edit" means a
7
single-letter addition, single-letter deletion, or exchange of a
8
letter with another letter.
9
10
https://en.wikipedia.org/wiki/Levenshtein_distance
11
12
EXAMPLES::
13
14
>>> from sage_bootstrap.levenshtein import Levenshtein
15
>>> Levenshtein(5)(u'Queensryche', u'Queensrÿche')
16
1
17
"""
18
19
# ****************************************************************************
20
# Copyright (C) 2015 Volker Braun <[email protected]>
21
#
22
# This program is free software: you can redistribute it and/or modify
23
# it under the terms of the GNU General Public License as published by
24
# the Free Software Foundation, either version 2 of the License, or
25
# (at your option) any later version.
26
# https://www.gnu.org/licenses/
27
# ****************************************************************************
28
29
30
class DistanceExceeded(Exception):
31
pass
32
33
34
class Levenshtein(object):
35
36
def __init__(self, limit):
37
"""
38
Levenshtein Distance with Maximum Distance Cutoff
39
40
Args:
41
limit (int): if the distance exceeds the limit, a
42
:class:`DistanceExceeded` is raised and the
43
computation is aborted.
44
45
EXAMPLES::
46
47
>>> from sage_bootstrap.levenshtein import Levenshtein
48
>>> lev3 = Levenshtein(3)
49
>>> lev3(u'saturday', u'sunday')
50
3
51
>>> lev3(u'kitten', u'sitting')
52
3
53
>>> lev2 = Levenshtein(2)
54
>>> lev2(u'kitten', u'sitting')
55
Traceback (most recent call last):
56
...
57
DistanceExceeded
58
"""
59
self._limit = limit
60
61
def __call__(self, a, b):
62
"""
63
calculate the levenshtein distance
64
65
args:
66
a,b (str): the two strings to compare
67
68
returns:
69
int: the Levenshtein distance if it is less or equal to
70
the distance limit.
71
72
Example::
73
74
>>> from app.scoring.levenshtein import Levenshtein
75
>>> lev3 = Levenshtein(3)
76
>>> lev3(u'Saturday', u'Sunday')
77
3
78
"""
79
n, m = len(a), len(b)
80
if n > m:
81
# Optimization to use O(min(n,m)) space
82
a, b, n, m = b, a, m, n
83
curr = range(n + 1)
84
for i in range(1, m + 1):
85
prev, curr = curr, [i] + [0] * n
86
for j in range(1, n + 1):
87
cost_add, cost_del = prev[j] + 1, curr[j - 1] + 1
88
cost_change = prev[j - 1]
89
if a[j - 1] != b[i - 1]:
90
cost_change += 1
91
curr[j] = min(cost_add, cost_del, cost_change)
92
if min(curr) > self._limit:
93
raise DistanceExceeded
94
if curr[n] > self._limit:
95
raise DistanceExceeded
96
return curr[n]
97
98