Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
marvel
GitHub Repository: marvel/qnf
Path: blob/master/elisp/fuzzy.el
987 views
1
;;; fuzzy.el --- Fuzzy matching utilities
2
3
;; Copyright (C) 2010 Tomohiro Matsuyama
4
5
;; Author: Tomohiro Matsuyama <[email protected]>
6
;; Keywords: convenience
7
8
;; This program is free software; you can redistribute it and/or modify
9
;; it under the terms of the GNU General Public License as published by
10
;; the Free Software Foundation, either version 3 of the License, or
11
;; (at your option) any later version.
12
13
;; This program is distributed in the hope that it will be useful,
14
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
;; GNU General Public License for more details.
17
18
;; You should have received a copy of the GNU General Public License
19
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21
;;; Commentary:
22
23
;;
24
25
;;; Code:
26
27
(eval-when-compile
28
(require 'cl))
29
(require 'regexp-opt)
30
31
(defgroup fuzzy nil
32
"Fuzzy matching utilities."
33
:group 'convenience
34
:prefix "fuzzy-")
35
36
(defcustom fuzzy-accept-error-rate 0.10
37
"Error threshold."
38
:group 'fuzzy)
39
40
(defvar fuzzy-accept-length-difference 2)
41
42
(defvar fuzzy-regexp-some-char (format "\\w\\{0,%s\\}" fuzzy-accept-length-difference))
43
44
45
46
;;; Functions
47
48
(defun fuzzy-reverse-string (string)
49
(apply 'string (nreverse (append string nil))))
50
51
(defun fuzzy-regexp-compile (string)
52
(labels ((oddp (n) (eq (logand n 1) 1))
53
(evenp (n) (eq (logand n 1) 0))
54
(opt (n) (regexp-opt-charset (append (substring string
55
(max 0 (- n 1))
56
(min (length string) (+ n 2))) nil))))
57
(concat
58
"\\("
59
(loop for i below (length string)
60
for c = (if (evenp i) (opt i) fuzzy-regexp-some-char)
61
concat c)
62
"\\|"
63
(loop for i below (length string)
64
for c = (if (oddp i) (opt i) fuzzy-regexp-some-char)
65
concat c)
66
"\\)")))
67
68
(defalias 'fuzzy-edit-distance 'fuzzy-jaro-winkler-distance)
69
70
(defun fuzzy-jaro-winkler-distance (s1 s2)
71
"http://en.wikipedia.org/wiki/Jaro-Winkler_distance"
72
(let* ((l1 (length s1))
73
(l2 (length s2))
74
(r (max 1 (1- (/ (max l1 l2) 2))))
75
(m 0)
76
(tr 0)
77
(p 0)
78
cs1 cs2)
79
(loop with seen = (make-vector l2 nil)
80
for i below l1
81
for c1 = (aref s1 i) do
82
(loop for j from (max 0 (- i r)) below (min l2 (+ i r))
83
for c2 = (aref s2 j)
84
if (and (char-equal c1 c2)
85
(null (aref seen j))) do
86
(push c1 cs1)
87
(aset seen j c2)
88
(incf m)
89
and return nil)
90
finally
91
(setq cs1 (nreverse cs1)
92
cs2 (loop for i below l2
93
for c = (aref seen i)
94
if c collect c)))
95
(loop for c1 in cs1
96
for c2 in cs2
97
if (not (char-equal c1 c2)) do
98
(incf tr))
99
(loop for i below (min m 5)
100
for c1 across s1
101
for c2 across s2
102
while (char-equal c1 c2) do
103
(incf p))
104
(if (eq m 0)
105
0.0
106
(setq m (float m))
107
(let* ((dj (/ (+ (/ m l1) (/ m l2) (/ (- m (/ tr 2)) m)) 3))
108
(dw (+ dj (* p 0.1 (- 1 dj)))))
109
dw))))
110
111
;; this function should be compiled
112
(byte-compile 'fuzzy-jaro-winkler-distance)
113
114
(defun fuzzy-match (s1 s2 &optional function)
115
(or function (setq function 'fuzzy-edit-distance))
116
(and (<= (abs (- (length s1) (length s2)))
117
fuzzy-accept-length-difference)
118
(>= (funcall function s1 s2)
119
(- 1 fuzzy-accept-error-rate))))
120
121
(defun fuzzy-all-completions (string collection)
122
"all-completions family with fuzzy matching."
123
(loop with length = (length string)
124
for str in collection
125
for s = (substring str 0 (min (length str)
126
(+ length fuzzy-accept-length-difference)))
127
if (fuzzy-match string s)
128
collect str))
129
130
131
132
;;; Search and Incremental Search
133
134
(defvar fuzzy-search-cache nil)
135
(defvar fuzzy-search-cache-string nil)
136
137
(defun fuzzy-search-cache-activate ()
138
(setq fuzzy-search-cache (make-hash-table))
139
(setq fuzzy-search-cache-string nil))
140
141
(defun fuzzy-search-cache-deactive ()
142
(setq fuzzy-search-cache nil)
143
(setq fuzzy-search-cache-string nil))
144
145
(defun fuzzy-search-edit-distance (s1 s2)
146
(or (and fuzzy-search-cache
147
(cond
148
((null fuzzy-search-cache-string)
149
(setq fuzzy-search-cache-string s1)
150
nil)
151
((not (equal fuzzy-search-cache-string s1))
152
(setq fuzzy-search-cache-string s1)
153
(clrhash fuzzy-search-cache)
154
nil)
155
(t))
156
(gethash s2 fuzzy-search-cache))
157
(let ((d (fuzzy-edit-distance s1 s2)))
158
(if fuzzy-search-cache
159
(puthash s2 d fuzzy-search-cache))
160
d)))
161
162
(defun fuzzy-search-match (s1 s2)
163
(fuzzy-match s1 s2 'fuzzy-search-edit-distance))
164
165
(defun fuzzy-search-forward (string &optional bound noerror count)
166
(let* ((regexp (fuzzy-regexp-compile string))
167
match-data)
168
(save-excursion
169
(while (and (null match-data)
170
(re-search-forward regexp bound t))
171
(if (fuzzy-search-match string (match-string 1))
172
(setq match-data (match-data))
173
(goto-char (1+ (match-beginning 1))))))
174
(when match-data
175
(store-match-data match-data)
176
(goto-char (match-end 1)))))
177
178
(defun fuzzy-search-backward (string &optional bound noerror count)
179
(let* ((regexp (fuzzy-regexp-compile string))
180
match-data begin end)
181
(save-excursion
182
(while (and (null match-data)
183
(re-search-backward regexp bound t))
184
(setq begin (match-beginning 1)
185
end (match-end 1))
186
(store-match-data nil)
187
(goto-char (max (point-min) (- begin (* (length string) 2))))
188
(while (re-search-forward regexp end t)
189
(if (fuzzy-search-match string (match-string 1))
190
(setq match-data (match-data))
191
(goto-char (1+ (match-beginning 1)))))
192
(unless match-data
193
(goto-char begin)))
194
(if match-data
195
(progn
196
(store-match-data match-data)
197
(goto-char (match-beginning 1)))
198
(store-match-data nil)))))
199
200
(defvar fuzzy-isearch nil)
201
(defvar fuzzy-isearch-failed-count 0)
202
(defvar fuzzy-isearch-enabled 'on-failed)
203
(defvar fuzzy-isearch-original-search-fun nil)
204
(defvar fuzzy-isearch-prefix "[FUZZY] ")
205
206
(defun fuzzy-isearch-activate ()
207
(setq fuzzy-isearch t)
208
(setq fuzzy-isearch-failed-count 0)
209
(fuzzy-search-cache-activate))
210
211
(defun fuzzy-isearch-deactivate ()
212
(setq fuzzy-isearch nil)
213
(setq fuzzy-isearch-failed-count 0)
214
(fuzzy-search-cache-deactive))
215
216
(defun fuzzy-isearch ()
217
(cond (isearch-word
218
(if isearch-forward 'word-search-forward 'word-search-backward))
219
(isearch-regexp
220
(if isearch-forward 're-search-forward 're-search-backward))
221
((or fuzzy-isearch
222
(eq fuzzy-isearch-enabled 'always)
223
(and (eq fuzzy-isearch-enabled 'on-failed)
224
(null isearch-success)
225
isearch-wrapped
226
(> (setq fuzzy-isearch-failed-count (1+ fuzzy-isearch-failed-count))
227
1)))
228
(unless fuzzy-isearch
229
;(goto-char isearch-opoint)
230
(fuzzy-isearch-activate))
231
(if isearch-forward 'fuzzy-search-forward 'fuzzy-search-backward))
232
(t
233
(if isearch-forward 'search-forward 'search-backward))))
234
235
(defun fuzzy-isearch-end-hook ()
236
(fuzzy-isearch-deactivate))
237
238
(defun turn-on-fuzzy-isearch ()
239
(interactive)
240
(setq fuzzy-isearch-original-search-fun isearch-search-fun-function)
241
(setq isearch-search-fun-function 'fuzzy-isearch)
242
(add-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
243
244
(defun turn-off-fuzzy-isearch ()
245
(interactive)
246
(setq isearch-search-fun-function fuzzy-isearch-original-search-fun)
247
(remove-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
248
249
(defadvice isearch-message-prefix (after fuzzy-isearch-message-prefix activate)
250
(if fuzzy-isearch
251
(setq ad-return-value (concat fuzzy-isearch-prefix ad-return-value))
252
ad-return-value))
253
254
(provide 'fuzzy)
255
;;; fuzzy.el ends here
256
257