Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ethen8181
GitHub Repository: ethen8181/machine-learning
Path: blob/master/python/cython/pairwise1.pyx
2574 views
1
cimport cython
2
import numpy as np
3
from libc.math cimport sqrt
4
5
# don't use np.sqrt - the sqrt function from the
6
# C standard library is much faster
7
8
# tricks to improve performance is to turn of some checking that cython does
9
# wraparound False will not allow for negative slicing
10
# boundscheck False will not check for IndexError
11
# http://docs.cython.org/en/latest/src/reference/compilation.html#compiler-directives
12
@cython.wraparound(False)
13
@cython.boundscheck(False)
14
cdef inline double euclidean_distance(double[:] x1, double[:] x2):
15
cdef int i, N
16
cdef double tmp, d = 0
17
18
# assume x2 has the same shape as x1;
19
# this could be dangerous!
20
# and unlike pure numpy, cython's numpy
21
# does not support broadcasting; thus
22
# we will have to loop through the vector
23
# to compute the euclidean distance
24
N = x1.shape[0]
25
for i in range(N):
26
tmp = x1[i] - x2[i]
27
d += tmp * tmp
28
29
return sqrt(d)
30
31
32
@cython.wraparound(False)
33
@cython.boundscheck(False)
34
def pairwise1(double[:, :] X , metric = 'euclidean'):
35
36
if metric == 'euclidean':
37
dist_func = euclidean_distance
38
else:
39
raise ValueError("unrecognized metric")
40
41
# note that we don't necessarily
42
# need to assign a value to C variables at declaration time.
43
cdef double dist
44
cdef int i, j, n_samples
45
n_samples = X.shape[0]
46
cdef double[:, :] D = np.zeros((n_samples, n_samples), dtype = np.float64)
47
48
for i in range(n_samples):
49
for j in range(i + 1, n_samples):
50
dist = dist_func(X[i], X[j])
51
D[i, j] = dist
52
D[j, i] = dist
53
54
return D
55
56
57