Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/pyo3-polars/example/derive_expression/expression_lib/src/distances.rs
7884 views
1
use std::hash::Hash;
2
3
use arrow::array::PrimitiveArray;
4
use num::Float;
5
use polars::prelude::*;
6
use pyo3_polars::export::polars_core::utils::arrow::types::NativeType;
7
use pyo3_polars::export::polars_core::with_match_physical_integer_type;
8
9
#[allow(clippy::all)]
10
pub(super) fn naive_hamming_dist(a: &str, b: &str) -> u32 {
11
let x = a.as_bytes();
12
let y = b.as_bytes();
13
x.iter()
14
.zip(y)
15
.fold(0, |a, (b, c)| a + (*b ^ *c).count_ones() as u32)
16
}
17
18
fn jacc_helper<T: NativeType + Hash + Eq>(a: &PrimitiveArray<T>, b: &PrimitiveArray<T>) -> f64 {
19
// convert to hashsets over Option<T>
20
let s1 = a.into_iter().collect::<PlHashSet<_>>();
21
let s2 = b.into_iter().collect::<PlHashSet<_>>();
22
23
// count the number of intersections
24
let s3_len = s1.intersection(&s2).count();
25
// return similarity
26
s3_len as f64 / (s1.len() + s2.len() - s3_len) as f64
27
}
28
29
#[allow(unexpected_cfgs)]
30
pub(super) fn naive_jaccard_sim(a: &ListChunked, b: &ListChunked) -> PolarsResult<Float64Chunked> {
31
polars_ensure!(
32
a.inner_dtype() == b.inner_dtype(),
33
ComputeError: "inner data types don't match"
34
);
35
polars_ensure!(
36
a.inner_dtype().is_integer(),
37
ComputeError: "inner data types must be integer"
38
);
39
Ok(with_match_physical_integer_type!(a.inner_dtype(), |$T| {
40
polars::prelude::arity::binary_elementwise(a, b, |a, b| {
41
match (a, b) {
42
(Some(a), Some(b)) => {
43
let a = a.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
44
let b = b.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
45
Some(jacc_helper(a, b))
46
},
47
_ => None
48
}
49
})
50
}))
51
}
52
53
fn haversine_elementwise<T: Float>(start_lat: T, start_long: T, end_lat: T, end_long: T) -> T {
54
let r_in_km = T::from(6371.0).unwrap();
55
let two = T::from(2.0).unwrap();
56
let one = T::one();
57
58
let d_lat = (end_lat - start_lat).to_radians();
59
let d_lon = (end_long - start_long).to_radians();
60
let lat1 = (start_lat).to_radians();
61
let lat2 = (end_lat).to_radians();
62
63
let a = ((d_lat / two).sin()) * ((d_lat / two).sin())
64
+ ((d_lon / two).sin()) * ((d_lon / two).sin()) * (lat1.cos()) * (lat2.cos());
65
let c = two * ((a.sqrt()).atan2((one - a).sqrt()));
66
r_in_km * c
67
}
68
69
pub(super) fn naive_haversine<T>(
70
start_lat: &ChunkedArray<T>,
71
start_long: &ChunkedArray<T>,
72
end_lat: &ChunkedArray<T>,
73
end_long: &ChunkedArray<T>,
74
) -> PolarsResult<ChunkedArray<T>>
75
where
76
T: PolarsFloatType,
77
T::Native: Float,
78
{
79
let out: ChunkedArray<T> = start_lat
80
.iter()
81
.zip(start_long.iter())
82
.zip(end_lat.iter())
83
.zip(end_long.iter())
84
.map(|(((start_lat, start_long), end_lat), end_long)| {
85
let start_lat = start_lat?;
86
let start_long = start_long?;
87
let end_lat = end_lat?;
88
let end_long = end_long?;
89
Some(haversine_elementwise(
90
start_lat, start_long, end_lat, end_long,
91
))
92
})
93
.collect();
94
95
Ok(out.with_name(start_lat.name().clone()))
96
}
97
98