Path: blob/main/crates/polars-compute/src/unique/distinct.rs
7884 views
/// Implementations for {n,arg}-unique on [`Array`] that can be amortized over several invocations.1use arrow::array::{Array, BinaryViewArray, BooleanArray, PrimitiveArray, StaticArray};2use arrow::bitmap::bitmask::BitMask;3use arrow::datatypes::ArrowDataType;4use arrow::legacy::prelude::LargeBinaryArray;5use arrow::types::{NativeType, PrimitiveType};6use polars_utils::aliases::PlHashSet;7use polars_utils::float16::pf16;8use polars_utils::total_ord::{TotalEq, TotalHash, TotalOrdWrap};9use polars_utils::{IdxSize, UnitVec};1011pub trait AmortizedUnique: Send + Sync + 'static {12fn new_empty(&self) -> Box<dyn AmortizedUnique>;1314/// Retain indices of items that are unique.15///16/// This is always stable.17///18/// # Safety19///20/// All indices i should be 0 <= i < values.len()21unsafe fn retain_unique(&mut self, values: &dyn Array, idxs: &mut UnitVec<IdxSize>);2223/// Get the indices of unique items in an array slice.24///25/// This is always stable.26fn arg_unique(27&mut self,28values: &dyn Array,29idxs: &mut UnitVec<IdxSize>,30start: IdxSize,31length: IdxSize,32);3334/// Get the number of unique items in an array at `idxs`.35///36/// # Safety37///38/// All indices i should be 0 <= i < values.len()39unsafe fn n_unique_idx(&mut self, values: &dyn Array, idxs: &[IdxSize]) -> IdxSize;4041/// Get the number of unique items in an array slice.42fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize;43}4445pub fn amortized_unique_from_dtype(dtype: &ArrowDataType) -> Box<dyn AmortizedUnique> {46use arrow::datatypes::PhysicalType as P;47match dtype.to_physical_type() {48P::Null => Box::new(NullUnique) as _,49P::Boolean => Box::new(BooleanUnique) as _,50P::Primitive(pt) => match pt {51PrimitiveType::Int8 => Box::new(PrimitiveArgUnique::<i8>::default()) as _,52PrimitiveType::Int16 => Box::new(PrimitiveArgUnique::<i16>::default()) as _,53PrimitiveType::Int32 => Box::new(PrimitiveArgUnique::<i32>::default()) as _,54PrimitiveType::Int64 => Box::new(PrimitiveArgUnique::<i64>::default()) as _,55PrimitiveType::Int128 => Box::new(PrimitiveArgUnique::<i128>::default()) as _,56PrimitiveType::UInt8 => Box::new(PrimitiveArgUnique::<u8>::default()) as _,57PrimitiveType::UInt16 => Box::new(PrimitiveArgUnique::<u16>::default()) as _,58PrimitiveType::UInt32 => Box::new(PrimitiveArgUnique::<u32>::default()) as _,59PrimitiveType::UInt64 => Box::new(PrimitiveArgUnique::<u64>::default()) as _,60PrimitiveType::UInt128 => Box::new(PrimitiveArgUnique::<u128>::default()) as _,61PrimitiveType::Float16 => Box::new(PrimitiveArgUnique::<pf16>::default()) as _,62PrimitiveType::Float32 => Box::new(PrimitiveArgUnique::<f32>::default()) as _,63PrimitiveType::Float64 => Box::new(PrimitiveArgUnique::<f64>::default()) as _,64PrimitiveType::Int256 => unreachable!(),65PrimitiveType::DaysMs => unreachable!(),66PrimitiveType::MonthDayNano => unreachable!(),67PrimitiveType::MonthDayMillis => unreachable!(),68},69P::BinaryView => Box::new(BinaryViewUnique::default()) as _,70P::LargeBinary => Box::new(BinaryUnique::default()) as _,7172P::Dictionary(_) => unreachable!(),73P::Binary => unreachable!(),74P::FixedSizeBinary => unreachable!(),75P::Utf8 => unreachable!(),76P::LargeUtf8 => unreachable!(),77P::List => unreachable!(),78P::Union => unreachable!(),79P::Map => unreachable!(),8081// Should be handled through BinaryView.82P::Utf8View => unreachable!(),8384// Should be handled through row encoding.85P::FixedSizeList => unreachable!(),86P::LargeList => unreachable!(),87P::Struct => unreachable!(),88}89}9091struct NullUnique;92struct BooleanUnique;93#[derive(Default)]94struct PrimitiveArgUnique<T>(95PlHashSet<TotalOrdWrap<T>>,96PlHashSet<Option<TotalOrdWrap<T>>>,97);98#[derive(Default)]99struct BinaryViewUnique(PlHashSet<&'static [u8]>, PlHashSet<Option<&'static [u8]>>);100#[derive(Default)]101struct BinaryUnique(PlHashSet<&'static [u8]>, PlHashSet<Option<&'static [u8]>>);102103impl AmortizedUnique for NullUnique {104fn new_empty(&self) -> Box<dyn AmortizedUnique> {105Box::new(NullUnique)106}107108unsafe fn retain_unique(&mut self, _values: &dyn Array, idxs: &mut UnitVec<IdxSize>) {109if !idxs.is_empty() {110*idxs = UnitVec::from_slice(&[idxs[0]]);111}112}113114fn arg_unique(115&mut self,116values: &dyn Array,117idxs: &mut UnitVec<IdxSize>,118start: IdxSize,119length: IdxSize,120) {121assert!(start.saturating_add(length) as usize <= values.len());122if length > 0 {123idxs.push(start);124}125}126127unsafe fn n_unique_idx(&mut self, _values: &dyn Array, idxs: &[IdxSize]) -> IdxSize {128IdxSize::from(!idxs.is_empty())129}130131fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize {132assert!(start.saturating_add(length) as usize <= values.len());133IdxSize::from(length > 0)134}135}136137impl AmortizedUnique for BooleanUnique {138fn new_empty(&self) -> Box<dyn AmortizedUnique> {139Box::new(BooleanUnique)140}141142unsafe fn retain_unique(&mut self, values: &dyn Array, idxs: &mut UnitVec<IdxSize>) {143if idxs.len() <= 1 {144return;145}146147let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();148149if values.has_nulls() {150let mut seen = 0u8;151idxs.retain(|i| {152if seen == 0b111 {153return false;154}155156// SAFETY: function invariant.157let v = match unsafe { values.get_unchecked(i as usize) } {158None => 1 << 0,159Some(false) => 1 << 1,160Some(true) => 1 << 2,161};162163let keep = seen & v == 0;164seen |= v;165keep166});167} else {168let values = values.values();169if values.set_bits() == 0 || values.unset_bits() == 0 {170*idxs = UnitVec::from_slice(&[idxs[0]]);171return;172}173174// SAFETY: function invariant.175let fst = unsafe { values.get_bit_unchecked(idxs[0] as usize) };176*idxs = match idxs[1..]177.iter()178// SAFETY: function invariant.179.position(|&i| fst != unsafe { values.get_bit_unchecked(i as usize) })180{181None => UnitVec::from_slice(&[idxs[0]]),182Some(i) => UnitVec::from_slice(&[idxs[0], idxs[1 + i]]),183};184}185}186187fn arg_unique(188&mut self,189values: &dyn Array,190idxs: &mut UnitVec<IdxSize>,191start: IdxSize,192length: IdxSize,193) {194if length <= 1 {195if length == 1 {196idxs.push(start);197}198return;199}200201assert!(start.saturating_add(length) as usize <= values.len());202let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();203204if values.has_nulls() {205let mut seen = 0u8;206idxs.extend((start..start + length).filter(|i| {207if seen == 0b111 {208return false;209}210211// SAFETY: asserted before.212let v = match unsafe { values.get_unchecked(*i as usize) } {213None => 1 << 0,214Some(false) => 1 << 1,215Some(true) => 1 << 2,216};217218let keep = seen & v == 0;219seen |= v;220keep221}));222} else {223let values = values.values();224if values.set_bits() == 0 || values.unset_bits() == 0 {225*idxs = UnitVec::from_slice(&[start]);226return;227}228229let values = BitMask::from_bitmap(values);230let values = values.sliced(start as usize, length as usize);231232let leading_zeros = values.leading_zeros();233if leading_zeros == values.len() {234*idxs = UnitVec::from_slice(&[start]);235} else if leading_zeros == 0 {236let leading_ones = values.leading_ones();237if leading_ones == values.len() {238*idxs = UnitVec::from_slice(&[start]);239} else {240*idxs = UnitVec::from_slice(&[start, start + leading_ones as IdxSize]);241}242} else {243*idxs = UnitVec::from_slice(&[start, start + leading_zeros as IdxSize]);244}245}246}247248unsafe fn n_unique_idx(&mut self, values: &dyn Array, idxs: &[IdxSize]) -> IdxSize {249if idxs.len() <= 1 {250return idxs.len() as IdxSize;251}252253let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();254255if values.has_nulls() {256let mut seen = 0u8;257for &i in idxs {258if seen == 0b111 {259break;260}261// SAFETY: function invariant.262seen |= match unsafe { values.get_unchecked(i as usize) } {263None => 1 << 0,264Some(false) => 1 << 1,265Some(true) => 1 << 2,266};267}268IdxSize::from(seen.count_ones())269} else {270let values = values.values();271if values.set_bits() == 0 || values.unset_bits() == 0 {272return 1;273}274275// SAFETY: function invariant.276let fst = unsafe { values.get_bit_unchecked(idxs[0] as usize) };277for &i in &idxs[1..] {278// SAFETY: function invariant.279if fst != unsafe { values.get_bit_unchecked(i as usize) } {280return 2;281}282}2831284}285}286287fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize {288if length <= 1 {289return length;290}291292let values = values.as_any().downcast_ref::<BooleanArray>().unwrap();293assert!(start.saturating_add(length) as usize <= values.len());294295if values.has_nulls() {296let validity = BitMask::from_bitmap(values.validity().unwrap());297let values = BitMask::from_bitmap(values.values());298299let validity = validity.sliced(start as usize, length as usize);300let values = values.sliced(start as usize, length as usize);301302let num_valid = validity.set_bits();303if num_valid == 0 {304return 1;305}306307if num_valid as IdxSize == length {308let num_trues = values.set_bits() as IdxSize;3091 + IdxSize::from(num_trues != length && num_trues != 0)310} else {311let num_trues = values.num_intersections_with(validity);3122 + IdxSize::from(num_trues != num_valid && num_trues != 0)313}314} else {315let values = values.values();316if values.set_bits() == 0 || values.unset_bits() == 0 {317return 1;318}319320let values = BitMask::from_bitmap(values);321let values = values.sliced(start as usize, length as usize);322let num_trues = values.set_bits();3231 + IdxSize::from(num_trues != 0 && num_trues != values.len())324}325}326}327328impl<T: NativeType + TotalHash + TotalEq> AmortizedUnique for PrimitiveArgUnique<T> {329fn new_empty(&self) -> Box<dyn AmortizedUnique> {330Box::new(PrimitiveArgUnique::<T>::default())331}332333unsafe fn retain_unique(&mut self, values: &dyn Array, idxs: &mut UnitVec<IdxSize>) {334if idxs.len() <= 1 {335return;336}337338let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();339340if values.has_nulls() {341self.1.clear();342idxs.retain(|i| {343// SAFETY: function invariant.344let value = unsafe { values.get_unchecked(i as usize) };345let value = value.map(TotalOrdWrap);346self.1.insert(value)347});348} else {349self.0.clear();350let values = values.values().as_slice();351idxs.retain(|i| {352// SAFETY: function invariant.353let value = *unsafe { values.get_unchecked(i as usize) };354let value = TotalOrdWrap(value);355self.0.insert(value)356});357}358}359360fn arg_unique(361&mut self,362values: &dyn Array,363idxs: &mut UnitVec<IdxSize>,364start: IdxSize,365length: IdxSize,366) {367if length <= 1 {368if length == 1 {369idxs.push(start);370}371return;372}373374let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();375assert!(start.saturating_add(length) as usize <= values.len());376377if values.has_nulls() {378self.1.clear();379idxs.extend((start..start + length).filter(|i| {380// SAFETY: asserted before.381let value = unsafe { values.get_unchecked(*i as usize) };382let value = value.map(TotalOrdWrap);383self.1.insert(value)384}));385} else {386self.0.clear();387let values = values.values().as_slice();388idxs.extend(389values[start as usize..][..length as usize]390.iter()391.enumerate()392.filter_map(|(i, value)| {393let value = TotalOrdWrap(*value);394self.0.insert(value).then_some(i as IdxSize + start)395}),396);397}398}399400unsafe fn n_unique_idx(&mut self, values: &dyn Array, idxs: &[IdxSize]) -> IdxSize {401if idxs.len() <= 1 {402return idxs.len() as IdxSize;403}404405let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();406407if values.has_nulls() {408self.1.clear();409self.1.extend(idxs.iter().map(|&i| {410// SAFETY: function invariant.411let value = unsafe { values.get_unchecked(i as usize) };412value.map(TotalOrdWrap)413}));414self.1.len() as IdxSize415} else {416let values = values.values();417self.0.clear();418self.0.extend(idxs.iter().map(|&i| {419// SAFETY: function invariant.420let value = *unsafe { values.get_unchecked(i as usize) };421TotalOrdWrap(value)422}));423self.0.len() as IdxSize424}425}426427fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize {428if length <= 1 {429return length;430}431432let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();433assert!(start.saturating_add(length) as usize <= values.len());434435if values.has_nulls() {436self.1.clear();437self.1.extend((start..start + length).map(|i| {438// SAFETY: asserted before.439let value = unsafe { values.get_unchecked(i as usize) };440value.map(TotalOrdWrap)441}));442self.1.len() as IdxSize443} else {444let values = values.values();445self.0.clear();446self.0.extend(447values[start as usize..][..length as usize]448.iter()449.map(|&v| TotalOrdWrap(v)),450);451self.0.len() as IdxSize452}453}454}455456impl AmortizedUnique for BinaryViewUnique {457fn new_empty(&self) -> Box<dyn AmortizedUnique> {458Box::new(BinaryViewUnique::default())459}460461fn arg_unique(462&mut self,463values: &dyn Array,464idxs: &mut UnitVec<IdxSize>,465start: IdxSize,466length: IdxSize,467) {468if length <= 1 {469if length == 1 {470idxs.push(start);471}472return;473}474475let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();476assert!(start.saturating_add(length) as usize <= values.len());477478if values.has_nulls() {479self.1.reserve(length as usize);480idxs.extend((start..start + length).filter(|i| {481// SAFETY: asserted before.482let value = unsafe { values.get_unchecked(*i as usize) };483// SAFETY: Gets cleared at end of the scope.484let value =485value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) });486self.1.insert(value)487}));488self.1.clear();489} else {490self.0.reserve(length as usize);491if values.total_buffer_len() == 0 {492let views = values.views().as_slice();493idxs.extend(494views[start as usize..][..length as usize]495.iter()496.enumerate()497.filter_map(|(i, value)| {498debug_assert!(value.is_inline());499500// SAFETY: buffer length == 0.501let value = unsafe { value.get_inlined_slice_unchecked() };502// SAFETY: Gets cleared at end of the scope.503let value =504unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };505self.0.insert(value).then_some(i as IdxSize + start)506}),507);508} else {509idxs.extend((start..start + length).filter(|i| {510// SAFETY: asserted before.511let value = unsafe { values.value_unchecked(*i as usize) };512// SAFETY: Gets cleared at end of the scope.513let value = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };514self.0.insert(value)515}));516}517self.0.clear();518}519}520521unsafe fn retain_unique(&mut self, values: &dyn Array, idxs: &mut UnitVec<IdxSize>) {522if idxs.len() <= 1 {523return;524}525526let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();527if values.has_nulls() {528self.1.reserve(idxs.len());529idxs.retain(|i| {530// SAFETY: asserted before.531let value = unsafe { values.get_unchecked(i as usize) };532// SAFETY: Gets cleared at end of the scope.533let value =534value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) });535self.1.insert(value)536});537self.1.clear();538} else {539self.0.reserve(idxs.len());540if values.total_buffer_len() == 0 {541let views = values.views().as_slice();542idxs.retain(|i| {543let value = unsafe { views.get_unchecked(i as usize) };544debug_assert!(value.is_inline());545546// SAFETY: buffer length == 0.547let value = unsafe { value.get_inlined_slice_unchecked() };548// SAFETY: Gets cleared at end of the scope.549let value = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };550self.0.insert(value)551});552} else {553idxs.retain(|i| {554// SAFETY: asserted before.555let value = unsafe { values.value_unchecked(i as usize) };556// SAFETY: Gets cleared at end of the scope.557let value = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };558self.0.insert(value)559});560}561self.0.clear();562}563}564565unsafe fn n_unique_idx(&mut self, values: &dyn Array, idxs: &[IdxSize]) -> IdxSize {566if idxs.len() <= 1 {567return idxs.len() as IdxSize;568}569570let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();571572if values.has_nulls() {573self.1.reserve(idxs.len());574self.1.extend(idxs.iter().map(|&i| {575// SAFETY: function invariant.576let value = unsafe { values.get_unchecked(i as usize) };577// SAFETY: Gets cleared at end of the scope.578value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) })579}));580let out = self.1.len() as IdxSize;581self.1.clear();582out583} else {584self.0.reserve(idxs.len());585if values.total_buffer_len() == 0 {586let views = values.views().as_slice();587self.0.extend(idxs.iter().map(|&i| {588let value = unsafe { views.get_unchecked(i as usize) };589debug_assert!(value.is_inline());590591// SAFETY: buffer length == 0.592let value = unsafe { value.get_inlined_slice_unchecked() };593// SAFETY: Gets cleared at end of the scope.594unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }595}));596} else {597self.0.extend(idxs.iter().map(|&i| {598// SAFETY: function invariant.599let value = unsafe { values.value_unchecked(i as usize) };600// SAFETY: Gets cleared at end of the scope.601unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }602}));603}604let out = self.0.len() as IdxSize;605self.0.clear();606out607}608}609610fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize {611if length <= 1 {612return length;613}614615let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();616assert!(start.saturating_add(length) as usize <= values.len());617618if values.has_nulls() {619self.1.reserve(length as usize);620self.1.extend((start..start + length).map(|i| {621// SAFETY: asserted before.622let value = unsafe { values.get_unchecked(i as usize) };623// SAFETY: Gets cleared at end of the scope.624value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) })625}));626let out = self.1.len() as IdxSize;627self.1.clear();628out629} else {630self.0.reserve(length as usize);631if values.total_buffer_len() == 0 {632let views = values.views().as_slice();633self.0.extend(634views[start as usize..][..length as usize]635.iter()636.map(|value| {637debug_assert!(value.is_inline());638639// SAFETY: buffer length == 0.640let value = unsafe { value.get_inlined_slice_unchecked() };641// SAFETY: Gets cleared at end of the scope.642unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }643}),644);645} else {646self.0.extend((start..start + length).map(|i| {647// SAFETY: asserted before.648let value = unsafe { values.value_unchecked(i as usize) };649// SAFETY: Gets cleared at end of the scope.650unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }651}));652}653let out = self.0.len() as IdxSize;654self.0.clear();655out656}657}658}659660impl AmortizedUnique for BinaryUnique {661fn new_empty(&self) -> Box<dyn AmortizedUnique> {662Box::new(BinaryUnique::default())663}664665fn arg_unique(666&mut self,667values: &dyn Array,668idxs: &mut UnitVec<IdxSize>,669start: IdxSize,670length: IdxSize,671) {672if length <= 1 {673if length == 1 {674idxs.push(start);675}676return;677}678679let values = values.as_any().downcast_ref::<LargeBinaryArray>().unwrap();680assert!(start.saturating_add(length) as usize <= values.len());681682if values.has_nulls() {683self.1.reserve(length as usize);684idxs.extend((start..start + length).filter(|i| {685// SAFETY: asserted before.686let value = unsafe { values.get_unchecked(*i as usize) };687// SAFETY: Gets cleared at end of the scope.688let value =689value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) });690self.1.insert(value)691}));692self.1.clear();693} else {694self.0.reserve(length as usize);695idxs.extend((start..start + length).filter(|i| {696// SAFETY: asserted before.697let value = unsafe { values.value_unchecked(*i as usize) };698let value = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };699self.0.insert(value)700}));701self.0.clear();702}703}704705unsafe fn retain_unique(&mut self, values: &dyn Array, idxs: &mut UnitVec<IdxSize>) {706if idxs.len() <= 1 {707return;708}709710let values = values.as_any().downcast_ref::<LargeBinaryArray>().unwrap();711712if values.has_nulls() {713self.1.reserve(idxs.len());714idxs.retain(|i| {715// SAFETY: function invariant.716let value = unsafe { values.get_unchecked(i as usize) };717// SAFETY: Gets cleared at end of the scope.718let value =719value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) });720self.1.insert(value)721});722self.1.clear();723} else {724self.0.reserve(idxs.len());725idxs.retain(|i| {726// SAFETY: function invariant.727let value = unsafe { values.value_unchecked(i as usize) };728let value = unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) };729self.0.insert(value)730});731self.0.clear();732}733}734735unsafe fn n_unique_idx(&mut self, values: &dyn Array, idxs: &[IdxSize]) -> IdxSize {736if idxs.len() <= 1 {737return idxs.len() as IdxSize;738}739740let values = values.as_any().downcast_ref::<LargeBinaryArray>().unwrap();741742if values.has_nulls() {743self.1.reserve(idxs.len());744self.1.extend(idxs.iter().map(|&i| {745// SAFETY: function invariant.746let value = unsafe { values.get_unchecked(i as usize) };747// SAFETY: Gets cleared at end of the scope.748value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) })749}));750let out = self.1.len() as IdxSize;751self.1.clear();752out753} else {754self.0.reserve(idxs.len());755self.0.extend(idxs.iter().map(|&i| {756// SAFETY: function invariant.757let value = unsafe { values.value_unchecked(i as usize) };758// SAFETY: Gets cleared at end of the scope.759unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }760}));761let out = self.0.len() as IdxSize;762self.0.clear();763out764}765}766767fn n_unique_slice(&mut self, values: &dyn Array, start: IdxSize, length: IdxSize) -> IdxSize {768if length <= 1 {769return length;770}771772let values = values.as_any().downcast_ref::<LargeBinaryArray>().unwrap();773assert!(start.saturating_add(length) as usize <= values.len());774775if values.has_nulls() {776self.1.reserve(length as usize);777self.1.extend((start..start + length).map(|i| {778// SAFETY: asserted before.779let value = unsafe { values.get_unchecked(i as usize) };780// SAFETY: Gets cleared at end of the scope.781value.map(|v| unsafe { std::mem::transmute::<&[u8], &'static [u8]>(v) })782}));783let out = self.1.len() as IdxSize;784self.1.clear();785out786} else {787self.0.reserve(length as usize);788self.0.extend((start..start + length).map(|i| {789// SAFETY: asserted before.790let value = unsafe { values.value_unchecked(i as usize) };791// SAFETY: Gets cleared at end of the scope.792unsafe { std::mem::transmute::<&[u8], &'static [u8]>(value) }793}));794let out = self.0.len() as IdxSize;795self.0.clear();796out797}798}799}800801802