Path: blob/main/crates/polars-io/src/csv/write/write_impl/serializer.rs
8409 views
//! This file is complicated because we have complicated escape handling. We want to avoid having1//! to write down each combination of type & escaping, but we also want the compiler to optimize them2//! to efficient machine code - so no dynamic dispatch. That means a lot of generics and macros.3//!4//! We need to differentiate between several kinds of types, and several kinds of escaping we support:5//!6//! - The simplest escaping mechanism are [`QuoteStyle::Always`] and [`QuoteStyle::Never`].7//! For `Never` we just never quote. For `Always` we pass any serializer that never quotes8//! to [`quote_serializer()`] then it becomes quoted properly.9//! - [`QuoteStyle::Necessary`] (the default) is only relevant for strings and floats with decimal_comma,10//! as these are the only types that can have newlines (row separators), commas (default column separators)11//! or quotes. String escaping is complicated anyway, and it is all inside [`string_serializer()`].12//! - The real complication is [`QuoteStyle::NonNumeric`], that doesn't quote numbers (unless necessary)13//! and nulls, and quotes any other thing. The problem is that nulls can be within any type, so we14//! need to handle two possibilities of quoting everywhere.15//!16//! So in case the chosen style is anything but `NonNumeric`, we statically know for each column except strings17//! whether it should be quoted (and for strings too when not `Necessary`). There we use18//! `quote_serializer()` or nothing.19//!20//! But to help with `NonNumeric`, each serializer carry the potential to distinguish between nulls and non-nulls,21//! and quote the latter and not the former. But in order to not have the branch when we statically know the answer,22//! we have an option to statically disable it with a const generic flag `QUOTE_NON_NULL`. Numbers (that should never23//! be quoted with `NonNumeric`) just always disable this flag.24//!25//! So we have three possibilities:26//!27//! 1. A serializer that never quotes. This is a bare serializer with `QUOTE_NON_NULL = false`.28//! 2. A serializer that always quotes. This is a serializer wrapped with `quote_serializer()`,29//! but also with `QUOTE_NON_NULL = false`.30//! 3. A serializer that quotes only non-nulls. This is a bare serializer with `QUOTE_NON_NULL = true`.3132use std::fmt::LowerExp;33use std::io::Write;3435use arrow::array::{Array, BooleanArray, Float16Array, NullArray, PrimitiveArray, Utf8ViewArray};36use arrow::legacy::time_zone::Tz;37use arrow::types::NativeType;38#[cfg(feature = "timezones")]39use chrono::TimeZone;40use memchr::{memchr_iter, memchr3};41use num_traits::NumCast;42use polars_core::prelude::*;43use polars_utils::float16::pf16;4445use crate::csv::write::{QuoteStyle, SerializeOptions};4647const TOO_MANY_MSG: &str = "too many items requested from CSV serializer";48const ARRAY_MISMATCH_MSG: &str = "wrong array type";4950#[allow(dead_code)]51struct IgnoreFmt;52impl std::fmt::Write for IgnoreFmt {53fn write_str(&mut self, _s: &str) -> std::fmt::Result {54Ok(())55}56}5758pub(super) trait Serializer<'a> {59fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions);60}6162fn make_serializer<'a, T, I: Iterator<Item = Option<T>>, const QUOTE_NON_NULL: bool>(63f: impl FnMut(T, &mut Vec<u8>, &SerializeOptions),64iter: I,65) -> impl Serializer<'a> {66struct SerializerImpl<F, I, const QUOTE_NON_NULL: bool> {67f: F,68iter: I,69}7071impl<'a, T, F, I, const QUOTE_NON_NULL: bool> Serializer<'a>72for SerializerImpl<F, I, QUOTE_NON_NULL>73where74F: FnMut(T, &mut Vec<u8>, &SerializeOptions),75I: Iterator<Item = Option<T>>,76{77fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {78let item = self.iter.next().expect(TOO_MANY_MSG);79match item {80Some(item) => {81if QUOTE_NON_NULL {82buf.push(options.quote_char);83}84(self.f)(item, buf, options);85if QUOTE_NON_NULL {86buf.push(options.quote_char);87}88},89None => buf.extend_from_slice(options.null.as_bytes()),90}91}92}9394SerializerImpl::<_, _, QUOTE_NON_NULL> { f, iter }95}9697fn integer_serializer<I: NativeType + itoa::Integer>(98array: &PrimitiveArray<I>,99) -> impl Serializer<'_> {100let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {101let mut buffer = itoa::Buffer::new();102let value = buffer.format(item);103buf.extend_from_slice(value.as_bytes());104};105106make_serializer::<_, _, false>(f, array.iter())107}108109fn float_serializer_no_precision_autoformat_f16(array: &Float16Array) -> impl Serializer<'_> {110let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {111let mut buffer = zmij::Buffer::new();112let cast: f32 = NumCast::from(item).unwrap();113let value = buffer.format(cast);114buf.extend_from_slice(value.as_bytes());115};116float_serializer_no_precision_autoformat_(array, f)117}118119fn float_serializer_no_precision_autoformat<I: NativeType + zmij::Float>(120array: &PrimitiveArray<I>,121) -> impl Serializer<'_> {122let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {123let mut buffer = zmij::Buffer::new();124let value = buffer.format(item);125buf.extend_from_slice(value.as_bytes());126};127float_serializer_no_precision_autoformat_(array, f)128}129130fn float_serializer_no_precision_autoformat_<131'a,132I: NativeType,133F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),134>(135array: &'a PrimitiveArray<I>,136f: F,137) -> impl Serializer<'a> {138make_serializer::<_, _, false>(f, array.iter())139}140141fn float_serializer_no_precision_autoformat_decimal_comma_f16(142array: &Float16Array,143) -> impl Serializer<'_> {144let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {145let mut buffer = zmij::Buffer::new();146let cast: f32 = NumCast::from(item).unwrap();147let value = buffer.format(cast);148149for ch in value.as_bytes() {150buf.push(if *ch == b'.' { b',' } else { *ch });151}152};153float_serializer_no_precision_autoformat_decimal_comma_(array, f)154}155156fn float_serializer_no_precision_autoformat_decimal_comma<I: NativeType + zmij::Float>(157array: &PrimitiveArray<I>,158) -> impl Serializer<'_> {159let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {160let mut buffer = zmij::Buffer::new();161let value = buffer.format(item).as_bytes();162163for ch in value {164buf.push(if *ch == b'.' { b',' } else { *ch });165}166};167float_serializer_no_precision_autoformat_decimal_comma_(array, f)168}169170fn float_serializer_no_precision_autoformat_decimal_comma_<171'a,172I: NativeType,173F: Fn(&'a I, &mut Vec<u8>, &SerializeOptions),174>(175array: &'a PrimitiveArray<I>,176f: F,177) -> impl Serializer<'a> {178make_serializer::<_, _, false>(f, array.iter())179}180181fn float_serializer_no_precision_scientific<I: NativeType + LowerExp>(182array: &PrimitiveArray<I>,183) -> impl Serializer<'_> {184let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {185// Float writing into a buffer of `Vec<u8>` cannot fail.186let _ = write!(buf, "{item:.e}");187};188189make_serializer::<_, _, false>(f, array.iter())190}191192fn float_serializer_no_precision_scientific_decimal_comma<I: NativeType + LowerExp>(193array: &PrimitiveArray<I>,194) -> impl Serializer<'_> {195let mut scratch = Vec::new();196197let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {198// Float writing into a buffer of `Vec<u8>` cannot fail.199let _ = write!(&mut scratch, "{item:.e}");200for c in &mut scratch {201if *c == b'.' {202*c = b',';203break;204}205}206buf.extend_from_slice(&scratch);207};208209make_serializer::<_, _, false>(f, array.iter())210}211212fn float_serializer_no_precision_positional<I: NativeType + NumCast>(213array: &PrimitiveArray<I>,214) -> impl Serializer<'_> {215let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {216let v: f64 = NumCast::from(item).unwrap();217let _ = write!(buf, "{v}");218};219220make_serializer::<_, _, false>(f, array.iter())221}222223fn float_serializer_no_precision_positional_decimal_comma<I: NativeType + NumCast>(224array: &PrimitiveArray<I>,225) -> impl Serializer<'_> {226let mut scratch = Vec::new();227228let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {229scratch.clear();230let v: f64 = NumCast::from(item).unwrap();231let _ = write!(&mut scratch, "{v}");232for c in &mut scratch {233if *c == b'.' {234*c = b',';235break;236}237}238buf.extend_from_slice(&scratch);239};240241make_serializer::<_, _, false>(f, array.iter())242}243244fn float_serializer_with_precision_scientific<I: NativeType + LowerExp>(245array: &PrimitiveArray<I>,246precision: usize,247) -> impl Serializer<'_> {248let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {249// Float writing into a buffer of `Vec<u8>` cannot fail.250let _ = write!(buf, "{item:.precision$e}");251};252253make_serializer::<_, _, false>(f, array.iter())254}255256fn float_serializer_with_precision_scientific_decimal_comma<I: NativeType + LowerExp>(257array: &PrimitiveArray<I>,258precision: usize,259) -> impl Serializer<'_> {260let mut scratch = Vec::new();261262let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {263scratch.clear();264// Float writing into a buffer of `Vec<u8>` cannot fail.265let _ = write!(&mut scratch, "{item:.precision$e}");266for c in &mut scratch {267if *c == b'.' {268*c = b',';269break;270}271}272buf.extend_from_slice(&scratch);273};274275make_serializer::<_, _, false>(f, array.iter())276}277278fn float_serializer_with_precision_positional<I: NativeType>(279array: &PrimitiveArray<I>,280precision: usize,281) -> impl Serializer<'_> {282let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {283// Float writing into a buffer of `Vec<u8>` cannot fail.284let _ = write!(buf, "{item:.precision$}");285};286287make_serializer::<_, _, false>(f, array.iter())288}289290fn float_serializer_with_precision_positional_decimal_comma<I: NativeType>(291array: &PrimitiveArray<I>,292precision: usize,293) -> impl Serializer<'_> {294let mut scratch = Vec::new();295296let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {297scratch.clear();298let _ = write!(&mut scratch, "{item:.precision$}");299for c in &mut scratch {300if *c == b'.' {301*c = b',';302break;303}304}305buf.extend_from_slice(&scratch);306};307308make_serializer::<_, _, false>(f, array.iter())309}310311fn null_serializer(_array: &NullArray) -> impl Serializer<'_> {312struct NullSerializer;313impl<'a> Serializer<'a> for NullSerializer {314fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {315buf.extend_from_slice(options.null.as_bytes());316}317}318NullSerializer319}320321fn bool_serializer<const QUOTE_NON_NULL: bool>(array: &BooleanArray) -> impl Serializer<'_> {322let f = move |item, buf: &mut Vec<u8>, _options: &SerializeOptions| {323let s = if item { "true" } else { "false" };324buf.extend_from_slice(s.as_bytes());325};326327make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())328}329330#[cfg(feature = "dtype-decimal")]331fn decimal_serializer(array: &PrimitiveArray<i128>, scale: usize) -> impl Serializer<'_> {332let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros();333334let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new();335let f = move |&item, buf: &mut Vec<u8>, options: &SerializeOptions| {336buf.extend_from_slice(337fmt_buf338.format_dec128(item, scale, trim_zeros, options.decimal_comma)339.as_bytes(),340);341};342343make_serializer::<_, _, false>(f, array.iter())344}345346#[cfg(any(347feature = "dtype-date",348feature = "dtype-time",349feature = "dtype-datetime"350))]351fn callback_serializer<'a, T: NativeType, const QUOTE_NON_NULL: bool>(352array: &'a PrimitiveArray<T>,353mut callback: impl FnMut(T, &mut Vec<u8>) + 'a,354) -> impl Serializer<'a> {355let f = move |&item, buf: &mut Vec<u8>, _options: &SerializeOptions| {356callback(item, buf);357};358359make_serializer::<_, _, QUOTE_NON_NULL>(f, array.iter())360}361362#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]363type ChronoFormatIter<'a, 'b> = std::slice::Iter<'a, chrono::format::Item<'b>>;364365#[cfg(any(feature = "dtype-date", feature = "dtype-time"))]366fn date_and_time_serializer<'a, Underlying: NativeType, T: std::fmt::Display>(367format_str: Option<&'a str>,368description: &str,369array: &'a dyn Array,370sample_value: T,371mut convert: impl FnMut(Underlying) -> T + Send + 'a,372mut format_fn: impl for<'b> FnMut(373&T,374ChronoFormatIter<'b, 'a>,375) -> chrono::format::DelayedFormat<ChronoFormatIter<'b, 'a>>376+ Send377+ 'a,378options: &SerializeOptions,379) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {380let array = array.as_any().downcast_ref().unwrap();381let serializer = match format_str {382Some(format_str) => {383let format = chrono::format::StrftimeItems::new(format_str).parse().map_err(384|_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),385)?;386use std::fmt::Write;387// Fail fast for invalid format. This return error faster to the user, and allows us to not return388// `Result` from `serialize()`.389write!(IgnoreFmt, "{}", format_fn(&sample_value, format.iter())).map_err(390|_| polars_err!(ComputeError: "cannot format {description} with format '{format_str}'"),391)?;392let callback = move |item, buf: &mut Vec<u8>| {393let item = convert(item);394// We checked the format is valid above.395let _ = write!(buf, "{}", format_fn(&item, format.iter()));396};397date_and_time_final_serializer(array, callback, options)398},399None => {400let callback = move |item, buf: &mut Vec<u8>| {401let item = convert(item);402// Formatting dates into `Vec<u8>` cannot fail.403let _ = write!(buf, "{item}");404};405date_and_time_final_serializer(array, callback, options)406},407};408Ok(serializer)409}410411#[cfg(any(412feature = "dtype-date",413feature = "dtype-time",414feature = "dtype-datetime"415))]416fn date_and_time_final_serializer<'a, T: NativeType>(417array: &'a PrimitiveArray<T>,418callback: impl FnMut(T, &mut Vec<u8>) + Send + 'a,419options: &SerializeOptions,420) -> Box<dyn Serializer<'a> + Send + 'a> {421match options.quote_style {422QuoteStyle::Always => Box::new(quote_serializer(callback_serializer::<T, false>(423array, callback,424))) as Box<dyn Serializer + Send>,425QuoteStyle::NonNumeric => Box::new(callback_serializer::<T, true>(array, callback)),426_ => Box::new(callback_serializer::<T, false>(array, callback)),427}428}429430pub(super) fn string_serializer<'a, Iter: Send + 'a>(431mut f: impl FnMut(&mut Iter) -> Option<&str> + Send + 'a,432options: &SerializeOptions,433mut update: impl FnMut(&'a dyn Array) -> Iter + Send + 'a,434array: &'a dyn Array,435) -> Box<dyn Serializer<'a> + 'a + Send> {436const LF: u8 = b'\n';437const CR: u8 = b'\r';438439struct StringSerializer<F, Iter> {440serialize: F,441iter: Iter,442}443444impl<'a, F, Iter> Serializer<'a> for StringSerializer<F, Iter>445where446F: FnMut(&mut Iter, &mut Vec<u8>, &SerializeOptions),447{448fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {449(self.serialize)(&mut self.iter, buf, options);450}451}452453fn serialize_str_escaped(buf: &mut Vec<u8>, s: &[u8], quote_char: u8, quoted: bool) {454let mut iter = memchr_iter(quote_char, s);455let first_quote = iter.next();456match first_quote {457None => buf.extend_from_slice(s),458Some(mut quote_pos) => {459if !quoted {460buf.push(quote_char);461}462let mut start_pos = 0;463loop {464buf.extend_from_slice(&s[start_pos..quote_pos]);465buf.extend_from_slice(&[quote_char, quote_char]);466match iter.next() {467Some(quote) => {468start_pos = quote_pos + 1;469quote_pos = quote;470},471None => {472buf.extend_from_slice(&s[quote_pos + 1..]);473break;474},475}476}477if !quoted {478buf.push(quote_char);479}480},481}482}483484let iter = update(array);485match options.quote_style {486QuoteStyle::Always => {487let serialize =488move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {489let quote_char = options.quote_char;490buf.push(quote_char);491let Some(s) = f(iter) else {492buf.extend_from_slice(options.null.as_bytes());493buf.push(quote_char);494return;495};496serialize_str_escaped(buf, s.as_bytes(), quote_char, true);497buf.push(quote_char);498};499Box::new(StringSerializer { serialize, iter })500},501QuoteStyle::NonNumeric => {502let serialize =503move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {504let Some(s) = f(iter) else {505buf.extend_from_slice(options.null.as_bytes());506return;507};508let quote_char = options.quote_char;509buf.push(quote_char);510serialize_str_escaped(buf, s.as_bytes(), quote_char, true);511buf.push(quote_char);512};513Box::new(StringSerializer { serialize, iter })514},515QuoteStyle::Necessary => {516let serialize =517move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {518let Some(s) = f(iter) else {519buf.extend_from_slice(options.null.as_bytes());520return;521};522let quote_char = options.quote_char;523// An empty string conflicts with null, so it is necessary to quote.524if s.is_empty() {525buf.extend_from_slice(&[quote_char, quote_char]);526return;527}528let needs_quote = memchr3(options.separator, LF, CR, s.as_bytes()).is_some();529if needs_quote {530buf.push(quote_char);531}532serialize_str_escaped(buf, s.as_bytes(), quote_char, needs_quote);533if needs_quote {534buf.push(quote_char);535}536};537Box::new(StringSerializer { serialize, iter })538},539QuoteStyle::Never => {540let serialize =541move |iter: &mut Iter, buf: &mut Vec<u8>, options: &SerializeOptions| {542let Some(s) = f(iter) else {543buf.extend_from_slice(options.null.as_bytes());544return;545};546buf.extend_from_slice(s.as_bytes());547};548Box::new(StringSerializer { serialize, iter })549},550}551}552553fn quote_serializer<'a>(serializer: impl Serializer<'a>) -> impl Serializer<'a> {554struct QuoteSerializer<S>(S);555impl<'a, S: Serializer<'a>> Serializer<'a> for QuoteSerializer<S> {556fn serialize(&mut self, buf: &mut Vec<u8>, options: &SerializeOptions) {557buf.push(options.quote_char);558self.0.serialize(buf, options);559buf.push(options.quote_char);560}561}562QuoteSerializer(serializer)563}564565pub(super) fn serializer_for<'a>(566array: &'a dyn Array,567options: &'a SerializeOptions,568dtype: &'a DataType,569_datetime_format: &'a str,570_time_zone: Option<Tz>,571) -> PolarsResult<Box<dyn Serializer<'a> + Send + 'a>> {572// The needs_quotes flag captures the quote logic for the quote_wrapper! macro573// It is targeted at numerical types primarily; other types may required additional logic574let needs_quotes = match dtype {575DataType::Float16 | DataType::Float32 | DataType::Float64 => {576// When comma is used as both the field separator and decimal separator, quoting577// may be required. Specifically, when:578// - quote_style is Always, or579// - quote_style is Necessary or Non-Numeric, the field separator is also a comma,580// and the float string field contains a comma character (no precision or precision > 0)581//582// In some rare cases, a field may get quoted when it is not strictly necessary583// (e.g., in scientific notation when only the first digit is non-zero such as '1e12',584// or null values in 'non_numeric' quote_style).585586let mut should_quote = options.decimal_comma && options.separator == b',';587if let Some(precision) = options.float_precision {588should_quote &= precision > 0;589}590591match options.quote_style {592QuoteStyle::Always => true,593QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,594QuoteStyle::Never => false,595}596},597#[cfg(feature = "dtype-decimal")]598DataType::Decimal(_, scale) => {599// Similar to logic for float data-types, but need to consider scale rather than precision600let should_quote = options.decimal_comma && options.separator == b',' && *scale > 0;601602match options.quote_style {603QuoteStyle::Always => true,604QuoteStyle::Necessary | QuoteStyle::NonNumeric => should_quote,605QuoteStyle::Never => false,606}607},608_ => options.quote_style == QuoteStyle::Always,609};610611macro_rules! quote_wrapper {612($make_serializer:path, $($arg:tt)*) => {{613let serializer = $make_serializer(array.as_any().downcast_ref().unwrap(), $($arg)*);614if needs_quotes {615Box::new(quote_serializer(serializer)) as Box<dyn Serializer + Send>616} else {617Box::new(serializer)618}619}};620($make_serializer:path) => { quote_wrapper!($make_serializer,) };621}622623let serializer = match dtype {624DataType::Int8 => quote_wrapper!(integer_serializer::<i8>),625DataType::UInt8 => quote_wrapper!(integer_serializer::<u8>),626DataType::Int16 => quote_wrapper!(integer_serializer::<i16>),627DataType::UInt16 => quote_wrapper!(integer_serializer::<u16>),628DataType::Int32 => quote_wrapper!(integer_serializer::<i32>),629DataType::UInt32 => quote_wrapper!(integer_serializer::<u32>),630DataType::Int64 => quote_wrapper!(integer_serializer::<i64>),631DataType::UInt64 => quote_wrapper!(integer_serializer::<u64>),632DataType::Int128 => quote_wrapper!(integer_serializer::<i128>),633DataType::UInt128 => quote_wrapper!(integer_serializer::<u128>),634DataType::Float16 => {635match (636options.decimal_comma,637options.float_precision,638options.float_scientific,639) {640// standard decimal separator (period)641(false, Some(precision), Some(true)) => {642quote_wrapper!(643float_serializer_with_precision_scientific::<pf16>,644precision645)646},647(false, Some(precision), _) => {648quote_wrapper!(649float_serializer_with_precision_positional::<pf16>,650precision651)652},653(false, None, Some(true)) => {654quote_wrapper!(float_serializer_no_precision_scientific::<pf16>)655},656(false, None, Some(false)) => {657quote_wrapper!(float_serializer_no_precision_positional::<pf16>)658},659(false, None, None) => {660quote_wrapper!(float_serializer_no_precision_autoformat_f16)661},662663// comma as the decimal separator664(true, Some(precision), Some(true)) => quote_wrapper!(665float_serializer_with_precision_scientific_decimal_comma::<pf16>,666precision667),668(true, Some(precision), _) => quote_wrapper!(669float_serializer_with_precision_positional_decimal_comma::<pf16>,670precision671),672(true, None, Some(true)) => {673quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<pf16>)674},675(true, None, Some(false)) => {676quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<pf16>)677},678(true, None, None) => {679quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma_f16)680},681}682},683DataType::Float32 => {684match (685options.decimal_comma,686options.float_precision,687options.float_scientific,688) {689// standard decimal separator (period)690(false, Some(precision), Some(true)) => {691quote_wrapper!(float_serializer_with_precision_scientific::<f32>, precision)692},693(false, Some(precision), _) => {694quote_wrapper!(float_serializer_with_precision_positional::<f32>, precision)695},696(false, None, Some(true)) => {697quote_wrapper!(float_serializer_no_precision_scientific::<f32>)698},699(false, None, Some(false)) => {700quote_wrapper!(float_serializer_no_precision_positional::<f32>)701},702(false, None, None) => {703quote_wrapper!(float_serializer_no_precision_autoformat::<f32>)704},705706// comma as the decimal separator707(true, Some(precision), Some(true)) => quote_wrapper!(708float_serializer_with_precision_scientific_decimal_comma::<f32>,709precision710),711(true, Some(precision), _) => quote_wrapper!(712float_serializer_with_precision_positional_decimal_comma::<f32>,713precision714),715(true, None, Some(true)) => {716quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f32>)717},718(true, None, Some(false)) => {719quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f32>)720},721(true, None, None) => {722quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f32>)723},724}725},726DataType::Float64 => {727match (728options.decimal_comma,729options.float_precision,730options.float_scientific,731) {732// standard decimal separator (period)733(false, Some(precision), Some(true)) => {734quote_wrapper!(float_serializer_with_precision_scientific::<f64>, precision)735},736(false, Some(precision), _) => {737quote_wrapper!(float_serializer_with_precision_positional::<f64>, precision)738},739(false, None, Some(true)) => {740quote_wrapper!(float_serializer_no_precision_scientific::<f64>)741},742(false, None, Some(false)) => {743quote_wrapper!(float_serializer_no_precision_positional::<f64>)744},745(false, None, None) => {746quote_wrapper!(float_serializer_no_precision_autoformat::<f64>)747},748749// comma as the decimal separator750(true, Some(precision), Some(true)) => quote_wrapper!(751float_serializer_with_precision_scientific_decimal_comma::<f64>,752precision753),754(true, Some(precision), _) => quote_wrapper!(755float_serializer_with_precision_positional_decimal_comma::<f64>,756precision757),758(true, None, Some(true)) => {759quote_wrapper!(float_serializer_no_precision_scientific_decimal_comma::<f64>)760},761(true, None, Some(false)) => {762quote_wrapper!(float_serializer_no_precision_positional_decimal_comma::<f64>)763},764(true, None, None) => {765quote_wrapper!(float_serializer_no_precision_autoformat_decimal_comma::<f64>)766},767}768},769DataType::Null => quote_wrapper!(null_serializer),770DataType::Boolean => {771let array = array.as_any().downcast_ref().unwrap();772match options.quote_style {773QuoteStyle::Always => Box::new(quote_serializer(bool_serializer::<false>(array)))774as Box<dyn Serializer + Send>,775QuoteStyle::NonNumeric => Box::new(bool_serializer::<true>(array)),776_ => Box::new(bool_serializer::<false>(array)),777}778},779#[cfg(feature = "dtype-date")]780DataType::Date => date_and_time_serializer(781options.date_format.as_deref(),782"NaiveDate",783array,784chrono::NaiveDate::MAX,785arrow::temporal_conversions::date32_to_date,786|date, items| date.format_with_items(items),787options,788)?,789#[cfg(feature = "dtype-time")]790DataType::Time => date_and_time_serializer(791Some(options.time_format.as_deref().unwrap_or("%T%.9f")),792"NaiveTime",793array,794chrono::NaiveTime::MIN,795arrow::temporal_conversions::time64ns_to_time,796|time, items| time.format_with_items(items),797options,798)?,799#[cfg(feature = "dtype-datetime")]800DataType::Datetime(time_unit, _) => {801let format = chrono::format::StrftimeItems::new(_datetime_format)802.parse()803.map_err(|_| {804polars_err!(805ComputeError: "cannot format {} with format '{_datetime_format}'",806if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },807)808})?;809use std::fmt::Write;810let sample_datetime = match _time_zone {811#[cfg(feature = "timezones")]812Some(time_zone) => time_zone813.from_utc_datetime(&chrono::NaiveDateTime::MAX)814.format_with_items(format.iter()),815#[cfg(not(feature = "timezones"))]816Some(_) => panic!("activate 'timezones' feature"),817None => chrono::NaiveDateTime::MAX.format_with_items(format.iter()),818};819// Fail fast for invalid format. This return error faster to the user, and allows us to not return820// `Result` from `serialize()`.821write!(IgnoreFmt, "{sample_datetime}").map_err(|_| {822polars_err!(823ComputeError: "cannot format {} with format '{_datetime_format}'",824if _time_zone.is_some() { "DateTime" } else { "NaiveDateTime" },825)826})?;827828let array = array.as_any().downcast_ref().unwrap();829830macro_rules! time_unit_serializer {831($convert:ident) => {832match _time_zone {833#[cfg(feature = "timezones")]834Some(time_zone) => {835let callback = move |item, buf: &mut Vec<u8>| {836let item = arrow::temporal_conversions::$convert(item);837let item = time_zone.from_utc_datetime(&item);838// We checked the format is valid above.839let _ = write!(buf, "{}", item.format_with_items(format.iter()));840};841date_and_time_final_serializer(array, callback, options)842},843#[cfg(not(feature = "timezones"))]844Some(_) => panic!("activate 'timezones' feature"),845None => {846let callback = move |item, buf: &mut Vec<u8>| {847let item = arrow::temporal_conversions::$convert(item);848// We checked the format is valid above.849let _ = write!(buf, "{}", item.format_with_items(format.iter()));850};851date_and_time_final_serializer(array, callback, options)852},853}854};855}856857match time_unit {858TimeUnit::Nanoseconds => time_unit_serializer!(timestamp_ns_to_datetime),859TimeUnit::Microseconds => time_unit_serializer!(timestamp_us_to_datetime),860TimeUnit::Milliseconds => time_unit_serializer!(timestamp_ms_to_datetime),861}862},863DataType::String => string_serializer(864|iter| Iterator::next(iter).expect(TOO_MANY_MSG),865options,866|arr| {867arr.as_any()868.downcast_ref::<Utf8ViewArray>()869.expect(ARRAY_MISMATCH_MSG)870.iter()871},872array,873),874#[cfg(feature = "dtype-categorical")]875DataType::Categorical(_, mapping) | DataType::Enum(_, mapping) => {876polars_core::with_match_categorical_physical_type!(dtype.cat_physical().unwrap(), |$C| {877string_serializer(878|iter| {879let &idx: &<$C as PolarsCategoricalType>::Native = Iterator::next(iter).expect(TOO_MANY_MSG)?;880Some(unsafe { mapping.cat_to_str_unchecked(idx.as_cat()) })881},882options,883|arr| {884arr.as_any()885.downcast_ref::<PrimitiveArray<<$C as PolarsCategoricalType>::Native>>()886.expect(ARRAY_MISMATCH_MSG)887.iter()888},889array,890)891})892},893#[cfg(feature = "dtype-decimal")]894DataType::Decimal(_, scale) => {895quote_wrapper!(decimal_serializer, *scale)896},897_ => {898polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.")899},900};901Ok(serializer)902}903904#[cfg(test)]905mod test {906use arrow::array::NullArray;907use polars_core::prelude::ArrowDataType;908909use super::string_serializer;910use crate::csv::write::options::{QuoteStyle, SerializeOptions};911912// It is the most complex serializer with most edge cases, it definitely needs a comprehensive test.913#[test]914fn test_string_serializer() {915#[track_caller]916fn check_string_serialization(options: &SerializeOptions, s: Option<&str>, expected: &str) {917let fake_array = NullArray::new(ArrowDataType::Null, 0);918let mut serializer = string_serializer(|s| *s, options, |_| s, &fake_array);919let mut buf = Vec::new();920serializer.serialize(&mut buf, options);921let serialized = std::str::from_utf8(&buf).unwrap();922// Don't use `assert_eq!()` because it prints debug format and it's hard to read with all the escapes.923if serialized != expected {924panic!(925"CSV string {s:?} wasn't serialized correctly: expected: `{expected}`, got: `{serialized}`"926);927}928}929930let always_quote = SerializeOptions {931quote_style: QuoteStyle::Always,932..SerializeOptions::default()933};934check_string_serialization(&always_quote, None, r#""""#);935check_string_serialization(&always_quote, Some(""), r#""""#);936check_string_serialization(&always_quote, Some("a"), r#""a""#);937check_string_serialization(&always_quote, Some("\""), r#""""""#);938check_string_serialization(&always_quote, Some("a\"\"b"), r#""a""""b""#);939940let necessary_quote = SerializeOptions {941quote_style: QuoteStyle::Necessary,942..SerializeOptions::default()943};944check_string_serialization(&necessary_quote, None, r#""#);945check_string_serialization(&necessary_quote, Some(""), r#""""#);946check_string_serialization(&necessary_quote, Some("a"), r#"a"#);947check_string_serialization(&necessary_quote, Some("\""), r#""""""#);948check_string_serialization(&necessary_quote, Some("a\"\"b"), r#""a""""b""#);949check_string_serialization(&necessary_quote, Some("a b"), r#"a b"#);950check_string_serialization(&necessary_quote, Some("a,b"), r#""a,b""#);951check_string_serialization(&necessary_quote, Some("a\nb"), "\"a\nb\"");952check_string_serialization(&necessary_quote, Some("a\rb"), "\"a\rb\"");953954let never_quote = SerializeOptions {955quote_style: QuoteStyle::Never,956..SerializeOptions::default()957};958check_string_serialization(&never_quote, None, "");959check_string_serialization(&never_quote, Some(""), "");960check_string_serialization(&never_quote, Some("a"), "a");961check_string_serialization(&never_quote, Some("\""), "\"");962check_string_serialization(&never_quote, Some("a\"\"b"), "a\"\"b");963check_string_serialization(&never_quote, Some("a b"), "a b");964check_string_serialization(&never_quote, Some("a,b"), "a,b");965check_string_serialization(&never_quote, Some("a\nb"), "a\nb");966check_string_serialization(&never_quote, Some("a\rb"), "a\rb");967968let non_numeric_quote = SerializeOptions {969quote_style: QuoteStyle::NonNumeric,970..SerializeOptions::default()971};972check_string_serialization(&non_numeric_quote, None, "");973check_string_serialization(&non_numeric_quote, Some(""), r#""""#);974check_string_serialization(&non_numeric_quote, Some("a"), r#""a""#);975check_string_serialization(&non_numeric_quote, Some("\""), r#""""""#);976check_string_serialization(&non_numeric_quote, Some("a\"\"b"), r#""a""""b""#);977check_string_serialization(&non_numeric_quote, Some("a b"), r#""a b""#);978check_string_serialization(&non_numeric_quote, Some("a,b"), r#""a,b""#);979check_string_serialization(&non_numeric_quote, Some("a\nb"), "\"a\nb\"");980check_string_serialization(&non_numeric_quote, Some("a\rb"), "\"a\rb\"");981}982}983984985