/*-1* Copyright (c) 2019 Christos Zoulas2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12*13* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS14* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED15* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR16* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS17* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR18* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF19* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS20* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN21* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)22* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE23* POSSIBILITY OF SUCH DAMAGE.24*/2526/*27* Parse CSV object serialization format (RFC-4180, RFC-7111)28*/2930#ifndef TEST31#include "file.h"3233#ifndef lint34FILE_RCSID("@(#)$File: is_csv.c,v 1.15 2024/05/18 15:16:13 christos Exp $")35#endif3637#include <string.h>38#include "magic.h"39#else40#define CAST(a, b) ((a)(b))41#include <sys/types.h>42#endif434445#ifdef DEBUG46#include <stdio.h>47#define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__)48#else49#define DPRINTF(fmt, ...)50#endif5152/*53* if CSV_LINES == 0:54* check all the lines in the buffer55* otherwise:56* check only up-to the number of lines specified57*58* the last line count is always ignored if it does not end in CRLF59*/60#ifndef CSV_LINES61#define CSV_LINES 1062#endif6364static int csv_parse(const unsigned char *, const unsigned char *);6566static const unsigned char *67eatquote(const unsigned char *uc, const unsigned char *ue)68{69int quote = 0;7071while (uc < ue) {72unsigned char c = *uc++;73if (c != '"') {74// We already got one, done.75if (quote) {76return --uc;77}78continue;79}80if (quote) {81// quote-quote escapes82quote = 0;83continue;84}85// first quote86quote = 1;87}88return ue;89}9091static int92csv_parse(const unsigned char *uc, const unsigned char *ue)93{94size_t nf = 0, tf = 0, nl = 0;9596while (uc < ue) {97switch (*uc++) {98case '"':99// Eat until the matching quote100uc = eatquote(uc, ue);101break;102case ',':103nf++;104break;105case '\n':106DPRINTF("%zu %zu %zu\n", nl, nf, tf);107nl++;108#if CSV_LINES109if (nl == CSV_LINES)110return tf > 1 && tf == nf;111#endif112if (tf == 0) {113// First time and no fields, give up114if (nf == 0)115return 0;116// First time, set the number of fields117tf = nf;118} else if (tf != nf) {119// Field number mismatch, we are done.120return 0;121}122nf = 0;123break;124default:125break;126}127}128return tf > 1 && nl >= 2;129}130131#ifndef TEST132int133file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text,134const char *code)135{136const unsigned char *uc = CAST(const unsigned char *, b->fbuf);137const unsigned char *ue = uc + b->flen;138int mime = ms->flags & MAGIC_MIME;139140if (!looks_text)141return 0;142143if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)144return 0;145146if (!csv_parse(uc, ue))147return 0;148149if (mime == MAGIC_MIME_ENCODING)150return 1;151152if (mime) {153if (file_printf(ms, "text/csv") == -1)154return -1;155return 1;156}157158if (file_printf(ms, "CSV %s%stext", code ? code : "",159code ? " " : "") == -1)160return -1;161162return 1;163}164165#else166167#include <sys/types.h>168#include <sys/stat.h>169#include <stdio.h>170#include <fcntl.h>171#include <unistd.h>172#include <stdlib.h>173#include <stdint.h>174#include <err.h>175176int177main(int argc, char *argv[])178{179int fd;180struct stat st;181unsigned char *p;182183if ((fd = open(argv[1], O_RDONLY)) == -1)184err(EXIT_FAILURE, "Can't open `%s'", argv[1]);185186if (fstat(fd, &st) == -1)187err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);188189if ((p = CAST(unsigned char *, malloc(st.st_size))) == NULL)190err(EXIT_FAILURE, "Can't allocate %jd bytes",191(intmax_t)st.st_size);192if (read(fd, p, st.st_size) != st.st_size)193err(EXIT_FAILURE, "Can't read %jd bytes",194(intmax_t)st.st_size);195printf("is csv %d\n", csv_parse(p, p + st.st_size));196return 0;197}198#endif199200201