Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/file/src/is_csv.c
39478 views
1
/*-
2
* Copyright (c) 2019 Christos Zoulas
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
*
14
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24
* POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
/*
28
* Parse CSV object serialization format (RFC-4180, RFC-7111)
29
*/
30
31
#ifndef TEST
32
#include "file.h"
33
34
#ifndef lint
35
FILE_RCSID("@(#)$File: is_csv.c,v 1.15 2024/05/18 15:16:13 christos Exp $")
36
#endif
37
38
#include <string.h>
39
#include "magic.h"
40
#else
41
#define CAST(a, b) ((a)(b))
42
#include <sys/types.h>
43
#endif
44
45
46
#ifdef DEBUG
47
#include <stdio.h>
48
#define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__)
49
#else
50
#define DPRINTF(fmt, ...)
51
#endif
52
53
/*
54
* if CSV_LINES == 0:
55
* check all the lines in the buffer
56
* otherwise:
57
* check only up-to the number of lines specified
58
*
59
* the last line count is always ignored if it does not end in CRLF
60
*/
61
#ifndef CSV_LINES
62
#define CSV_LINES 10
63
#endif
64
65
static int csv_parse(const unsigned char *, const unsigned char *);
66
67
static const unsigned char *
68
eatquote(const unsigned char *uc, const unsigned char *ue)
69
{
70
int quote = 0;
71
72
while (uc < ue) {
73
unsigned char c = *uc++;
74
if (c != '"') {
75
// We already got one, done.
76
if (quote) {
77
return --uc;
78
}
79
continue;
80
}
81
if (quote) {
82
// quote-quote escapes
83
quote = 0;
84
continue;
85
}
86
// first quote
87
quote = 1;
88
}
89
return ue;
90
}
91
92
static int
93
csv_parse(const unsigned char *uc, const unsigned char *ue)
94
{
95
size_t nf = 0, tf = 0, nl = 0;
96
97
while (uc < ue) {
98
switch (*uc++) {
99
case '"':
100
// Eat until the matching quote
101
uc = eatquote(uc, ue);
102
break;
103
case ',':
104
nf++;
105
break;
106
case '\n':
107
DPRINTF("%zu %zu %zu\n", nl, nf, tf);
108
nl++;
109
#if CSV_LINES
110
if (nl == CSV_LINES)
111
return tf > 1 && tf == nf;
112
#endif
113
if (tf == 0) {
114
// First time and no fields, give up
115
if (nf == 0)
116
return 0;
117
// First time, set the number of fields
118
tf = nf;
119
} else if (tf != nf) {
120
// Field number mismatch, we are done.
121
return 0;
122
}
123
nf = 0;
124
break;
125
default:
126
break;
127
}
128
}
129
return tf > 1 && nl >= 2;
130
}
131
132
#ifndef TEST
133
int
134
file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text,
135
const char *code)
136
{
137
const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
138
const unsigned char *ue = uc + b->flen;
139
int mime = ms->flags & MAGIC_MIME;
140
141
if (!looks_text)
142
return 0;
143
144
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
145
return 0;
146
147
if (!csv_parse(uc, ue))
148
return 0;
149
150
if (mime == MAGIC_MIME_ENCODING)
151
return 1;
152
153
if (mime) {
154
if (file_printf(ms, "text/csv") == -1)
155
return -1;
156
return 1;
157
}
158
159
if (file_printf(ms, "CSV %s%stext", code ? code : "",
160
code ? " " : "") == -1)
161
return -1;
162
163
return 1;
164
}
165
166
#else
167
168
#include <sys/types.h>
169
#include <sys/stat.h>
170
#include <stdio.h>
171
#include <fcntl.h>
172
#include <unistd.h>
173
#include <stdlib.h>
174
#include <stdint.h>
175
#include <err.h>
176
177
int
178
main(int argc, char *argv[])
179
{
180
int fd;
181
struct stat st;
182
unsigned char *p;
183
184
if ((fd = open(argv[1], O_RDONLY)) == -1)
185
err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
186
187
if (fstat(fd, &st) == -1)
188
err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
189
190
if ((p = CAST(unsigned char *, malloc(st.st_size))) == NULL)
191
err(EXIT_FAILURE, "Can't allocate %jd bytes",
192
(intmax_t)st.st_size);
193
if (read(fd, p, st.st_size) != st.st_size)
194
err(EXIT_FAILURE, "Can't read %jd bytes",
195
(intmax_t)st.st_size);
196
printf("is csv %d\n", csv_parse(p, p + st.st_size));
197
return 0;
198
}
199
#endif
200
201