Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libast/misc/recfmt.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1985-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* David Korn <[email protected]> *
19
* Phong Vo <[email protected]> *
20
* *
21
***********************************************************************/
22
#pragma prototyped
23
24
/*
25
* determine record format by sampling data in <buf,size>
26
* total is the total file size, <=0 if not available
27
* return r:
28
* -1 could not determine
29
* RECTYPE(r)==REC_fixed fixed length REC_F_SIZE(r)
30
* RECTYPE(r)==REC_delimited variable length delimiter=REC_D_DELIMITER(r)
31
* RECTYPE(r)==REC_variable variable length
32
*/
33
34
#include <recfmt.h>
35
36
typedef struct
37
{
38
unsigned int rep[4 * 1024];
39
unsigned int hit[UCHAR_MAX + 1];
40
} Sample_t;
41
42
Recfmt_t
43
recfmt(const void* buf, size_t size, off_t total)
44
{
45
register unsigned char* s;
46
register unsigned char* t;
47
register Sample_t* q;
48
register unsigned int* h;
49
register unsigned int i;
50
unsigned int j;
51
unsigned int k;
52
unsigned int n;
53
unsigned int m;
54
unsigned int x;
55
unsigned long f;
56
unsigned long g;
57
58
static unsigned char terminators[] = { '\n', 0x15, 0x25 };
59
60
/*
61
* check for V format
62
*/
63
64
s = (unsigned char*)buf;
65
t = s + size;
66
while ((k = (t - s)) >= 4 && !s[2] && !s[3])
67
{
68
if ((i = (s[0]<<8)|s[1]) > k)
69
break;
70
s += i;
71
}
72
if (!k || size > 2 * k)
73
return REC_V_TYPE(4, 0, 2, 0, 1);
74
s = (unsigned char*)buf;
75
76
/*
77
* check for terminated records
78
*/
79
80
for (i = 0; i < elementsof(terminators); i++)
81
if ((t = (unsigned char*)memchr((void*)s, k = terminators[i], size / 2)) && (n = t - s + 1) > 1 && (total <= 0 || !(total % n)))
82
{
83
for (j = n - 1; j < size; j += n)
84
if (s[j] != k)
85
{
86
n = 0;
87
break;
88
}
89
if (n)
90
return REC_D_TYPE(terminators[i]);
91
}
92
93
/*
94
* check fixed length record frequencies
95
*/
96
97
if (!(q = newof(0, Sample_t, 1, 0)))
98
return REC_N_TYPE();
99
x = 0;
100
for (i = 0; i < size; i++)
101
{
102
h = q->hit + s[i];
103
m = i - *h;
104
*h = i;
105
if (m < elementsof(q->rep))
106
{
107
if (m > x)
108
x = m;
109
q->rep[m]++;
110
}
111
}
112
n = 0;
113
m = 0;
114
f = ~0;
115
for (i = x; i > 1; i--)
116
{
117
if ((total <= 0 || !(total % i)) && q->rep[i] > q->rep[n])
118
{
119
m++;
120
g = 0;
121
for (j = i; j < size - i; j += i)
122
for (k = 0; k < i; k++)
123
if (s[j + k] != s[j + k - i])
124
g++;
125
g = (((g * 100) / i) * 100) / q->rep[i];
126
if (g <= f)
127
{
128
f = g;
129
n = i;
130
}
131
}
132
}
133
if (m <= 1 && n <= 2 && total > 1 && total < 256)
134
{
135
n = 0;
136
for (i = 0; i < size; i++)
137
for (j = 0; j < elementsof(terminators); j++)
138
if (s[i] == terminators[j])
139
n++;
140
n = n ? 0 : total;
141
}
142
free(q);
143
return n ? REC_F_TYPE(n) : REC_N_TYPE();
144
}
145
146
#if MAIN
147
148
main()
149
{
150
void* s;
151
size_t size;
152
off_t total;
153
154
if (!(s = sfreserve(sfstdin, SF_UNBOUND, 0)))
155
{
156
sfprintf(sfstderr, "read error\n");
157
return 1;
158
}
159
size = sfvalue(sfstdin);
160
total = sfsize(sfstdin);
161
sfprintf(sfstdout, "%d\n", recfmt(s, size, total));
162
return 0;
163
}
164
165
#endif
166
167