Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/lib/libpz/pzfixed.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1998-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
22
/*
23
* determine fixed record size by sampling data
24
*/
25
26
#include "pzlib.h"
27
28
typedef struct
29
{
30
unsigned int rep[4 * 1024];
31
unsigned int hit[UCHAR_MAX + 1];
32
} Fixed_t;
33
34
/*
35
* determine fixed record size by sampling data
36
* if buf!=0 then <buf,z> is used
37
* otherwise data is peeked from io
38
* return:
39
* >0 best guess from sample
40
* 0 could not determine
41
* <0 error
42
*/
43
44
ssize_t
45
pzfixed(Pz_t* pz, register Sfio_t* io, void* buf, size_t z)
46
{
47
register unsigned char* s;
48
register Fixed_t* xp;
49
register unsigned int* cp;
50
register unsigned int i;
51
unsigned char* t;
52
unsigned int j;
53
unsigned int k;
54
unsigned int n;
55
unsigned int m;
56
unsigned int max;
57
unsigned long f;
58
unsigned long g;
59
Sfoff_t siz;
60
Error_f trace;
61
62
trace = pz && error_info.trace <= -2 ? pz->disc->errorf : 0;
63
siz = pz && (pz->flags & PZ_POP) ? (Sfoff_t)0 : sfsize(io);
64
if (buf)
65
s = (unsigned char*)buf;
66
else if (!(s = sfreserve(io, 8 * elementsof(xp->rep), 1)) && !(s = sfreserve(io, SF_UNBOUND, 1)))
67
return -1;
68
else
69
z = sfvalue(io);
70
if (trace)
71
(*trace)(pz, pz->disc, -2, "pzfixed: siz=%I*d buf=%p z=%I*u", sizeof(siz), siz, buf, sizeof(z), z);
72
73
/*
74
* first check for newline terminated
75
*/
76
77
if ((t = (unsigned char*)memchr((void*)s, '\n', z / 2)) && (n = t - s + 1) > 1)
78
{
79
if (siz > 0 && siz % n)
80
n = 0;
81
else
82
for (i = n - 1; i < z; i += n)
83
if (s[i] != '\n')
84
{
85
n = 0;
86
break;
87
}
88
if (n && trace)
89
(*trace)(pz, pz->disc, -2, "pzfixed: newline terminated %u byte records", n);
90
}
91
else
92
n = 0;
93
if (!n && (xp = newof(0, Fixed_t, 1, 0)))
94
{
95
if (trace)
96
(*trace)(pz, pz->disc, -2, "pzfixed: LEN REP BEST FREQ");
97
max = 0;
98
for (i = 0; i < z; i++)
99
{
100
cp = xp->hit + s[i];
101
m = i - *cp;
102
*cp = i;
103
if (m < elementsof(xp->rep))
104
{
105
if (m > max)
106
max = m;
107
xp->rep[m]++;
108
}
109
}
110
n = 0;
111
m = 0;
112
f = ~0;
113
for (i = max; i > 1; i--)
114
{
115
if ((siz <= 0 || !(siz % i)) && xp->rep[i] > xp->rep[n])
116
{
117
m++;
118
g = 0;
119
for (j = i; j < z - i; j += i)
120
for (k = 0; k < i; k++)
121
if (s[j + k] != s[j + k - i])
122
g++;
123
g = (((g * 100) / i) * 100) / xp->rep[i];
124
if (trace)
125
(*trace)(pz, pz->disc, -2, "pzfixed: %5d %8d %8ld %8ld%s", i, xp->rep[i], f, g, (g <= f) ? " *" : "");
126
if (g <= f)
127
{
128
f = g;
129
n = i;
130
}
131
}
132
}
133
if (m <= 1 && n <= 2 && siz > 0 && siz < 256)
134
n = siz;
135
free(xp);
136
}
137
138
/*
139
* release the peek data
140
*/
141
142
if (!buf)
143
sfread(io, s, 0);
144
return n;
145
}
146
147