Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/zstd/programs/lorem.c
289023 views
1
/*
2
* Copyright (c) Meta Platforms, Inc. and affiliates.
3
* All rights reserved.
4
*
5
* This source code is licensed under both the BSD-style license (found in the
6
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
* in the COPYING file in the root directory of this source tree).
8
* You may select, at your option, one of the above-listed licenses.
9
*/
10
11
/* Implementation notes:
12
*
13
* This is a very simple lorem ipsum generator
14
* which features a static list of words
15
* and print them one after another randomly
16
* with a fake sentence / paragraph structure.
17
*
18
* The goal is to generate a printable text
19
* that can be used to fake a text compression scenario.
20
* The resulting compression / ratio curve of the lorem ipsum generator
21
* is more satisfying than the previous statistical generator,
22
* which was initially designed for entropy compression,
23
* and lacks a regularity more representative of text.
24
*
25
* The compression ratio achievable on the generated lorem ipsum
26
* is still a bit too good, presumably because the dictionary is a bit too
27
* small. It would be possible to create some more complex scheme, notably by
28
* enlarging the dictionary with a word generator, and adding grammatical rules
29
* (composition) and syntax rules. But that's probably overkill for the intended
30
* goal.
31
*/
32
33
#include "lorem.h"
34
#include <assert.h>
35
#include <limits.h> /* INT_MAX */
36
#include <string.h> /* memcpy */
37
38
#define WORD_MAX_SIZE 20
39
40
/* Define the word pool */
41
static const char* kWords[] = {
42
"lorem", "ipsum", "dolor", "sit", "amet",
43
"consectetur", "adipiscing", "elit", "sed", "do",
44
"eiusmod", "tempor", "incididunt", "ut", "labore",
45
"et", "dolore", "magna", "aliqua", "dis",
46
"lectus", "vestibulum", "mattis", "ullamcorper", "velit",
47
"commodo", "a", "lacus", "arcu", "magnis",
48
"parturient", "montes", "nascetur", "ridiculus", "mus",
49
"mauris", "nulla", "malesuada", "pellentesque", "eget",
50
"gravida", "in", "dictum", "non", "erat",
51
"nam", "voluptat", "maecenas", "blandit", "aliquam",
52
"etiam", "enim", "lobortis", "scelerisque", "fermentum",
53
"dui", "faucibus", "ornare", "at", "elementum",
54
"eu", "facilisis", "odio", "morbi", "quis",
55
"eros", "donec", "ac", "orci", "purus",
56
"turpis", "cursus", "leo", "vel", "porta",
57
"consequat", "interdum", "varius", "vulputate", "aliquet",
58
"pharetra", "nunc", "auctor", "urna", "id",
59
"metus", "viverra", "nibh", "cras", "mi",
60
"unde", "omnis", "iste", "natus", "error",
61
"perspiciatis", "voluptatem", "accusantium", "doloremque", "laudantium",
62
"totam", "rem", "aperiam", "eaque", "ipsa",
63
"quae", "ab", "illo", "inventore", "veritatis",
64
"quasi", "architecto", "beatae", "vitae", "dicta",
65
"sunt", "explicabo", "nemo", "ipsam", "quia",
66
"voluptas", "aspernatur", "aut", "odit", "fugit",
67
"consequuntur", "magni", "dolores", "eos", "qui",
68
"ratione", "sequi", "nesciunt", "neque", "porro",
69
"quisquam", "est", "dolorem", "adipisci", "numquam",
70
"eius", "modi", "tempora", "incidunt", "magnam",
71
"quaerat", "ad", "minima", "veniam", "nostrum",
72
"ullam", "corporis", "suscipit", "laboriosam", "nisi",
73
"aliquid", "ex", "ea", "commodi", "consequatur",
74
"autem", "eum", "iure", "voluptate", "esse",
75
"quam", "nihil", "molestiae", "illum", "fugiat",
76
"quo", "pariatur", "vero", "accusamus", "iusto",
77
"dignissimos", "ducimus", "blanditiis", "praesentium", "voluptatum",
78
"deleniti", "atque", "corrupti", "quos", "quas",
79
"molestias", "excepturi", "sint", "occaecati", "cupiditate",
80
"provident", "similique", "culpa", "officia", "deserunt",
81
"mollitia", "animi", "laborum", "dolorum", "fuga",
82
"harum", "quidem", "rerum", "facilis", "expedita",
83
"distinctio", "libero", "tempore", "cum", "soluta",
84
"nobis", "eligendi", "optio", "cumque", "impedit",
85
"minus", "quod", "maxime", "placeat", "facere",
86
"possimus", "assumenda", "repellendus", "temporibus", "quibusdam",
87
"officiis", "debitis", "saepe", "eveniet", "voluptates",
88
"repudiandae", "recusandae", "itaque", "earum", "hic",
89
"tenetur", "sapiente", "delectus", "reiciendis", "cillum",
90
"maiores", "alias", "perferendis", "doloribus", "asperiores",
91
"repellat", "minim", "nostrud", "exercitation", "ullamco",
92
"laboris", "aliquip", "duis", "aute", "irure",
93
};
94
static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
95
96
/* simple 1-dimension distribution, based on word's length, favors small words
97
*/
98
static const int kWeights[] = { 0, 8, 6, 4, 3, 2 };
99
static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
100
101
#define DISTRIB_SIZE_MAX 650
102
static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
103
static unsigned g_distribCount = 0;
104
105
static void countFreqs(
106
const char* words[],
107
size_t nbWords,
108
const int* weights,
109
size_t nbWeights)
110
{
111
unsigned total = 0;
112
size_t w;
113
for (w = 0; w < nbWords; w++) {
114
size_t len = strlen(words[w]);
115
int lmax;
116
if (len >= nbWeights)
117
len = nbWeights - 1;
118
lmax = weights[len];
119
total += (unsigned)lmax;
120
}
121
g_distribCount = total;
122
assert(g_distribCount <= DISTRIB_SIZE_MAX);
123
}
124
125
static void init_word_distrib(
126
const char* words[],
127
size_t nbWords,
128
const int* weights,
129
size_t nbWeights)
130
{
131
size_t w, d = 0;
132
countFreqs(words, nbWords, weights, nbWeights);
133
for (w = 0; w < nbWords; w++) {
134
size_t len = strlen(words[w]);
135
int l, lmax;
136
if (len >= nbWeights)
137
len = nbWeights - 1;
138
lmax = weights[len];
139
for (l = 0; l < lmax; l++) {
140
g_distrib[d++] = (int)w;
141
}
142
}
143
}
144
145
/* Note: this unit only works when invoked sequentially.
146
* No concurrent access is allowed */
147
static char* g_ptr = NULL;
148
static size_t g_nbChars = 0;
149
static size_t g_maxChars = 10000000;
150
static unsigned g_randRoot = 0;
151
152
#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
153
static unsigned LOREM_rand(unsigned range)
154
{
155
static const unsigned prime1 = 2654435761U;
156
static const unsigned prime2 = 2246822519U;
157
unsigned rand32 = g_randRoot;
158
rand32 *= prime1;
159
rand32 ^= prime2;
160
rand32 = RDG_rotl32(rand32, 13);
161
g_randRoot = rand32;
162
return (unsigned)(((unsigned long long)rand32 * range) >> 32);
163
}
164
165
static void writeLastCharacters(void)
166
{
167
size_t lastChars = g_maxChars - g_nbChars;
168
assert(g_maxChars >= g_nbChars);
169
if (lastChars == 0)
170
return;
171
g_ptr[g_nbChars++] = '.';
172
if (lastChars > 2) {
173
memset(g_ptr + g_nbChars, ' ', lastChars - 2);
174
}
175
if (lastChars > 1) {
176
g_ptr[g_maxChars - 1] = '\n';
177
}
178
g_nbChars = g_maxChars;
179
}
180
181
static void generateWord(const char* word, const char* separator, int upCase)
182
{
183
size_t const len = strlen(word) + strlen(separator);
184
if (g_nbChars + len > g_maxChars) {
185
writeLastCharacters();
186
return;
187
}
188
memcpy(g_ptr + g_nbChars, word, strlen(word));
189
if (upCase) {
190
static const char toUp = 'A' - 'a';
191
g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
192
}
193
g_nbChars += strlen(word);
194
memcpy(g_ptr + g_nbChars, separator, strlen(separator));
195
g_nbChars += strlen(separator);
196
}
197
198
static int about(unsigned target)
199
{
200
return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
201
}
202
203
/* Function to generate a random sentence */
204
static void generateSentence(int nbWords)
205
{
206
int commaPos = about(9);
207
int comma2 = commaPos + about(7);
208
int qmark = (LOREM_rand(11) == 7);
209
const char* endSep = qmark ? "? " : ". ";
210
int i;
211
for (i = 0; i < nbWords; i++) {
212
int const wordID = g_distrib[LOREM_rand(g_distribCount)];
213
const char* const word = kWords[wordID];
214
const char* sep = " ";
215
if (i == commaPos)
216
sep = ", ";
217
if (i == comma2)
218
sep = ", ";
219
if (i == nbWords - 1)
220
sep = endSep;
221
generateWord(word, sep, i == 0);
222
}
223
}
224
225
static void generateParagraph(int nbSentences)
226
{
227
int i;
228
for (i = 0; i < nbSentences; i++) {
229
int wordsPerSentence = about(11);
230
generateSentence(wordsPerSentence);
231
}
232
if (g_nbChars < g_maxChars) {
233
g_ptr[g_nbChars++] = '\n';
234
}
235
if (g_nbChars < g_maxChars) {
236
g_ptr[g_nbChars++] = '\n';
237
}
238
}
239
240
/* It's "common" for lorem ipsum generators to start with the same first
241
* pre-defined sentence */
242
static void generateFirstSentence(void)
243
{
244
int i;
245
for (i = 0; i < 18; i++) {
246
const char* word = kWords[i];
247
const char* separator = " ";
248
if (i == 4)
249
separator = ", ";
250
if (i == 7)
251
separator = ", ";
252
generateWord(word, separator, i == 0);
253
}
254
generateWord(kWords[18], ". ", 0);
255
}
256
257
size_t
258
LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
259
{
260
g_ptr = (char*)buffer;
261
assert(size < INT_MAX);
262
g_maxChars = size;
263
g_nbChars = 0;
264
g_randRoot = seed;
265
if (g_distribCount == 0) {
266
init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
267
}
268
269
if (first) {
270
generateFirstSentence();
271
}
272
while (g_nbChars < g_maxChars) {
273
int sentencePerParagraph = about(7);
274
generateParagraph(sentencePerParagraph);
275
if (!fill)
276
break; /* only generate one paragraph in not-fill mode */
277
}
278
g_ptr = NULL;
279
return g_nbChars;
280
}
281
282
void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
283
{
284
LOREM_genBlock(buffer, size, seed, 1, 1);
285
}
286
287