Path: blob/main/sys/contrib/zstd/programs/lorem.c
289023 views
/*1* Copyright (c) Meta Platforms, Inc. and affiliates.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/910/* Implementation notes:11*12* This is a very simple lorem ipsum generator13* which features a static list of words14* and print them one after another randomly15* with a fake sentence / paragraph structure.16*17* The goal is to generate a printable text18* that can be used to fake a text compression scenario.19* The resulting compression / ratio curve of the lorem ipsum generator20* is more satisfying than the previous statistical generator,21* which was initially designed for entropy compression,22* and lacks a regularity more representative of text.23*24* The compression ratio achievable on the generated lorem ipsum25* is still a bit too good, presumably because the dictionary is a bit too26* small. It would be possible to create some more complex scheme, notably by27* enlarging the dictionary with a word generator, and adding grammatical rules28* (composition) and syntax rules. But that's probably overkill for the intended29* goal.30*/3132#include "lorem.h"33#include <assert.h>34#include <limits.h> /* INT_MAX */35#include <string.h> /* memcpy */3637#define WORD_MAX_SIZE 203839/* Define the word pool */40static const char* kWords[] = {41"lorem", "ipsum", "dolor", "sit", "amet",42"consectetur", "adipiscing", "elit", "sed", "do",43"eiusmod", "tempor", "incididunt", "ut", "labore",44"et", "dolore", "magna", "aliqua", "dis",45"lectus", "vestibulum", "mattis", "ullamcorper", "velit",46"commodo", "a", "lacus", "arcu", "magnis",47"parturient", "montes", "nascetur", "ridiculus", "mus",48"mauris", "nulla", "malesuada", "pellentesque", "eget",49"gravida", "in", "dictum", "non", "erat",50"nam", "voluptat", "maecenas", "blandit", "aliquam",51"etiam", "enim", "lobortis", "scelerisque", "fermentum",52"dui", "faucibus", "ornare", "at", "elementum",53"eu", "facilisis", "odio", "morbi", "quis",54"eros", "donec", "ac", "orci", "purus",55"turpis", "cursus", "leo", "vel", "porta",56"consequat", "interdum", "varius", "vulputate", "aliquet",57"pharetra", "nunc", "auctor", "urna", "id",58"metus", "viverra", "nibh", "cras", "mi",59"unde", "omnis", "iste", "natus", "error",60"perspiciatis", "voluptatem", "accusantium", "doloremque", "laudantium",61"totam", "rem", "aperiam", "eaque", "ipsa",62"quae", "ab", "illo", "inventore", "veritatis",63"quasi", "architecto", "beatae", "vitae", "dicta",64"sunt", "explicabo", "nemo", "ipsam", "quia",65"voluptas", "aspernatur", "aut", "odit", "fugit",66"consequuntur", "magni", "dolores", "eos", "qui",67"ratione", "sequi", "nesciunt", "neque", "porro",68"quisquam", "est", "dolorem", "adipisci", "numquam",69"eius", "modi", "tempora", "incidunt", "magnam",70"quaerat", "ad", "minima", "veniam", "nostrum",71"ullam", "corporis", "suscipit", "laboriosam", "nisi",72"aliquid", "ex", "ea", "commodi", "consequatur",73"autem", "eum", "iure", "voluptate", "esse",74"quam", "nihil", "molestiae", "illum", "fugiat",75"quo", "pariatur", "vero", "accusamus", "iusto",76"dignissimos", "ducimus", "blanditiis", "praesentium", "voluptatum",77"deleniti", "atque", "corrupti", "quos", "quas",78"molestias", "excepturi", "sint", "occaecati", "cupiditate",79"provident", "similique", "culpa", "officia", "deserunt",80"mollitia", "animi", "laborum", "dolorum", "fuga",81"harum", "quidem", "rerum", "facilis", "expedita",82"distinctio", "libero", "tempore", "cum", "soluta",83"nobis", "eligendi", "optio", "cumque", "impedit",84"minus", "quod", "maxime", "placeat", "facere",85"possimus", "assumenda", "repellendus", "temporibus", "quibusdam",86"officiis", "debitis", "saepe", "eveniet", "voluptates",87"repudiandae", "recusandae", "itaque", "earum", "hic",88"tenetur", "sapiente", "delectus", "reiciendis", "cillum",89"maiores", "alias", "perferendis", "doloribus", "asperiores",90"repellat", "minim", "nostrud", "exercitation", "ullamco",91"laboris", "aliquip", "duis", "aute", "irure",92};93static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);9495/* simple 1-dimension distribution, based on word's length, favors small words96*/97static const int kWeights[] = { 0, 8, 6, 4, 3, 2 };98static const size_t kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);99100#define DISTRIB_SIZE_MAX 650101static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };102static unsigned g_distribCount = 0;103104static void countFreqs(105const char* words[],106size_t nbWords,107const int* weights,108size_t nbWeights)109{110unsigned total = 0;111size_t w;112for (w = 0; w < nbWords; w++) {113size_t len = strlen(words[w]);114int lmax;115if (len >= nbWeights)116len = nbWeights - 1;117lmax = weights[len];118total += (unsigned)lmax;119}120g_distribCount = total;121assert(g_distribCount <= DISTRIB_SIZE_MAX);122}123124static void init_word_distrib(125const char* words[],126size_t nbWords,127const int* weights,128size_t nbWeights)129{130size_t w, d = 0;131countFreqs(words, nbWords, weights, nbWeights);132for (w = 0; w < nbWords; w++) {133size_t len = strlen(words[w]);134int l, lmax;135if (len >= nbWeights)136len = nbWeights - 1;137lmax = weights[len];138for (l = 0; l < lmax; l++) {139g_distrib[d++] = (int)w;140}141}142}143144/* Note: this unit only works when invoked sequentially.145* No concurrent access is allowed */146static char* g_ptr = NULL;147static size_t g_nbChars = 0;148static size_t g_maxChars = 10000000;149static unsigned g_randRoot = 0;150151#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))152static unsigned LOREM_rand(unsigned range)153{154static const unsigned prime1 = 2654435761U;155static const unsigned prime2 = 2246822519U;156unsigned rand32 = g_randRoot;157rand32 *= prime1;158rand32 ^= prime2;159rand32 = RDG_rotl32(rand32, 13);160g_randRoot = rand32;161return (unsigned)(((unsigned long long)rand32 * range) >> 32);162}163164static void writeLastCharacters(void)165{166size_t lastChars = g_maxChars - g_nbChars;167assert(g_maxChars >= g_nbChars);168if (lastChars == 0)169return;170g_ptr[g_nbChars++] = '.';171if (lastChars > 2) {172memset(g_ptr + g_nbChars, ' ', lastChars - 2);173}174if (lastChars > 1) {175g_ptr[g_maxChars - 1] = '\n';176}177g_nbChars = g_maxChars;178}179180static void generateWord(const char* word, const char* separator, int upCase)181{182size_t const len = strlen(word) + strlen(separator);183if (g_nbChars + len > g_maxChars) {184writeLastCharacters();185return;186}187memcpy(g_ptr + g_nbChars, word, strlen(word));188if (upCase) {189static const char toUp = 'A' - 'a';190g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);191}192g_nbChars += strlen(word);193memcpy(g_ptr + g_nbChars, separator, strlen(separator));194g_nbChars += strlen(separator);195}196197static int about(unsigned target)198{199return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);200}201202/* Function to generate a random sentence */203static void generateSentence(int nbWords)204{205int commaPos = about(9);206int comma2 = commaPos + about(7);207int qmark = (LOREM_rand(11) == 7);208const char* endSep = qmark ? "? " : ". ";209int i;210for (i = 0; i < nbWords; i++) {211int const wordID = g_distrib[LOREM_rand(g_distribCount)];212const char* const word = kWords[wordID];213const char* sep = " ";214if (i == commaPos)215sep = ", ";216if (i == comma2)217sep = ", ";218if (i == nbWords - 1)219sep = endSep;220generateWord(word, sep, i == 0);221}222}223224static void generateParagraph(int nbSentences)225{226int i;227for (i = 0; i < nbSentences; i++) {228int wordsPerSentence = about(11);229generateSentence(wordsPerSentence);230}231if (g_nbChars < g_maxChars) {232g_ptr[g_nbChars++] = '\n';233}234if (g_nbChars < g_maxChars) {235g_ptr[g_nbChars++] = '\n';236}237}238239/* It's "common" for lorem ipsum generators to start with the same first240* pre-defined sentence */241static void generateFirstSentence(void)242{243int i;244for (i = 0; i < 18; i++) {245const char* word = kWords[i];246const char* separator = " ";247if (i == 4)248separator = ", ";249if (i == 7)250separator = ", ";251generateWord(word, separator, i == 0);252}253generateWord(kWords[18], ". ", 0);254}255256size_t257LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)258{259g_ptr = (char*)buffer;260assert(size < INT_MAX);261g_maxChars = size;262g_nbChars = 0;263g_randRoot = seed;264if (g_distribCount == 0) {265init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);266}267268if (first) {269generateFirstSentence();270}271while (g_nbChars < g_maxChars) {272int sentencePerParagraph = about(7);273generateParagraph(sentencePerParagraph);274if (!fill)275break; /* only generate one paragraph in not-fill mode */276}277g_ptr = NULL;278return g_nbChars;279}280281void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)282{283LOREM_genBlock(buffer, size, seed, 1, 1);284}285286287