Path: blob/main/sys/contrib/zstd/programs/zstdcli.c
48254 views
/*1* Copyright (c) Yann Collet, Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/91011/*-************************************12* Tuning parameters13**************************************/14#ifndef ZSTDCLI_CLEVEL_DEFAULT15# define ZSTDCLI_CLEVEL_DEFAULT 316#endif1718#ifndef ZSTDCLI_CLEVEL_MAX19# define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */20#endif2122#ifndef ZSTDCLI_NBTHREADS_DEFAULT23# define ZSTDCLI_NBTHREADS_DEFAULT 124#endif2526/*-************************************27* Dependencies28**************************************/29#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */30#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */31#include <stdlib.h> /* getenv */32#include <string.h> /* strcmp, strlen */33#include <stdio.h> /* fprintf(), stdin, stdout, stderr */34#include <errno.h> /* errno */35#include <assert.h> /* assert */3637#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */38#ifndef ZSTD_NOBENCH39# include "benchzstd.h" /* BMK_benchFiles */40#endif41#ifndef ZSTD_NODICT42# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */43#endif44#ifndef ZSTD_NOTRACE45# include "zstdcli_trace.h"46#endif47#include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */484950/*-************************************51* Constants52**************************************/53#define COMPRESSOR_NAME "zstd command line interface"54#ifndef ZSTD_VERSION55# define ZSTD_VERSION "v" ZSTD_VERSION_STRING56#endif57#define AUTHOR "Yann Collet"58#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR5960#define ZSTD_ZSTDMT "zstdmt"61#define ZSTD_UNZSTD "unzstd"62#define ZSTD_CAT "zstdcat"63#define ZSTD_ZCAT "zcat"64#define ZSTD_GZ "gzip"65#define ZSTD_GUNZIP "gunzip"66#define ZSTD_GZCAT "gzcat"67#define ZSTD_LZMA "lzma"68#define ZSTD_UNLZMA "unlzma"69#define ZSTD_XZ "xz"70#define ZSTD_UNXZ "unxz"71#define ZSTD_LZ4 "lz4"72#define ZSTD_UNLZ4 "unlz4"7374#define KB *(1 <<10)75#define MB *(1 <<20)76#define GB *(1U<<30)7778#define DISPLAY_LEVEL_DEFAULT 27980static const char* g_defaultDictName = "dictionary";81static const unsigned g_defaultMaxDictSize = 110 KB;82static const int g_defaultDictCLevel = 3;83static const unsigned g_defaultSelectivityLevel = 9;84static const unsigned g_defaultMaxWindowLog = 27;85#define OVERLAP_LOG_DEFAULT 999986#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */87static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;88static U32 g_ldmHashLog = 0;89static U32 g_ldmMinMatch = 0;90static U32 g_ldmHashRateLog = LDM_PARAM_DEFAULT;91static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;929394#define DEFAULT_ACCEL 19596typedef enum { cover, fastCover, legacy } dictType;9798/*-************************************99* Display Macros100**************************************/101#define DISPLAY_F(f, ...) fprintf((f), __VA_ARGS__)102#define DISPLAYOUT(...) DISPLAY_F(stdout, __VA_ARGS__)103#define DISPLAY(...) DISPLAY_F(stderr, __VA_ARGS__)104#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }105static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */106107108/*-************************************109* Check Version (when CLI linked to dynamic library)110**************************************/111112/* Due to usage of experimental symbols and capabilities by the CLI,113* the CLI must be linked against a dynamic library of same version */114static void checkLibVersion(void)115{116if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString())) {117DISPLAYLEVEL(1, "Error : incorrect library version (expecting : %s ; actual : %s ) \n",118ZSTD_VERSION_STRING, ZSTD_versionString());119DISPLAYLEVEL(1, "Please update library to version %s, or use stand-alone zstd binary \n",120ZSTD_VERSION_STRING);121exit(1);122}123}124125126/*-************************************127* Command Line128**************************************/129/* print help either in `stderr` or `stdout` depending on originating request130* error (badusage) => stderr131* help (usage_advanced) => stdout132*/133static void usage(FILE* f, const char* programName)134{135DISPLAY_F(f, "Usage : \n");136DISPLAY_F(f, " %s [args] [FILE(s)] [-o file] \n", programName);137DISPLAY_F(f, "\n");138DISPLAY_F(f, "FILE : a filename \n");139DISPLAY_F(f, " with no FILE, or when FILE is - , read standard input\n");140DISPLAY_F(f, "Arguments : \n");141#ifndef ZSTD_NOCOMPRESS142DISPLAY_F(f, " -# : # compression level (1-%d, default: %d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT);143#endif144#ifndef ZSTD_NODECOMPRESS145DISPLAY_F(f, " -d : decompression \n");146#endif147DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n");148DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n");149DISPLAY_F(f, " -f : disable input and output checks. Allows overwriting existing files,\n");150DISPLAY_F(f, " input from console, output to stdout, operating on links,\n");151DISPLAY_F(f, " block devices, etc.\n");152DISPLAY_F(f, "--rm : remove source file(s) after successful de/compression \n");153DISPLAY_F(f, " -k : preserve source file(s) (default) \n");154DISPLAY_F(f, " -h/-H : display help/long help and exit \n");155}156157static void usage_advanced(const char* programName)158{159DISPLAYOUT(WELCOME_MESSAGE);160usage(stdout, programName);161DISPLAYOUT( "\n");162DISPLAYOUT( "Advanced arguments : \n");163DISPLAYOUT( " -V : display Version number and exit \n");164165DISPLAYOUT( " -c : write to standard output (even if it is the console) \n");166167DISPLAYOUT( " -v : verbose mode; specify multiple times to increase verbosity \n");168DISPLAYOUT( " -q : suppress warnings; specify twice to suppress errors too \n");169DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n");170DISPLAYOUT( " note: any (de)compressed output to terminal will mix with progress counter text. \n");171172#ifdef UTIL_HAS_CREATEFILELIST173DISPLAYOUT( " -r : operate recursively on directories \n");174DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n");175DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n");176#endif177178#ifdef UTIL_HAS_MIRRORFILELIST179DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n");180#endif181182183#ifndef ZSTD_NOCOMPRESS184DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)");185#ifndef ZSTD_NODECOMPRESS186DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).");187#endif188#else189#ifdef ZSTD_NOCOMPRESS190DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate).");191#endif192#endif /* ZSTD_NOCOMPRESS */193194#ifndef ZSTD_NOTRACE195DISPLAYOUT( "\n");196DISPLAYOUT( "--trace FILE : log tracing information to FILE.");197#endif198DISPLAYOUT( "\n");199200DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n");201202#ifndef ZSTD_NOCOMPRESS203DISPLAYOUT( "\n");204DISPLAYOUT( "Advanced compression arguments : \n");205DISPLAYOUT( "--ultra : enable levels beyond %i, up to %i (requires more memory) \n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());206DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);207DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);208DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");209DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n");210DISPLAYOUT( "--patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n");211# ifdef ZSTD_MULTITHREAD212DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");213DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n");214DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n");215DISPLAYOUT( "--auto-threads={physical,logical} (default: physical} : use either physical cores or logical cores as default when specifying -T0 \n");216DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n");217# endif218DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n");219DISPLAYOUT( "--stream-size=# : specify size of streaming input from `stdin` \n");220DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n");221DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n");222DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n");223DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n");224225DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n");226#ifdef ZSTD_GZCOMPRESS227DISPLAYOUT( "--format=gzip : compress files to the .gz format \n");228#endif229#ifdef ZSTD_LZMACOMPRESS230DISPLAYOUT( "--format=xz : compress files to the .xz format \n");231DISPLAYOUT( "--format=lzma : compress files to the .lzma format \n");232#endif233#ifdef ZSTD_LZ4COMPRESS234DISPLAYOUT( "--format=lz4 : compress files to the .lz4 format \n");235#endif236#endif /* !ZSTD_NOCOMPRESS */237238#ifndef ZSTD_NODECOMPRESS239DISPLAYOUT( "\n");240DISPLAYOUT( "Advanced decompression arguments : \n");241DISPLAYOUT( " -l : print information about zstd compressed files \n");242DISPLAYOUT( "--test : test compressed file integrity \n");243DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n");244# if ZSTD_SPARSE_DEFAULT245DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n");246# else247DISPLAYOUT( "--[no-]sparse : sparse mode (default: disabled) \n");248# endif249#endif /* ZSTD_NODECOMPRESS */250251#ifndef ZSTD_NODICT252DISPLAYOUT( "\n");253DISPLAYOUT( "Dictionary builder : \n");254DISPLAYOUT( "--train ## : create a dictionary from a training set of files \n");255DISPLAYOUT( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args \n");256DISPLAYOUT( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args \n");257DISPLAYOUT( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u) \n", g_defaultSelectivityLevel);258DISPLAYOUT( " -o DICT : DICT is dictionary name (default: %s) \n", g_defaultDictName);259DISPLAYOUT( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);260DISPLAYOUT( "--dictID=# : force dictionary ID to specified value (default: random) \n");261#endif262263#ifndef ZSTD_NOBENCH264DISPLAYOUT( "\n");265DISPLAYOUT( "Benchmark arguments : \n");266DISPLAYOUT( " -b# : benchmark file(s), using # compression level (default: %d) \n", ZSTDCLI_CLEVEL_DEFAULT);267DISPLAYOUT( " -e# : test all compression levels successively from -b# to -e# (default: 1) \n");268DISPLAYOUT( " -i# : minimum evaluation time in seconds (default: 3s) \n");269DISPLAYOUT( " -B# : cut file into independent blocks of size # (default: no block) \n");270DISPLAYOUT( " -S : output one benchmark result per input file (default: consolidated result) \n");271DISPLAYOUT( "--priority=rt : set process priority to real-time \n");272#endif273274}275276static void badusage(const char* programName)277{278DISPLAYLEVEL(1, "Incorrect parameters \n");279if (g_displayLevel >= 2) usage(stderr, programName);280}281282static void waitEnter(void)283{284int unused;285DISPLAY("Press enter to continue... \n");286unused = getchar();287(void)unused;288}289290static const char* lastNameFromPath(const char* path)291{292const char* name = path;293if (strrchr(name, '/')) name = strrchr(name, '/') + 1;294if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */295return name;296}297298/*! exeNameMatch() :299@return : a non-zero value if exeName matches test, excluding the extension300*/301static int exeNameMatch(const char* exeName, const char* test)302{303return !strncmp(exeName, test, strlen(test)) &&304(exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');305}306307static void errorOut(const char* msg)308{309DISPLAY("%s \n", msg); exit(1);310}311312/*! readU32FromCharChecked() :313* @return 0 if success, and store the result in *value.314* allows and interprets K, KB, KiB, M, MB and MiB suffix.315* Will also modify `*stringPtr`, advancing it to position where it stopped reading.316* @return 1 if an overflow error occurs */317static int readU32FromCharChecked(const char** stringPtr, unsigned* value)318{319unsigned result = 0;320while ((**stringPtr >='0') && (**stringPtr <='9')) {321unsigned const max = ((unsigned)(-1)) / 10;322unsigned last = result;323if (result > max) return 1; /* overflow error */324result *= 10;325result += (unsigned)(**stringPtr - '0');326if (result < last) return 1; /* overflow error */327(*stringPtr)++ ;328}329if ((**stringPtr=='K') || (**stringPtr=='M')) {330unsigned const maxK = ((unsigned)(-1)) >> 10;331if (result > maxK) return 1; /* overflow error */332result <<= 10;333if (**stringPtr=='M') {334if (result > maxK) return 1; /* overflow error */335result <<= 10;336}337(*stringPtr)++; /* skip `K` or `M` */338if (**stringPtr=='i') (*stringPtr)++;339if (**stringPtr=='B') (*stringPtr)++;340}341*value = result;342return 0;343}344345/*! readU32FromChar() :346* @return : unsigned integer value read from input in `char` format.347* allows and interprets K, KB, KiB, M, MB and MiB suffix.348* Will also modify `*stringPtr`, advancing it to position where it stopped reading.349* Note : function will exit() program if digit sequence overflows */350static unsigned readU32FromChar(const char** stringPtr) {351static const char errorMsg[] = "error: numeric value overflows 32-bit unsigned int";352unsigned result;353if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }354return result;355}356357/*! readIntFromChar() :358* @return : signed integer value read from input in `char` format.359* allows and interprets K, KB, KiB, M, MB and MiB suffix.360* Will also modify `*stringPtr`, advancing it to position where it stopped reading.361* Note : function will exit() program if digit sequence overflows */362static int readIntFromChar(const char** stringPtr) {363static const char errorMsg[] = "error: numeric value overflows 32-bit int";364int sign = 1;365unsigned result;366if (**stringPtr=='-') {367(*stringPtr)++;368sign = -1;369}370if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }371return (int) result * sign;372}373374/*! readSizeTFromCharChecked() :375* @return 0 if success, and store the result in *value.376* allows and interprets K, KB, KiB, M, MB and MiB suffix.377* Will also modify `*stringPtr`, advancing it to position where it stopped reading.378* @return 1 if an overflow error occurs */379static int readSizeTFromCharChecked(const char** stringPtr, size_t* value)380{381size_t result = 0;382while ((**stringPtr >='0') && (**stringPtr <='9')) {383size_t const max = ((size_t)(-1)) / 10;384size_t last = result;385if (result > max) return 1; /* overflow error */386result *= 10;387result += (size_t)(**stringPtr - '0');388if (result < last) return 1; /* overflow error */389(*stringPtr)++ ;390}391if ((**stringPtr=='K') || (**stringPtr=='M')) {392size_t const maxK = ((size_t)(-1)) >> 10;393if (result > maxK) return 1; /* overflow error */394result <<= 10;395if (**stringPtr=='M') {396if (result > maxK) return 1; /* overflow error */397result <<= 10;398}399(*stringPtr)++; /* skip `K` or `M` */400if (**stringPtr=='i') (*stringPtr)++;401if (**stringPtr=='B') (*stringPtr)++;402}403*value = result;404return 0;405}406407/*! readSizeTFromChar() :408* @return : size_t value read from input in `char` format.409* allows and interprets K, KB, KiB, M, MB and MiB suffix.410* Will also modify `*stringPtr`, advancing it to position where it stopped reading.411* Note : function will exit() program if digit sequence overflows */412static size_t readSizeTFromChar(const char** stringPtr) {413static const char errorMsg[] = "error: numeric value overflows size_t";414size_t result;415if (readSizeTFromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }416return result;417}418419/** longCommandWArg() :420* check if *stringPtr is the same as longCommand.421* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.422* @return 0 and doesn't modify *stringPtr otherwise.423*/424static int longCommandWArg(const char** stringPtr, const char* longCommand)425{426size_t const comSize = strlen(longCommand);427int const result = !strncmp(*stringPtr, longCommand, comSize);428if (result) *stringPtr += comSize;429return result;430}431432433#ifndef ZSTD_NODICT434435static const unsigned kDefaultRegression = 1;436/**437* parseCoverParameters() :438* reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params439* @return 1 means that cover parameters were correct440* @return 0 in case of malformed parameters441*/442static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)443{444memset(params, 0, sizeof(*params));445for (; ;) {446if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }447if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }448if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }449if (longCommandWArg(&stringPtr, "split=")) {450unsigned splitPercentage = readU32FromChar(&stringPtr);451params->splitPoint = (double)splitPercentage / 100.0;452if (stringPtr[0]==',') { stringPtr++; continue; } else break;453}454if (longCommandWArg(&stringPtr, "shrink")) {455params->shrinkDictMaxRegression = kDefaultRegression;456params->shrinkDict = 1;457if (stringPtr[0]=='=') {458stringPtr++;459params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);460}461if (stringPtr[0]==',') {462stringPtr++;463continue;464}465else break;466}467return 0;468}469if (stringPtr[0] != 0) return 0;470DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);471return 1;472}473474/**475* parseFastCoverParameters() :476* reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params477* @return 1 means that fastcover parameters were correct478* @return 0 in case of malformed parameters479*/480static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)481{482memset(params, 0, sizeof(*params));483for (; ;) {484if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }485if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }486if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }487if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }488if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }489if (longCommandWArg(&stringPtr, "split=")) {490unsigned splitPercentage = readU32FromChar(&stringPtr);491params->splitPoint = (double)splitPercentage / 100.0;492if (stringPtr[0]==',') { stringPtr++; continue; } else break;493}494if (longCommandWArg(&stringPtr, "shrink")) {495params->shrinkDictMaxRegression = kDefaultRegression;496params->shrinkDict = 1;497if (stringPtr[0]=='=') {498stringPtr++;499params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);500}501if (stringPtr[0]==',') {502stringPtr++;503continue;504}505else break;506}507return 0;508}509if (stringPtr[0] != 0) return 0;510DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);511return 1;512}513514/**515* parseLegacyParameters() :516* reads legacy dictionary builder parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity517* @return 1 means that legacy dictionary builder parameters were correct518* @return 0 in case of malformed parameters519*/520static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity)521{522if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; }523*selectivity = readU32FromChar(&stringPtr);524if (stringPtr[0] != 0) return 0;525DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity);526return 1;527}528529static ZDICT_cover_params_t defaultCoverParams(void)530{531ZDICT_cover_params_t params;532memset(¶ms, 0, sizeof(params));533params.d = 8;534params.steps = 4;535params.splitPoint = 1.0;536params.shrinkDict = 0;537params.shrinkDictMaxRegression = kDefaultRegression;538return params;539}540541static ZDICT_fastCover_params_t defaultFastCoverParams(void)542{543ZDICT_fastCover_params_t params;544memset(¶ms, 0, sizeof(params));545params.d = 8;546params.f = 20;547params.steps = 4;548params.splitPoint = 0.75; /* different from default splitPoint of cover */549params.accel = DEFAULT_ACCEL;550params.shrinkDict = 0;551params.shrinkDictMaxRegression = kDefaultRegression;552return params;553}554#endif555556557/** parseAdaptParameters() :558* reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.559* Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.560* There is no guarantee that any of these values will be updated.561* @return 1 means that parsing was successful,562* @return 0 in case of malformed parameters563*/564static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)565{566for ( ; ;) {567if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }568if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }569DISPLAYLEVEL(4, "invalid compression parameter \n");570return 0;571}572if (stringPtr[0] != 0) return 0; /* check the end of string */573if (*adaptMinPtr > *adaptMaxPtr) {574DISPLAYLEVEL(4, "incoherent adaptation limits \n");575return 0;576}577return 1;578}579580581/** parseCompressionParameters() :582* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6") into *params583* @return 1 means that compression parameters were correct584* @return 0 in case of malformed parameters585*/586static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params)587{588for ( ; ;) {589if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }590if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }591if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }592if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }593if (longCommandWArg(&stringPtr, "minMatch=") || longCommandWArg(&stringPtr, "mml=")) { params->minMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }594if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }595if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }596if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }597if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "lhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }598if (longCommandWArg(&stringPtr, "ldmMinMatch=") || longCommandWArg(&stringPtr, "lmml=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }599if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "lblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }600if (longCommandWArg(&stringPtr, "ldmHashRateLog=") || longCommandWArg(&stringPtr, "lhrlog=")) { g_ldmHashRateLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }601DISPLAYLEVEL(4, "invalid compression parameter \n");602return 0;603}604605DISPLAYLEVEL(4, "windowLog=%d, chainLog=%d, hashLog=%d, searchLog=%d \n", params->windowLog, params->chainLog, params->hashLog, params->searchLog);606DISPLAYLEVEL(4, "minMatch=%d, targetLength=%d, strategy=%d \n", params->minMatch, params->targetLength, params->strategy);607if (stringPtr[0] != 0) return 0; /* check the end of string */608return 1;609}610611static void printVersion(void)612{613if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) {614DISPLAYOUT("%s\n", ZSTD_VERSION_STRING);615return;616}617618DISPLAYOUT(WELCOME_MESSAGE);619if (g_displayLevel >= 3) {620/* format support */621DISPLAYOUT("*** supports: zstd");622#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8)623DISPLAYOUT(", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT);624#endif625#ifdef ZSTD_GZCOMPRESS626DISPLAYOUT(", gzip");627#endif628#ifdef ZSTD_LZ4COMPRESS629DISPLAYOUT(", lz4");630#endif631#ifdef ZSTD_LZMACOMPRESS632DISPLAYOUT(", lzma, xz ");633#endif634DISPLAYOUT("\n");635if (g_displayLevel >= 4) {636/* posix support */637#ifdef _POSIX_C_SOURCE638DISPLAYOUT("_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);639#endif640#ifdef _POSIX_VERSION641DISPLAYOUT("_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION);642#endif643#ifdef PLATFORM_POSIX_VERSION644DISPLAYOUT("PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);645#endif646} }647}648649#define ZSTD_NB_STRATEGIES 9650static const char* ZSTD_strategyMap[ZSTD_NB_STRATEGIES + 1] = { "", "ZSTD_fast",651"ZSTD_dfast", "ZSTD_greedy", "ZSTD_lazy", "ZSTD_lazy2", "ZSTD_btlazy2",652"ZSTD_btopt", "ZSTD_btultra", "ZSTD_btultra2"};653654#ifndef ZSTD_NOCOMPRESS655656static void printDefaultCParams(const char* filename, const char* dictFileName, int cLevel) {657unsigned long long fileSize = UTIL_getFileSize(filename);658const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0;659const ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, fileSize, dictSize);660if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAY("%s (%u bytes)\n", filename, (unsigned)fileSize);661else DISPLAY("%s (src size unknown)\n", filename);662DISPLAY(" - windowLog : %u\n", cParams.windowLog);663DISPLAY(" - chainLog : %u\n", cParams.chainLog);664DISPLAY(" - hashLog : %u\n", cParams.hashLog);665DISPLAY(" - searchLog : %u\n", cParams.searchLog);666DISPLAY(" - minMatch : %u\n", cParams.minMatch);667DISPLAY(" - targetLength : %u\n", cParams.targetLength);668assert(cParams.strategy < ZSTD_NB_STRATEGIES + 1);669DISPLAY(" - strategy : %s (%u)\n", ZSTD_strategyMap[(int)cParams.strategy], (unsigned)cParams.strategy);670}671672static void printActualCParams(const char* filename, const char* dictFileName, int cLevel, const ZSTD_compressionParameters* cParams) {673unsigned long long fileSize = UTIL_getFileSize(filename);674const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0;675ZSTD_compressionParameters actualCParams = ZSTD_getCParams(cLevel, fileSize, dictSize);676assert(g_displayLevel >= 4);677actualCParams.windowLog = cParams->windowLog == 0 ? actualCParams.windowLog : cParams->windowLog;678actualCParams.chainLog = cParams->chainLog == 0 ? actualCParams.chainLog : cParams->chainLog;679actualCParams.hashLog = cParams->hashLog == 0 ? actualCParams.hashLog : cParams->hashLog;680actualCParams.searchLog = cParams->searchLog == 0 ? actualCParams.searchLog : cParams->searchLog;681actualCParams.minMatch = cParams->minMatch == 0 ? actualCParams.minMatch : cParams->minMatch;682actualCParams.targetLength = cParams->targetLength == 0 ? actualCParams.targetLength : cParams->targetLength;683actualCParams.strategy = cParams->strategy == 0 ? actualCParams.strategy : cParams->strategy;684DISPLAY("--zstd=wlog=%d,clog=%d,hlog=%d,slog=%d,mml=%d,tlen=%d,strat=%d\n",685actualCParams.windowLog, actualCParams.chainLog, actualCParams.hashLog, actualCParams.searchLog,686actualCParams.minMatch, actualCParams.targetLength, actualCParams.strategy);687}688689#endif690691/* Environment variables for parameter setting */692#define ENV_CLEVEL "ZSTD_CLEVEL"693#define ENV_NBTHREADS "ZSTD_NBTHREADS" /* takes lower precedence than directly specifying -T# in the CLI */694695/* pick up environment variable */696static int init_cLevel(void) {697const char* const env = getenv(ENV_CLEVEL);698if (env != NULL) {699const char* ptr = env;700int sign = 1;701if (*ptr == '-') {702sign = -1;703ptr++;704} else if (*ptr == '+') {705ptr++;706}707708if ((*ptr>='0') && (*ptr<='9')) {709unsigned absLevel;710if (readU32FromCharChecked(&ptr, &absLevel)) {711DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_CLEVEL, env);712return ZSTDCLI_CLEVEL_DEFAULT;713} else if (*ptr == 0) {714return sign * (int)absLevel;715} }716717DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid integer value \n", ENV_CLEVEL, env);718}719720return ZSTDCLI_CLEVEL_DEFAULT;721}722723#ifdef ZSTD_MULTITHREAD724static unsigned init_nbThreads(void) {725const char* const env = getenv(ENV_NBTHREADS);726if (env != NULL) {727const char* ptr = env;728if ((*ptr>='0') && (*ptr<='9')) {729unsigned nbThreads;730if (readU32FromCharChecked(&ptr, &nbThreads)) {731DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);732return ZSTDCLI_NBTHREADS_DEFAULT;733} else if (*ptr == 0) {734return nbThreads;735}736}737DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);738}739740return ZSTDCLI_NBTHREADS_DEFAULT;741}742#endif743744#define NEXT_FIELD(ptr) { \745if (*argument == '=') { \746ptr = ++argument; \747argument += strlen(ptr); \748} else { \749argNb++; \750if (argNb >= argCount) { \751DISPLAY("error: missing command argument \n"); \752CLEAN_RETURN(1); \753} \754ptr = argv[argNb]; \755assert(ptr != NULL); \756if (ptr[0]=='-') { \757DISPLAY("error: command cannot be separated from its argument by another command \n"); \758CLEAN_RETURN(1); \759} } }760761#define NEXT_UINT32(val32) { \762const char* __nb; \763NEXT_FIELD(__nb); \764val32 = readU32FromChar(&__nb); \765}766767typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;768769#define CLEAN_RETURN(i) { operationResult = (i); goto _end; }770771#ifdef ZSTD_NOCOMPRESS772/* symbols from compression library are not defined and should not be invoked */773# define MINCLEVEL -99774# define MAXCLEVEL 22775#else776# define MINCLEVEL ZSTD_minCLevel()777# define MAXCLEVEL ZSTD_maxCLevel()778#endif779780int main(int argCount, const char* argv[])781{782int argNb,783followLinks = 0,784allowBlockDevices = 0,785forceStdin = 0,786forceStdout = 0,787hasStdout = 0,788ldmFlag = 0,789main_pause = 0,790nbWorkers = 0,791adapt = 0,792useRowMatchFinder = 0,793adaptMin = MINCLEVEL,794adaptMax = MAXCLEVEL,795rsyncable = 0,796nextArgumentsAreFiles = 0,797operationResult = 0,798separateFiles = 0,799setRealTimePrio = 0,800singleThread = 0,801#ifdef ZSTD_MULTITHREAD802defaultLogicalCores = 0,803#endif804showDefaultCParams = 0,805ultra=0,806contentSize=1;807double compressibility = 0.5;808unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */809size_t blockSize = 0;810811FIO_prefs_t* const prefs = FIO_createPreferences();812FIO_ctx_t* const fCtx = FIO_createContext();813zstd_operation_mode operation = zom_compress;814ZSTD_compressionParameters compressionParams;815int cLevel = init_cLevel();816int cLevelLast = MINCLEVEL - 1; /* lower than minimum */817unsigned recursive = 0;818unsigned memLimit = 0;819FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */820FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */821const char* programName = argv[0];822const char* outFileName = NULL;823const char* outDirName = NULL;824const char* outMirroredDirName = NULL;825const char* dictFileName = NULL;826const char* patchFromDictFileName = NULL;827const char* suffix = ZSTD_EXTENSION;828unsigned maxDictSize = g_defaultMaxDictSize;829unsigned dictID = 0;830size_t streamSrcSize = 0;831size_t targetCBlockSize = 0;832size_t srcSizeHint = 0;833int dictCLevel = g_defaultDictCLevel;834unsigned dictSelect = g_defaultSelectivityLevel;835#ifndef ZSTD_NODICT836ZDICT_cover_params_t coverParams = defaultCoverParams();837ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();838dictType dict = fastCover;839#endif840#ifndef ZSTD_NOBENCH841BMK_advancedParams_t benchParams = BMK_initAdvancedParams();842#endif843ZSTD_paramSwitch_e literalCompressionMode = ZSTD_ps_auto;844845846/* init */847checkLibVersion();848(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */849(void)memLimit;850assert(argCount >= 1);851if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }852programName = lastNameFromPath(programName);853#ifdef ZSTD_MULTITHREAD854nbWorkers = init_nbThreads();855#endif856857/* preset behaviors */858if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;859if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;860if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */861if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */862if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like gzip */863if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); } /* behave like gunzip, also supports multiple formats */864if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */865if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like lzma */866if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */867if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like xz */868if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unxz, also supports multiple formats */869if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like lz4 */870if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */871memset(&compressionParams, 0, sizeof(compressionParams));872873/* init crash handler */874FIO_addAbortHandler();875876/* command switches */877for (argNb=1; argNb<argCount; argNb++) {878const char* argument = argv[argNb];879if (!argument) continue; /* Protection if argument empty */880881if (nextArgumentsAreFiles) {882UTIL_refFilename(filenames, argument);883continue;884}885886/* "-" means stdin/stdout */887if (!strcmp(argument, "-")){888UTIL_refFilename(filenames, stdinmark);889continue;890}891892/* Decode commands (note : aggregated commands are allowed) */893if (argument[0]=='-') {894895if (argument[1]=='-') {896/* long commands (--long-word) */897if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */898if (!strcmp(argument, "--list")) { operation=zom_list; continue; }899if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }900if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }901if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }902if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; }903if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }904if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }905if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }906if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }907if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }908if (!strcmp(argument, "--ultra")) { ultra=1; continue; }909if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }910if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }911if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }912if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }913if (!strcmp(argument, "--test")) { operation=zom_test; continue; }914if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }915if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }916if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }917if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }918if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }919if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; }920if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }921if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }922if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }923if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; }924if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; }925if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }926if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }927if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }928#ifdef ZSTD_GZCOMPRESS929if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }930#endif931#ifdef ZSTD_LZMACOMPRESS932if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }933if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }934#endif935#ifdef ZSTD_LZ4COMPRESS936if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }937#endif938if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }939if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_ps_enable; continue; }940if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_ps_disable; continue; }941if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; }942if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; }943if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }944945/* long commands with arguments */946#ifndef ZSTD_NODICT947if (longCommandWArg(&argument, "--train-cover")) {948operation = zom_train;949if (outFileName == NULL)950outFileName = g_defaultDictName;951dict = cover;952/* Allow optional arguments following an = */953if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }954else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }955else if (!parseCoverParameters(argument, &coverParams)) { badusage(programName); CLEAN_RETURN(1); }956continue;957}958if (longCommandWArg(&argument, "--train-fastcover")) {959operation = zom_train;960if (outFileName == NULL)961outFileName = g_defaultDictName;962dict = fastCover;963/* Allow optional arguments following an = */964if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }965else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }966else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badusage(programName); CLEAN_RETURN(1); }967continue;968}969if (longCommandWArg(&argument, "--train-legacy")) {970operation = zom_train;971if (outFileName == NULL)972outFileName = g_defaultDictName;973dict = legacy;974/* Allow optional arguments following an = */975if (*argument == 0) { continue; }976else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }977else if (!parseLegacyParameters(argument, &dictSelect)) { badusage(programName); CLEAN_RETURN(1); }978continue;979}980#endif981if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }982if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }983if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }984if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }985if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; }986if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }987if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }988if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; }989if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; }990if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; }991if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; }992if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; }993#ifdef ZSTD_MULTITHREAD994if (longCommandWArg(&argument, "--auto-threads")) {995const char* threadDefault = NULL;996NEXT_FIELD(threadDefault);997if (strcmp(threadDefault, "logical") == 0)998defaultLogicalCores = 1;999continue;1000}1001#endif1002#ifdef UTIL_HAS_MIRRORFILELIST1003if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }1004#endif1005#ifndef ZSTD_NOTRACE1006if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; }1007#endif1008if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; }1009if (longCommandWArg(&argument, "--long")) {1010unsigned ldmWindowLog = 0;1011ldmFlag = 1;1012/* Parse optional window log */1013if (*argument == '=') {1014++argument;1015ldmWindowLog = readU32FromChar(&argument);1016} else if (*argument != 0) {1017/* Invalid character following --long */1018badusage(programName);1019CLEAN_RETURN(1);1020}1021/* Only set windowLog if not already set by --zstd */1022if (compressionParams.windowLog == 0)1023compressionParams.windowLog = ldmWindowLog;1024continue;1025}1026#ifndef ZSTD_NOCOMPRESS /* linking ZSTD_minCLevel() requires compression support */1027if (longCommandWArg(&argument, "--fast")) {1028/* Parse optional acceleration factor */1029if (*argument == '=') {1030U32 const maxFast = (U32)-ZSTD_minCLevel();1031U32 fastLevel;1032++argument;1033fastLevel = readU32FromChar(&argument);1034if (fastLevel > maxFast) fastLevel = maxFast;1035if (fastLevel) {1036dictCLevel = cLevel = -(int)fastLevel;1037} else {1038badusage(programName);1039CLEAN_RETURN(1);1040}1041} else if (*argument != 0) {1042/* Invalid character following --fast */1043badusage(programName);1044CLEAN_RETURN(1);1045} else {1046cLevel = -1; /* default for --fast */1047}1048continue;1049}1050#endif10511052if (longCommandWArg(&argument, "--filelist")) {1053const char* listName;1054NEXT_FIELD(listName);1055UTIL_refFilename(file_of_names, listName);1056continue;1057}10581059/* fall-through, will trigger bad_usage() later on */1060}10611062argument++;1063while (argument[0]!=0) {10641065#ifndef ZSTD_NOCOMPRESS1066/* compression Level */1067if ((*argument>='0') && (*argument<='9')) {1068dictCLevel = cLevel = (int)readU32FromChar(&argument);1069continue;1070}1071#endif10721073switch(argument[0])1074{1075/* Display help */1076case 'V': printVersion(); CLEAN_RETURN(0); /* Version Only */1077case 'H':1078case 'h': usage_advanced(programName); CLEAN_RETURN(0);10791080/* Compress */1081case 'z': operation=zom_compress; argument++; break;10821083/* Decoding */1084case 'd':1085#ifndef ZSTD_NOBENCH1086benchParams.mode = BMK_decodeOnly;1087if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */1088#endif1089operation=zom_decompress; argument++; break;10901091/* Force stdout, even if stdout==console */1092case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;10931094/* Use file content as dictionary */1095case 'D': argument++; NEXT_FIELD(dictFileName); break;10961097/* Overwrite */1098case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; argument++; break;10991100/* Verbose mode */1101case 'v': g_displayLevel++; argument++; break;11021103/* Quiet mode */1104case 'q': g_displayLevel--; argument++; break;11051106/* keep source file (default) */1107case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;11081109/* Checksum */1110case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;11111112/* test compressed file */1113case 't': operation=zom_test; argument++; break;11141115/* destination file name */1116case 'o': argument++; NEXT_FIELD(outFileName); break;11171118/* limit memory */1119case 'M':1120argument++;1121memLimit = readU32FromChar(&argument);1122break;1123case 'l': operation=zom_list; argument++; break;1124#ifdef UTIL_HAS_CREATEFILELIST1125/* recursive */1126case 'r': recursive=1; argument++; break;1127#endif11281129#ifndef ZSTD_NOBENCH1130/* Benchmark */1131case 'b':1132operation=zom_bench;1133argument++;1134break;11351136/* range bench (benchmark only) */1137case 'e':1138/* compression Level */1139argument++;1140cLevelLast = (int)readU32FromChar(&argument);1141break;11421143/* Modify Nb Iterations (benchmark only) */1144case 'i':1145argument++;1146bench_nbSeconds = readU32FromChar(&argument);1147break;11481149/* cut input into blocks (benchmark only) */1150case 'B':1151argument++;1152blockSize = readU32FromChar(&argument);1153break;11541155/* benchmark files separately (hidden option) */1156case 'S':1157argument++;1158separateFiles = 1;1159break;11601161#endif /* ZSTD_NOBENCH */11621163/* nb of threads (hidden option) */1164case 'T':1165argument++;1166nbWorkers = (int)readU32FromChar(&argument);1167break;11681169/* Dictionary Selection level */1170case 's':1171argument++;1172dictSelect = readU32FromChar(&argument);1173break;11741175/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */1176case 'p': argument++;1177#ifndef ZSTD_NOBENCH1178if ((*argument>='0') && (*argument<='9')) {1179benchParams.additionalParam = (int)readU32FromChar(&argument);1180} else1181#endif1182main_pause=1;1183break;11841185/* Select compressibility of synthetic sample */1186case 'P':1187argument++;1188compressibility = (double)readU32FromChar(&argument) / 100;1189break;11901191/* unknown command */1192default : badusage(programName); CLEAN_RETURN(1);1193}1194}1195continue;1196} /* if (argument[0]=='-') */11971198/* none of the above : add filename to list */1199UTIL_refFilename(filenames, argument);1200}12011202/* Welcome message (if verbose) */1203DISPLAYLEVEL(3, WELCOME_MESSAGE);12041205#ifdef ZSTD_MULTITHREAD1206if ((nbWorkers==0) && (!singleThread)) {1207/* automatically set # workers based on # of reported cpus */1208if (defaultLogicalCores) {1209nbWorkers = UTIL_countLogicalCores();1210DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);1211} else {1212nbWorkers = UTIL_countPhysicalCores();1213DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);1214}1215}1216#else1217(void)singleThread; (void)nbWorkers;1218#endif12191220g_utilDisplayLevel = g_displayLevel;12211222#ifdef UTIL_HAS_CREATEFILELIST1223if (!followLinks) {1224unsigned u, fileNamesNb;1225unsigned const nbFilenames = (unsigned)filenames->tableSize;1226for (u=0, fileNamesNb=0; u<nbFilenames; u++) {1227if ( UTIL_isLink(filenames->fileNames[u])1228&& !UTIL_isFIFO(filenames->fileNames[u])1229) {1230DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);1231} else {1232filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];1233} }1234if (fileNamesNb == 0 && nbFilenames > 0) /* all names are eliminated */1235CLEAN_RETURN(1);1236filenames->tableSize = fileNamesNb;1237} /* if (!followLinks) */12381239/* read names from a file */1240if (file_of_names->tableSize) {1241size_t const nbFileLists = file_of_names->tableSize;1242size_t flNb;1243for (flNb=0; flNb < nbFileLists; flNb++) {1244FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);1245if (fnt==NULL) {1246DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);1247CLEAN_RETURN(1);1248}1249filenames = UTIL_mergeFileNamesTable(filenames, fnt);1250}1251}12521253if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */1254UTIL_expandFNT(&filenames, followLinks);1255}1256#else1257(void)followLinks;1258#endif12591260if (operation == zom_list) {1261#ifndef ZSTD_NODECOMPRESS1262int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel);1263CLEAN_RETURN(ret);1264#else1265DISPLAY("file information is not supported \n");1266CLEAN_RETURN(1);1267#endif1268}12691270/* Check if benchmark is selected */1271if (operation==zom_bench) {1272#ifndef ZSTD_NOBENCH1273benchParams.blockSize = blockSize;1274benchParams.nbWorkers = nbWorkers;1275benchParams.realTime = (unsigned)setRealTimePrio;1276benchParams.nbSeconds = bench_nbSeconds;1277benchParams.ldmFlag = ldmFlag;1278benchParams.ldmMinMatch = (int)g_ldmMinMatch;1279benchParams.ldmHashLog = (int)g_ldmHashLog;1280benchParams.useRowMatchFinder = useRowMatchFinder;1281if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {1282benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;1283}1284if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) {1285benchParams.ldmHashRateLog = (int)g_ldmHashRateLog;1286}1287benchParams.literalCompressionMode = literalCompressionMode;12881289if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();1290if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();1291if (cLevelLast < cLevel) cLevelLast = cLevel;1292if (cLevelLast > cLevel)1293DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);1294if (filenames->tableSize > 0) {1295if(separateFiles) {1296unsigned i;1297for(i = 0; i < filenames->tableSize; i++) {1298int c;1299DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);1300for(c = cLevel; c <= cLevelLast; c++) {1301BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);1302} }1303} else {1304for(; cLevel <= cLevelLast; cLevel++) {1305BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);1306} }1307} else {1308for(; cLevel <= cLevelLast; cLevel++) {1309BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);1310} }13111312#else1313(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility;1314#endif1315goto _end;1316}13171318/* Check if dictionary builder is selected */1319if (operation==zom_train) {1320#ifndef ZSTD_NODICT1321ZDICT_params_t zParams;1322zParams.compressionLevel = dictCLevel;1323zParams.notificationLevel = (unsigned)g_displayLevel;1324zParams.dictID = dictID;1325if (dict == cover) {1326int const optimize = !coverParams.k || !coverParams.d;1327coverParams.nbThreads = (unsigned)nbWorkers;1328coverParams.zParams = zParams;1329operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);1330} else if (dict == fastCover) {1331int const optimize = !fastCoverParams.k || !fastCoverParams.d;1332fastCoverParams.nbThreads = (unsigned)nbWorkers;1333fastCoverParams.zParams = zParams;1334operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);1335} else {1336ZDICT_legacy_params_t dictParams;1337memset(&dictParams, 0, sizeof(dictParams));1338dictParams.selectivityLevel = dictSelect;1339dictParams.zParams = zParams;1340operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);1341}1342#else1343(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */1344DISPLAYLEVEL(1, "training mode not available \n");1345operationResult = 1;1346#endif1347goto _end;1348}13491350#ifndef ZSTD_NODECOMPRESS1351if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */1352#endif13531354/* No input filename ==> use stdin and stdout */1355if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark);1356if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName)1357outFileName = stdoutmark; /* when input is stdin, default output is stdout */13581359/* Check if input/output defined as console; trigger an error in this case */1360if (!forceStdin1361&& !strcmp(filenames->fileNames[0], stdinmark)1362&& IS_CONSOLE(stdin) ) {1363DISPLAYLEVEL(1, "stdin is a console, aborting\n");1364CLEAN_RETURN(1);1365}1366if ( outFileName && !strcmp(outFileName, stdoutmark)1367&& IS_CONSOLE(stdout)1368&& !strcmp(filenames->fileNames[0], stdinmark)1369&& !forceStdout1370&& operation!=zom_decompress ) {1371DISPLAYLEVEL(1, "stdout is a console, aborting\n");1372CLEAN_RETURN(1);1373}13741375#ifndef ZSTD_NOCOMPRESS1376/* check compression level limits */1377{ int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX;1378if (cLevel > maxCLevel) {1379DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel);1380cLevel = maxCLevel;1381} }1382#endif13831384if (showDefaultCParams) {1385if (operation == zom_decompress) {1386DISPLAY("error : can't use --show-default-cparams in decomrpession mode \n");1387CLEAN_RETURN(1);1388}1389}13901391if (dictFileName != NULL && patchFromDictFileName != NULL) {1392DISPLAY("error : can't use -D and --patch-from=# at the same time \n");1393CLEAN_RETURN(1);1394}13951396if (patchFromDictFileName != NULL && filenames->tableSize > 1) {1397DISPLAY("error : can't use --patch-from=# on multiple files \n");1398CLEAN_RETURN(1);1399}14001401/* No status message in pipe mode (stdin - stdout) */1402hasStdout = outFileName && !strcmp(outFileName,stdoutmark);14031404if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1;14051406/* IO Stream/File */1407FIO_setHasStdoutOutput(fCtx, hasStdout);1408FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);1409FIO_determineHasStdinInput(fCtx, filenames);1410FIO_setNotificationLevel(g_displayLevel);1411FIO_setAllowBlockDevices(prefs, allowBlockDevices);1412FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);1413if (memLimit == 0) {1414if (compressionParams.windowLog == 0) {1415memLimit = (U32)1 << g_defaultMaxWindowLog;1416} else {1417memLimit = (U32)1 << (compressionParams.windowLog & 31);1418} }1419if (patchFromDictFileName != NULL)1420dictFileName = patchFromDictFileName;1421FIO_setMemLimit(prefs, memLimit);1422if (operation==zom_compress) {1423#ifndef ZSTD_NOCOMPRESS1424FIO_setContentSize(prefs, contentSize);1425FIO_setNbWorkers(prefs, nbWorkers);1426FIO_setBlockSize(prefs, (int)blockSize);1427if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);1428FIO_setLdmFlag(prefs, (unsigned)ldmFlag);1429FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);1430FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch);1431if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);1432if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);1433FIO_setAdaptiveMode(prefs, (unsigned)adapt);1434FIO_setUseRowMatchFinder(prefs, useRowMatchFinder);1435FIO_setAdaptMin(prefs, adaptMin);1436FIO_setAdaptMax(prefs, adaptMax);1437FIO_setRsyncable(prefs, rsyncable);1438FIO_setStreamSrcSize(prefs, streamSrcSize);1439FIO_setTargetCBlockSize(prefs, targetCBlockSize);1440FIO_setSrcSizeHint(prefs, srcSizeHint);1441FIO_setLiteralCompressionMode(prefs, literalCompressionMode);1442if (adaptMin > cLevel) cLevel = adaptMin;1443if (adaptMax < cLevel) cLevel = adaptMax;14441445/* Compare strategies constant with the ground truth */1446{ ZSTD_bounds strategyBounds = ZSTD_cParam_getBounds(ZSTD_c_strategy);1447assert(ZSTD_NB_STRATEGIES == strategyBounds.upperBound);1448(void)strategyBounds; }14491450if (showDefaultCParams || g_displayLevel >= 4) {1451size_t fileNb;1452for (fileNb = 0; fileNb < (size_t)filenames->tableSize; fileNb++) {1453if (showDefaultCParams)1454printDefaultCParams(filenames->fileNames[fileNb], dictFileName, cLevel);1455if (g_displayLevel >= 4)1456printActualCParams(filenames->fileNames[fileNb], dictFileName, cLevel, &compressionParams);1457}1458}14591460if (g_displayLevel >= 4)1461FIO_displayCompressionParameters(prefs);1462if ((filenames->tableSize==1) && outFileName)1463operationResult = FIO_compressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams);1464else1465operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);1466#else1467(void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */1468DISPLAY("Compression not supported \n");1469#endif1470} else { /* decompression or test */1471#ifndef ZSTD_NODECOMPRESS1472if (filenames->tableSize == 1 && outFileName) {1473operationResult = FIO_decompressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName);1474} else {1475operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName);1476}1477#else1478DISPLAY("Decompression not supported \n");1479#endif1480}14811482_end:1483FIO_freePreferences(prefs);1484FIO_freeContext(fCtx);1485if (main_pause) waitEnter();1486UTIL_freeFileNamesTable(filenames);1487UTIL_freeFileNamesTable(file_of_names);1488#ifndef ZSTD_NOTRACE1489TRACE_finish();1490#endif14911492return operationResult;1493}149414951496