#pragma prototyped /* * pack [-] [-f] file ... * * pack files using Huffman coding * * David Korn * AT&T Research * */ static const char usage[] = "[-?\n@(#)$Id: pack (AT&T Research) 2003-04-28 $\n]" USAGE_LICENSE "[+NAME?pack - pack files using Huffman coding]" "[+DESCRIPTION?\bpack\b attempts to store the specified files in a compressed " "form using static Huffman coding. Wherever possible each \afile\a " "is replaced by a packed file named \afile\a\b.z\b with the same " "access modes, access and modified dates, and owner as those of " "\afile\a. The \b-f\b option forces packing of \afile\a even when " "there is no space benefit for packing the file.]" "[+?If \bpack\b is successful, \afile\a will be removed. Packed files " "can be restored to their original form using \bunpack\b or \bpcat\b.]" "[+?\bpack\b uses Huffman (minimum redundancy) codes on a byte-by-byte basis. " "Ordinarily, for each file that is packed, a line is written to " "standard output containing \afile\a\b.z\b and the percent " "compression. If the \b-v\b options is specified, or if the \b-\b " "argument is specified, an internal flag is set that causes the " "number of times each byte is used, its relative frequency, and the " "code for the byte to be written to the standard output. Additional " "occurrences of \b-\b in place of name cause the internal flag to be " "set and reset.]" "[+?No packing occurs if:]{" "[+-?\afile\a appears to be already packed.]" "[+-?\afile\a has links.]" "[+-?\afile\a is a directory.]" "[+-?\afile\a cannot be opened.]" "[+-?No disk storage blocks will be saved by packing unless \b-f\b " "is specified.]" "[+-?A file called \afile\a\b.z\b already exists.]" "[+-?The \b.z\b file cannot be created.]" "[+-?An I/O error occurred during processing.]" "}" "[f:force?Pack the file even if the packed size is larger than the original.]" "[v:verbose?Causes additional information to be written to standard ouput.]" "\n" "\nfile ...\n" "\n" "[+EXIT STATUS]{" "[+0?All files packed successfully.]" "[+\an\a?\an\a files failed to pack, where \an\a is less than 125.]" "[+125?125 or more files failed to pack.]" "}" "[+SEE ALSO?\bunpack\b(1), \bpcat\b(1), \bcompress\b(1), \bgzip\b(1)]" ; #include "huffman.h" #include <error.h> #include <ls.h> #define BLKSIZE 512 #define block(size) (((size) + BLKSIZE-1) & ~(BLKSIZE-1)) #define PERM(m) ((m)&(S_IRWXU|S_IRWXG|S_IRWXO)) static void vprint(Huff_t*, int); static char *outname(char*); static const char suffix[] = ".z"; int main(int argc, register char *argv[]) { static char command[] = "pack"; register Huff_t *hp; register char *infile,*outfile; Sfio_t *fpin,*fpout; int nfile=0, npack=0, force=0, verbose=0; int out, deleted, dsize, n; struct stat statb; NOT_USED(argc); error_info.id = command; while(n = optget(argv,usage)) switch(n) { case 'f': force++; break; case 'v': verbose = !verbose; break; case ':': error(2, opt_info.arg); break; case '?': error(ERROR_usage(2), "%s", opt_info.arg); break; } argv += opt_info.index; if(error_info.errors || !*argv) error(ERROR_usage(2), "%s", optusage((char*)0)); while (infile = *argv++) { if(*infile == '-') { /* awful way to handle options, but preserves SVID */ switch(infile[1]) { case 'f': force++; continue; case 0: verbose = !verbose; continue; } } nfile++; fpin = fpout = (Sfio_t*)0; hp = (Huff_t*)0; deleted = 0; if(!(outfile = outname(infile))) continue; if (!(fpin=sfopen((Sfio_t*)0,infile,"r"))) error(ERROR_system(0), "%s: cannot open", infile); else if(fstat(sffileno(fpin),&statb) < 0) error(ERROR_system(0), "%s: cannot stat", infile); else if(S_ISDIR(statb.st_mode)) error(2, "%s: cannot pack a directory", infile); else if(statb.st_nlink > 1) error(2, "%s: has links", infile); else if(statb.st_size ==0) error(2, "%s: cannot pack a zero length file", infile); else if(access(outfile,F_OK) ==0) error(ERROR_system(0), "%s: already exists", outfile); else if(((out=open(outfile,O_WRONLY|O_CREAT|O_TRUNC|O_BINARY,PERM(statb.st_mode))) < 0) || !(fpout = sfnew((Sfio_t*)0,(char*)0,SF_UNBOUND,out,SF_WRITE))) error(ERROR_system(0), "%s: cannot create", outfile); else if((deleted++,chmod(outfile,statb.st_mode)) < 0) error(ERROR_system(0), "%s: cannot change mode to %o",outfile,statb.st_mode); else { chown(outfile,statb.st_uid,statb.st_gid); if(!(hp = huffinit(fpin,(Sfoff_t)-1))) error(2, "%s: read error", infile); else if(sfseek(fpin,(Sfoff_t)0,0) < 0) error(ERROR_system(0),"%s: seek error", infile); else if((dsize = huffputhdr(hp,fpout)) < 0) error(2, "%s: write error", infile); else if(!force && block(huffisize(hp)) <= block(huffosize(hp)+dsize)) error(2, "%s:no savings - file unchanged", infile); else if(huffencode(hp,fpin,fpout,SF_UNBOUND)<0) error(2, "%s: read error", infile); else { double diff; if(remove(infile) < 0) error(ERROR_system(0), "%s: cannot remove", infile); diff = huffisize(hp) - (dsize+huffosize(hp)); sfprintf(sfstdout,"%s: %s : %.1f%% Compression\n",command, infile,(100*diff)/((double)huffisize(hp))); if(verbose) vprint(hp,dsize); npack++; deleted = 0; } } if(hp) huffend(hp); if(fpin) sfclose(fpin); if(fpout) sfclose(fpout); if(deleted) remove(outfile); if(outfile) free(outfile); } nfile -= npack; if(nfile > 125) nfile = 125; exit(nfile); } static char *outname(char *infile) { register int n = strlen(infile); register int sufflen = strlen(suffix); register char *cp; if(streq(suffix,infile+n-sufflen)) { error(ERROR_exit(1), "%s: already packed", infile); return((char*)0); } if(cp = (char*)malloc(n+sufflen+1)) { strcpy(cp,infile); strcat(cp+n,suffix); } return(cp); } static void vprint(Huff_t *hp,int dsize) { sfprintf(sfstdout," from %lld to %lld bytes\n", huffisize(hp), huffosize(hp)); sfprintf(sfstdout," Huffman tree has %d levels below root\n", hp->maxlev); sfprintf(sfstdout," %d distinct bytes in input\n", hp->nchars); sfprintf(sfstdout," dictionary overhead = %ld bytes\n", dsize); sfprintf(sfstdout," effective entropy = %.2f bits/byte\n", ((double)(huffosize(hp))/(double)huffisize(hp))*CHAR_BIT); sfprintf(sfstdout," asymptotic entropy = %.2f bits/byte\n", ((double)(huffosize(hp)-dsize)/(double)huffisize(hp))*CHAR_BIT); }