/*-1* SPDX-License-Identifier: BSD-3-Clause2*3* Copyright (c) 1992 Keith Muller.4* Copyright (c) 1992, 19935* The Regents of the University of California. All rights reserved.6*7* This code is derived from software contributed to Berkeley by8* Keith Muller of the University of California, San Diego.9*10* Redistribution and use in source and binary forms, with or without11* modification, are permitted provided that the following conditions12* are met:13* 1. Redistributions of source code must retain the above copyright14* notice, this list of conditions and the following disclaimer.15* 2. Redistributions in binary form must reproduce the above copyright16* notice, this list of conditions and the following disclaimer in the17* documentation and/or other materials provided with the distribution.18* 3. Neither the name of the University nor the names of its contributors19* may be used to endorse or promote products derived from this software20* without specific prior written permission.21*22* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND23* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE24* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE25* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE26* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL27* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS28* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)29* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY31* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF32* SUCH DAMAGE.33*/3435#include <sys/types.h>36#include <sys/time.h>37#include <sys/stat.h>38#include <sys/fcntl.h>39#include <errno.h>40#include <stdio.h>41#include <stdlib.h>42#include <string.h>43#include <unistd.h>44#include "pax.h"45#include "tables.h"46#include "extern.h"4748/*49* Routines for controlling the contents of all the different databases pax50* keeps. Tables are dynamically created only when they are needed. The51* goal was speed and the ability to work with HUGE archives. The databases52* were kept simple, but do have complex rules for when the contents change.53* As of this writing, the POSIX library functions were more complex than54* needed for this application (pax databases have very short lifetimes and55* do not survive after pax is finished). Pax is required to handle very56* large archives. These database routines carefully combine memory usage and57* temporary file storage in ways which will not significantly impact runtime58* performance while allowing the largest possible archives to be handled.59* Trying to force the fit to the POSIX database routines was not considered60* time well spent.61*/6263static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */64static FTM **ftab = NULL; /* file time table for updating arch */65static NAMT **ntab = NULL; /* interactive rename storage table */66static DEVT **dtab = NULL; /* device/inode mapping tables */67static ATDIR **atab = NULL; /* file tree directory time reset table */68static int dirfd = -1; /* storage for setting created dir time/mode */69static u_long dircnt; /* entries in dir time/mode storage */70static int ffd = -1; /* tmp file for file time table name storage */7172static DEVT *chk_dev(dev_t, int);7374/*75* hard link table routines76*77* The hard link table tries to detect hard links to files using the device and78* inode values. We do this when writing an archive, so we can tell the format79* write routine that this file is a hard link to another file. The format80* write routine then can store this file in whatever way it wants (as a hard81* link if the format supports that like tar, or ignore this info like cpio).82* (Actually a field in the format driver table tells us if the format wants83* hard link info. if not, we do not waste time looking for them). We also use84* the same table when reading an archive. In that situation, this table is85* used by the format read routine to detect hard links from stored dev and86* inode numbers (like cpio). This will allow pax to create a link when one87* can be detected by the archive format.88*/8990/*91* lnk_start92* Creates the hard link table.93* Return:94* 0 if created, -1 if failure95*/9697int98lnk_start(void)99{100if (ltab != NULL)101return(0);102if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {103paxwarn(1, "Cannot allocate memory for hard link table");104return(-1);105}106return(0);107}108109/*110* chk_lnk()111* Looks up entry in hard link hash table. If found, it copies the name112* of the file it is linked to (we already saw that file) into ln_name.113* lnkcnt is decremented and if goes to 1 the node is deleted from the114* database. (We have seen all the links to this file). If not found,115* we add the file to the database if it has the potential for having116* hard links to other files we may process (it has a link count > 1)117* Return:118* if found returns 1; if not found returns 0; -1 on error119*/120121int122chk_lnk(ARCHD *arcn)123{124HRDLNK *pt;125HRDLNK **ppt;126u_int indx;127128if (ltab == NULL)129return(-1);130/*131* ignore those nodes that cannot have hard links132*/133if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))134return(0);135136/*137* hash inode number and look for this file138*/139indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;140if ((pt = ltab[indx]) != NULL) {141/*142* it's hash chain in not empty, walk down looking for it143*/144ppt = &(ltab[indx]);145while (pt != NULL) {146if ((pt->ino == arcn->sb.st_ino) &&147(pt->dev == arcn->sb.st_dev))148break;149ppt = &(pt->fow);150pt = pt->fow;151}152153if (pt != NULL) {154/*155* found a link. set the node type and copy in the156* name of the file it is to link to. we need to157* handle hardlinks to regular files differently than158* other links.159*/160arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name,161sizeof(arcn->ln_name) - 1);162arcn->ln_name[arcn->ln_nlen] = '\0';163if (arcn->type == PAX_REG)164arcn->type = PAX_HRG;165else166arcn->type = PAX_HLK;167168/*169* if we have found all the links to this file, remove170* it from the database171*/172if (--pt->nlink <= 1) {173*ppt = pt->fow;174free(pt->name);175free(pt);176}177return(1);178}179}180181/*182* we never saw this file before. It has links so we add it to the183* front of this hash chain184*/185if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) {186if ((pt->name = strdup(arcn->name)) != NULL) {187pt->dev = arcn->sb.st_dev;188pt->ino = arcn->sb.st_ino;189pt->nlink = arcn->sb.st_nlink;190pt->fow = ltab[indx];191ltab[indx] = pt;192return(0);193}194free(pt);195}196197paxwarn(1, "Hard link table out of memory");198return(-1);199}200201/*202* purg_lnk203* remove reference for a file that we may have added to the data base as204* a potential source for hard links. We ended up not using the file, so205* we do not want to accidentally point another file at it later on.206*/207208void209purg_lnk(ARCHD *arcn)210{211HRDLNK *pt;212HRDLNK **ppt;213u_int indx;214215if (ltab == NULL)216return;217/*218* do not bother to look if it could not be in the database219*/220if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||221(arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))222return;223224/*225* find the hash chain for this inode value, if empty return226*/227indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;228if ((pt = ltab[indx]) == NULL)229return;230231/*232* walk down the list looking for the inode/dev pair, unlink and233* free if found234*/235ppt = &(ltab[indx]);236while (pt != NULL) {237if ((pt->ino == arcn->sb.st_ino) &&238(pt->dev == arcn->sb.st_dev))239break;240ppt = &(pt->fow);241pt = pt->fow;242}243if (pt == NULL)244return;245246/*247* remove and free it248*/249*ppt = pt->fow;250free(pt->name);251free(pt);252}253254/*255* lnk_end()256* Pull apart an existing link table so we can reuse it. We do this between257* read and write phases of append with update. (The format may have258* used the link table, and we need to start with a fresh table for the259* write phase).260*/261262void263lnk_end(void)264{265int i;266HRDLNK *pt;267HRDLNK *ppt;268269if (ltab == NULL)270return;271272for (i = 0; i < L_TAB_SZ; ++i) {273if (ltab[i] == NULL)274continue;275pt = ltab[i];276ltab[i] = NULL;277278/*279* free up each entry on this chain280*/281while (pt != NULL) {282ppt = pt;283pt = ppt->fow;284free(ppt->name);285free(ppt);286}287}288return;289}290291/*292* modification time table routines293*294* The modification time table keeps track of last modification times for all295* files stored in an archive during a write phase when -u is set. We only296* add a file to the archive if it is newer than a file with the same name297* already stored on the archive (if there is no other file with the same298* name on the archive it is added). This applies to writes and appends.299* An append with an -u must read the archive and store the modification time300* for every file on that archive before starting the write phase. It is clear301* that this is one HUGE database. To save memory space, the actual file names302* are stored in a scratch file and indexed by an in memory hash table. The303* hash table is indexed by hashing the file path. The nodes in the table store304* the length of the filename and the lseek offset within the scratch file305* where the actual name is stored. Since there are never any deletions from306* this table, fragmentation of the scratch file is never an issue. Lookups307* seem to not exhibit any locality at all (files in the database are rarely308* looked up more than once...). So caching is just a waste of memory. The309* only limitation is the amount of scratch file space available to store the310* path names.311*/312313/*314* ftime_start()315* create the file time hash table and open for read/write the scratch316* file. (after created it is unlinked, so when we exit we leave317* no witnesses).318* Return:319* 0 if the table and file was created ok, -1 otherwise320*/321322int323ftime_start(void)324{325326if (ftab != NULL)327return(0);328if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {329paxwarn(1, "Cannot allocate memory for file time table");330return(-1);331}332333/*334* get random name and create temporary scratch file, unlink name335* so it will get removed on exit336*/337memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));338if ((ffd = mkstemp(tempfile)) < 0) {339syswarn(1, errno, "Unable to create temporary file: %s",340tempfile);341return(-1);342}343(void)unlink(tempfile);344345return(0);346}347348/*349* chk_ftime()350* looks up entry in file time hash table. If not found, the file is351* added to the hash table and the file named stored in the scratch file.352* If a file with the same name is found, the file times are compared and353* the most recent file time is retained. If the new file was younger (or354* was not in the database) the new file is selected for storage.355* Return:356* 0 if file should be added to the archive, 1 if it should be skipped,357* -1 on error358*/359360int361chk_ftime(ARCHD *arcn)362{363FTM *pt;364int namelen;365u_int indx;366char ckname[PAXPATHLEN+1];367368/*369* no info, go ahead and add to archive370*/371if (ftab == NULL)372return(0);373374/*375* hash the pathname and look up in table376*/377namelen = arcn->nlen;378indx = st_hash(arcn->name, namelen, F_TAB_SZ);379if ((pt = ftab[indx]) != NULL) {380/*381* the hash chain is not empty, walk down looking for match382* only read up the path names if the lengths match, speeds383* up the search a lot384*/385while (pt != NULL) {386if (pt->namelen == namelen) {387/*388* potential match, have to read the name389* from the scratch file.390*/391if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {392syswarn(1, errno,393"Failed ftime table seek");394return(-1);395}396if (read(ffd, ckname, namelen) != namelen) {397syswarn(1, errno,398"Failed ftime table read");399return(-1);400}401402/*403* if the names match, we are done404*/405if (!strncmp(ckname, arcn->name, namelen))406break;407}408409/*410* try the next entry on the chain411*/412pt = pt->fow;413}414415if (pt != NULL) {416/*417* found the file, compare the times, save the newer418*/419if (arcn->sb.st_mtime > pt->mtime) {420/*421* file is newer422*/423pt->mtime = arcn->sb.st_mtime;424return(0);425}426/*427* file is older428*/429return(1);430}431}432433/*434* not in table, add it435*/436if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) {437/*438* add the name at the end of the scratch file, saving the439* offset. add the file to the head of the hash chain440*/441if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {442if (write(ffd, arcn->name, namelen) == namelen) {443pt->mtime = arcn->sb.st_mtime;444pt->namelen = namelen;445pt->fow = ftab[indx];446ftab[indx] = pt;447return(0);448}449syswarn(1, errno, "Failed write to file time table");450} else451syswarn(1, errno, "Failed seek on file time table");452} else453paxwarn(1, "File time table ran out of memory");454455if (pt != NULL)456free(pt);457return(-1);458}459460/*461* Interactive rename table routines462*463* The interactive rename table keeps track of the new names that the user464* assigns to files from tty input. Since this map is unique for each file465* we must store it in case there is a reference to the file later in archive466* (a link). Otherwise we will be unable to find the file we know was467* extracted. The remapping of these files is stored in a memory based hash468* table (it is assumed since input must come from /dev/tty, it is unlikely to469* be a very large table).470*/471472/*473* name_start()474* create the interactive rename table475* Return:476* 0 if successful, -1 otherwise477*/478479int480name_start(void)481{482if (ntab != NULL)483return(0);484if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {485paxwarn(1, "Cannot allocate memory for interactive rename table");486return(-1);487}488return(0);489}490491/*492* add_name()493* add the new name to old name mapping just created by the user.494* If an old name mapping is found (there may be duplicate names on an495* archive) only the most recent is kept.496* Return:497* 0 if added, -1 otherwise498*/499500int501add_name(char *oname, int onamelen, char *nname)502{503NAMT *pt;504u_int indx;505506if (ntab == NULL) {507/*508* should never happen509*/510paxwarn(0, "No interactive rename table, links may fail\n");511return(0);512}513514/*515* look to see if we have already mapped this file, if so we516* will update it517*/518indx = st_hash(oname, onamelen, N_TAB_SZ);519if ((pt = ntab[indx]) != NULL) {520/*521* look down the has chain for the file522*/523while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))524pt = pt->fow;525526if (pt != NULL) {527/*528* found an old mapping, replace it with the new one529* the user just input (if it is different)530*/531if (strcmp(nname, pt->nname) == 0)532return(0);533534free(pt->nname);535if ((pt->nname = strdup(nname)) == NULL) {536paxwarn(1, "Cannot update rename table");537return(-1);538}539return(0);540}541}542543/*544* this is a new mapping, add it to the table545*/546if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) {547if ((pt->oname = strdup(oname)) != NULL) {548if ((pt->nname = strdup(nname)) != NULL) {549pt->fow = ntab[indx];550ntab[indx] = pt;551return(0);552}553free(pt->oname);554}555free(pt);556}557paxwarn(1, "Interactive rename table out of memory");558return(-1);559}560561/*562* sub_name()563* look up a link name to see if it points at a file that has been564* remapped by the user. If found, the link is adjusted to contain the565* new name (oname is the link to name)566*/567568void569sub_name(char *oname, int *onamelen, size_t onamesize)570{571NAMT *pt;572u_int indx;573574if (ntab == NULL)575return;576/*577* look the name up in the hash table578*/579indx = st_hash(oname, *onamelen, N_TAB_SZ);580if ((pt = ntab[indx]) == NULL)581return;582583while (pt != NULL) {584/*585* walk down the hash chain looking for a match586*/587if (strcmp(oname, pt->oname) == 0) {588/*589* found it, replace it with the new name590* and return (we know that oname has enough space)591*/592*onamelen = l_strncpy(oname, pt->nname, onamesize - 1);593oname[*onamelen] = '\0';594return;595}596pt = pt->fow;597}598599/*600* no match, just return601*/602return;603}604605/*606* device/inode mapping table routines607* (used with formats that store device and inodes fields)608*609* device/inode mapping tables remap the device field in an archive header. The610* device/inode fields are used to determine when files are hard links to each611* other. However these values have very little meaning outside of that. This612* database is used to solve one of two different problems.613*614* 1) when files are appended to an archive, while the new files may have hard615* links to each other, you cannot determine if they have hard links to any616* file already stored on the archive from a prior run of pax. We must assume617* that these inode/device pairs are unique only within a SINGLE run of pax618* (which adds a set of files to an archive). So we have to make sure the619* inode/dev pairs we add each time are always unique. We do this by observing620* while the inode field is very dense, the use of the dev field is fairly621* sparse. Within each run of pax, we remap any device number of a new archive622* member that has a device number used in a prior run and already stored in a623* file on the archive. During the read phase of the append, we store the624* device numbers used and mark them to not be used by any file during the625* write phase. If during write we go to use one of those old device numbers,626* we remap it to a new value.627*628* 2) Often the fields in the archive header used to store these values are629* too small to store the entire value. The result is an inode or device value630* which can be truncated. This really can foul up an archive. With truncation631* we end up creating links between files that are really not links (after632* truncation the inodes are the same value). We address that by detecting633* truncation and forcing a remap of the device field to split truncated634* inodes away from each other. Each truncation creates a pattern of bits that635* are removed. We use this pattern of truncated bits to partition the inodes636* on a single device to many different devices (each one represented by the637* truncated bit pattern). All inodes on the same device that have the same638* truncation pattern are mapped to the same new device. Two inodes that639* truncate to the same value clearly will always have different truncation640* bit patterns, so they will be split from away each other. When we spot641* device truncation we remap the device number to a non truncated value.642* (for more info see table.h for the data structures involved).643*/644645/*646* dev_start()647* create the device mapping table648* Return:649* 0 if successful, -1 otherwise650*/651652int653dev_start(void)654{655if (dtab != NULL)656return(0);657if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {658paxwarn(1, "Cannot allocate memory for device mapping table");659return(-1);660}661return(0);662}663664/*665* add_dev()666* add a device number to the table. this will force the device to be667* remapped to a new value if it be used during a write phase. This668* function is called during the read phase of an append to prohibit the669* use of any device number already in the archive.670* Return:671* 0 if added ok, -1 otherwise672*/673674int675add_dev(ARCHD *arcn)676{677if (chk_dev(arcn->sb.st_dev, 1) == NULL)678return(-1);679return(0);680}681682/*683* chk_dev()684* check for a device value in the device table. If not found and the add685* flag is set, it is added. This does NOT assign any mapping values, just686* adds the device number as one that need to be remapped. If this device687* is already mapped, just return with a pointer to that entry.688* Return:689* pointer to the entry for this device in the device map table. Null690* if the add flag is not set and the device is not in the table (it is691* not been seen yet). If add is set and the device cannot be added, null692* is returned (indicates an error).693*/694695static DEVT *696chk_dev(dev_t dev, int add)697{698DEVT *pt;699u_int indx;700701if (dtab == NULL)702return(NULL);703/*704* look to see if this device is already in the table705*/706indx = ((unsigned)dev) % D_TAB_SZ;707if ((pt = dtab[indx]) != NULL) {708while ((pt != NULL) && (pt->dev != dev))709pt = pt->fow;710711/*712* found it, return a pointer to it713*/714if (pt != NULL)715return(pt);716}717718/*719* not in table, we add it only if told to as this may just be a check720* to see if a device number is being used.721*/722if (add == 0)723return(NULL);724725/*726* allocate a node for this device and add it to the front of the hash727* chain. Note we do not assign remaps values here, so the pt->list728* list must be NULL.729*/730if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) {731paxwarn(1, "Device map table out of memory");732return(NULL);733}734pt->dev = dev;735pt->list = NULL;736pt->fow = dtab[indx];737dtab[indx] = pt;738return(pt);739}740/*741* map_dev()742* given an inode and device storage mask (the mask has a 1 for each bit743* the archive format is able to store in a header), we check for inode744* and device truncation and remap the device as required. Device mapping745* can also occur when during the read phase of append a device number was746* seen (and was marked as do not use during the write phase). WE ASSUME747* that unsigned longs are the same size or bigger than the fields used748* for ino_t and dev_t. If not the types will have to be changed.749* Return:750* 0 if all ok, -1 otherwise.751*/752753int754map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)755{756DEVT *pt;757DLIST *dpt;758static dev_t lastdev = 0; /* next device number to try */759int trc_ino = 0;760int trc_dev = 0;761ino_t trunc_bits = 0;762ino_t nino;763764if (dtab == NULL)765return(0);766/*767* check for device and inode truncation, and extract the truncated768* bit pattern.769*/770if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)771++trc_dev;772if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {773++trc_ino;774trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);775}776777/*778* see if this device is already being mapped, look up the device779* then find the truncation bit pattern which applies780*/781if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {782/*783* this device is already marked to be remapped784*/785for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)786if (dpt->trunc_bits == trunc_bits)787break;788789if (dpt != NULL) {790/*791* we are being remapped for this device and pattern792* change the device number to be stored and return793*/794arcn->sb.st_dev = dpt->dev;795arcn->sb.st_ino = nino;796return(0);797}798} else {799/*800* this device is not being remapped YET. if we do not have any801* form of truncation, we do not need a remap802*/803if (!trc_ino && !trc_dev)804return(0);805806/*807* we have truncation, have to add this as a device to remap808*/809if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)810goto bad;811812/*813* if we just have a truncated inode, we have to make sure that814* all future inodes that do not truncate (they have the815* truncation pattern of all 0's) continue to map to the same816* device number. We probably have already written inodes with817* this device number to the archive with the truncation818* pattern of all 0's. So we add the mapping for all 0's to the819* same device number.820*/821if (!trc_dev && (trunc_bits != 0)) {822if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)823goto bad;824dpt->trunc_bits = 0;825dpt->dev = arcn->sb.st_dev;826dpt->fow = pt->list;827pt->list = dpt;828}829}830831/*832* look for a device number not being used. We must watch for wrap833* around on lastdev (so we do not get stuck looking forever!)834*/835while (++lastdev > 0) {836if (chk_dev(lastdev, 0) != NULL)837continue;838/*839* found an unused value. If we have reached truncation point840* for this format we are hosed, so we give up. Otherwise we841* mark it as being used.842*/843if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||844(chk_dev(lastdev, 1) == NULL))845goto bad;846break;847}848849if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL))850goto bad;851852/*853* got a new device number, store it under this truncation pattern.854* change the device number this file is being stored with.855*/856dpt->trunc_bits = trunc_bits;857dpt->dev = lastdev;858dpt->fow = pt->list;859pt->list = dpt;860arcn->sb.st_dev = lastdev;861arcn->sb.st_ino = nino;862return(0);863864bad:865paxwarn(1, "Unable to fix truncated inode/device field when storing %s",866arcn->name);867paxwarn(0, "Archive may create improper hard links when extracted");868return(0);869}870871/*872* directory access/mod time reset table routines (for directories READ by pax)873*874* The pax -t flag requires that access times of archive files be the same875* before being read by pax. For regular files, access time is restored after876* the file has been copied. This database provides the same functionality for877* directories read during file tree traversal. Restoring directory access time878* is more complex than files since directories may be read several times until879* all the descendants in their subtree are visited by fts. Directory access880* and modification times are stored during the fts pre-order visit (done881* before any descendants in the subtree are visited) and restored after the882* fts post-order visit (after all the descendants have been visited). In the883* case of premature exit from a subtree (like from the effects of -n), any884* directory entries left in this database are reset during final cleanup885* operations of pax. Entries are hashed by inode number for fast lookup.886*/887888/*889* atdir_start()890* create the directory access time database for directories READ by pax.891* Return:892* 0 is created ok, -1 otherwise.893*/894895int896atdir_start(void)897{898if (atab != NULL)899return(0);900if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {901paxwarn(1,"Cannot allocate space for directory access time table");902return(-1);903}904return(0);905}906907908/*909* atdir_end()910* walk through the directory access time table and reset the access time911* of any directory who still has an entry left in the database. These912* entries are for directories READ by pax913*/914915void916atdir_end(void)917{918ATDIR *pt;919int i;920921if (atab == NULL)922return;923/*924* for each non-empty hash table entry reset all the directories925* chained there.926*/927for (i = 0; i < A_TAB_SZ; ++i) {928if ((pt = atab[i]) == NULL)929continue;930/*931* remember to force the times, set_ftime() looks at pmtime932* and patime, which only applies to things CREATED by pax,933* not read by pax. Read time reset is controlled by -t.934*/935for (; pt != NULL; pt = pt->fow)936set_ftime(pt->name, pt->mtime, pt->atime, 1);937}938}939940/*941* add_atdir()942* add a directory to the directory access time table. Table is hashed943* and chained by inode number. This is for directories READ by pax944*/945946void947add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime)948{949ATDIR *pt;950u_int indx;951952if (atab == NULL)953return;954955/*956* make sure this directory is not already in the table, if so just957* return (the older entry always has the correct time). The only958* way this will happen is when the same subtree can be traversed by959* different args to pax and the -n option is aborting fts out of a960* subtree before all the post-order visits have been made.961*/962indx = ((unsigned)ino) % A_TAB_SZ;963if ((pt = atab[indx]) != NULL) {964while (pt != NULL) {965if ((pt->ino == ino) && (pt->dev == dev))966break;967pt = pt->fow;968}969970/*971* oops, already there. Leave it alone.972*/973if (pt != NULL)974return;975}976977/*978* add it to the front of the hash chain979*/980if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) {981if ((pt->name = strdup(fname)) != NULL) {982pt->dev = dev;983pt->ino = ino;984pt->mtime = mtime;985pt->atime = atime;986pt->fow = atab[indx];987atab[indx] = pt;988return;989}990free(pt);991}992993paxwarn(1, "Directory access time reset table ran out of memory");994return;995}996997/*998* get_atdir()999* look up a directory by inode and device number to obtain the access1000* and modification time you want to set to. If found, the modification1001* and access time parameters are set and the entry is removed from the1002* table (as it is no longer needed). These are for directories READ by1003* pax1004* Return:1005* 0 if found, -1 if not found.1006*/10071008int1009get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime)1010{1011ATDIR *pt;1012ATDIR **ppt;1013u_int indx;10141015if (atab == NULL)1016return(-1);1017/*1018* hash by inode and search the chain for an inode and device match1019*/1020indx = ((unsigned)ino) % A_TAB_SZ;1021if ((pt = atab[indx]) == NULL)1022return(-1);10231024ppt = &(atab[indx]);1025while (pt != NULL) {1026if ((pt->ino == ino) && (pt->dev == dev))1027break;1028/*1029* no match, go to next one1030*/1031ppt = &(pt->fow);1032pt = pt->fow;1033}10341035/*1036* return if we did not find it.1037*/1038if (pt == NULL)1039return(-1);10401041/*1042* found it. return the times and remove the entry from the table.1043*/1044*ppt = pt->fow;1045*mtime = pt->mtime;1046*atime = pt->atime;1047free(pt->name);1048free(pt);1049return(0);1050}10511052/*1053* directory access mode and time storage routines (for directories CREATED1054* by pax).1055*1056* Pax requires that extracted directories, by default, have their access/mod1057* times and permissions set to the values specified in the archive. During the1058* actions of extracting (and creating the destination subtree during -rw copy)1059* directories extracted may be modified after being created. Even worse is1060* that these directories may have been created with file permissions which1061* prohibits any descendants of these directories from being extracted. When1062* directories are created by pax, access rights may be added to permit the1063* creation of files in their subtree. Every time pax creates a directory, the1064* times and file permissions specified by the archive are stored. After all1065* files have been extracted (or copied), these directories have their times1066* and file modes reset to the stored values. The directory info is restored in1067* reverse order as entries were added to the data file from root to leaf. To1068* restore atime properly, we must go backwards. The data file consists of1069* records with two parts, the file name followed by a DIRDATA trailer. The1070* fixed sized trailer contains the size of the name plus the off_t location in1071* the file. To restore we work backwards through the file reading the trailer1072* then the file name.1073*/10741075/*1076* dir_start()1077* set up the directory time and file mode storage for directories CREATED1078* by pax.1079* Return:1080* 0 if ok, -1 otherwise1081*/10821083int1084dir_start(void)1085{10861087if (dirfd != -1)1088return(0);10891090/*1091* unlink the file so it goes away at termination by itself1092*/1093memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));1094if ((dirfd = mkstemp(tempfile)) >= 0) {1095(void)unlink(tempfile);1096return(0);1097}1098paxwarn(1, "Unable to create temporary file for directory times: %s",1099tempfile);1100return(-1);1101}11021103/*1104* add_dir()1105* add the mode and times for a newly CREATED directory1106* name is name of the directory, psb the stat buffer with the data in it,1107* frc_mode is a flag that says whether to force the setting of the mode1108* (ignoring the user set values for preserving file mode). Frc_mode is1109* for the case where we created a file and found that the resulting1110* directory was not writeable and the user asked for file modes to NOT1111* be preserved. (we have to preserve what was created by default, so we1112* have to force the setting at the end. this is stated explicitly in the1113* pax spec)1114*/11151116void1117add_dir(char *name, int nlen, struct stat *psb, int frc_mode)1118{1119DIRDATA dblk;11201121if (dirfd < 0)1122return;11231124/*1125* get current position (where file name will start) so we can store it1126* in the trailer1127*/1128if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) {1129paxwarn(1,"Unable to store mode and times for directory: %s",name);1130return;1131}11321133/*1134* write the file name followed by the trailer1135*/1136dblk.nlen = nlen + 1;1137dblk.mode = psb->st_mode & 0xffff;1138dblk.mtime = psb->st_mtime;1139dblk.atime = psb->st_atime;1140dblk.frc_mode = frc_mode;1141if ((write(dirfd, name, dblk.nlen) == dblk.nlen) &&1142(write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) {1143++dircnt;1144return;1145}11461147paxwarn(1,"Unable to store mode and times for created directory: %s",name);1148return;1149}11501151/*1152* proc_dir()1153* process all file modes and times stored for directories CREATED1154* by pax1155*/11561157void1158proc_dir(void)1159{1160char name[PAXPATHLEN+1];1161DIRDATA dblk;1162u_long cnt;11631164if (dirfd < 0)1165return;1166/*1167* read backwards through the file and process each directory1168*/1169for (cnt = 0; cnt < dircnt; ++cnt) {1170/*1171* read the trailer, then the file name, if this fails1172* just give up.1173*/1174if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0)1175break;1176if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk))1177break;1178if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)1179break;1180if (read(dirfd, name, dblk.nlen) != dblk.nlen)1181break;1182if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)1183break;11841185/*1186* frc_mode set, make sure we set the file modes even if1187* the user didn't ask for it (see file_subs.c for more info)1188*/1189if (pmode || dblk.frc_mode)1190set_pmode(name, dblk.mode);1191if (patime || pmtime)1192set_ftime(name, dblk.mtime, dblk.atime, 0);1193}11941195(void)close(dirfd);1196dirfd = -1;1197if (cnt != dircnt)1198paxwarn(1,"Unable to set mode and times for created directories");1199return;1200}12011202/*1203* database independent routines1204*/12051206/*1207* st_hash()1208* hashes filenames to a u_int for hashing into a table. Looks at the tail1209* end of file, as this provides far better distribution than any other1210* part of the name. For performance reasons we only care about the last1211* MAXKEYLEN chars (should be at LEAST large enough to pick off the file1212* name). Was tested on 500,000 name file tree traversal from the root1213* and gave almost a perfectly uniform distribution of keys when used with1214* prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)1215* chars at a time and pads with 0 for last addition.1216* Return:1217* the hash value of the string MOD (%) the table size.1218*/12191220u_int1221st_hash(char *name, int len, int tabsz)1222{1223char *pt;1224char *dest;1225char *end;1226int i;1227u_int key = 0;1228int steps;1229int res;1230u_int val;12311232/*1233* only look at the tail up to MAXKEYLEN, we do not need to waste1234* time here (remember these are pathnames, the tail is what will1235* spread out the keys)1236*/1237if (len > MAXKEYLEN) {1238pt = &(name[len - MAXKEYLEN]);1239len = MAXKEYLEN;1240} else1241pt = name;12421243/*1244* calculate the number of u_int size steps in the string and if1245* there is a runt to deal with1246*/1247steps = len/sizeof(u_int);1248res = len % sizeof(u_int);12491250/*1251* add up the value of the string in unsigned integer sized pieces1252* too bad we cannot have unsigned int aligned strings, then we1253* could avoid the expensive copy.1254*/1255for (i = 0; i < steps; ++i) {1256end = pt + sizeof(u_int);1257dest = (char *)&val;1258while (pt < end)1259*dest++ = *pt++;1260key += val;1261}12621263/*1264* add in the runt padded with zero to the right1265*/1266if (res) {1267val = 0;1268end = pt + res;1269dest = (char *)&val;1270while (pt < end)1271*dest++ = *pt++;1272key += val;1273}12741275/*1276* return the result mod the table size1277*/1278return(key % tabsz);1279}128012811282