Path: blob/master/Utilities/cmlibarchive/libarchive/archive_entry_link_resolver.c
5045 views
/*-1* Copyright (c) 2003-2007 Tim Kientzle2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12*13* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR14* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES15* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.16* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,17* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT18* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,19* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY20* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT21* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF22* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.23*/2425#include "archive_platform.h"2627#ifdef HAVE_SYS_STAT_H28#include <sys/stat.h>29#endif30#ifdef HAVE_ERRNO_H31#include <errno.h>32#endif33#include <stdio.h>34#ifdef HAVE_STDLIB_H35#include <stdlib.h>36#endif37#ifdef HAVE_STRING_H38#include <string.h>39#endif4041#include "archive.h"42#include "archive_entry.h"4344/*45* This is mostly a pretty straightforward hash table implementation.46* The only interesting bit is the different strategies used to47* match up links. These strategies match those used by various48* archiving formats:49* tar - content stored with first link, remainder refer back to it.50* This requires us to match each subsequent link up with the51* first appearance.52* cpio - Old cpio just stored body with each link, match-ups were53* implicit. This is trivial.54* new cpio - New cpio only stores body with last link, match-ups55* are implicit. This is actually quite tricky; see the notes56* below.57*/5859/* Users pass us a format code, we translate that into a strategy here. */60#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 061#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 162#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 263#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 36465/* Initial size of link cache. */66#define links_cache_initial_size 10246768struct links_entry {69struct links_entry *next;70struct links_entry *previous;71struct archive_entry *canonical;72struct archive_entry *entry;73size_t hash;74unsigned int links; /* # links not yet seen */75};7677struct archive_entry_linkresolver {78struct links_entry **buckets;79struct links_entry *spare;80unsigned long number_entries;81size_t number_buckets;82int strategy;83};8485#define NEXT_ENTRY_DEFERRED 186#define NEXT_ENTRY_PARTIAL 287#define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)8889static struct links_entry *find_entry(struct archive_entry_linkresolver *,90struct archive_entry *);91static void grow_hash(struct archive_entry_linkresolver *);92static struct links_entry *insert_entry(struct archive_entry_linkresolver *,93struct archive_entry *);94static struct links_entry *next_entry(struct archive_entry_linkresolver *,95int);9697struct archive_entry_linkresolver *98archive_entry_linkresolver_new(void)99{100struct archive_entry_linkresolver *res;101102/* Check for positive power-of-two */103if (links_cache_initial_size == 0 ||104(links_cache_initial_size & (links_cache_initial_size - 1)) != 0)105return (NULL);106107res = calloc(1, sizeof(struct archive_entry_linkresolver));108if (res == NULL)109return (NULL);110res->number_buckets = links_cache_initial_size;111res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));112if (res->buckets == NULL) {113free(res);114return (NULL);115}116return (res);117}118119void120archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,121int fmt)122{123int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;124125switch (fmtbase) {126case ARCHIVE_FORMAT_7ZIP:127case ARCHIVE_FORMAT_AR:128case ARCHIVE_FORMAT_ZIP:129res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;130break;131case ARCHIVE_FORMAT_CPIO:132switch (fmt) {133case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:134case ARCHIVE_FORMAT_CPIO_SVR4_CRC:135res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;136break;137default:138res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;139break;140}141break;142case ARCHIVE_FORMAT_MTREE:143res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;144break;145case ARCHIVE_FORMAT_ISO9660:146case ARCHIVE_FORMAT_SHAR:147case ARCHIVE_FORMAT_TAR:148case ARCHIVE_FORMAT_XAR:149res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;150break;151default:152res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;153break;154}155}156157void158archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)159{160struct links_entry *le;161162if (res == NULL)163return;164165while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)166archive_entry_free(le->entry);167free(res->buckets);168free(res);169}170171void172archive_entry_linkify(struct archive_entry_linkresolver *res,173struct archive_entry **e, struct archive_entry **f)174{175struct links_entry *le;176struct archive_entry *t;177178*f = NULL; /* Default: Don't return a second entry. */179180if (*e == NULL) {181le = next_entry(res, NEXT_ENTRY_DEFERRED);182if (le != NULL) {183*e = le->entry;184le->entry = NULL;185}186return;187}188189/* If it has only one link, then we're done. */190if (archive_entry_nlink(*e) == 1)191return;192/* Directories, devices never have hardlinks. */193if (archive_entry_filetype(*e) == AE_IFDIR194|| archive_entry_filetype(*e) == AE_IFBLK195|| archive_entry_filetype(*e) == AE_IFCHR)196return;197198switch (res->strategy) {199case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:200le = find_entry(res, *e);201if (le != NULL) {202archive_entry_unset_size(*e);203#if defined(_WIN32) && !defined(__CYGWIN__)204archive_entry_copy_hardlink_w(*e,205archive_entry_pathname_w(le->canonical));206#else207archive_entry_copy_hardlink(*e,208archive_entry_pathname(le->canonical));209#endif210} else211insert_entry(res, *e);212return;213case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:214le = find_entry(res, *e);215if (le != NULL) {216#if defined(_WIN32) && !defined(__CYGWIN__)217archive_entry_copy_hardlink_w(*e,218archive_entry_pathname_w(le->canonical));219#else220archive_entry_copy_hardlink(*e,221archive_entry_pathname(le->canonical));222#endif223} else224insert_entry(res, *e);225return;226case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:227/* This one is trivial. */228return;229case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:230le = find_entry(res, *e);231if (le != NULL) {232/*233* Put the new entry in le, return the234* old entry from le.235*/236t = *e;237*e = le->entry;238le->entry = t;239/* Make the old entry into a hardlink. */240archive_entry_unset_size(*e);241#if defined(_WIN32) && !defined(__CYGWIN__)242archive_entry_copy_hardlink_w(*e,243archive_entry_pathname_w(le->canonical));244#else245archive_entry_copy_hardlink(*e,246archive_entry_pathname(le->canonical));247#endif248/* If we ran out of links, return the249* final entry as well. */250if (le->links == 0) {251*f = le->entry;252le->entry = NULL;253}254} else {255/*256* If we haven't seen it, tuck it away257* for future use.258*/259le = insert_entry(res, *e);260if (le == NULL)261/* XXX We should return an error code XXX */262return;263le->entry = *e;264*e = NULL;265}266return;267default:268break;269}270return;271}272273static struct links_entry *274find_entry(struct archive_entry_linkresolver *res,275struct archive_entry *entry)276{277struct links_entry *le;278size_t hash, bucket;279dev_t dev;280int64_t ino;281282if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {283return (NULL);284}285286/* Free a held entry. */287if (res->spare != NULL) {288archive_entry_free(res->spare->canonical);289archive_entry_free(res->spare->entry);290free(res->spare);291res->spare = NULL;292}293294dev = archive_entry_dev(entry);295ino = archive_entry_ino64(entry);296hash = (size_t)(dev ^ ino);297298/* Try to locate this entry in the links cache. */299bucket = hash & (res->number_buckets - 1);300for (le = res->buckets[bucket]; le != NULL; le = le->next) {301if (le->hash == hash302&& dev == archive_entry_dev(le->canonical)303&& ino == archive_entry_ino64(le->canonical)) {304/*305* Decrement link count each time and release306* the entry if it hits zero. This saves307* memory and is necessary for detecting308* missed links.309*/310--le->links;311if (le->links > 0)312return (le);313/* Remove it from this hash bucket. */314if (le->previous != NULL)315le->previous->next = le->next;316if (le->next != NULL)317le->next->previous = le->previous;318if (res->buckets[bucket] == le)319res->buckets[bucket] = le->next;320res->number_entries--;321/* Defer freeing this entry. */322res->spare = le;323return (le);324}325}326return (NULL);327}328329static struct links_entry *330next_entry(struct archive_entry_linkresolver *res, int mode)331{332struct links_entry *le;333size_t bucket;334335/* Free a held entry. */336if (res->spare != NULL) {337archive_entry_free(res->spare->canonical);338archive_entry_free(res->spare->entry);339free(res->spare);340res->spare = NULL;341}342343/* Look for next non-empty bucket in the links cache. */344for (bucket = 0; bucket < res->number_buckets; bucket++) {345for (le = res->buckets[bucket]; le != NULL; le = le->next) {346if (le->entry != NULL &&347(mode & NEXT_ENTRY_DEFERRED) == 0)348continue;349if (le->entry == NULL &&350(mode & NEXT_ENTRY_PARTIAL) == 0)351continue;352/* Remove it from this hash bucket. */353if (le->next != NULL)354le->next->previous = le->previous;355if (le->previous != NULL)356le->previous->next = le->next;357else358res->buckets[bucket] = le->next;359res->number_entries--;360/* Defer freeing this entry. */361res->spare = le;362return (le);363}364}365return (NULL);366}367368static struct links_entry *369insert_entry(struct archive_entry_linkresolver *res,370struct archive_entry *entry)371{372struct links_entry *le;373size_t hash, bucket;374375if (!archive_entry_ino_is_set(entry) || !archive_entry_dev_is_set(entry)) {376return (NULL);377}378379/* Add this entry to the links cache. */380le = calloc(1, sizeof(struct links_entry));381if (le == NULL)382return (NULL);383le->canonical = archive_entry_clone(entry);384385/* If the links cache is getting too full, enlarge the hash table. */386if (res->number_entries > res->number_buckets * 2)387grow_hash(res);388389hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));390bucket = hash & (res->number_buckets - 1);391392/* If we could allocate the entry, record it. */393if (res->buckets[bucket] != NULL)394res->buckets[bucket]->previous = le;395res->number_entries++;396le->next = res->buckets[bucket];397le->previous = NULL;398res->buckets[bucket] = le;399le->hash = hash;400le->links = archive_entry_nlink(entry) - 1;401return (le);402}403404static void405grow_hash(struct archive_entry_linkresolver *res)406{407struct links_entry *le, **new_buckets;408size_t new_size;409size_t i, bucket;410411/* Try to enlarge the bucket list. */412new_size = res->number_buckets * 2;413if (new_size < res->number_buckets)414return;415new_buckets = calloc(new_size, sizeof(struct links_entry *));416417if (new_buckets == NULL)418return;419420for (i = 0; i < res->number_buckets; i++) {421while (res->buckets[i] != NULL) {422/* Remove entry from old bucket. */423le = res->buckets[i];424res->buckets[i] = le->next;425426/* Add entry to new bucket. */427bucket = le->hash & (new_size - 1);428429if (new_buckets[bucket] != NULL)430new_buckets[bucket]->previous = le;431le->next = new_buckets[bucket];432le->previous = NULL;433new_buckets[bucket] = le;434}435}436free(res->buckets);437res->buckets = new_buckets;438res->number_buckets = new_size;439}440441struct archive_entry *442archive_entry_partial_links(struct archive_entry_linkresolver *res,443unsigned int *links)444{445struct archive_entry *e;446struct links_entry *le;447448/* Free a held entry. */449if (res->spare != NULL) {450archive_entry_free(res->spare->canonical);451archive_entry_free(res->spare->entry);452free(res->spare);453res->spare = NULL;454}455456le = next_entry(res, NEXT_ENTRY_PARTIAL);457if (le != NULL) {458e = le->canonical;459if (links != NULL)460*links = le->links;461le->canonical = NULL;462} else {463e = NULL;464if (links != NULL)465*links = 0;466}467return (e);468}469470471