Path: blob/master/Utilities/cmlibarchive/libarchive/archive_entry_link_resolver.c
3153 views
/*-1* Copyright (c) 2003-2007 Tim Kientzle2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12*13* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR14* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES15* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.16* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,17* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT18* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,19* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY20* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT21* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF22* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.23*/2425#include "archive_platform.h"2627#ifdef HAVE_SYS_STAT_H28#include <sys/stat.h>29#endif30#ifdef HAVE_ERRNO_H31#include <errno.h>32#endif33#include <stdio.h>34#ifdef HAVE_STDLIB_H35#include <stdlib.h>36#endif37#ifdef HAVE_STRING_H38#include <string.h>39#endif4041#include "archive.h"42#include "archive_entry.h"4344/*45* This is mostly a pretty straightforward hash table implementation.46* The only interesting bit is the different strategies used to47* match up links. These strategies match those used by various48* archiving formats:49* tar - content stored with first link, remainder refer back to it.50* This requires us to match each subsequent link up with the51* first appearance.52* cpio - Old cpio just stored body with each link, match-ups were53* implicit. This is trivial.54* new cpio - New cpio only stores body with last link, match-ups55* are implicit. This is actually quite tricky; see the notes56* below.57*/5859/* Users pass us a format code, we translate that into a strategy here. */60#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 061#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 162#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 263#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 36465/* Initial size of link cache. */66#define links_cache_initial_size 10246768struct links_entry {69struct links_entry *next;70struct links_entry *previous;71struct archive_entry *canonical;72struct archive_entry *entry;73size_t hash;74unsigned int links; /* # links not yet seen */75};7677struct archive_entry_linkresolver {78struct links_entry **buckets;79struct links_entry *spare;80unsigned long number_entries;81size_t number_buckets;82int strategy;83};8485#define NEXT_ENTRY_DEFERRED 186#define NEXT_ENTRY_PARTIAL 287#define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)8889static struct links_entry *find_entry(struct archive_entry_linkresolver *,90struct archive_entry *);91static void grow_hash(struct archive_entry_linkresolver *);92static struct links_entry *insert_entry(struct archive_entry_linkresolver *,93struct archive_entry *);94static struct links_entry *next_entry(struct archive_entry_linkresolver *,95int);9697struct archive_entry_linkresolver *98archive_entry_linkresolver_new(void)99{100struct archive_entry_linkresolver *res;101102/* Check for positive power-of-two */103if (links_cache_initial_size == 0 ||104(links_cache_initial_size & (links_cache_initial_size - 1)) != 0)105return (NULL);106107res = calloc(1, sizeof(struct archive_entry_linkresolver));108if (res == NULL)109return (NULL);110res->number_buckets = links_cache_initial_size;111res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));112if (res->buckets == NULL) {113free(res);114return (NULL);115}116return (res);117}118119void120archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,121int fmt)122{123int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;124125switch (fmtbase) {126case ARCHIVE_FORMAT_7ZIP:127case ARCHIVE_FORMAT_AR:128case ARCHIVE_FORMAT_ZIP:129res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;130break;131case ARCHIVE_FORMAT_CPIO:132switch (fmt) {133case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:134case ARCHIVE_FORMAT_CPIO_SVR4_CRC:135res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;136break;137default:138res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;139break;140}141break;142case ARCHIVE_FORMAT_MTREE:143res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;144break;145case ARCHIVE_FORMAT_ISO9660:146case ARCHIVE_FORMAT_SHAR:147case ARCHIVE_FORMAT_TAR:148case ARCHIVE_FORMAT_XAR:149res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;150break;151default:152res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;153break;154}155}156157void158archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)159{160struct links_entry *le;161162if (res == NULL)163return;164165while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)166archive_entry_free(le->entry);167free(res->buckets);168free(res);169}170171void172archive_entry_linkify(struct archive_entry_linkresolver *res,173struct archive_entry **e, struct archive_entry **f)174{175struct links_entry *le;176struct archive_entry *t;177178*f = NULL; /* Default: Don't return a second entry. */179180if (*e == NULL) {181le = next_entry(res, NEXT_ENTRY_DEFERRED);182if (le != NULL) {183*e = le->entry;184le->entry = NULL;185}186return;187}188189/* If it has only one link, then we're done. */190if (archive_entry_nlink(*e) == 1)191return;192/* Directories, devices never have hardlinks. */193if (archive_entry_filetype(*e) == AE_IFDIR194|| archive_entry_filetype(*e) == AE_IFBLK195|| archive_entry_filetype(*e) == AE_IFCHR)196return;197198switch (res->strategy) {199case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:200le = find_entry(res, *e);201if (le != NULL) {202archive_entry_unset_size(*e);203#if defined(_WIN32) && !defined(__CYGWIN__)204archive_entry_copy_hardlink_w(*e,205archive_entry_pathname_w(le->canonical));206#else207archive_entry_copy_hardlink(*e,208archive_entry_pathname(le->canonical));209#endif210} else211insert_entry(res, *e);212return;213case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:214le = find_entry(res, *e);215if (le != NULL) {216#if defined(_WIN32) && !defined(__CYGWIN__)217archive_entry_copy_hardlink_w(*e,218archive_entry_pathname_w(le->canonical));219#else220archive_entry_copy_hardlink(*e,221archive_entry_pathname(le->canonical));222#endif223} else224insert_entry(res, *e);225return;226case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:227/* This one is trivial. */228return;229case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:230le = find_entry(res, *e);231if (le != NULL) {232/*233* Put the new entry in le, return the234* old entry from le.235*/236t = *e;237*e = le->entry;238le->entry = t;239/* Make the old entry into a hardlink. */240archive_entry_unset_size(*e);241#if defined(_WIN32) && !defined(__CYGWIN__)242archive_entry_copy_hardlink_w(*e,243archive_entry_pathname_w(le->canonical));244#else245archive_entry_copy_hardlink(*e,246archive_entry_pathname(le->canonical));247#endif248/* If we ran out of links, return the249* final entry as well. */250if (le->links == 0) {251*f = le->entry;252le->entry = NULL;253}254} else {255/*256* If we haven't seen it, tuck it away257* for future use.258*/259le = insert_entry(res, *e);260if (le == NULL)261/* XXX We should return an error code XXX */262return;263le->entry = *e;264*e = NULL;265}266return;267default:268break;269}270return;271}272273static struct links_entry *274find_entry(struct archive_entry_linkresolver *res,275struct archive_entry *entry)276{277struct links_entry *le;278size_t hash, bucket;279dev_t dev;280int64_t ino;281282/* Free a held entry. */283if (res->spare != NULL) {284archive_entry_free(res->spare->canonical);285archive_entry_free(res->spare->entry);286free(res->spare);287res->spare = NULL;288}289290dev = archive_entry_dev(entry);291ino = archive_entry_ino64(entry);292hash = (size_t)(dev ^ ino);293294/* Try to locate this entry in the links cache. */295bucket = hash & (res->number_buckets - 1);296for (le = res->buckets[bucket]; le != NULL; le = le->next) {297if (le->hash == hash298&& dev == archive_entry_dev(le->canonical)299&& ino == archive_entry_ino64(le->canonical)) {300/*301* Decrement link count each time and release302* the entry if it hits zero. This saves303* memory and is necessary for detecting304* missed links.305*/306--le->links;307if (le->links > 0)308return (le);309/* Remove it from this hash bucket. */310if (le->previous != NULL)311le->previous->next = le->next;312if (le->next != NULL)313le->next->previous = le->previous;314if (res->buckets[bucket] == le)315res->buckets[bucket] = le->next;316res->number_entries--;317/* Defer freeing this entry. */318res->spare = le;319return (le);320}321}322return (NULL);323}324325static struct links_entry *326next_entry(struct archive_entry_linkresolver *res, int mode)327{328struct links_entry *le;329size_t bucket;330331/* Free a held entry. */332if (res->spare != NULL) {333archive_entry_free(res->spare->canonical);334archive_entry_free(res->spare->entry);335free(res->spare);336res->spare = NULL;337}338339/* Look for next non-empty bucket in the links cache. */340for (bucket = 0; bucket < res->number_buckets; bucket++) {341for (le = res->buckets[bucket]; le != NULL; le = le->next) {342if (le->entry != NULL &&343(mode & NEXT_ENTRY_DEFERRED) == 0)344continue;345if (le->entry == NULL &&346(mode & NEXT_ENTRY_PARTIAL) == 0)347continue;348/* Remove it from this hash bucket. */349if (le->next != NULL)350le->next->previous = le->previous;351if (le->previous != NULL)352le->previous->next = le->next;353else354res->buckets[bucket] = le->next;355res->number_entries--;356/* Defer freeing this entry. */357res->spare = le;358return (le);359}360}361return (NULL);362}363364static struct links_entry *365insert_entry(struct archive_entry_linkresolver *res,366struct archive_entry *entry)367{368struct links_entry *le;369size_t hash, bucket;370371/* Add this entry to the links cache. */372le = calloc(1, sizeof(struct links_entry));373if (le == NULL)374return (NULL);375le->canonical = archive_entry_clone(entry);376377/* If the links cache is getting too full, enlarge the hash table. */378if (res->number_entries > res->number_buckets * 2)379grow_hash(res);380381hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));382bucket = hash & (res->number_buckets - 1);383384/* If we could allocate the entry, record it. */385if (res->buckets[bucket] != NULL)386res->buckets[bucket]->previous = le;387res->number_entries++;388le->next = res->buckets[bucket];389le->previous = NULL;390res->buckets[bucket] = le;391le->hash = hash;392le->links = archive_entry_nlink(entry) - 1;393return (le);394}395396static void397grow_hash(struct archive_entry_linkresolver *res)398{399struct links_entry *le, **new_buckets;400size_t new_size;401size_t i, bucket;402403/* Try to enlarge the bucket list. */404new_size = res->number_buckets * 2;405if (new_size < res->number_buckets)406return;407new_buckets = calloc(new_size, sizeof(struct links_entry *));408409if (new_buckets == NULL)410return;411412for (i = 0; i < res->number_buckets; i++) {413while (res->buckets[i] != NULL) {414/* Remove entry from old bucket. */415le = res->buckets[i];416res->buckets[i] = le->next;417418/* Add entry to new bucket. */419bucket = le->hash & (new_size - 1);420421if (new_buckets[bucket] != NULL)422new_buckets[bucket]->previous = le;423le->next = new_buckets[bucket];424le->previous = NULL;425new_buckets[bucket] = le;426}427}428free(res->buckets);429res->buckets = new_buckets;430res->number_buckets = new_size;431}432433struct archive_entry *434archive_entry_partial_links(struct archive_entry_linkresolver *res,435unsigned int *links)436{437struct archive_entry *e;438struct links_entry *le;439440/* Free a held entry. */441if (res->spare != NULL) {442archive_entry_free(res->spare->canonical);443archive_entry_free(res->spare->entry);444free(res->spare);445res->spare = NULL;446}447448le = next_entry(res, NEXT_ENTRY_PARTIAL);449if (le != NULL) {450e = le->canonical;451if (links != NULL)452*links = le->links;453le->canonical = NULL;454} else {455e = NULL;456if (links != NULL)457*links = 0;458}459return (e);460}461462463