CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/File/VFS/ZipFileReader.cpp
Views: 1401
#include <algorithm>1#include <ctype.h>2#include <set>3#include <cstdio>4#include <cstring>56#ifdef SHARED_LIBZIP7#include <zip.h>8#else9#include "ext/libzip/zip.h"10#endif1112#include "Common/Common.h"13#include "Common/Log.h"14#include "Common/File/VFS/ZipFileReader.h"15#include "Common/StringUtils.h"1617ZipFileReader *ZipFileReader::Create(const Path &zipFile, const char *inZipPath, bool logErrors) {18int error = 0;19zip *zip_file;20if (zipFile.Type() == PathType::CONTENT_URI) {21int fd = File::OpenFD(zipFile, File::OPEN_READ);22if (!fd) {23if (logErrors) {24ERROR_LOG(Log::IO, "Failed to open FD for '%s' as zip file", zipFile.c_str());25}26return nullptr;27}28zip_file = zip_fdopen(fd, 0, &error);29} else {30zip_file = zip_open(zipFile.c_str(), 0, &error);31}3233if (!zip_file) {34if (logErrors) {35ERROR_LOG(Log::IO, "Failed to open %s as a zip file", zipFile.c_str());36}37return nullptr;38}3940// The inZipPath is supposed to be a folder, and internally in this class, we suffix41// folder paths with '/', matching how the zip library works.42std::string path = inZipPath;43if (!path.empty() && path.back() != '/') {44path.push_back('/');45}46return new ZipFileReader(zip_file, path);47}4849ZipFileReader::~ZipFileReader() {50std::lock_guard<std::mutex> guard(lock_);51zip_close(zip_file_);52}5354uint8_t *ZipFileReader::ReadFile(const char *path, size_t *size) {55std::string temp_path = inZipPath_ + path;5657std::lock_guard<std::mutex> guard(lock_);58// Figure out the file size first.59struct zip_stat zstat;60zip_stat(zip_file_, temp_path.c_str(), ZIP_FL_NOCASE | ZIP_FL_UNCHANGED, &zstat);61zip_file *file = zip_fopen(zip_file_, temp_path.c_str(), ZIP_FL_NOCASE | ZIP_FL_UNCHANGED);62if (!file) {63ERROR_LOG(Log::IO, "Error opening %s from ZIP", temp_path.c_str());64return 0;65}66uint8_t *contents = new uint8_t[zstat.size + 1];67zip_fread(file, contents, zstat.size);68zip_fclose(file);69contents[zstat.size] = 0;7071*size = zstat.size;72return contents;73}7475bool ZipFileReader::GetFileListing(const char *orig_path, std::vector<File::FileInfo> *listing, const char *filter = 0) {76std::string path = std::string(inZipPath_) + orig_path;77if (!path.empty() && path.back() != '/') {78path.push_back('/');79}8081std::set<std::string> filters;82std::string tmp;83if (filter) {84while (*filter) {85if (*filter == ':') {86filters.emplace("." + tmp);87tmp.clear();88} else {89tmp.push_back(*filter);90}91filter++;92}93}9495if (tmp.size())96filters.emplace("." + tmp);9798// We just loop through the whole ZIP file and deduce what files are in this directory, and what subdirectories there are.99std::set<std::string> files;100std::set<std::string> directories;101bool success = GetZipListings(path, files, directories);102if (!success) {103// This means that no file prefix matched the path.104return false;105}106107listing->clear();108109// INFO_LOG(Log::System, "Zip: Listing '%s'", orig_path);110111listing->reserve(directories.size() + files.size());112for (const auto &dir : directories) {113File::FileInfo info;114info.name = dir;115116// Remove the "inzip" part of the fullname.117std::string relativePath = std::string(path).substr(inZipPath_.size());118info.fullName = Path(relativePath + dir);119info.exists = true;120info.isWritable = false;121info.isDirectory = true;122// INFO_LOG(Log::System, "Found file: %s (%s)", info.name.c_str(), info.fullName.c_str());123listing->push_back(info);124}125126for (const auto &fiter : files) {127std::string fpath = path;128File::FileInfo info;129info.name = fiter;130std::string relativePath = std::string(path).substr(inZipPath_.size());131info.fullName = Path(relativePath + fiter);132info.exists = true;133info.isWritable = false;134info.isDirectory = false;135std::string ext = info.fullName.GetFileExtension();136if (filter) {137if (filters.find(ext) == filters.end()) {138continue;139}140}141// INFO_LOG(Log::System, "Found dir: %s (%s)", info.name.c_str(), info.fullName.c_str());142listing->push_back(info);143}144145std::sort(listing->begin(), listing->end());146return true;147}148149// path here is from the root, so inZipPath needs to already be added.150bool ZipFileReader::GetZipListings(const std::string &path, std::set<std::string> &files, std::set<std::string> &directories) {151_dbg_assert_(path.empty() || path.back() == '/');152153std::lock_guard<std::mutex> guard(lock_);154int numFiles = zip_get_num_files(zip_file_);155bool anyPrefixMatched = false;156for (int i = 0; i < numFiles; i++) {157const char* name = zip_get_name(zip_file_, i, 0);158if (!name)159continue; // shouldn't happen, I think160if (startsWith(name, path)) {161if (strlen(name) == path.size()) {162// Don't want to return the same folder.163continue;164}165const char *slashPos = strchr(name + path.size(), '/');166if (slashPos != 0) {167anyPrefixMatched = true;168// A directory. Let's pick off the only part we care about.169size_t offset = path.size();170std::string dirName = std::string(name + offset, slashPos - (name + offset));171// We might get a lot of these if the tree is deep. The std::set deduplicates.172directories.insert(dirName);173} else {174anyPrefixMatched = true;175// It's a file.176const char *fn = name + path.size();177files.emplace(fn);178}179}180}181return anyPrefixMatched;182}183184bool ZipFileReader::GetFileInfo(const char *path, File::FileInfo *info) {185struct zip_stat zstat;186std::string temp_path = inZipPath_ + path;187188// Clear some things to start.189info->isDirectory = false;190info->isWritable = false;191info->size = 0;192193{194std::lock_guard<std::mutex> guard(lock_);195if (0 != zip_stat(zip_file_, temp_path.c_str(), ZIP_FL_NOCASE | ZIP_FL_UNCHANGED, &zstat)) {196// ZIP files do not have real directories, so we'll end up here if we197// try to stat one. For now that's fine.198info->exists = false;199return false;200}201}202203// Zips usually don't contain directory entries, but they may.204if ((zstat.valid & ZIP_STAT_NAME) != 0 && zstat.name) {205info->isDirectory = zstat.name[strlen(zstat.name) - 1] == '/';206}207if ((zstat.valid & ZIP_STAT_SIZE) != 0) {208info->size = zstat.size;209}210211info->fullName = Path(path);212info->exists = true;213return true;214}215216class ZipFileReaderFileReference : public VFSFileReference {217public:218int zi;219};220221class ZipFileReaderOpenFile : public VFSOpenFile {222public:223~ZipFileReaderOpenFile() {224// Needs to be closed properly and unlocked.225_dbg_assert_(zf == nullptr);226}227ZipFileReaderFileReference *reference;228zip_file_t *zf = nullptr;229};230231VFSFileReference *ZipFileReader::GetFile(const char *path) {232std::lock_guard<std::mutex> guard(lock_);233int zi = zip_name_locate(zip_file_, path, ZIP_FL_NOCASE);234if (zi < 0) {235// Not found.236return nullptr;237}238ZipFileReaderFileReference *ref = new ZipFileReaderFileReference();239ref->zi = zi;240return ref;241}242243bool ZipFileReader::GetFileInfo(VFSFileReference *vfsReference, File::FileInfo *fileInfo) {244ZipFileReaderFileReference *reference = (ZipFileReaderFileReference *)vfsReference;245// If you crash here, you called this while having the lock held by having the file open.246// Don't do that, check the info before you open the file.247std::lock_guard<std::mutex> guard(lock_);248zip_stat_t zstat;249if (zip_stat_index(zip_file_, reference->zi, 0, &zstat) != 0)250return false;251*fileInfo = File::FileInfo{};252fileInfo->size = 0;253if (zstat.valid & ZIP_STAT_SIZE)254fileInfo->size = zstat.size;255return zstat.size;256}257258void ZipFileReader::ReleaseFile(VFSFileReference *vfsReference) {259ZipFileReaderFileReference *reference = (ZipFileReaderFileReference *)vfsReference;260// Don't do anything other than deleting it.261delete reference;262}263264VFSOpenFile *ZipFileReader::OpenFileForRead(VFSFileReference *vfsReference, size_t *size) {265ZipFileReaderFileReference *reference = (ZipFileReaderFileReference *)vfsReference;266ZipFileReaderOpenFile *openFile = new ZipFileReaderOpenFile();267openFile->reference = reference;268*size = 0;269// We only allow one file to be open for read concurrently. It's possible that this can be improved,270// especially if we only access by index like this.271lock_.lock();272zip_stat_t zstat;273if (zip_stat_index(zip_file_, reference->zi, 0, &zstat) != 0) {274lock_.unlock();275delete openFile;276return nullptr;277}278279openFile->zf = zip_fopen_index(zip_file_, reference->zi, 0);280if (!openFile->zf) {281WARN_LOG(Log::G3D, "File with index %d not found in zip", reference->zi);282lock_.unlock();283delete openFile;284return nullptr;285}286287*size = zstat.size;288// Intentionally leaving the mutex locked, will be closed in CloseFile.289return openFile;290}291292void ZipFileReader::Rewind(VFSOpenFile *vfsOpenFile) {293ZipFileReaderOpenFile *openFile = (ZipFileReaderOpenFile *)vfsOpenFile;294// Close and re-open.295zip_fclose(openFile->zf);296openFile->zf = zip_fopen_index(zip_file_, openFile->reference->zi, 0);297}298299size_t ZipFileReader::Read(VFSOpenFile *vfsOpenFile, void *buffer, size_t length) {300ZipFileReaderOpenFile *file = (ZipFileReaderOpenFile *)vfsOpenFile;301return zip_fread(file->zf, buffer, length);302}303304void ZipFileReader::CloseFile(VFSOpenFile *vfsOpenFile) {305ZipFileReaderOpenFile *file = (ZipFileReaderOpenFile *)vfsOpenFile;306_dbg_assert_(file->zf != nullptr);307zip_fclose(file->zf);308file->zf = nullptr;309lock_.unlock();310delete file;311}312313314