Path: blob/master/src/java.base/share/native/libjli/parse_manifest.c
67707 views
/*1* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425#include <sys/types.h>26#include <sys/stat.h>27#include <fcntl.h>28#include <stdio.h>29#include <stdlib.h>30#include <string.h>31#include "jni.h"32#include "jli_util.h"3334#include <zlib.h>35#include "manifest_info.h"3637static char *manifest;3839static const char *manifest_name = "META-INF/MANIFEST.MF";4041/*42* Inflate the manifest file (or any file for that matter).43*44* fd: File descriptor of the jar file.45* entry: Contains the information necessary to perform the inflation46* (the compressed and uncompressed sizes and the offset in47* the file where the compressed data is located).48* size_out: Returns the size of the inflated file.49*50* Upon success, it returns a pointer to a NUL-terminated malloc'd buffer51* containing the inflated manifest file. When the caller is done with it,52* this buffer should be released by a call to free(). Upon failure,53* returns NULL.54*/55static char *56inflate_file(int fd, zentry *entry, int *size_out)57{58char *in;59char *out;60z_stream zs;6162if (entry->csize == (size_t) -1 || entry->isize == (size_t) -1 )63return (NULL);64if (JLI_Lseek(fd, entry->offset, SEEK_SET) < (jlong)0)65return (NULL);66if ((in = malloc(entry->csize + 1)) == NULL)67return (NULL);68if ((size_t)(read(fd, in, (unsigned int)entry->csize)) != entry->csize) {69free(in);70return (NULL);71}72if (entry->how == STORED) {73*(char *)((size_t)in + entry->csize) = '\0';74if (size_out) {75*size_out = (int)entry->csize;76}77return (in);78} else if (entry->how == DEFLATED) {79zs.zalloc = (alloc_func)Z_NULL;80zs.zfree = (free_func)Z_NULL;81zs.opaque = (voidpf)Z_NULL;82zs.next_in = (Byte*)in;83zs.avail_in = (uInt)entry->csize;84if (inflateInit2(&zs, -MAX_WBITS) < 0) {85free(in);86return (NULL);87}88if ((out = malloc(entry->isize + 1)) == NULL) {89free(in);90return (NULL);91}92zs.next_out = (Byte*)out;93zs.avail_out = (uInt)entry->isize;94if (inflate(&zs, Z_PARTIAL_FLUSH) < 0) {95free(in);96free(out);97return (NULL);98}99*(char *)((size_t)out + entry->isize) = '\0';100free(in);101if (inflateEnd(&zs) < 0) {102free(out);103return (NULL);104}105if (size_out) {106*size_out = (int)entry->isize;107}108return (out);109}110free(in);111return (NULL);112}113114/*115* Implementation notes:116*117* This is a zip format reader for seekable files, that tolerates118* leading and trailing garbage, and tolerates having had internal119* offsets adjusted for leading garbage (as with Info-Zip's zip -A).120*121* We find the end header by scanning backwards from the end of the122* file for the end signature. This may fail in the presence of123* trailing garbage or a ZIP file comment that contains binary data.124* Similarly, the ZIP64 end header may need to be located by scanning125* backwards from the end header. It may be misidentified, but this126* is very unlikely to happen in practice without adversarial input.127*128* The zip file format is documented at:129* https://www.pkware.com/documents/casestudies/APPNOTE.TXT130*131* TODO: more informative error messages132*/133134/** Reads count bytes from fd at position pos into given buffer. */135static jboolean136readAt(int fd, jlong pos, unsigned int count, void *buf) {137return (pos >= 0138&& JLI_Lseek(fd, pos, SEEK_SET) == pos139&& read(fd, buf, count) == (jlong) count);140}141142143/*144* Tells whether given header values (obtained from either ZIP64 or145* non-ZIP64 header) appear to be correct, by checking the first LOC146* and CEN headers.147*/148static jboolean149is_valid_end_header(int fd, jlong endpos,150jlong censiz, jlong cenoff, jlong entries) {151Byte cenhdr[CENHDR];152Byte lochdr[LOCHDR];153// Expected offset of the first central directory header154jlong censtart = endpos - censiz;155// Expected position within the file that offsets are relative to156jlong base_offset = endpos - (censiz + cenoff);157return censtart >= 0 && cenoff >= 0 &&158(censiz == 0 ||159// Validate first CEN and LOC header signatures.160// Central directory must come directly before the end header.161(readAt(fd, censtart, CENHDR, cenhdr)162&& CENSIG_AT(cenhdr)163&& readAt(fd, base_offset + CENOFF(cenhdr), LOCHDR, lochdr)164&& LOCSIG_AT(lochdr)165&& CENNAM(cenhdr) == LOCNAM(lochdr)));166}167168/*169* Tells whether p appears to be pointing at a valid ZIP64 end header.170* Values censiz, cenoff, and entries are the corresponding values171* from the non-ZIP64 end header. We perform extra checks to avoid172* misidentifying data from the last entry as a ZIP64 end header.173*/174static jboolean175is_zip64_endhdr(int fd, const Byte *p, jlong end64pos,176jlong censiz, jlong cenoff, jlong entries) {177if (ZIP64_ENDSIG_AT(p)) {178jlong censiz64 = ZIP64_ENDSIZ(p);179jlong cenoff64 = ZIP64_ENDOFF(p);180jlong entries64 = ZIP64_ENDTOT(p);181return (censiz64 == censiz || censiz == ZIP64_MAGICVAL)182&& (cenoff64 == cenoff || cenoff == ZIP64_MAGICVAL)183&& (entries64 == entries || entries == ZIP64_MAGICCOUNT)184&& is_valid_end_header(fd, end64pos, censiz64, cenoff64, entries64);185}186return JNI_FALSE;187}188189/*190* Given a non-ZIP64 end header located at endhdr and endpos, look for191* an adjacent ZIP64 end header, finding the base offset and censtart192* from the ZIP64 header if available, else from the non-ZIP64 header.193* @return 0 if successful, -1 in case of failure194*/195static int196find_positions64(int fd, const Byte * const endhdr, const jlong endpos,197jlong* base_offset, jlong* censtart)198{199jlong censiz = ENDSIZ(endhdr);200jlong cenoff = ENDOFF(endhdr);201jlong entries = ENDTOT(endhdr);202jlong end64pos;203Byte buf[ZIP64_ENDHDR + ZIP64_LOCHDR];204if (censiz + cenoff != endpos205&& (end64pos = endpos - sizeof(buf)) >= (jlong)0206&& readAt(fd, end64pos, sizeof(buf), buf)207&& ZIP64_LOCSIG_AT(buf + ZIP64_ENDHDR)208&& (jlong) ZIP64_LOCDSK(buf + ZIP64_ENDHDR) == ENDDSK(endhdr)209&& (is_zip64_endhdr(fd, buf, end64pos, censiz, cenoff, entries)210|| // A variable sized "zip64 extensible data sector" ?211((end64pos = ZIP64_LOCOFF(buf + ZIP64_ENDHDR)) >= (jlong)0212&& readAt(fd, end64pos, ZIP64_ENDHDR, buf)213&& is_zip64_endhdr(fd, buf, end64pos, censiz, cenoff, entries)))214) {215*censtart = end64pos - ZIP64_ENDSIZ(buf);216*base_offset = *censtart - ZIP64_ENDOFF(buf);217} else {218if (!is_valid_end_header(fd, endpos, censiz, cenoff, entries))219return -1;220*censtart = endpos - censiz;221*base_offset = *censtart - cenoff;222}223return 0;224}225226/*227* Finds the base offset and censtart of the zip file.228*229* @param fd file descriptor of the jar file230* @param eb scratch buffer231* @return 0 if successful, -1 in case of failure232*/233static int234find_positions(int fd, Byte *eb, jlong* base_offset, jlong* censtart)235{236jlong len;237jlong pos;238jlong flen;239int bytes;240Byte *cp;241Byte *endpos;242Byte *buffer;243244/*245* 99.44% (or more) of the time, there will be no comment at the246* end of the zip file. Try reading just enough to read the END247* record from the end of the file, at this time we should also248* check to see if we have a ZIP64 archive.249*/250if ((pos = JLI_Lseek(fd, -ENDHDR, SEEK_END)) < (jlong)0)251return (-1);252if (read(fd, eb, ENDHDR) < 0)253return (-1);254if (ENDSIG_AT(eb)) {255return find_positions64(fd, eb, pos, base_offset, censtart);256}257258/*259* Shucky-Darn,... There is a comment at the end of the zip file.260*261* Allocate and fill a buffer with enough of the zip file262* to meet the specification for a maximal comment length.263*/264if ((flen = JLI_Lseek(fd, 0, SEEK_END)) < (jlong)0)265return (-1);266len = (flen < END_MAXLEN) ? flen : END_MAXLEN;267if (JLI_Lseek(fd, -len, SEEK_END) < (jlong)0)268return (-1);269if ((buffer = malloc(END_MAXLEN)) == NULL)270return (-1);271272/*273* read() on windows takes an unsigned int for count. Casting len274* to an unsigned int here is safe since it is guaranteed to be275* less than END_MAXLEN.276*/277if ((bytes = read(fd, buffer, (unsigned int)len)) < 0) {278free(buffer);279return (-1);280}281282/*283* Search backwards from the end of file stopping when the END header284* signature is found.285*/286endpos = &buffer[bytes];287for (cp = &buffer[bytes - ENDHDR]; cp >= &buffer[0]; cp--)288if (ENDSIG_AT(cp) && (cp + ENDHDR + ENDCOM(cp) == endpos)) {289(void) memcpy(eb, cp, ENDHDR);290free(buffer);291pos = flen - (endpos - cp);292return find_positions64(fd, eb, pos, base_offset, censtart);293}294free(buffer);295return (-1);296}297298#define BUFSIZE (3 * 65536 + CENHDR + SIGSIZ)299#define MINREAD 1024300301/*302* Locate the manifest file with the zip/jar file.303*304* fd: File descriptor of the jar file.305* entry: To be populated with the information necessary to perform306* the inflation (the compressed and uncompressed sizes and307* the offset in the file where the compressed data is located).308*309* Returns zero upon success. Returns a negative value upon failure.310*311* The buffer for reading the Central Directory if the zip/jar file needs312* to be large enough to accommodate the largest possible single record313* and the signature of the next record which is:314*315* 3*2**16 + CENHDR + SIGSIZ316*317* Each of the three variable sized fields (name, comment and extension)318* has a maximum possible size of 64k.319*320* Typically, only a small bit of this buffer is used with bytes shuffled321* down to the beginning of the buffer. It is one thing to allocate such322* a large buffer and another thing to actually start faulting it in.323*324* In most cases, all that needs to be read are the first two entries in325* a typical jar file (META-INF and META-INF/MANIFEST.MF). Keep this factoid326* in mind when optimizing this code.327*/328static int329find_file(int fd, zentry *entry, const char *file_name)330{331int bytes;332int res;333int entry_size;334int read_size;335336/*337* The (imaginary) position within the file relative to which338* offsets within the zip file refer. This is usually the339* location of the first local header (the start of the zip data)340* (which in turn is usually 0), but if the zip file has content341* prepended, then it will be either 0 or the length of the342* prepended content, depending on whether or not internal offsets343* have been adjusted (via e.g. zip -A). May be negative if344* content is prepended, zip -A is run, then the prefix is345* detached!346*/347jlong base_offset;348349/** The position within the file of the start of the central directory. */350jlong censtart;351352Byte *p;353Byte *bp;354Byte *buffer;355Byte locbuf[LOCHDR];356357if ((buffer = (Byte*)malloc(BUFSIZE)) == NULL) {358return(-1);359}360361bp = buffer;362363if (find_positions(fd, bp, &base_offset, &censtart) == -1) {364free(buffer);365return -1;366}367if (JLI_Lseek(fd, censtart, SEEK_SET) < (jlong) 0) {368free(buffer);369return -1;370}371372if ((bytes = read(fd, bp, MINREAD)) < 0) {373free(buffer);374return (-1);375}376p = bp;377/*378* Loop through the Central Directory Headers. Note that a valid zip/jar379* must have an ENDHDR (with ENDSIG) after the Central Directory.380*/381while (CENSIG_AT(p)) {382383/*384* If a complete header isn't in the buffer, shift the contents385* of the buffer down and refill the buffer. Note that the check386* for "bytes < CENHDR" must be made before the test for the entire387* size of the header, because if bytes is less than CENHDR, the388* actual size of the header can't be determined. The addition of389* SIGSIZ guarantees that the next signature is also in the buffer390* for proper loop termination.391*/392if (bytes < CENHDR) {393p = memmove(bp, p, bytes);394if ((res = read(fd, bp + bytes, MINREAD)) <= 0) {395free(buffer);396return (-1);397}398bytes += res;399}400entry_size = CENHDR + CENNAM(p) + CENEXT(p) + CENCOM(p);401if (bytes < entry_size + SIGSIZ) {402if (p != bp)403p = memmove(bp, p, bytes);404read_size = entry_size - bytes + SIGSIZ;405read_size = (read_size < MINREAD) ? MINREAD : read_size;406if ((res = read(fd, bp + bytes, read_size)) <= 0) {407free(buffer);408return (-1);409}410bytes += res;411}412413/*414* Check if the name is the droid we are looking for; the jar file415* manifest. If so, build the entry record from the data found in416* the header located and return success.417*/418if ((size_t)CENNAM(p) == JLI_StrLen(file_name) &&419memcmp((p + CENHDR), file_name, JLI_StrLen(file_name)) == 0) {420if (JLI_Lseek(fd, base_offset + CENOFF(p), SEEK_SET) < (jlong)0) {421free(buffer);422return (-1);423}424if (read(fd, locbuf, LOCHDR) < 0) {425free(buffer);426return (-1);427}428if (!LOCSIG_AT(locbuf)) {429free(buffer);430return (-1);431}432entry->isize = CENLEN(p);433entry->csize = CENSIZ(p);434entry->offset = base_offset + CENOFF(p) + LOCHDR +435LOCNAM(locbuf) + LOCEXT(locbuf);436entry->how = CENHOW(p);437free(buffer);438return (0);439}440441/*442* Point to the next entry and decrement the count of valid remaining443* bytes.444*/445bytes -= entry_size;446p += entry_size;447}448free(buffer);449return (-1); /* Fell off the end the loop without a Manifest */450}451452/*453* Parse a Manifest file header entry into a distinct "name" and "value".454* Continuation lines are joined into a single "value". The documented455* syntax for a header entry is:456*457* header: name ":" value458*459* name: alphanum *headerchar460*461* value: SPACE *otherchar newline *continuation462*463* continuation: SPACE *otherchar newline464*465* newline: CR LF | LF | CR (not followed by LF)466*467* alphanum: {"A"-"Z"} | {"a"-"z"} | {"0"-"9"}468*469* headerchar: alphanum | "-" | "_"470*471* otherchar: any UTF-8 character except NUL, CR and LF472*473* Note that a manifest file may be composed of multiple sections,474* each of which may contain multiple headers.475*476* section: *header +newline477*478* nonempty-section: +header +newline479*480* (Note that the point of "nonempty-section" is unclear, because it isn't481* referenced elsewhere in the full specification for the Manifest file.)482*483* Arguments:484* lp pointer to a character pointer which points to the start485* of a valid header.486* name pointer to a character pointer which will be set to point487* to the name portion of the header (nul terminated).488* value pointer to a character pointer which will be set to point489* to the value portion of the header (nul terminated).490*491* Returns:492* 1 Successful parsing of an NV pair. lp is updated to point to the493* next character after the terminating newline in the string494* representing the Manifest file. name and value are updated to495* point to the strings parsed.496* 0 A valid end of section indicator was encountered. lp, name, and497* value are not modified.498* -1 lp does not point to a valid header. Upon return, the values of499* lp, name, and value are undefined.500*/501static int502parse_nv_pair(char **lp, char **name, char **value)503{504char *nl;505char *cp;506507/*508* End of the section - return 0. The end of section condition is509* indicated by either encountering a blank line or the end of the510* Manifest "string" (EOF).511*/512if (**lp == '\0' || **lp == '\n' || **lp == '\r')513return (0);514515/*516* Getting to here, indicates that *lp points to an "otherchar".517* Turn the "header" into a string on its own.518*/519nl = JLI_StrPBrk(*lp, "\n\r");520if (nl == NULL) {521nl = JLI_StrChr(*lp, (int)'\0');522} else {523cp = nl; /* For merging continuation lines */524if (*nl == '\r' && *(nl+1) == '\n')525*nl++ = '\0';526*nl++ = '\0';527528/*529* Process any "continuation" line(s), by making them part of the530* "header" line. Yes, I know that we are "undoing" the NULs we531* just placed here, but continuation lines are the fairly rare532* case, so we shouldn't unnecessarily complicate the code above.533*534* Note that an entire continuation line is processed each iteration535* through the outer while loop.536*/537while (*nl == ' ') {538nl++; /* First character to be moved */539while (*nl != '\n' && *nl != '\r' && *nl != '\0')540*cp++ = *nl++; /* Shift string */541if (*nl == '\0')542return (-1); /* Error: newline required */543*cp = '\0';544if (*nl == '\r' && *(nl+1) == '\n')545*nl++ = '\0';546*nl++ = '\0';547}548}549550/*551* Separate the name from the value;552*/553cp = JLI_StrChr(*lp, (int)':');554if (cp == NULL)555return (-1);556*cp++ = '\0'; /* The colon terminates the name */557if (*cp != ' ')558return (-1);559*cp++ = '\0'; /* Eat the required space */560*name = *lp;561*value = cp;562*lp = nl;563return (1);564}565566/*567* Read the manifest from the specified jar file and fill in the manifest_info568* structure with the information found within.569*570* Error returns are as follows:571* 0 Success572* -1 Unable to open jarfile573* -2 Error accessing the manifest from within the jarfile (most likely574* a manifest is not present, or this isn't a valid zip/jar file).575*/576int577JLI_ParseManifest(char *jarfile, manifest_info *info)578{579int fd;580zentry entry;581char *lp;582char *name;583char *value;584int rc;585char *splashscreen_name = NULL;586587if ((fd = JLI_Open(jarfile, O_RDONLY588#ifdef O_LARGEFILE589| O_LARGEFILE /* large file mode */590#endif591#ifdef O_BINARY592| O_BINARY /* use binary mode on windows */593#endif594)) == -1) {595return (-1);596}597info->manifest_version = NULL;598info->main_class = NULL;599info->jre_version = NULL;600info->jre_restrict_search = 0;601info->splashscreen_image_file_name = NULL;602if ((rc = find_file(fd, &entry, manifest_name)) != 0) {603close(fd);604return (-2);605}606manifest = inflate_file(fd, &entry, NULL);607if (manifest == NULL) {608close(fd);609return (-2);610}611lp = manifest;612while ((rc = parse_nv_pair(&lp, &name, &value)) > 0) {613if (JLI_StrCaseCmp(name, "Manifest-Version") == 0) {614info->manifest_version = value;615} else if (JLI_StrCaseCmp(name, "Main-Class") == 0) {616info->main_class = value;617} else if (JLI_StrCaseCmp(name, "JRE-Version") == 0) {618/*619* Manifest specification overridden by command line option620* so we will silently override there with no specification.621*/622info->jre_version = 0;623} else if (JLI_StrCaseCmp(name, "Splashscreen-Image") == 0) {624info->splashscreen_image_file_name = value;625}626}627close(fd);628if (rc == 0)629return (0);630else631return (-2);632}633634/*635* Opens the jar file and unpacks the specified file from its contents.636* Returns NULL on failure.637*/638void *639JLI_JarUnpackFile(const char *jarfile, const char *filename, int *size) {640int fd;641zentry entry;642void *data = NULL;643644if ((fd = JLI_Open(jarfile, O_RDONLY645#ifdef O_LARGEFILE646| O_LARGEFILE /* large file mode */647#endif648#ifdef O_BINARY649| O_BINARY /* use binary mode on windows */650#endif651)) == -1) {652return NULL;653}654if (find_file(fd, &entry, filename) == 0) {655data = inflate_file(fd, &entry, size);656}657close(fd);658return (data);659}660661/*662* Specialized "free" function.663*/664void665JLI_FreeManifest()666{667if (manifest)668free(manifest);669}670671/*672* Iterate over the manifest of the specified jar file and invoke the provided673* closure function for each attribute encountered.674*675* Error returns are as follows:676* 0 Success677* -1 Unable to open jarfile678* -2 Error accessing the manifest from within the jarfile (most likely679* this means a manifest is not present, or it isn't a valid zip/jar file).680*/681JNIEXPORT int JNICALL682JLI_ManifestIterate(const char *jarfile, attribute_closure ac, void *user_data)683{684int fd;685zentry entry;686char *mp; /* manifest pointer */687char *lp; /* pointer into manifest, updated during iteration */688char *name;689char *value;690int rc;691692if ((fd = JLI_Open(jarfile, O_RDONLY693#ifdef O_LARGEFILE694| O_LARGEFILE /* large file mode */695#endif696#ifdef O_BINARY697| O_BINARY /* use binary mode on windows */698#endif699)) == -1) {700return (-1);701}702703if ((rc = find_file(fd, &entry, manifest_name)) != 0) {704close(fd);705return (-2);706}707708mp = inflate_file(fd, &entry, NULL);709if (mp == NULL) {710close(fd);711return (-2);712}713714lp = mp;715while ((rc = parse_nv_pair(&lp, &name, &value)) > 0) {716(*ac)(name, value, user_data);717}718free(mp);719close(fd);720return (rc == 0) ? 0 : -2;721}722723724