/*1* Various routines from the OSTA 2.01 specs. Copyrights are included with2* each code segment. Slight whitespace modifications have been made for3* formatting purposes. Typos/bugs have been fixed.4*/56#include <fs/udf/osta.h>78/*****************************************************************************/9/*-10**********************************************************************11* OSTA compliant Unicode compression, uncompression routines.12* Copyright 1995 Micro Design International, Inc.13* Written by Jason M. Rinn.14* Micro Design International gives permission for the free use of the15* following source code.16*/1718/***********************************************************************19* Takes an OSTA CS0 compressed unicode name, and converts20* it to Unicode.21* The Unicode output will be in the byte order22* that the local compiler uses for 16-bit values.23* NOTE: This routine only performs error checking on the compID.24* It is up to the user to ensure that the unicode buffer is large25* enough, and that the compressed unicode name is correct.26*27* RETURN VALUE28*29* The number of unicode characters which were uncompressed.30* A -1 is returned if the compression ID is invalid.31*/32int33udf_UncompressUnicode(34int numberOfBytes, /* (Input) number of bytes read from media. */35byte *UDFCompressed, /* (Input) bytes read from media. */36unicode_t *unicode) /* (Output) uncompressed unicode characters. */37{38unsigned int compID;39int returnValue, unicodeIndex, byteIndex;4041/* Use UDFCompressed to store current byte being read. */42compID = UDFCompressed[0];4344/* First check for valid compID. */45if (compID != 8 && compID != 16) {46returnValue = -1;47} else {48unicodeIndex = 0;49byteIndex = 1;5051/* Loop through all the bytes. */52while (byteIndex < numberOfBytes) {53if (compID == 16) {54/* Move the first byte to the high bits of the55* unicode char.56*/57unicode[unicodeIndex] =58UDFCompressed[byteIndex++] << 8;59} else {60unicode[unicodeIndex] = 0;61}62if (byteIndex < numberOfBytes) {63/*Then the next byte to the low bits. */64unicode[unicodeIndex] |=65UDFCompressed[byteIndex++];66}67unicodeIndex++;68}69returnValue = unicodeIndex;70}71return(returnValue);72}7374/*75* Almost same as udf_UncompressUnicode(). The difference is that76* it keeps byte order of unicode string.77*/78int79udf_UncompressUnicodeByte(80int numberOfBytes, /* (Input) number of bytes read from media. */81byte *UDFCompressed, /* (Input) bytes read from media. */82byte *unicode) /* (Output) uncompressed unicode characters. */83{84unsigned int compID;85int returnValue, unicodeIndex, byteIndex;8687/* Use UDFCompressed to store current byte being read. */88compID = UDFCompressed[0];8990/* First check for valid compID. */91if (compID != 8 && compID != 16) {92returnValue = -1;93} else {94unicodeIndex = 0;95byteIndex = 1;9697/* Loop through all the bytes. */98while (byteIndex < numberOfBytes) {99if (compID == 16) {100/* Move the first byte to the high bits of the101* unicode char.102*/103unicode[unicodeIndex++] =104UDFCompressed[byteIndex++];105} else {106unicode[unicodeIndex++] = 0;107}108if (byteIndex < numberOfBytes) {109/*Then the next byte to the low bits. */110unicode[unicodeIndex++] =111UDFCompressed[byteIndex++];112}113}114returnValue = unicodeIndex;115}116return(returnValue);117}118119/***********************************************************************120* DESCRIPTION:121* Takes a string of unicode wide characters and returns an OSTA CS0122* compressed unicode string. The unicode MUST be in the byte order of123* the compiler in order to obtain correct results. Returns an error124* if the compression ID is invalid.125*126* NOTE: This routine assumes the implementation already knows, by127* the local environment, how many bits are appropriate and128* therefore does no checking to test if the input characters fit129* into that number of bits or not.130*131* RETURN VALUE132*133* The total number of bytes in the compressed OSTA CS0 string,134* including the compression ID.135* A -1 is returned if the compression ID is invalid.136*/137int138udf_CompressUnicode(139int numberOfChars, /* (Input) number of unicode characters. */140int compID, /* (Input) compression ID to be used. */141unicode_t *unicode, /* (Input) unicode characters to compress. */142byte *UDFCompressed) /* (Output) compressed string, as bytes. */143{144int byteIndex, unicodeIndex;145146if (compID != 8 && compID != 16) {147byteIndex = -1; /* Unsupported compression ID ! */148} else {149/* Place compression code in first byte. */150UDFCompressed[0] = compID;151152byteIndex = 1;153unicodeIndex = 0;154while (unicodeIndex < numberOfChars) {155if (compID == 16) {156/* First, place the high bits of the char157* into the byte stream.158*/159UDFCompressed[byteIndex++] =160(unicode[unicodeIndex] & 0xFF00) >> 8;161}162/*Then place the low bits into the stream. */163UDFCompressed[byteIndex++] =164unicode[unicodeIndex] & 0x00FF;165unicodeIndex++;166}167}168return(byteIndex);169}170171/*****************************************************************************/172/*173* CRC 010041174*/175static unsigned short crc_table[256] = {1760x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,1770x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,1780x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,1790x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,1800x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,1810xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,1820x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,1830xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,1840x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,1850xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,1860x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,1870xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,1880x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,1890xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,1900x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,1910xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,1920x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,1930x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,1940x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,1950x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,1960xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,1970x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,1980xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,1990x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,2000xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,2010x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,2020xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,2030x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,2040xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,2050x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,2060xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,2070x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0208};209210unsigned short211udf_cksum(unsigned char *s, int n)212{213unsigned short crc=0;214215while (n-- > 0)216crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);217return crc;218}219220/* UNICODE Checksum */221unsigned short222udf_unicode_cksum(unsigned short *s, int n)223{224unsigned short crc=0;225226while (n-- > 0) {227/* Take high order byte first--corresponds to a big endian228* byte stream.229*/230crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);231crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);232}233return crc;234}235236#ifdef MAIN237unsigned char bytes[] = { 0x70, 0x6A, 0x77 };238239main()240{241unsigned short x;242x = cksum(bytes, sizeof bytes);243printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);244exit(0);245}246#endif247248/*****************************************************************************/249#ifdef NEEDS_ISPRINT250/*-251**********************************************************************252* OSTA UDF compliant file name translation routine for OS/2,253* Windows 95, Windows NT, Macintosh and UNIX.254* Copyright 1995 Micro Design International, Inc.255* Written by Jason M. Rinn.256* Micro Design International gives permission for the free use of the257* following source code.258*/259260/***********************************************************************261* To use these routines with different operating systems.262*263* OS/2264* Define OS2265* Define MAXLEN = 254266*267* Windows 95268* Define WIN_95269* Define MAXLEN = 255270*271* Windows NT272* Define WIN_NT273* Define MAXLEN = 255274*275* Macintosh:276* Define APPLE_MAC.277* Define MAXLEN = 31.278*279* UNIX280* Define UNIX.281* Define MAXLEN as specified by unix version.282*/283284#define ILLEGAL_CHAR_MARK 0x005F285#define CRC_MARK 0x0023286#define EXT_SIZE 5287#define TRUE 1288#define FALSE 0289#define PERIOD 0x002E290#define SPACE 0x0020291292/*** PROTOTYPES ***/293int IsIllegal(unicode_t ch);294295/* Define a function or macro which determines if a Unicode character is296* printable under your implementation.297*/298int UnicodeIsPrint(unicode_t);299300/***********************************************************************301* Translates a long file name to one using a MAXLEN and an illegal302* char set in accord with the OSTA requirements. Assumes the name has303* already been translated to Unicode.304*305* RETURN VALUE306*307* Number of unicode characters in translated name.308*/309int UDFTransName(310unicode_t *newName, /* (Output)Translated name. Must be of length311* MAXLEN */312unicode_t *udfName, /* (Input) Name from UDF volume.*/313int udfLen) /* (Input) Length of UDF Name. */314{315int index, newIndex = 0, needsCRC = FALSE;316int extIndex = 0, newExtIndex = 0, hasExt = FALSE;317#if defined OS2 || defined WIN_95 || defined WIN_NT318int trailIndex = 0;319#endif320unsigned short valueCRC;321unicode_t current;322const char hexChar[] = "0123456789ABCDEF";323324for (index = 0; index < udfLen; index++) {325current = udfName[index];326327if (IsIllegal(current) || !UnicodeIsPrint(current)) {328needsCRC = TRUE;329/* Replace Illegal and non-displayable chars with330* underscore.331*/332current = ILLEGAL_CHAR_MARK;333/* Skip any other illegal or non-displayable334* characters.335*/336while(index+1 < udfLen && (IsIllegal(udfName[index+1])337|| !UnicodeIsPrint(udfName[index+1]))) {338index++;339}340}341342/* Record position of extension, if one is found. */343if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) {344if (udfLen == index + 1) {345/* A trailing period is NOT an extension. */346hasExt = FALSE;347} else {348hasExt = TRUE;349extIndex = index;350newExtIndex = newIndex;351}352}353354#if defined OS2 || defined WIN_95 || defined WIN_NT355/* Record position of last char which is NOT period or space. */356else if (current != PERIOD && current != SPACE) {357trailIndex = newIndex;358}359#endif360361if (newIndex < MAXLEN) {362newName[newIndex++] = current;363} else {364needsCRC = TRUE;365}366}367368#if defined OS2 || defined WIN_95 || defined WIN_NT369/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */370if (trailIndex != newIndex - 1) {371newIndex = trailIndex + 1;372needsCRC = TRUE;373hasExt = FALSE; /* Trailing period does not make an374* extension. */375}376#endif377378if (needsCRC) {379unicode_t ext[EXT_SIZE];380int localExtIndex = 0;381if (hasExt) {382int maxFilenameLen;383/* Translate extension, and store it in ext. */384for(index = 0; index<EXT_SIZE &&385extIndex + index +1 < udfLen; index++ ) {386current = udfName[extIndex + index + 1];387if (IsIllegal(current) ||388!UnicodeIsPrint(current)) {389needsCRC = 1;390/* Replace Illegal and non-displayable391* chars with underscore.392*/393current = ILLEGAL_CHAR_MARK;394/* Skip any other illegal or395* non-displayable characters.396*/397while(index + 1 < EXT_SIZE398&& (IsIllegal(udfName[extIndex +399index + 2]) ||400!isprint(udfName[extIndex +401index + 2]))) {402index++;403}404}405ext[localExtIndex++] = current;406}407408/* Truncate filename to leave room for extension and409* CRC.410*/411maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);412if (newIndex > maxFilenameLen) {413newIndex = maxFilenameLen;414} else {415newIndex = newExtIndex;416}417} else if (newIndex > MAXLEN - 5) {418/*If no extension, make sure to leave room for CRC. */419newIndex = MAXLEN - 5;420}421newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */422423/*Calculate CRC from original filename from FileIdentifier. */424valueCRC = udf_unicode_cksum(udfName, udfLen);425/* Convert 16-bits of CRC to hex characters. */426newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];427newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];428newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];429newName[newIndex++] = hexChar[(valueCRC & 0x000f)];430431/* Place a translated extension at end, if found. */432if (hasExt) {433newName[newIndex++] = PERIOD;434for (index = 0;index < localExtIndex ;index++ ) {435newName[newIndex++] = ext[index];436}437}438}439return(newIndex);440}441442#if defined OS2 || defined WIN_95 || defined WIN_NT443/***********************************************************************444* Decides if a Unicode character matches one of a list445* of ASCII characters.446* Used by OS2 version of IsIllegal for readability, since all of the447* illegal characters above 0x0020 are in the ASCII subset of Unicode.448* Works very similarly to the standard C function strchr().449*450* RETURN VALUE451*452* Non-zero if the Unicode character is in the given ASCII string.453*/454int UnicodeInString(455unsigned char *string, /* (Input) String to search through. */456unicode_t ch) /* (Input) Unicode char to search for. */457{458int found = FALSE;459while (*string != '\0' && found == FALSE) {460/* These types should compare, since both are unsigned461* numbers. */462if (*string == ch) {463found = TRUE;464}465string++;466}467return(found);468}469#endif /* OS2 */470471/***********************************************************************472* Decides whether the given character is illegal for a given OS.473*474* RETURN VALUE475*476* Non-zero if char is illegal.477*/478int IsIllegal(unicode_t ch)479{480#ifdef APPLE_MAC481/* Only illegal character on the MAC is the colon. */482if (ch == 0x003A) {483return(1);484} else {485return(0);486}487488#elif defined UNIX489/* Illegal UNIX characters are NULL and slash. */490if (ch == 0x0000 || ch == 0x002F) {491return(1);492} else {493return(0);494}495496#elif defined OS2 || defined WIN_95 || defined WIN_NT497/* Illegal char's for OS/2 according to WARP toolkit. */498if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {499return(1);500} else {501return(0);502}503#endif504}505#endif506507508