CoCalc -- osta.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/udf/osta.c
¹⁰⁴⁸⁸² views
1
/*
2
 * Various routines from the OSTA 2.01 specs.  Copyrights are included with
3
 * each code segment.  Slight whitespace modifications have been made for
4
 * formatting purposes.  Typos/bugs have been fixed.
5
 */
6

7
#include <fs/udf/osta.h>
8

9
/*****************************************************************************/
10
/*-
11
 **********************************************************************
12
 * OSTA compliant Unicode compression, uncompression routines.
13
 * Copyright 1995 Micro Design International, Inc.
14
 * Written by Jason M. Rinn.
15
 * Micro Design International gives permission for the free use of the
16
 * following source code.
17
 */
18

19
/***********************************************************************
20
 * Takes an OSTA CS0 compressed unicode name, and converts
21
 * it to Unicode.
22
 * The Unicode output will be in the byte order
23
 * that the local compiler uses for 16-bit values.
24
 * NOTE: This routine only performs error checking on the compID.
25
 * It is up to the user to ensure that the unicode buffer is large
26
 * enough, and that the compressed unicode name is correct.
27
 *
28
 * RETURN VALUE
29
 *
30
 * The number of unicode characters which were uncompressed.
31
 * A -1 is returned if the compression ID is invalid.
32
 */
33
int
34
udf_UncompressUnicode(
35
	int numberOfBytes,	/* (Input) number of bytes read from media. */
36
	byte *UDFCompressed,	/* (Input) bytes read from media. */
37
	unicode_t *unicode)	/* (Output) uncompressed unicode characters. */
38
{
39
	unsigned int compID;
40
	int returnValue, unicodeIndex, byteIndex;
41

42
	/* Use UDFCompressed to store current byte being read. */
43
	compID = UDFCompressed[0];
44

45
	/* First check for valid compID. */
46
	if (compID != 8 && compID != 16) {
47
		returnValue = -1;
48
	} else {
49
		unicodeIndex = 0;
50
		byteIndex = 1;
51

52
		/* Loop through all the bytes. */
53
		while (byteIndex < numberOfBytes) {
54
			if (compID == 16) {
55
				/* Move the first byte to the high bits of the
56
				 * unicode char.
57
				 */
58
				unicode[unicodeIndex] =
59
				    UDFCompressed[byteIndex++] << 8;
60
			} else {
61
				unicode[unicodeIndex] = 0;
62
			}
63
			if (byteIndex < numberOfBytes) {
64
				/*Then the next byte to the low bits. */
65
				unicode[unicodeIndex] |=
66
				    UDFCompressed[byteIndex++];
67
			}
68
			unicodeIndex++;
69
		}
70
		returnValue = unicodeIndex;
71
	}
72
	return(returnValue);
73
}
74

75
/*
76
 * Almost same as udf_UncompressUnicode(). The difference is that
77
 * it keeps byte order of unicode string.
78
 */
79
int
80
udf_UncompressUnicodeByte(
81
	int numberOfBytes,	/* (Input) number of bytes read from media. */
82
	byte *UDFCompressed,	/* (Input) bytes read from media. */
83
	byte *unicode)		/* (Output) uncompressed unicode characters. */
84
{
85
	unsigned int compID;
86
	int returnValue, unicodeIndex, byteIndex;
87

88
	/* Use UDFCompressed to store current byte being read. */
89
	compID = UDFCompressed[0];
90

91
	/* First check for valid compID. */
92
	if (compID != 8 && compID != 16) {
93
		returnValue = -1;
94
	} else {
95
		unicodeIndex = 0;
96
		byteIndex = 1;
97

98
		/* Loop through all the bytes. */
99
		while (byteIndex < numberOfBytes) {
100
			if (compID == 16) {
101
				/* Move the first byte to the high bits of the
102
				 * unicode char.
103
				 */
104
				unicode[unicodeIndex++] =
105
				    UDFCompressed[byteIndex++];
106
			} else {
107
				unicode[unicodeIndex++] = 0;
108
			}
109
			if (byteIndex < numberOfBytes) {
110
				/*Then the next byte to the low bits. */
111
				unicode[unicodeIndex++] =
112
				    UDFCompressed[byteIndex++];
113
			}
114
		}
115
		returnValue = unicodeIndex;
116
	}
117
	return(returnValue);
118
}
119

120
/***********************************************************************
121
 * DESCRIPTION:
122
 * Takes a string of unicode wide characters and returns an OSTA CS0
123
 * compressed unicode string. The unicode MUST be in the byte order of
124
 * the compiler in order to obtain correct results. Returns an error
125
 * if the compression ID is invalid.
126
 *
127
 * NOTE: This routine assumes the implementation already knows, by
128
 * the local environment, how many bits are appropriate and
129
 * therefore does no checking to test if the input characters fit
130
 * into that number of bits or not.
131
 *
132
 * RETURN VALUE
133
 *
134
 * The total number of bytes in the compressed OSTA CS0 string,
135
 * including the compression ID.
136
 * A -1 is returned if the compression ID is invalid.
137
 */
138
int
139
udf_CompressUnicode(
140
	int numberOfChars,	/* (Input) number of unicode characters. */
141
	int compID,		/* (Input) compression ID to be used. */
142
	unicode_t *unicode,	/* (Input) unicode characters to compress. */
143
	byte *UDFCompressed)	/* (Output) compressed string, as bytes. */
144
{
145
	int byteIndex, unicodeIndex;
146

147
	if (compID != 8 && compID != 16) {
148
		byteIndex = -1; /* Unsupported compression ID ! */
149
	} else {
150
		/* Place compression code in first byte. */
151
		UDFCompressed[0] = compID;
152

153
		byteIndex = 1;
154
		unicodeIndex = 0;
155
		while (unicodeIndex < numberOfChars) {
156
			if (compID == 16) {
157
				/* First, place the high bits of the char
158
				 * into the byte stream.
159
				 */
160
				UDFCompressed[byteIndex++] =
161
				    (unicode[unicodeIndex] & 0xFF00) >> 8;
162
			}
163
			/*Then place the low bits into the stream. */
164
			UDFCompressed[byteIndex++] =
165
			    unicode[unicodeIndex] & 0x00FF;
166
			unicodeIndex++;
167
		}
168
	}
169
	return(byteIndex);
170
}
171

172
/*****************************************************************************/
173
/*
174
 * CRC 010041
175
 */
176
static unsigned short crc_table[256] = {
177
	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
178
	0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
179
	0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
180
	0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
181
	0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
182
	0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
183
	0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
184
	0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
185
	0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
186
	0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
187
	0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
188
	0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
189
	0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
190
	0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
191
	0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
192
	0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
193
	0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
194
	0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
195
	0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
196
	0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
197
	0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
198
	0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
199
	0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
200
	0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
201
	0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
202
	0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
203
	0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
204
	0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
205
	0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
206
	0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
207
	0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
208
	0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
209
};
210

211
unsigned short
212
udf_cksum(unsigned char *s, int n)
213
{
214
	unsigned short crc=0;
215

216
	while (n-- > 0)
217
		crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
218
	return crc;
219
}
220

221
/* UNICODE Checksum */
222
unsigned short
223
udf_unicode_cksum(unsigned short *s, int n)
224
{
225
	unsigned short crc=0;
226

227
	while (n-- > 0) {
228
		/* Take high order byte first--corresponds to a big endian
229
		 * byte stream.
230
		 */
231
		crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
232
		crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
233
	}
234
	return crc;
235
}
236

237
#ifdef MAIN
238
unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
239

240
main()
241
{
242
	unsigned short x;
243
	x = cksum(bytes, sizeof bytes);
244
	printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
245
	exit(0);
246
}
247
#endif
248

249
/*****************************************************************************/
250
#ifdef NEEDS_ISPRINT
251
/*-
252
 **********************************************************************
253
 * OSTA UDF compliant file name translation routine for OS/2,
254
 * Windows 95, Windows NT, Macintosh and UNIX.
255
 * Copyright 1995 Micro Design International, Inc.
256
 * Written by Jason M. Rinn.
257
 * Micro Design International gives permission for the free use of the
258
 * following source code.
259
 */
260

261
/***********************************************************************
262
 * To use these routines with different operating systems.
263
 *
264
 * OS/2
265
 * Define OS2
266
 * Define MAXLEN = 254
267
 *
268
 * Windows 95
269
 * Define WIN_95
270
 * Define MAXLEN = 255
271
 *
272
 * Windows NT
273
 * Define WIN_NT
274
 * Define MAXLEN = 255
275
 *
276
 * Macintosh:
277
 * Define APPLE_MAC.
278
 * Define MAXLEN = 31.
279
 *
280
 * UNIX
281
 * Define UNIX.
282
 * Define MAXLEN as specified by unix version.
283
 */
284

285
#define	ILLEGAL_CHAR_MARK	0x005F
286
#define	CRC_MARK	0x0023
287
#define	EXT_SIZE	5
288
#define	TRUE	1
289
#define	FALSE	0
290
#define	PERIOD	0x002E
291
#define	SPACE	0x0020
292

293
/*** PROTOTYPES ***/
294
int IsIllegal(unicode_t ch);
295

296
/* Define a function or macro which determines if a Unicode character is
297
 * printable under your implementation.
298
 */
299
int UnicodeIsPrint(unicode_t);
300

301
/***********************************************************************
302
 * Translates a long file name to one using a MAXLEN and an illegal
303
 * char set in accord with the OSTA requirements. Assumes the name has
304
 * already been translated to Unicode.
305
 *
306
 * RETURN VALUE
307
 *
308
 * Number of unicode characters in translated name.
309
 */
310
int UDFTransName(
311
	unicode_t *newName,	/* (Output)Translated name. Must be of length
312
				 * MAXLEN */
313
	unicode_t *udfName,	/* (Input) Name from UDF volume.*/
314
	int udfLen)		/* (Input) Length of UDF Name. */
315
{
316
	int index, newIndex = 0, needsCRC = FALSE;
317
	int extIndex = 0, newExtIndex = 0, hasExt = FALSE;
318
#if defined OS2 || defined WIN_95 || defined WIN_NT
319
	int trailIndex = 0;
320
#endif
321
	unsigned short valueCRC;
322
	unicode_t current;
323
	const char hexChar[] = "0123456789ABCDEF";
324

325
	for (index = 0; index < udfLen; index++) {
326
		current = udfName[index];
327

328
		if (IsIllegal(current) || !UnicodeIsPrint(current)) {
329
			needsCRC = TRUE;
330
			/* Replace Illegal and non-displayable chars with
331
			 * underscore.
332
			 */
333
			current = ILLEGAL_CHAR_MARK;
334
			/* Skip any other illegal or non-displayable
335
			 * characters.
336
			 */
337
			while(index+1 < udfLen && (IsIllegal(udfName[index+1])
338
			    || !UnicodeIsPrint(udfName[index+1]))) {
339
				index++;
340
			}
341
		}
342

343
		/* Record position of extension, if one is found. */
344
		if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) {
345
			if (udfLen == index + 1) {
346
				/* A trailing period is NOT an extension. */
347
				hasExt = FALSE;
348
			} else {
349
				hasExt = TRUE;
350
				extIndex = index;
351
				newExtIndex = newIndex;
352
			}
353
		}
354

355
#if defined OS2 || defined WIN_95 || defined WIN_NT
356
		/* Record position of last char which is NOT period or space. */
357
		else if (current != PERIOD && current != SPACE) {
358
			trailIndex = newIndex;
359
		}
360
#endif
361

362
		if (newIndex < MAXLEN) {
363
			newName[newIndex++] = current;
364
		} else {
365
			needsCRC = TRUE;
366
		}
367
	}
368

369
#if defined OS2 || defined WIN_95 || defined WIN_NT
370
	/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
371
	if (trailIndex != newIndex - 1) {
372
		newIndex = trailIndex + 1;
373
		needsCRC = TRUE;
374
		hasExt = FALSE; /* Trailing period does not make an
375
				 * extension. */
376
	}
377
#endif
378

379
	if (needsCRC) {
380
		unicode_t ext[EXT_SIZE];
381
		int localExtIndex = 0;
382
		if (hasExt) {
383
			int maxFilenameLen;
384
			/* Translate extension, and store it in ext. */
385
			for(index = 0; index<EXT_SIZE &&
386
			    extIndex + index +1 < udfLen; index++) {
387
				current = udfName[extIndex + index + 1];
388
				if (IsIllegal(current) ||
389
				    !UnicodeIsPrint(current)) {
390
					needsCRC = 1;
391
					/* Replace Illegal and non-displayable
392
					 * chars with underscore.
393
					 */
394
					current = ILLEGAL_CHAR_MARK;
395
					/* Skip any other illegal or
396
					 * non-displayable characters.
397
					 */
398
					while(index + 1 < EXT_SIZE
399
					    && (IsIllegal(udfName[extIndex +
400
					    index + 2]) ||
401
					    !isprint(udfName[extIndex +
402
					    index + 2]))) {
403
						index++;
404
					}
405
				}
406
				ext[localExtIndex++] = current;
407
			}
408

409
			/* Truncate filename to leave room for extension and
410
			 * CRC.
411
			 */
412
			maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
413
			if (newIndex > maxFilenameLen) {
414
				newIndex = maxFilenameLen;
415
			} else {
416
				newIndex = newExtIndex;
417
			}
418
		} else if (newIndex > MAXLEN - 5) {
419
			/*If no extension, make sure to leave room for CRC. */
420
			newIndex = MAXLEN - 5;
421
		}
422
		newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
423

424
		/*Calculate CRC from original filename from FileIdentifier. */
425
		valueCRC = udf_unicode_cksum(udfName, udfLen);
426
		/* Convert 16-bits of CRC to hex characters. */
427
		newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
428
		newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
429
		newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
430
		newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
431

432
		/* Place a translated extension at end, if found. */
433
		if (hasExt) {
434
			newName[newIndex++] = PERIOD;
435
			for (index = 0; index < localExtIndex; index++) {
436
				newName[newIndex++] = ext[index];
437
			}
438
		}
439
	}
440
	return(newIndex);
441
}
442

443
#if defined OS2 || defined WIN_95 || defined WIN_NT
444
/***********************************************************************
445
 * Decides if a Unicode character matches one of a list
446
 * of ASCII characters.
447
 * Used by OS2 version of IsIllegal for readability, since all of the
448
 * illegal characters above 0x0020 are in the ASCII subset of Unicode.
449
 * Works very similarly to the standard C function strchr().
450
 *
451
 * RETURN VALUE
452
 *
453
 * Non-zero if the Unicode character is in the given ASCII string.
454
 */
455
int UnicodeInString(
456
	unsigned char *string,	/* (Input) String to search through. */
457
	unicode_t ch)		/* (Input) Unicode char to search for. */
458
{
459
	int found = FALSE;
460
	while (*string != '\0' && found == FALSE) {
461
		/* These types should compare, since both are unsigned
462
		 * numbers. */
463
		if (*string == ch) {
464
			found = TRUE;
465
		}
466
		string++;
467
	}
468
	return(found);
469
}
470
#endif /* OS2 */
471

472
/***********************************************************************
473
 * Decides whether the given character is illegal for a given OS.
474
 *
475
 * RETURN VALUE
476
 *
477
 * Non-zero if char is illegal.
478
 */
479
int IsIllegal(unicode_t ch)
480
{
481
#ifdef APPLE_MAC
482
	/* Only illegal character on the MAC is the colon. */
483
	if (ch == 0x003A) {
484
		return(1);
485
	} else {
486
		return(0);
487
	}
488

489
#elif defined UNIX
490
	/* Illegal UNIX characters are NULL and slash. */
491
	if (ch == 0x0000 || ch == 0x002F) {
492
		return(1);
493
	} else {
494
		return(0);
495
	}
496

497
#elif defined OS2 || defined WIN_95 || defined WIN_NT
498
	/* Illegal char's for OS/2 according to WARP toolkit. */
499
	if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
500
		return(1);
501
	} else {
502
		return(0);
503
	}
504
#endif
505
}
506
#endif
507

508
Product

Resources

Company