Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/udf/osta.c
39586 views
1
/*
2
* Various routines from the OSTA 2.01 specs. Copyrights are included with
3
* each code segment. Slight whitespace modifications have been made for
4
* formatting purposes. Typos/bugs have been fixed.
5
*/
6
7
#include <fs/udf/osta.h>
8
9
/*****************************************************************************/
10
/*-
11
**********************************************************************
12
* OSTA compliant Unicode compression, uncompression routines.
13
* Copyright 1995 Micro Design International, Inc.
14
* Written by Jason M. Rinn.
15
* Micro Design International gives permission for the free use of the
16
* following source code.
17
*/
18
19
/***********************************************************************
20
* Takes an OSTA CS0 compressed unicode name, and converts
21
* it to Unicode.
22
* The Unicode output will be in the byte order
23
* that the local compiler uses for 16-bit values.
24
* NOTE: This routine only performs error checking on the compID.
25
* It is up to the user to ensure that the unicode buffer is large
26
* enough, and that the compressed unicode name is correct.
27
*
28
* RETURN VALUE
29
*
30
* The number of unicode characters which were uncompressed.
31
* A -1 is returned if the compression ID is invalid.
32
*/
33
int
34
udf_UncompressUnicode(
35
int numberOfBytes, /* (Input) number of bytes read from media. */
36
byte *UDFCompressed, /* (Input) bytes read from media. */
37
unicode_t *unicode) /* (Output) uncompressed unicode characters. */
38
{
39
unsigned int compID;
40
int returnValue, unicodeIndex, byteIndex;
41
42
/* Use UDFCompressed to store current byte being read. */
43
compID = UDFCompressed[0];
44
45
/* First check for valid compID. */
46
if (compID != 8 && compID != 16) {
47
returnValue = -1;
48
} else {
49
unicodeIndex = 0;
50
byteIndex = 1;
51
52
/* Loop through all the bytes. */
53
while (byteIndex < numberOfBytes) {
54
if (compID == 16) {
55
/* Move the first byte to the high bits of the
56
* unicode char.
57
*/
58
unicode[unicodeIndex] =
59
UDFCompressed[byteIndex++] << 8;
60
} else {
61
unicode[unicodeIndex] = 0;
62
}
63
if (byteIndex < numberOfBytes) {
64
/*Then the next byte to the low bits. */
65
unicode[unicodeIndex] |=
66
UDFCompressed[byteIndex++];
67
}
68
unicodeIndex++;
69
}
70
returnValue = unicodeIndex;
71
}
72
return(returnValue);
73
}
74
75
/*
76
* Almost same as udf_UncompressUnicode(). The difference is that
77
* it keeps byte order of unicode string.
78
*/
79
int
80
udf_UncompressUnicodeByte(
81
int numberOfBytes, /* (Input) number of bytes read from media. */
82
byte *UDFCompressed, /* (Input) bytes read from media. */
83
byte *unicode) /* (Output) uncompressed unicode characters. */
84
{
85
unsigned int compID;
86
int returnValue, unicodeIndex, byteIndex;
87
88
/* Use UDFCompressed to store current byte being read. */
89
compID = UDFCompressed[0];
90
91
/* First check for valid compID. */
92
if (compID != 8 && compID != 16) {
93
returnValue = -1;
94
} else {
95
unicodeIndex = 0;
96
byteIndex = 1;
97
98
/* Loop through all the bytes. */
99
while (byteIndex < numberOfBytes) {
100
if (compID == 16) {
101
/* Move the first byte to the high bits of the
102
* unicode char.
103
*/
104
unicode[unicodeIndex++] =
105
UDFCompressed[byteIndex++];
106
} else {
107
unicode[unicodeIndex++] = 0;
108
}
109
if (byteIndex < numberOfBytes) {
110
/*Then the next byte to the low bits. */
111
unicode[unicodeIndex++] =
112
UDFCompressed[byteIndex++];
113
}
114
}
115
returnValue = unicodeIndex;
116
}
117
return(returnValue);
118
}
119
120
/***********************************************************************
121
* DESCRIPTION:
122
* Takes a string of unicode wide characters and returns an OSTA CS0
123
* compressed unicode string. The unicode MUST be in the byte order of
124
* the compiler in order to obtain correct results. Returns an error
125
* if the compression ID is invalid.
126
*
127
* NOTE: This routine assumes the implementation already knows, by
128
* the local environment, how many bits are appropriate and
129
* therefore does no checking to test if the input characters fit
130
* into that number of bits or not.
131
*
132
* RETURN VALUE
133
*
134
* The total number of bytes in the compressed OSTA CS0 string,
135
* including the compression ID.
136
* A -1 is returned if the compression ID is invalid.
137
*/
138
int
139
udf_CompressUnicode(
140
int numberOfChars, /* (Input) number of unicode characters. */
141
int compID, /* (Input) compression ID to be used. */
142
unicode_t *unicode, /* (Input) unicode characters to compress. */
143
byte *UDFCompressed) /* (Output) compressed string, as bytes. */
144
{
145
int byteIndex, unicodeIndex;
146
147
if (compID != 8 && compID != 16) {
148
byteIndex = -1; /* Unsupported compression ID ! */
149
} else {
150
/* Place compression code in first byte. */
151
UDFCompressed[0] = compID;
152
153
byteIndex = 1;
154
unicodeIndex = 0;
155
while (unicodeIndex < numberOfChars) {
156
if (compID == 16) {
157
/* First, place the high bits of the char
158
* into the byte stream.
159
*/
160
UDFCompressed[byteIndex++] =
161
(unicode[unicodeIndex] & 0xFF00) >> 8;
162
}
163
/*Then place the low bits into the stream. */
164
UDFCompressed[byteIndex++] =
165
unicode[unicodeIndex] & 0x00FF;
166
unicodeIndex++;
167
}
168
}
169
return(byteIndex);
170
}
171
172
/*****************************************************************************/
173
/*
174
* CRC 010041
175
*/
176
static unsigned short crc_table[256] = {
177
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
178
0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
179
0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
180
0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
181
0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
182
0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
183
0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
184
0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
185
0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
186
0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
187
0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
188
0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
189
0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
190
0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
191
0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
192
0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
193
0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
194
0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
195
0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
196
0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
197
0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
198
0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
199
0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
200
0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
201
0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
202
0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
203
0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
204
0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
205
0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
206
0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
207
0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
208
0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
209
};
210
211
unsigned short
212
udf_cksum(unsigned char *s, int n)
213
{
214
unsigned short crc=0;
215
216
while (n-- > 0)
217
crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
218
return crc;
219
}
220
221
/* UNICODE Checksum */
222
unsigned short
223
udf_unicode_cksum(unsigned short *s, int n)
224
{
225
unsigned short crc=0;
226
227
while (n-- > 0) {
228
/* Take high order byte first--corresponds to a big endian
229
* byte stream.
230
*/
231
crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
232
crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
233
}
234
return crc;
235
}
236
237
#ifdef MAIN
238
unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
239
240
main()
241
{
242
unsigned short x;
243
x = cksum(bytes, sizeof bytes);
244
printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
245
exit(0);
246
}
247
#endif
248
249
/*****************************************************************************/
250
#ifdef NEEDS_ISPRINT
251
/*-
252
**********************************************************************
253
* OSTA UDF compliant file name translation routine for OS/2,
254
* Windows 95, Windows NT, Macintosh and UNIX.
255
* Copyright 1995 Micro Design International, Inc.
256
* Written by Jason M. Rinn.
257
* Micro Design International gives permission for the free use of the
258
* following source code.
259
*/
260
261
/***********************************************************************
262
* To use these routines with different operating systems.
263
*
264
* OS/2
265
* Define OS2
266
* Define MAXLEN = 254
267
*
268
* Windows 95
269
* Define WIN_95
270
* Define MAXLEN = 255
271
*
272
* Windows NT
273
* Define WIN_NT
274
* Define MAXLEN = 255
275
*
276
* Macintosh:
277
* Define APPLE_MAC.
278
* Define MAXLEN = 31.
279
*
280
* UNIX
281
* Define UNIX.
282
* Define MAXLEN as specified by unix version.
283
*/
284
285
#define ILLEGAL_CHAR_MARK 0x005F
286
#define CRC_MARK 0x0023
287
#define EXT_SIZE 5
288
#define TRUE 1
289
#define FALSE 0
290
#define PERIOD 0x002E
291
#define SPACE 0x0020
292
293
/*** PROTOTYPES ***/
294
int IsIllegal(unicode_t ch);
295
296
/* Define a function or macro which determines if a Unicode character is
297
* printable under your implementation.
298
*/
299
int UnicodeIsPrint(unicode_t);
300
301
/***********************************************************************
302
* Translates a long file name to one using a MAXLEN and an illegal
303
* char set in accord with the OSTA requirements. Assumes the name has
304
* already been translated to Unicode.
305
*
306
* RETURN VALUE
307
*
308
* Number of unicode characters in translated name.
309
*/
310
int UDFTransName(
311
unicode_t *newName, /* (Output)Translated name. Must be of length
312
* MAXLEN */
313
unicode_t *udfName, /* (Input) Name from UDF volume.*/
314
int udfLen) /* (Input) Length of UDF Name. */
315
{
316
int index, newIndex = 0, needsCRC = FALSE;
317
int extIndex = 0, newExtIndex = 0, hasExt = FALSE;
318
#if defined OS2 || defined WIN_95 || defined WIN_NT
319
int trailIndex = 0;
320
#endif
321
unsigned short valueCRC;
322
unicode_t current;
323
const char hexChar[] = "0123456789ABCDEF";
324
325
for (index = 0; index < udfLen; index++) {
326
current = udfName[index];
327
328
if (IsIllegal(current) || !UnicodeIsPrint(current)) {
329
needsCRC = TRUE;
330
/* Replace Illegal and non-displayable chars with
331
* underscore.
332
*/
333
current = ILLEGAL_CHAR_MARK;
334
/* Skip any other illegal or non-displayable
335
* characters.
336
*/
337
while(index+1 < udfLen && (IsIllegal(udfName[index+1])
338
|| !UnicodeIsPrint(udfName[index+1]))) {
339
index++;
340
}
341
}
342
343
/* Record position of extension, if one is found. */
344
if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) {
345
if (udfLen == index + 1) {
346
/* A trailing period is NOT an extension. */
347
hasExt = FALSE;
348
} else {
349
hasExt = TRUE;
350
extIndex = index;
351
newExtIndex = newIndex;
352
}
353
}
354
355
#if defined OS2 || defined WIN_95 || defined WIN_NT
356
/* Record position of last char which is NOT period or space. */
357
else if (current != PERIOD && current != SPACE) {
358
trailIndex = newIndex;
359
}
360
#endif
361
362
if (newIndex < MAXLEN) {
363
newName[newIndex++] = current;
364
} else {
365
needsCRC = TRUE;
366
}
367
}
368
369
#if defined OS2 || defined WIN_95 || defined WIN_NT
370
/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
371
if (trailIndex != newIndex - 1) {
372
newIndex = trailIndex + 1;
373
needsCRC = TRUE;
374
hasExt = FALSE; /* Trailing period does not make an
375
* extension. */
376
}
377
#endif
378
379
if (needsCRC) {
380
unicode_t ext[EXT_SIZE];
381
int localExtIndex = 0;
382
if (hasExt) {
383
int maxFilenameLen;
384
/* Translate extension, and store it in ext. */
385
for(index = 0; index<EXT_SIZE &&
386
extIndex + index +1 < udfLen; index++ ) {
387
current = udfName[extIndex + index + 1];
388
if (IsIllegal(current) ||
389
!UnicodeIsPrint(current)) {
390
needsCRC = 1;
391
/* Replace Illegal and non-displayable
392
* chars with underscore.
393
*/
394
current = ILLEGAL_CHAR_MARK;
395
/* Skip any other illegal or
396
* non-displayable characters.
397
*/
398
while(index + 1 < EXT_SIZE
399
&& (IsIllegal(udfName[extIndex +
400
index + 2]) ||
401
!isprint(udfName[extIndex +
402
index + 2]))) {
403
index++;
404
}
405
}
406
ext[localExtIndex++] = current;
407
}
408
409
/* Truncate filename to leave room for extension and
410
* CRC.
411
*/
412
maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
413
if (newIndex > maxFilenameLen) {
414
newIndex = maxFilenameLen;
415
} else {
416
newIndex = newExtIndex;
417
}
418
} else if (newIndex > MAXLEN - 5) {
419
/*If no extension, make sure to leave room for CRC. */
420
newIndex = MAXLEN - 5;
421
}
422
newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
423
424
/*Calculate CRC from original filename from FileIdentifier. */
425
valueCRC = udf_unicode_cksum(udfName, udfLen);
426
/* Convert 16-bits of CRC to hex characters. */
427
newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
428
newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
429
newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
430
newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
431
432
/* Place a translated extension at end, if found. */
433
if (hasExt) {
434
newName[newIndex++] = PERIOD;
435
for (index = 0;index < localExtIndex ;index++ ) {
436
newName[newIndex++] = ext[index];
437
}
438
}
439
}
440
return(newIndex);
441
}
442
443
#if defined OS2 || defined WIN_95 || defined WIN_NT
444
/***********************************************************************
445
* Decides if a Unicode character matches one of a list
446
* of ASCII characters.
447
* Used by OS2 version of IsIllegal for readability, since all of the
448
* illegal characters above 0x0020 are in the ASCII subset of Unicode.
449
* Works very similarly to the standard C function strchr().
450
*
451
* RETURN VALUE
452
*
453
* Non-zero if the Unicode character is in the given ASCII string.
454
*/
455
int UnicodeInString(
456
unsigned char *string, /* (Input) String to search through. */
457
unicode_t ch) /* (Input) Unicode char to search for. */
458
{
459
int found = FALSE;
460
while (*string != '\0' && found == FALSE) {
461
/* These types should compare, since both are unsigned
462
* numbers. */
463
if (*string == ch) {
464
found = TRUE;
465
}
466
string++;
467
}
468
return(found);
469
}
470
#endif /* OS2 */
471
472
/***********************************************************************
473
* Decides whether the given character is illegal for a given OS.
474
*
475
* RETURN VALUE
476
*
477
* Non-zero if char is illegal.
478
*/
479
int IsIllegal(unicode_t ch)
480
{
481
#ifdef APPLE_MAC
482
/* Only illegal character on the MAC is the colon. */
483
if (ch == 0x003A) {
484
return(1);
485
} else {
486
return(0);
487
}
488
489
#elif defined UNIX
490
/* Illegal UNIX characters are NULL and slash. */
491
if (ch == 0x0000 || ch == 0x002F) {
492
return(1);
493
} else {
494
return(0);
495
}
496
497
#elif defined OS2 || defined WIN_95 || defined WIN_NT
498
/* Illegal char's for OS/2 according to WARP toolkit. */
499
if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
500
return(1);
501
} else {
502
return(0);
503
}
504
#endif
505
}
506
#endif
507
508