Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/ksh93/sh/string.c
1810 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1982-2012 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* David Korn <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
/*
22
* string processing routines for Korn shell
23
*
24
*/
25
26
#include <ast.h>
27
#include <ast_wchar.h>
28
#include "defs.h"
29
#include <stak.h>
30
#include <ccode.h>
31
#include "shtable.h"
32
#include "lexstates.h"
33
#include "national.h"
34
35
#if _hdr_wctype
36
# include <wctype.h>
37
#endif
38
39
#if !_lib_iswprint && !defined(iswprint)
40
# define iswprint(c) (((c)&~0377) || isprint(c))
41
#endif
42
43
44
/*
45
* Table lookup routine
46
* <table> is searched for string <sp> and corresponding value is returned
47
* This is only used for small tables and is used to save non-sharable memory
48
*/
49
50
const Shtable_t *sh_locate(register const char *sp,const Shtable_t *table,int size)
51
{
52
register int first;
53
register const Shtable_t *tp;
54
register int c;
55
static const Shtable_t empty = {0,0};
56
if(sp==0 || (first= *sp)==0)
57
return(&empty);
58
tp=table;
59
while((c= *tp->sh_name) && (CC_NATIVE!=CC_ASCII || c <= first))
60
{
61
if(first == c && strcmp(sp,tp->sh_name)==0)
62
return(tp);
63
tp = (Shtable_t*)((char*)tp+size);
64
}
65
return(&empty);
66
}
67
68
/*
69
* shtab_options lookup routine
70
*/
71
72
#define sep(c) ((c)=='-'||(c)=='_')
73
74
int sh_lookopt(register const char *sp, int *invert)
75
{
76
register int first;
77
register const Shtable_t *tp;
78
register int c;
79
register const char *s, *t, *sw, *tw;
80
int amb;
81
int hit;
82
int inv;
83
int no;
84
if(sp==0)
85
return(0);
86
if(*sp=='n' && *(sp+1)=='o' && (*(sp+2)!='t' || *(sp+3)!='i'))
87
{
88
sp+=2;
89
if(sep(*sp))
90
sp++;
91
*invert = !*invert;
92
}
93
if((first= *sp)==0)
94
return(0);
95
tp=shtab_options;
96
amb=hit=0;
97
for(;;)
98
{
99
t=tp->sh_name;
100
if(no = *t=='n' && *(t+1)=='o' && *(t+2)!='t')
101
t+=2;
102
if(!(c= *t))
103
break;
104
if(first == c)
105
{
106
if(strcmp(sp,t)==0)
107
{
108
*invert ^= no;
109
return(tp->sh_number);
110
}
111
s=sw=sp;
112
tw=t;
113
for(;;)
114
{
115
if(!*s || *s=='=')
116
{
117
if (*s == '=' && !strtol(s+1, NiL, 0))
118
no = !no;
119
if (!*t)
120
{
121
*invert ^= no;
122
return(tp->sh_number);
123
}
124
if (hit || amb)
125
{
126
hit = 0;
127
amb = 1;
128
}
129
else
130
{
131
hit = tp->sh_number;
132
inv = no;
133
}
134
break;
135
}
136
else if(!*t)
137
break;
138
else if(sep(*s))
139
sw = ++s;
140
else if(sep(*t))
141
tw = ++t;
142
else if(*s==*t)
143
{
144
s++;
145
t++;
146
}
147
else if(s==sw && t==tw)
148
break;
149
else
150
{
151
if(t!=tw)
152
{
153
while(*t && !sep(*t))
154
t++;
155
if(!*t)
156
break;
157
tw = ++t;
158
}
159
while (s>sw && *s!=*t)
160
s--;
161
}
162
}
163
}
164
tp = (Shtable_t*)((char*)tp+sizeof(*shtab_options));
165
}
166
if(hit)
167
*invert ^= inv;
168
return(hit);
169
}
170
171
/*
172
* look for the substring <oldsp> in <string> and replace with <newsp>
173
* The new string is put on top of the stack
174
*/
175
char *sh_substitute(const char *string,const char *oldsp,char *newsp)
176
/*@
177
assume string!=NULL && oldsp!=NULL && newsp!=NULL;
178
return x satisfying x==NULL ||
179
strlen(x)==(strlen(in string)+strlen(in newsp)-strlen(in oldsp));
180
@*/
181
{
182
register const char *sp = string;
183
register const char *cp;
184
const char *savesp = 0;
185
stakseek(0);
186
if(*sp==0)
187
return((char*)0);
188
if(*(cp=oldsp) == 0)
189
goto found;
190
#if SHOPT_MULTIBYTE
191
mbinit();
192
#endif /* SHOPT_MULTIBYTE */
193
do
194
{
195
/* skip to first character which matches start of oldsp */
196
while(*sp && (savesp==sp || *sp != *cp))
197
{
198
#if SHOPT_MULTIBYTE
199
/* skip a whole character at a time */
200
int c = mbsize(sp);
201
if(c < 0)
202
sp++;
203
while(c-- > 0)
204
#endif /* SHOPT_MULTIBYTE */
205
stakputc(*sp++);
206
}
207
if(*sp == 0)
208
return((char*)0);
209
savesp = sp;
210
for(;*cp;cp++)
211
{
212
if(*cp != *sp++)
213
break;
214
}
215
if(*cp==0)
216
/* match found */
217
goto found;
218
sp = savesp;
219
cp = oldsp;
220
}
221
while(*sp);
222
return((char*)0);
223
224
found:
225
/* copy new */
226
stakputs(newsp);
227
/* copy rest of string */
228
stakputs(sp);
229
return(stakfreeze(1));
230
}
231
232
/*
233
* TRIM(sp)
234
* Remove escape characters from characters in <sp> and eliminate quoted nulls.
235
*/
236
237
void sh_trim(register char *sp)
238
/*@
239
assume sp!=NULL;
240
promise strlen(in sp) <= in strlen(sp);
241
@*/
242
{
243
register char *dp;
244
register int c;
245
if(sp)
246
{
247
dp = sp;
248
while(c= *sp)
249
{
250
#if SHOPT_MULTIBYTE
251
int len;
252
if(mbwide() && (len=mbsize(sp))>1)
253
{
254
memmove(dp, sp, len);
255
dp += len;
256
sp += len;
257
continue;
258
}
259
#endif /* SHOPT_MULTIBYTE */
260
sp++;
261
if(c == '\\')
262
c = *sp++;
263
if(c)
264
*dp++ = c;
265
}
266
*dp = 0;
267
}
268
}
269
270
/*
271
* copy <str1> to <str2> changing upper case to lower case
272
* <str2> must be big enough to hold <str1>
273
* <str1> and <str2> may point to the same place.
274
*/
275
276
void sh_utol(register char const *str1,register char *str2)
277
/*@
278
assume str1!=0 && str2!=0
279
return x satisfying strlen(in str1)==strlen(in str2);
280
@*/
281
{
282
register int c;
283
for(; c= *((unsigned char*)str1); str1++,str2++)
284
{
285
if(isupper(c))
286
*str2 = tolower(c);
287
else
288
*str2 = c;
289
}
290
*str2 = 0;
291
}
292
293
/*
294
* format string as a csv field
295
*/
296
static char *sh_fmtcsv(const char *string)
297
{
298
register const char *cp = string;
299
register int c;
300
int offset;
301
if(!cp)
302
return((char*)0);
303
offset = staktell();
304
while((c=mbchar(cp)),isaname(c));
305
if(c==0)
306
return((char*)string);
307
stakputc('"');
308
stakwrite(string,cp-string);
309
if(c=='"')
310
stakputc('"');
311
string = cp;
312
while(c=mbchar(cp))
313
{
314
if(c=='"')
315
{
316
stakwrite(string,cp-string);
317
string = cp;
318
stakputc('"');
319
}
320
}
321
if(--cp>string)
322
stakwrite(string,cp-string);
323
stakputc('"');
324
stakputc(0);
325
return(stakptr(offset));
326
}
327
328
/*
329
* print <str> quoting chars so that it can be read by the shell
330
* puts null terminated result on stack, but doesn't freeze it
331
*/
332
char *sh_fmtq(const char *string)
333
{
334
register const char *cp = string, *op;
335
register int c, state;
336
int offset;
337
if(!cp)
338
return((char*)0);
339
offset = staktell();
340
state = ((c= mbchar(cp))==0);
341
if(isaletter(c))
342
{
343
while((c=mbchar(cp)),isaname(c));
344
if(c==0)
345
return((char*)string);
346
if(c=='=')
347
{
348
if(*cp==0)
349
return((char*)string);
350
if(*cp=='=')
351
cp++;
352
c = cp - string;
353
stakwrite(string,c);
354
string = cp;
355
c = mbchar(cp);
356
}
357
}
358
if(c==0 || c=='#' || c=='~')
359
state = 1;
360
for(;c;c= mbchar(cp))
361
{
362
#if SHOPT_MULTIBYTE
363
if(c=='\'' || c>=128 || c<0 || !iswprint(c))
364
#else
365
if(c=='\'' || !isprint(c))
366
#endif /* SHOPT_MULTIBYTE */
367
state = 2;
368
else if(c==']' || c=='=' || (c!=':' && c<=0x7f && (c=sh_lexstates[ST_NORM][c]) && c!=S_EPAT))
369
state |=1;
370
}
371
if(state<2)
372
{
373
if(state==1)
374
stakputc('\'');
375
if(c = --cp - string)
376
stakwrite(string,c);
377
if(state==1)
378
stakputc('\'');
379
}
380
else
381
{
382
int isbyte=0;
383
stakwrite("$'",2);
384
cp = string;
385
#if SHOPT_MULTIBYTE
386
while(op = cp, c= mbchar(cp))
387
#else
388
while(op = cp, c= *(unsigned char*)cp++)
389
#endif
390
{
391
state=1;
392
switch(c)
393
{
394
case ('a'==97?'\033':39):
395
c = 'E';
396
break;
397
case '\n':
398
c = 'n';
399
break;
400
case '\r':
401
c = 'r';
402
break;
403
case '\t':
404
c = 't';
405
break;
406
case '\f':
407
c = 'f';
408
break;
409
case '\b':
410
c = 'b';
411
break;
412
case '\a':
413
c = 'a';
414
break;
415
case '\\': case '\'':
416
break;
417
default:
418
#if SHOPT_MULTIBYTE
419
isbyte = 0;
420
if(c<0)
421
{
422
c = *((unsigned char *)op);
423
cp = op+1;
424
isbyte = 1;
425
}
426
if(mbwide() && ((cp-op)>1))
427
{
428
sfprintf(staksp,"\\u[%x]",c);
429
continue;
430
}
431
else if(!iswprint(c) || isbyte)
432
#else
433
if(!isprint(c))
434
#endif
435
{
436
sfprintf(staksp,"\\x%.2x",c);
437
continue;
438
}
439
state=0;
440
break;
441
}
442
if(state)
443
{
444
stakputc('\\');
445
stakputc(c);
446
}
447
else
448
stakwrite(op, cp-op);
449
}
450
stakputc('\'');
451
}
452
stakputc(0);
453
return(stakptr(offset));
454
}
455
456
/*
457
* print <str> quoting chars so that it can be read by the shell
458
* puts null terminated result on stack, but doesn't freeze it
459
* single!=0 limits quoting to '...'
460
* fold>0 prints raw newlines and inserts appropriately
461
* escaped newlines every (fold-x) chars
462
*/
463
char *sh_fmtqf(const char *string, int single, int fold)
464
{
465
register const char *cp = string;
466
register const char *bp;
467
register const char *vp;
468
register int c;
469
register int n;
470
register int q;
471
register int a;
472
int offset;
473
474
if (--fold < 8)
475
fold = 0;
476
if(single)
477
return sh_fmtcsv(cp);
478
if (!cp || !*cp || !fold || fold && strlen(string) < fold)
479
return sh_fmtq(cp);
480
offset = staktell();
481
single = single ? 1 : 3;
482
c = mbchar(string);
483
a = isaletter(c) ? '=' : 0;
484
vp = cp + 1;
485
do
486
{
487
q = 0;
488
n = fold;
489
bp = cp;
490
while ((!n || n-- > 0) && (c = mbchar(cp)))
491
{
492
if (a && !isaname(c))
493
a = 0;
494
#if SHOPT_MULTIBYTE
495
if (c >= 0x200)
496
continue;
497
if (c == '\'' || !iswprint(c))
498
#else
499
if (c == '\'' || !isprint(c))
500
#endif /* SHOPT_MULTIBYTE */
501
{
502
q = single;
503
break;
504
}
505
if (c == '\n')
506
q = 1;
507
else if (c == a)
508
{
509
stakwrite(bp, cp - bp);
510
bp = cp;
511
vp = cp + 1;
512
a = 0;
513
}
514
else if ((c == '#' || c == '~') && cp == vp || c == ']' || c != ':' && (c = sh_lexstates[ST_NORM][c]) && c != S_EPAT)
515
q = 1;
516
}
517
if (q & 2)
518
{
519
stakputc('$');
520
stakputc('\'');
521
cp = bp;
522
n = fold - 3;
523
q = 1;
524
while (c = mbchar(cp))
525
{
526
switch (c)
527
{
528
case ('a'==97?'\033':39):
529
c = 'E';
530
break;
531
case '\n':
532
q = 0;
533
n = fold - 1;
534
break;
535
case '\r':
536
c = 'r';
537
break;
538
case '\t':
539
c = 't';
540
break;
541
case '\f':
542
c = 'f';
543
break;
544
case '\b':
545
c = 'b';
546
break;
547
case '\a':
548
c = 'a';
549
break;
550
case '\\':
551
if (*cp == 'n')
552
{
553
c = '\n';
554
q = 0;
555
n = fold - 1;
556
break;
557
}
558
case '\'':
559
break;
560
default:
561
#if SHOPT_MULTIBYTE
562
if(!iswprint(c))
563
#else
564
if(!isprint(c))
565
#endif
566
{
567
if ((n -= 4) <= 0)
568
{
569
stakwrite("'\\\n$'", 5);
570
n = fold - 7;
571
}
572
sfprintf(staksp, "\\%03o", c);
573
continue;
574
}
575
q = 0;
576
break;
577
}
578
if ((n -= q + 1) <= 0)
579
{
580
if (!q)
581
{
582
stakputc('\'');
583
cp = bp;
584
break;
585
}
586
stakwrite("'\\\n$'", 5);
587
n = fold - 5;
588
}
589
if (q)
590
stakputc('\\');
591
else
592
q = 1;
593
stakputc(c);
594
bp = cp;
595
}
596
if (!c)
597
stakputc('\'');
598
}
599
else if (q & 1)
600
{
601
stakputc('\'');
602
cp = bp;
603
n = fold ? (fold - 2) : 0;
604
while (c = mbchar(cp))
605
{
606
if (c == '\n')
607
n = fold - 1;
608
else if (n && --n <= 0)
609
{
610
n = fold - 2;
611
stakwrite(bp, --cp - bp);
612
bp = cp;
613
stakwrite("'\\\n'", 4);
614
}
615
else if (n == 1 && *cp == '\'')
616
{
617
n = fold - 5;
618
stakwrite(bp, --cp - bp);
619
bp = cp;
620
stakwrite("'\\\n\\''", 6);
621
}
622
else if (c == '\'')
623
{
624
stakwrite(bp, cp - bp - 1);
625
bp = cp;
626
if (n && (n -= 4) <= 0)
627
{
628
n = fold - 5;
629
stakwrite("'\\\n\\''", 6);
630
}
631
else
632
stakwrite("'\\''", 4);
633
}
634
}
635
stakwrite(bp, cp - bp - 1);
636
stakputc('\'');
637
}
638
else if (n = fold)
639
{
640
cp = bp;
641
while (c = mbchar(cp))
642
{
643
if (--n <= 0)
644
{
645
n = fold;
646
stakwrite(bp, --cp - bp);
647
bp = cp;
648
stakwrite("\\\n", 2);
649
}
650
}
651
stakwrite(bp, cp - bp - 1);
652
}
653
else
654
stakwrite(bp, cp - bp);
655
if (c)
656
{
657
stakputc('\\');
658
stakputc('\n');
659
}
660
} while (c);
661
stakputc(0);
662
return(stakptr(offset));
663
}
664
665
#if SHOPT_MULTIBYTE
666
int sh_strchr(const char *string, register const char *dp)
667
{
668
wchar_t c, d;
669
register const char *cp=string;
670
mbinit();
671
d = mbchar(dp);
672
mbinit();
673
while(c = mbchar(cp))
674
{
675
if(c==d)
676
return(cp-string);
677
}
678
if(d==0)
679
return(cp-string);
680
return(-1);
681
}
682
#endif /* SHOPT_MULTIBYTE */
683
684
const char *_sh_translate(const char *message)
685
{
686
#if ERROR_VERSION >= 20000317L
687
return(ERROR_translate(0,0,e_dict,message));
688
#else
689
#if ERROR_VERSION >= 20000101L
690
return(ERROR_translate(e_dict,message));
691
#else
692
return(ERROR_translate(message,1));
693
#endif
694
#endif
695
}
696
697
/*
698
* change '['identifier']' to identifier
699
* character before <str> must be a '['
700
* returns pointer to last character
701
*/
702
char *sh_checkid(char *str, char *last)
703
{
704
register unsigned char *cp = (unsigned char*)str;
705
register unsigned char *v = cp;
706
register int c;
707
if(c=mbchar(cp),isaletter(c))
708
while(c=mbchar(cp),isaname(c));
709
if(c==']' && (!last || ((char*)cp==last)))
710
{
711
/* eliminate [ and ] */
712
while(v < cp)
713
{
714
v[-1] = *v;
715
v++;
716
}
717
if(last)
718
last -=2;
719
else
720
{
721
while(*v)
722
{
723
v[-2] = *v;
724
v++;
725
}
726
v[-2] = 0;
727
last = (char*)v;
728
}
729
}
730
return(last);
731
}
732
733
#if _AST_VERSION <= 20000317L
734
char *fmtident(const char *string)
735
{
736
return((char*)string);
737
}
738
#endif
739
740