Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/tests/all/component_model/strings.rs
1692 views
1
#![cfg(not(miri))]
2
3
use super::REALLOC_AND_FREE;
4
use anyhow::Result;
5
use wasmtime::component::{Component, Linker};
6
use wasmtime::{Engine, Store, StoreContextMut, Trap};
7
8
const UTF16_TAG: u32 = 1 << 31;
9
10
// Special cases that this tries to test:
11
//
12
// * utf8 -> utf8
13
// * various code point sizes
14
//
15
// * utf8 -> utf16 - the adapter here will make a pessimistic allocation that's
16
// twice the size of the utf8 encoding for the utf16 destination
17
// * utf16 byte size is twice the utf8 size
18
// * utf16 byte size is less than twice the utf8 size
19
//
20
// * utf8 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
21
// pessimistic utf16 allocation that's downsized if necessary
22
// * utf8 fits exactly in latin1
23
// * utf8 fits latin1 but is bigger byte-wise
24
// * utf8 is not latin1 and fits utf16 allocation precisely (NOT POSSIBLE)
25
// * utf8 is not latin1 and utf16 is smaller than allocation
26
//
27
// * utf16 -> utf8 - this starts with an optimistic size and then reallocates to
28
// a pessimistic size, interesting cases are:
29
// * utf8 size is 0.5x the utf16 byte size (perfect fit in initial alloc)
30
// * utf8 size is 1.5x the utf16 byte size (perfect fit in larger alloc)
31
// * utf8 size is 0.5x-1.5x the utf16 size (larger alloc is downsized)
32
//
33
// * utf16 -> utf16
34
// * various code point sizes
35
//
36
// * utf16 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
37
// pessimistic utf16 allocation that's downsized if necessary
38
// * utf16 fits exactly in latin1
39
// * utf16 fits latin1 but is bigger byte-wise (NOT POSSIBLE)
40
// * utf16 is not latin1 and fits utf16 allocation precisely
41
// * utf16 is not latin1 and utf16 is smaller than allocation (NOT POSSIBLE)
42
//
43
// * compact-utf16 -> utf8 dynamically determines between one of
44
// * latin1 -> utf8
45
// * latin1 size matches utf8 size
46
// * latin1 is smaller than utf8 size
47
// * utf16 -> utf8
48
// * covered above
49
//
50
// * compact-utf16 -> utf16 dynamically determines between one of
51
// * latin1 -> utf16 - latin1 size always matches utf16
52
// * test various code points
53
// * utf16 -> utf16
54
// * covered above
55
//
56
// * compact-utf16 -> compact-utf16 dynamically determines between one of
57
// * latin1 -> latin1
58
// * not much interesting here
59
// * utf16 -> compact-utf16-to-compact-probably-utf16
60
// * utf16 actually fits within latin1
61
// * otherwise not more interesting than utf16 -> utf16
62
//
63
const STRINGS: &[&str] = &[
64
"",
65
// 1 byte in utf8, 2 bytes in utf16
66
"x",
67
"hello this is a particularly long string yes it is it keeps going",
68
// 35 bytes in utf8, 23 units in utf16, 23 bytes in latin1
69
"à á â ã ä å æ ç è é ê ë",
70
// 47 bytes in utf8, 31 units in utf16
71
"Ξ Ο Π Ρ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή",
72
// 24 bytes in utf8, 8 units in utf16
73
"STUVWXYZ",
74
// 16 bytes in utf8, 8 units in utf16
75
"ËÌÍÎÏÐÑÒ",
76
// 4 bytes in utf8, 1 unit in utf16
77
"\u{10000}",
78
// latin1-compatible prefix followed by utf8/16-requiring suffix
79
//
80
// 24 bytes in utf8, 13 units in utf16, first 8 usvs are latin1-compatible
81
"à ascii VWXYZ",
82
];
83
84
static ENCODINGS: [&str; 3] = ["utf8", "utf16", "latin1+utf16"];
85
86
#[test]
87
fn roundtrip() -> Result<()> {
88
for debug in [true, false] {
89
let mut config = wasmtime_test_util::component::config();
90
config.debug_adapter_modules(debug);
91
let engine = Engine::new(&config)?;
92
for src in ENCODINGS {
93
for dst in ENCODINGS {
94
test_roundtrip(&engine, src, dst)?;
95
}
96
}
97
}
98
Ok(())
99
}
100
101
fn test_roundtrip(engine: &Engine, src: &str, dst: &str) -> Result<()> {
102
println!("src={src} dst={dst}");
103
104
let mk_echo = |name: &str, encoding: &str| {
105
format!(
106
r#"
107
(component {name}
108
(import "echo" (func $echo (param "a" string) (result string)))
109
(core instance $libc (instantiate $libc))
110
(core func $echo (canon lower (func $echo)
111
(memory $libc "memory")
112
(realloc (func $libc "realloc"))
113
string-encoding={encoding}
114
))
115
(core instance $echo (instantiate $echo
116
(with "libc" (instance $libc))
117
(with "" (instance (export "echo" (func $echo))))
118
))
119
(func (export "echo2") (param "a" string) (result string)
120
(canon lift
121
(core func $echo "echo")
122
(memory $libc "memory")
123
(realloc (func $libc "realloc"))
124
string-encoding={encoding}
125
)
126
)
127
)
128
"#
129
)
130
};
131
132
let src = mk_echo("$src", src);
133
let dst = mk_echo("$dst", dst);
134
let component = format!(
135
r#"
136
(component
137
(import "host" (func $host (param "a" string) (result string)))
138
139
(core module $libc
140
(memory (export "memory") 1)
141
{REALLOC_AND_FREE}
142
)
143
(core module $echo
144
(import "" "echo" (func $echo (param i32 i32 i32)))
145
(import "libc" "memory" (memory 0))
146
(import "libc" "realloc" (func $realloc (param i32 i32 i32 i32) (result i32)))
147
148
(func (export "echo") (param i32 i32) (result i32)
149
(local $retptr i32)
150
(local.set $retptr
151
(call $realloc
152
(i32.const 0)
153
(i32.const 0)
154
(i32.const 4)
155
(i32.const 8)))
156
(call $echo
157
(local.get 0)
158
(local.get 1)
159
(local.get $retptr))
160
local.get $retptr
161
)
162
)
163
164
{src}
165
{dst}
166
167
(instance $dst (instantiate $dst (with "echo" (func $host))))
168
(instance $src (instantiate $src (with "echo" (func $dst "echo2"))))
169
(export "echo" (func $src "echo2"))
170
)
171
"#
172
);
173
let component = Component::new(engine, &component)?;
174
let mut store = Store::new(engine, String::new());
175
let mut linker = Linker::new(engine);
176
linker.root().func_wrap(
177
"host",
178
|store: StoreContextMut<String>, (arg,): (String,)| {
179
assert_eq!(*store.data(), arg);
180
Ok((arg,))
181
},
182
)?;
183
let instance = linker.instantiate(&mut store, &component)?;
184
let func = instance.get_typed_func::<(String,), (String,)>(&mut store, "echo")?;
185
186
for string in STRINGS {
187
println!("testing string {string:?}");
188
*store.data_mut() = string.to_string();
189
let (ret,) = func.call(&mut store, (string.to_string(),))?;
190
assert_eq!(ret, *string);
191
func.post_return(&mut store)?;
192
}
193
Ok(())
194
}
195
196
#[test]
197
fn ptr_out_of_bounds() -> Result<()> {
198
let engine = wasmtime_test_util::component::engine();
199
for src in ENCODINGS {
200
for dst in ENCODINGS {
201
test_ptr_out_of_bounds(&engine, src, dst)?;
202
}
203
}
204
Ok(())
205
}
206
207
fn test_ptr_out_of_bounds(engine: &Engine, src: &str, dst: &str) -> Result<()> {
208
let test = |len: u32| -> Result<()> {
209
let component = format!(
210
r#"
211
(component
212
(component $c
213
(core module $m
214
(func (export "") (param i32 i32))
215
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
216
(memory (export "memory") 1)
217
)
218
(core instance $m (instantiate $m))
219
(func (export "a") (param "a" string)
220
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
221
string-encoding={dst})
222
)
223
)
224
225
(component $c2
226
(import "a" (func $f (param "a" string)))
227
(core module $libc
228
(memory (export "memory") 1)
229
)
230
(core instance $libc (instantiate $libc))
231
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
232
(core module $m
233
(import "" "" (func $f (param i32 i32)))
234
235
(func $start (call $f (i32.const 0x8000_0000) (i32.const {len})))
236
(start $start)
237
)
238
(core instance (instantiate $m (with "" (instance (export "" (func $f))))))
239
)
240
241
(instance $c (instantiate $c))
242
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
243
)
244
"#
245
);
246
let component = Component::new(engine, &component)?;
247
let mut store = Store::new(engine, ());
248
let trap = Linker::new(engine)
249
.instantiate(&mut store, &component)
250
.err()
251
.unwrap()
252
.downcast::<Trap>()?;
253
assert_eq!(trap, Trap::UnreachableCodeReached);
254
Ok(())
255
};
256
257
test(0)?;
258
test(1)?;
259
260
Ok(())
261
}
262
263
// Test that even if the ptr+len calculation overflows then a trap still
264
// happens.
265
#[test]
266
fn ptr_overflow() -> Result<()> {
267
let engine = wasmtime_test_util::component::engine();
268
for src in ENCODINGS {
269
for dst in ENCODINGS {
270
test_ptr_overflow(&engine, src, dst)?;
271
}
272
}
273
Ok(())
274
}
275
276
fn test_ptr_overflow(engine: &Engine, src: &str, dst: &str) -> Result<()> {
277
let component = format!(
278
r#"
279
(component
280
(component $c
281
(core module $m
282
(func (export "") (param i32 i32))
283
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
284
(memory (export "memory") 1)
285
)
286
(core instance $m (instantiate $m))
287
(func (export "a") (param "a" string)
288
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
289
string-encoding={dst})
290
)
291
)
292
293
(component $c2
294
(import "a" (func $f (param "a" string)))
295
(core module $libc
296
(memory (export "memory") 1)
297
)
298
(core instance $libc (instantiate $libc))
299
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
300
(core module $m
301
(import "" "" (func $f (param i32 i32)))
302
303
(func (export "f") (param i32) (call $f (i32.const 1000) (local.get 0)))
304
)
305
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
306
(func (export "f") (param "a" u32) (canon lift (core func $m "f")))
307
)
308
309
(instance $c (instantiate $c))
310
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
311
(export "f" (func $c2 "f"))
312
)
313
"#
314
);
315
316
let component = Component::new(engine, &component)?;
317
let mut store = Store::new(engine, ());
318
319
let mut test_overflow = |size: u32| -> Result<()> {
320
println!("src={src} dst={dst} size={size:#x}");
321
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
322
let func = instance.get_typed_func::<(u32,), ()>(&mut store, "f")?;
323
let trap = func
324
.call(&mut store, (size,))
325
.unwrap_err()
326
.downcast::<Trap>()?;
327
assert_eq!(trap, Trap::UnreachableCodeReached);
328
Ok(())
329
};
330
331
let max = 1 << 31;
332
333
match src {
334
"utf8" => {
335
// This exceeds MAX_STRING_BYTE_LENGTH
336
test_overflow(max)?;
337
338
if dst == "utf16" {
339
// exceeds MAX_STRING_BYTE_LENGTH when multiplied
340
test_overflow(max / 2)?;
341
342
// Technically this fails on the first string, not the second.
343
// Ideally this would test the overflow check on the second
344
// string though.
345
test_overflow(max / 2 - 100)?;
346
} else {
347
// This will point into unmapped memory
348
test_overflow(max - 100)?;
349
}
350
}
351
352
"utf16" => {
353
test_overflow(max / 2)?;
354
test_overflow(max / 2 - 100)?;
355
}
356
357
"latin1+utf16" => {
358
test_overflow((max / 2) | UTF16_TAG)?;
359
// tag a utf16 string with the max length and it should overflow.
360
test_overflow((max / 2 - 100) | UTF16_TAG)?;
361
}
362
363
_ => unreachable!(),
364
}
365
366
Ok(())
367
}
368
369
// Test that that the pointer returned from `realloc` is bounds-checked.
370
#[test]
371
fn realloc_oob() -> Result<()> {
372
let engine = wasmtime_test_util::component::engine();
373
for src in ENCODINGS {
374
for dst in ENCODINGS {
375
test_realloc_oob(&engine, src, dst)?;
376
}
377
}
378
Ok(())
379
}
380
381
fn test_realloc_oob(engine: &Engine, src: &str, dst: &str) -> Result<()> {
382
let component = format!(
383
r#"
384
(component
385
(component $c
386
(core module $m
387
(func (export "") (param i32 i32))
388
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 100_000)
389
(memory (export "memory") 1)
390
)
391
(core instance $m (instantiate $m))
392
(func (export "a") (param "a" string)
393
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
394
string-encoding={dst})
395
)
396
)
397
398
(component $c2
399
(import "a" (func $f (param "a" string)))
400
(core module $libc
401
(memory (export "memory") 1)
402
)
403
(core instance $libc (instantiate $libc))
404
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
405
(core module $m
406
(import "" "" (func $f (param i32 i32)))
407
408
(func (export "f") (call $f (i32.const 1000) (i32.const 10)))
409
)
410
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
411
(func (export "f") (canon lift (core func $m "f")))
412
)
413
414
(instance $c (instantiate $c))
415
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
416
(export "f" (func $c2 "f"))
417
)
418
"#
419
);
420
421
let component = Component::new(engine, &component)?;
422
let mut store = Store::new(engine, ());
423
424
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
425
let func = instance.get_typed_func::<(), ()>(&mut store, "f")?;
426
let trap = func.call(&mut store, ()).unwrap_err().downcast::<Trap>()?;
427
assert_eq!(trap, Trap::UnreachableCodeReached);
428
Ok(())
429
}
430
431
// Test that that the pointer returned from `realloc` is bounds-checked.
432
#[test]
433
fn raw_string_encodings() -> Result<()> {
434
let engine = wasmtime_test_util::component::engine();
435
test_invalid_string_encoding(&engine, "utf8", "utf8", &[0xff], 1)?;
436
let array = b"valid string until \xffthen valid again";
437
test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
438
test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
439
let array = b"symbol \xce\xa3 until \xffthen valid";
440
test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
441
test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
442
test_invalid_string_encoding(&engine, "utf8", "latin1+utf16", array, array.len() as u32)?;
443
test_invalid_string_encoding(&engine, "utf16", "utf8", &[0x01, 0xd8], 1)?;
444
test_invalid_string_encoding(&engine, "utf16", "utf16", &[0x01, 0xd8], 1)?;
445
test_invalid_string_encoding(
446
&engine,
447
"utf16",
448
"latin1+utf16",
449
&[0xff, 0xff, 0x01, 0xd8],
450
2,
451
)?;
452
test_invalid_string_encoding(
453
&engine,
454
"latin1+utf16",
455
"utf8",
456
&[0x01, 0xd8],
457
1 | UTF16_TAG,
458
)?;
459
test_invalid_string_encoding(
460
&engine,
461
"latin1+utf16",
462
"utf16",
463
&[0x01, 0xd8],
464
1 | UTF16_TAG,
465
)?;
466
test_invalid_string_encoding(
467
&engine,
468
"latin1+utf16",
469
"utf16",
470
&[0xff, 0xff, 0x01, 0xd8],
471
2 | UTF16_TAG,
472
)?;
473
test_invalid_string_encoding(
474
&engine,
475
"latin1+utf16",
476
"latin1+utf16",
477
&[0xab, 0x00, 0xff, 0xff, 0x01, 0xd8],
478
3 | UTF16_TAG,
479
)?;
480
481
// This latin1+utf16 string should get compressed to latin1 across the
482
// boundary.
483
test_valid_string_encoding(
484
&engine,
485
"latin1+utf16",
486
"latin1+utf16",
487
&[0xab, 0x00, 0xff, 0x00],
488
2 | UTF16_TAG,
489
)?;
490
Ok(())
491
}
492
493
fn test_invalid_string_encoding(
494
engine: &Engine,
495
src: &str,
496
dst: &str,
497
bytes: &[u8],
498
len: u32,
499
) -> Result<()> {
500
let trap = test_raw_when_encoded(engine, src, dst, bytes, len)?.unwrap();
501
let src = src.replace("latin1+", "");
502
assert!(
503
format!("{trap:?}").contains(&format!("invalid {src} encoding")),
504
"bad error: {trap:?}",
505
);
506
Ok(())
507
}
508
509
fn test_valid_string_encoding(
510
engine: &Engine,
511
src: &str,
512
dst: &str,
513
bytes: &[u8],
514
len: u32,
515
) -> Result<()> {
516
let err = test_raw_when_encoded(engine, src, dst, bytes, len)?;
517
assert!(err.is_none());
518
Ok(())
519
}
520
521
fn test_raw_when_encoded(
522
engine: &Engine,
523
src: &str,
524
dst: &str,
525
bytes: &[u8],
526
len: u32,
527
) -> Result<Option<anyhow::Error>> {
528
let component = format!(
529
r#"
530
(component
531
(component $c
532
(core module $m
533
(func (export "") (param i32 i32))
534
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
535
(memory (export "memory") 1)
536
)
537
(core instance $m (instantiate $m))
538
(func (export "a") (param "a" string)
539
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
540
string-encoding={dst})
541
)
542
)
543
544
(component $c2
545
(import "a" (func $f (param "a" string)))
546
(core module $libc
547
(memory (export "memory") 1)
548
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
549
)
550
(core instance $libc (instantiate $libc))
551
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
552
(core module $m
553
(import "" "" (func $f (param i32 i32)))
554
555
(func (export "f") (param i32 i32 i32) (call $f (local.get 0) (local.get 2)))
556
)
557
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
558
(func (export "f") (param "a" (list u8)) (param "b" u32) (canon lift (core func $m "f")
559
(memory $libc "memory")
560
(realloc (func $libc "realloc"))))
561
)
562
563
(instance $c (instantiate $c))
564
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
565
(export "f" (func $c2 "f"))
566
)
567
"#
568
);
569
570
let component = Component::new(engine, &component)?;
571
let mut store = Store::new(engine, ());
572
573
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
574
let func = instance.get_typed_func::<(&[u8], u32), ()>(&mut store, "f")?;
575
match func.call(&mut store, (bytes, len)) {
576
Ok(_) => Ok(None),
577
Err(e) => Ok(Some(e)),
578
}
579
}
580
581