Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/tests/all/component_model/strings.rs
3054 views
1
#![cfg(not(miri))]
2
3
use super::REALLOC_AND_FREE;
4
use wasmtime::Result;
5
use wasmtime::component::{Component, Linker};
6
use wasmtime::{Engine, Store, StoreContextMut, Trap};
7
8
const UTF16_TAG: u32 = 1 << 31;
9
10
// Special cases that this tries to test:
11
//
12
// * utf8 -> utf8
13
// * various code point sizes
14
//
15
// * utf8 -> utf16 - the adapter here will make a pessimistic allocation that's
16
// twice the size of the utf8 encoding for the utf16 destination
17
// * utf16 byte size is twice the utf8 size
18
// * utf16 byte size is less than twice the utf8 size
19
//
20
// * utf8 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
21
// pessimistic utf16 allocation that's downsized if necessary
22
// * utf8 fits exactly in latin1
23
// * utf8 fits latin1 but is bigger byte-wise
24
// * utf8 is not latin1 and fits utf16 allocation precisely (NOT POSSIBLE)
25
// * utf8 is not latin1 and utf16 is smaller than allocation
26
//
27
// * utf16 -> utf8 - this starts with an optimistic size and then reallocates to
28
// a pessimistic size, interesting cases are:
29
// * utf8 size is 0.5x the utf16 byte size (perfect fit in initial alloc)
30
// * utf8 size is 1.5x the utf16 byte size (perfect fit in larger alloc)
31
// * utf8 size is 0.5x-1.5x the utf16 size (larger alloc is downsized)
32
//
33
// * utf16 -> utf16
34
// * various code point sizes
35
//
36
// * utf16 -> latin1+utf16 - attempts to convert to latin1 then falls back to a
37
// pessimistic utf16 allocation that's downsized if necessary
38
// * utf16 fits exactly in latin1
39
// * utf16 fits latin1 but is bigger byte-wise (NOT POSSIBLE)
40
// * utf16 is not latin1 and fits utf16 allocation precisely
41
// * utf16 is not latin1 and utf16 is smaller than allocation (NOT POSSIBLE)
42
//
43
// * compact-utf16 -> utf8 dynamically determines between one of
44
// * latin1 -> utf8
45
// * latin1 size matches utf8 size
46
// * latin1 is smaller than utf8 size
47
// * utf16 -> utf8
48
// * covered above
49
//
50
// * compact-utf16 -> utf16 dynamically determines between one of
51
// * latin1 -> utf16 - latin1 size always matches utf16
52
// * test various code points
53
// * utf16 -> utf16
54
// * covered above
55
//
56
// * compact-utf16 -> compact-utf16 dynamically determines between one of
57
// * latin1 -> latin1
58
// * not much interesting here
59
// * utf16 -> compact-utf16-to-compact-probably-utf16
60
// * utf16 actually fits within latin1
61
// * otherwise not more interesting than utf16 -> utf16
62
//
63
const STRINGS: &[&str] = &[
64
"",
65
// 1 byte in utf8, 2 bytes in utf16
66
"x",
67
"hello this is a particularly long string yes it is it keeps going",
68
// 35 bytes in utf8, 23 units in utf16, 23 bytes in latin1
69
"à á â ã ä å æ ç è é ê ë",
70
// 47 bytes in utf8, 31 units in utf16
71
"Ξ Ο Π Ρ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή",
72
// 24 bytes in utf8, 8 units in utf16
73
"STUVWXYZ",
74
// 16 bytes in utf8, 8 units in utf16
75
"ËÌÍÎÏÐÑÒ",
76
// 4 bytes in utf8, 1 unit in utf16
77
"\u{10000}",
78
// latin1-compatible prefix followed by utf8/16-requiring suffix
79
//
80
// 24 bytes in utf8, 13 units in utf16, first 8 usvs are latin1-compatible
81
"à ascii VWXYZ",
82
];
83
84
static ENCODINGS: [&str; 3] = ["utf8", "utf16", "latin1+utf16"];
85
86
#[test]
87
fn roundtrip() -> Result<()> {
88
for debug in [true, false] {
89
let mut config = wasmtime_test_util::component::config();
90
config.debug_adapter_modules(debug);
91
let engine = Engine::new(&config)?;
92
for src in ENCODINGS {
93
for dst in ENCODINGS {
94
test_roundtrip(&engine, src, dst)?;
95
}
96
}
97
}
98
Ok(())
99
}
100
101
fn test_roundtrip(engine: &Engine, src: &str, dst: &str) -> Result<()> {
102
println!("src={src} dst={dst}");
103
104
let mk_echo = |name: &str, encoding: &str| {
105
format!(
106
r#"
107
(component {name}
108
(import "echo" (func $echo (param "a" string) (result string)))
109
(core instance $libc (instantiate $libc))
110
(core func $echo (canon lower (func $echo)
111
(memory $libc "memory")
112
(realloc (func $libc "realloc"))
113
string-encoding={encoding}
114
))
115
(core instance $echo (instantiate $echo
116
(with "libc" (instance $libc))
117
(with "" (instance (export "echo" (func $echo))))
118
))
119
(func (export "echo2") (param "a" string) (result string)
120
(canon lift
121
(core func $echo "echo")
122
(memory $libc "memory")
123
(realloc (func $libc "realloc"))
124
string-encoding={encoding}
125
)
126
)
127
)
128
"#
129
)
130
};
131
132
let src = mk_echo("$src", src);
133
let dst = mk_echo("$dst", dst);
134
let component = format!(
135
r#"
136
(component
137
(import "host" (func $host (param "a" string) (result string)))
138
139
(core module $libc
140
(memory (export "memory") 1)
141
{REALLOC_AND_FREE}
142
)
143
(core module $echo
144
(import "" "echo" (func $echo (param i32 i32 i32)))
145
(import "libc" "memory" (memory 0))
146
(import "libc" "realloc" (func $realloc (param i32 i32 i32 i32) (result i32)))
147
148
(func (export "echo") (param i32 i32) (result i32)
149
(local $retptr i32)
150
(local.set $retptr
151
(call $realloc
152
(i32.const 0)
153
(i32.const 0)
154
(i32.const 4)
155
(i32.const 8)))
156
(call $echo
157
(local.get 0)
158
(local.get 1)
159
(local.get $retptr))
160
local.get $retptr
161
)
162
)
163
164
{src}
165
{dst}
166
167
(instance $dst (instantiate $dst (with "echo" (func $host))))
168
(instance $src (instantiate $src (with "echo" (func $dst "echo2"))))
169
(export "echo" (func $src "echo2"))
170
)
171
"#
172
);
173
let component = Component::new(engine, &component)?;
174
let mut store = Store::new(engine, String::new());
175
let mut linker = Linker::new(engine);
176
linker.root().func_wrap(
177
"host",
178
|store: StoreContextMut<String>, (arg,): (String,)| {
179
assert_eq!(*store.data(), arg);
180
Ok((arg,))
181
},
182
)?;
183
let instance = linker.instantiate(&mut store, &component)?;
184
let func = instance.get_typed_func::<(String,), (String,)>(&mut store, "echo")?;
185
186
for string in STRINGS {
187
println!("testing string {string:?}");
188
*store.data_mut() = string.to_string();
189
let (ret,) = func.call(&mut store, (string.to_string(),))?;
190
assert_eq!(ret, *string);
191
}
192
Ok(())
193
}
194
195
#[test]
196
fn ptr_out_of_bounds() -> Result<()> {
197
let engine = wasmtime_test_util::component::engine();
198
for src in ENCODINGS {
199
for dst in ENCODINGS {
200
test_ptr_out_of_bounds(&engine, src, dst)?;
201
}
202
}
203
Ok(())
204
}
205
206
fn test_ptr_out_of_bounds(engine: &Engine, src: &str, dst: &str) -> Result<()> {
207
let test = |len: u32| -> Result<()> {
208
let component = format!(
209
r#"
210
(component
211
(component $c
212
(core module $m
213
(func (export "") (param i32 i32))
214
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
215
(memory (export "memory") 1)
216
)
217
(core instance $m (instantiate $m))
218
(func (export "a") (param "a" string)
219
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
220
string-encoding={dst})
221
)
222
)
223
224
(component $c2
225
(import "a" (func $f (param "a" string)))
226
(core module $libc
227
(memory (export "memory") 1)
228
)
229
(core instance $libc (instantiate $libc))
230
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
231
(core module $m
232
(import "" "" (func $f (param i32 i32)))
233
234
(func $start (call $f (i32.const 0x8000_0000) (i32.const {len})))
235
(start $start)
236
)
237
(core instance (instantiate $m (with "" (instance (export "" (func $f))))))
238
)
239
240
(instance $c (instantiate $c))
241
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
242
)
243
"#
244
);
245
let component = Component::new(engine, &component)?;
246
let mut store = Store::new(engine, ());
247
let trap = Linker::new(engine)
248
.instantiate(&mut store, &component)
249
.err()
250
.unwrap()
251
.downcast::<Trap>()?;
252
assert_eq!(trap, Trap::StringOutOfBounds);
253
Ok(())
254
};
255
256
test(0)?;
257
test(1)?;
258
259
Ok(())
260
}
261
262
// Test that even if the ptr+len calculation overflows then a trap still
263
// happens.
264
#[test]
265
fn ptr_overflow() -> Result<()> {
266
let engine = wasmtime_test_util::component::engine();
267
for src in ENCODINGS {
268
for dst in ENCODINGS {
269
test_ptr_overflow(&engine, src, dst)?;
270
}
271
}
272
Ok(())
273
}
274
275
fn test_ptr_overflow(engine: &Engine, src: &str, dst: &str) -> Result<()> {
276
let component = format!(
277
r#"
278
(component
279
(component $c
280
(core module $m
281
(func (export "") (param i32 i32))
282
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
283
(memory (export "memory") 1)
284
)
285
(core instance $m (instantiate $m))
286
(func (export "a") (param "a" string)
287
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
288
string-encoding={dst})
289
)
290
)
291
292
(component $c2
293
(import "a" (func $f (param "a" string)))
294
(core module $libc
295
(memory (export "memory") 1)
296
)
297
(core instance $libc (instantiate $libc))
298
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
299
(core module $m
300
(import "" "" (func $f (param i32 i32)))
301
302
(func (export "f") (param i32) (call $f (i32.const 1000) (local.get 0)))
303
)
304
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
305
(func (export "f") (param "a" u32) (canon lift (core func $m "f")))
306
)
307
308
(instance $c (instantiate $c))
309
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
310
(export "f" (func $c2 "f"))
311
)
312
"#
313
);
314
315
let component = Component::new(engine, &component)?;
316
317
let test_overflow = |size: u32| -> Result<()> {
318
println!("src={src} dst={dst} size={size:#x}");
319
let mut store = Store::new(engine, ());
320
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
321
let func = instance.get_typed_func::<(u32,), ()>(&mut store, "f")?;
322
let trap = func
323
.call(&mut store, (size,))
324
.unwrap_err()
325
.downcast::<Trap>()?;
326
assert_eq!(trap, Trap::StringOutOfBounds);
327
Ok(())
328
};
329
330
let max = 1 << 31;
331
332
match src {
333
"utf8" => {
334
// This exceeds MAX_STRING_BYTE_LENGTH
335
test_overflow(max)?;
336
337
if dst == "utf16" {
338
// exceeds MAX_STRING_BYTE_LENGTH when multiplied
339
test_overflow(max / 2)?;
340
341
// Technically this fails on the first string, not the second.
342
// Ideally this would test the overflow check on the second
343
// string though.
344
test_overflow(max / 2 - 100)?;
345
} else {
346
// This will point into unmapped memory
347
test_overflow(max - 100)?;
348
}
349
}
350
351
"utf16" => {
352
test_overflow(max / 2)?;
353
test_overflow(max / 2 - 100)?;
354
}
355
356
"latin1+utf16" => {
357
test_overflow((max / 2) | UTF16_TAG)?;
358
// tag a utf16 string with the max length and it should overflow.
359
test_overflow((max / 2 - 100) | UTF16_TAG)?;
360
}
361
362
_ => unreachable!(),
363
}
364
365
Ok(())
366
}
367
368
// Test that that the pointer returned from `realloc` is bounds-checked.
369
#[test]
370
fn realloc_oob() -> Result<()> {
371
let engine = wasmtime_test_util::component::engine();
372
for src in ENCODINGS {
373
for dst in ENCODINGS {
374
test_realloc_oob(&engine, src, dst)?;
375
}
376
}
377
Ok(())
378
}
379
380
fn test_realloc_oob(engine: &Engine, src: &str, dst: &str) -> Result<()> {
381
let component = format!(
382
r#"
383
(component
384
(component $c
385
(core module $m
386
(func (export "") (param i32 i32))
387
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 100_000)
388
(memory (export "memory") 1)
389
)
390
(core instance $m (instantiate $m))
391
(func (export "a") (param "a" string)
392
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
393
string-encoding={dst})
394
)
395
)
396
397
(component $c2
398
(import "a" (func $f (param "a" string)))
399
(core module $libc
400
(memory (export "memory") 1)
401
)
402
(core instance $libc (instantiate $libc))
403
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
404
(core module $m
405
(import "" "" (func $f (param i32 i32)))
406
407
(func (export "f") (call $f (i32.const 1000) (i32.const 10)))
408
)
409
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
410
(func (export "f") (canon lift (core func $m "f")))
411
)
412
413
(instance $c (instantiate $c))
414
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
415
(export "f" (func $c2 "f"))
416
)
417
"#
418
);
419
420
let component = Component::new(engine, &component)?;
421
let mut store = Store::new(engine, ());
422
423
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
424
let func = instance.get_typed_func::<(), ()>(&mut store, "f")?;
425
let trap = func.call(&mut store, ()).unwrap_err().downcast::<Trap>()?;
426
assert_eq!(trap, Trap::StringOutOfBounds);
427
Ok(())
428
}
429
430
// Test that that the pointer returned from `realloc` is bounds-checked.
431
#[test]
432
fn raw_string_encodings() -> Result<()> {
433
let engine = wasmtime_test_util::component::engine();
434
test_invalid_string_encoding(&engine, "utf8", "utf8", &[0xff], 1)?;
435
let array = b"valid string until \xffthen valid again";
436
test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
437
test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
438
let array = b"symbol \xce\xa3 until \xffthen valid";
439
test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;
440
test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;
441
test_invalid_string_encoding(&engine, "utf8", "latin1+utf16", array, array.len() as u32)?;
442
test_invalid_string_encoding(&engine, "utf16", "utf8", &[0x01, 0xd8], 1)?;
443
test_invalid_string_encoding(&engine, "utf16", "utf16", &[0x01, 0xd8], 1)?;
444
test_invalid_string_encoding(
445
&engine,
446
"utf16",
447
"latin1+utf16",
448
&[0xff, 0xff, 0x01, 0xd8],
449
2,
450
)?;
451
test_invalid_string_encoding(
452
&engine,
453
"latin1+utf16",
454
"utf8",
455
&[0x01, 0xd8],
456
1 | UTF16_TAG,
457
)?;
458
test_invalid_string_encoding(
459
&engine,
460
"latin1+utf16",
461
"utf16",
462
&[0x01, 0xd8],
463
1 | UTF16_TAG,
464
)?;
465
test_invalid_string_encoding(
466
&engine,
467
"latin1+utf16",
468
"utf16",
469
&[0xff, 0xff, 0x01, 0xd8],
470
2 | UTF16_TAG,
471
)?;
472
test_invalid_string_encoding(
473
&engine,
474
"latin1+utf16",
475
"latin1+utf16",
476
&[0xab, 0x00, 0xff, 0xff, 0x01, 0xd8],
477
3 | UTF16_TAG,
478
)?;
479
480
// This latin1+utf16 string should get compressed to latin1 across the
481
// boundary.
482
test_valid_string_encoding(
483
&engine,
484
"latin1+utf16",
485
"latin1+utf16",
486
&[0xab, 0x00, 0xff, 0x00],
487
2 | UTF16_TAG,
488
)?;
489
Ok(())
490
}
491
492
fn test_invalid_string_encoding(
493
engine: &Engine,
494
src: &str,
495
dst: &str,
496
bytes: &[u8],
497
len: u32,
498
) -> Result<()> {
499
let trap = test_raw_when_encoded(engine, src, dst, bytes, len)?.unwrap();
500
let src = src.replace("latin1+", "");
501
assert!(
502
format!("{trap:?}").contains(&format!("invalid {src} encoding")),
503
"bad error: {trap:?}",
504
);
505
Ok(())
506
}
507
508
fn test_valid_string_encoding(
509
engine: &Engine,
510
src: &str,
511
dst: &str,
512
bytes: &[u8],
513
len: u32,
514
) -> Result<()> {
515
let err = test_raw_when_encoded(engine, src, dst, bytes, len)?;
516
assert!(err.is_none());
517
Ok(())
518
}
519
520
fn test_raw_when_encoded(
521
engine: &Engine,
522
src: &str,
523
dst: &str,
524
bytes: &[u8],
525
len: u32,
526
) -> Result<Option<wasmtime::Error>> {
527
let component = format!(
528
r#"
529
(component
530
(component $c
531
(core module $m
532
(func (export "") (param i32 i32))
533
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
534
(memory (export "memory") 1)
535
)
536
(core instance $m (instantiate $m))
537
(func (export "a") (param "a" string)
538
(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")
539
string-encoding={dst})
540
)
541
)
542
543
(component $c2
544
(import "a" (func $f (param "a" string)))
545
(core module $libc
546
(memory (export "memory") 1)
547
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
548
)
549
(core instance $libc (instantiate $libc))
550
(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))
551
(core module $m
552
(import "" "" (func $f (param i32 i32)))
553
554
(func (export "f") (param i32 i32 i32) (call $f (local.get 0) (local.get 2)))
555
)
556
(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))
557
(func (export "f") (param "a" (list u8)) (param "b" u32) (canon lift (core func $m "f")
558
(memory $libc "memory")
559
(realloc (func $libc "realloc"))))
560
)
561
562
(instance $c (instantiate $c))
563
(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))
564
(export "f" (func $c2 "f"))
565
)
566
"#
567
);
568
569
let component = Component::new(engine, &component)?;
570
let mut store = Store::new(engine, ());
571
572
let instance = Linker::new(engine).instantiate(&mut store, &component)?;
573
let func = instance.get_typed_func::<(&[u8], u32), ()>(&mut store, "f")?;
574
match func.call(&mut store, (bytes, len)) {
575
Ok(_) => Ok(None),
576
Err(e) => Ok(Some(e)),
577
}
578
}
579
580