Path: blob/main/tests/all/component_model/strings.rs
1692 views
#![cfg(not(miri))]12use super::REALLOC_AND_FREE;3use anyhow::Result;4use wasmtime::component::{Component, Linker};5use wasmtime::{Engine, Store, StoreContextMut, Trap};67const UTF16_TAG: u32 = 1 << 31;89// Special cases that this tries to test:10//11// * utf8 -> utf812// * various code point sizes13//14// * utf8 -> utf16 - the adapter here will make a pessimistic allocation that's15// twice the size of the utf8 encoding for the utf16 destination16// * utf16 byte size is twice the utf8 size17// * utf16 byte size is less than twice the utf8 size18//19// * utf8 -> latin1+utf16 - attempts to convert to latin1 then falls back to a20// pessimistic utf16 allocation that's downsized if necessary21// * utf8 fits exactly in latin122// * utf8 fits latin1 but is bigger byte-wise23// * utf8 is not latin1 and fits utf16 allocation precisely (NOT POSSIBLE)24// * utf8 is not latin1 and utf16 is smaller than allocation25//26// * utf16 -> utf8 - this starts with an optimistic size and then reallocates to27// a pessimistic size, interesting cases are:28// * utf8 size is 0.5x the utf16 byte size (perfect fit in initial alloc)29// * utf8 size is 1.5x the utf16 byte size (perfect fit in larger alloc)30// * utf8 size is 0.5x-1.5x the utf16 size (larger alloc is downsized)31//32// * utf16 -> utf1633// * various code point sizes34//35// * utf16 -> latin1+utf16 - attempts to convert to latin1 then falls back to a36// pessimistic utf16 allocation that's downsized if necessary37// * utf16 fits exactly in latin138// * utf16 fits latin1 but is bigger byte-wise (NOT POSSIBLE)39// * utf16 is not latin1 and fits utf16 allocation precisely40// * utf16 is not latin1 and utf16 is smaller than allocation (NOT POSSIBLE)41//42// * compact-utf16 -> utf8 dynamically determines between one of43// * latin1 -> utf844// * latin1 size matches utf8 size45// * latin1 is smaller than utf8 size46// * utf16 -> utf847// * covered above48//49// * compact-utf16 -> utf16 dynamically determines between one of50// * latin1 -> utf16 - latin1 size always matches utf1651// * test various code points52// * utf16 -> utf1653// * covered above54//55// * compact-utf16 -> compact-utf16 dynamically determines between one of56// * latin1 -> latin157// * not much interesting here58// * utf16 -> compact-utf16-to-compact-probably-utf1659// * utf16 actually fits within latin160// * otherwise not more interesting than utf16 -> utf1661//62const STRINGS: &[&str] = &[63"",64// 1 byte in utf8, 2 bytes in utf1665"x",66"hello this is a particularly long string yes it is it keeps going",67// 35 bytes in utf8, 23 units in utf16, 23 bytes in latin168"à á â ã ä å æ ç è é ê ë",69// 47 bytes in utf8, 31 units in utf1670"Ξ Ο Π Ρ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή",71// 24 bytes in utf8, 8 units in utf1672"STUVWXYZ",73// 16 bytes in utf8, 8 units in utf1674"ËÌÍÎÏÐÑÒ",75// 4 bytes in utf8, 1 unit in utf1676"\u{10000}",77// latin1-compatible prefix followed by utf8/16-requiring suffix78//79// 24 bytes in utf8, 13 units in utf16, first 8 usvs are latin1-compatible80"à ascii VWXYZ",81];8283static ENCODINGS: [&str; 3] = ["utf8", "utf16", "latin1+utf16"];8485#[test]86fn roundtrip() -> Result<()> {87for debug in [true, false] {88let mut config = wasmtime_test_util::component::config();89config.debug_adapter_modules(debug);90let engine = Engine::new(&config)?;91for src in ENCODINGS {92for dst in ENCODINGS {93test_roundtrip(&engine, src, dst)?;94}95}96}97Ok(())98}99100fn test_roundtrip(engine: &Engine, src: &str, dst: &str) -> Result<()> {101println!("src={src} dst={dst}");102103let mk_echo = |name: &str, encoding: &str| {104format!(105r#"106(component {name}107(import "echo" (func $echo (param "a" string) (result string)))108(core instance $libc (instantiate $libc))109(core func $echo (canon lower (func $echo)110(memory $libc "memory")111(realloc (func $libc "realloc"))112string-encoding={encoding}113))114(core instance $echo (instantiate $echo115(with "libc" (instance $libc))116(with "" (instance (export "echo" (func $echo))))117))118(func (export "echo2") (param "a" string) (result string)119(canon lift120(core func $echo "echo")121(memory $libc "memory")122(realloc (func $libc "realloc"))123string-encoding={encoding}124)125)126)127"#128)129};130131let src = mk_echo("$src", src);132let dst = mk_echo("$dst", dst);133let component = format!(134r#"135(component136(import "host" (func $host (param "a" string) (result string)))137138(core module $libc139(memory (export "memory") 1)140{REALLOC_AND_FREE}141)142(core module $echo143(import "" "echo" (func $echo (param i32 i32 i32)))144(import "libc" "memory" (memory 0))145(import "libc" "realloc" (func $realloc (param i32 i32 i32 i32) (result i32)))146147(func (export "echo") (param i32 i32) (result i32)148(local $retptr i32)149(local.set $retptr150(call $realloc151(i32.const 0)152(i32.const 0)153(i32.const 4)154(i32.const 8)))155(call $echo156(local.get 0)157(local.get 1)158(local.get $retptr))159local.get $retptr160)161)162163{src}164{dst}165166(instance $dst (instantiate $dst (with "echo" (func $host))))167(instance $src (instantiate $src (with "echo" (func $dst "echo2"))))168(export "echo" (func $src "echo2"))169)170"#171);172let component = Component::new(engine, &component)?;173let mut store = Store::new(engine, String::new());174let mut linker = Linker::new(engine);175linker.root().func_wrap(176"host",177|store: StoreContextMut<String>, (arg,): (String,)| {178assert_eq!(*store.data(), arg);179Ok((arg,))180},181)?;182let instance = linker.instantiate(&mut store, &component)?;183let func = instance.get_typed_func::<(String,), (String,)>(&mut store, "echo")?;184185for string in STRINGS {186println!("testing string {string:?}");187*store.data_mut() = string.to_string();188let (ret,) = func.call(&mut store, (string.to_string(),))?;189assert_eq!(ret, *string);190func.post_return(&mut store)?;191}192Ok(())193}194195#[test]196fn ptr_out_of_bounds() -> Result<()> {197let engine = wasmtime_test_util::component::engine();198for src in ENCODINGS {199for dst in ENCODINGS {200test_ptr_out_of_bounds(&engine, src, dst)?;201}202}203Ok(())204}205206fn test_ptr_out_of_bounds(engine: &Engine, src: &str, dst: &str) -> Result<()> {207let test = |len: u32| -> Result<()> {208let component = format!(209r#"210(component211(component $c212(core module $m213(func (export "") (param i32 i32))214(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)215(memory (export "memory") 1)216)217(core instance $m (instantiate $m))218(func (export "a") (param "a" string)219(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")220string-encoding={dst})221)222)223224(component $c2225(import "a" (func $f (param "a" string)))226(core module $libc227(memory (export "memory") 1)228)229(core instance $libc (instantiate $libc))230(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))231(core module $m232(import "" "" (func $f (param i32 i32)))233234(func $start (call $f (i32.const 0x8000_0000) (i32.const {len})))235(start $start)236)237(core instance (instantiate $m (with "" (instance (export "" (func $f))))))238)239240(instance $c (instantiate $c))241(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))242)243"#244);245let component = Component::new(engine, &component)?;246let mut store = Store::new(engine, ());247let trap = Linker::new(engine)248.instantiate(&mut store, &component)249.err()250.unwrap()251.downcast::<Trap>()?;252assert_eq!(trap, Trap::UnreachableCodeReached);253Ok(())254};255256test(0)?;257test(1)?;258259Ok(())260}261262// Test that even if the ptr+len calculation overflows then a trap still263// happens.264#[test]265fn ptr_overflow() -> Result<()> {266let engine = wasmtime_test_util::component::engine();267for src in ENCODINGS {268for dst in ENCODINGS {269test_ptr_overflow(&engine, src, dst)?;270}271}272Ok(())273}274275fn test_ptr_overflow(engine: &Engine, src: &str, dst: &str) -> Result<()> {276let component = format!(277r#"278(component279(component $c280(core module $m281(func (export "") (param i32 i32))282(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)283(memory (export "memory") 1)284)285(core instance $m (instantiate $m))286(func (export "a") (param "a" string)287(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")288string-encoding={dst})289)290)291292(component $c2293(import "a" (func $f (param "a" string)))294(core module $libc295(memory (export "memory") 1)296)297(core instance $libc (instantiate $libc))298(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))299(core module $m300(import "" "" (func $f (param i32 i32)))301302(func (export "f") (param i32) (call $f (i32.const 1000) (local.get 0)))303)304(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))305(func (export "f") (param "a" u32) (canon lift (core func $m "f")))306)307308(instance $c (instantiate $c))309(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))310(export "f" (func $c2 "f"))311)312"#313);314315let component = Component::new(engine, &component)?;316let mut store = Store::new(engine, ());317318let mut test_overflow = |size: u32| -> Result<()> {319println!("src={src} dst={dst} size={size:#x}");320let instance = Linker::new(engine).instantiate(&mut store, &component)?;321let func = instance.get_typed_func::<(u32,), ()>(&mut store, "f")?;322let trap = func323.call(&mut store, (size,))324.unwrap_err()325.downcast::<Trap>()?;326assert_eq!(trap, Trap::UnreachableCodeReached);327Ok(())328};329330let max = 1 << 31;331332match src {333"utf8" => {334// This exceeds MAX_STRING_BYTE_LENGTH335test_overflow(max)?;336337if dst == "utf16" {338// exceeds MAX_STRING_BYTE_LENGTH when multiplied339test_overflow(max / 2)?;340341// Technically this fails on the first string, not the second.342// Ideally this would test the overflow check on the second343// string though.344test_overflow(max / 2 - 100)?;345} else {346// This will point into unmapped memory347test_overflow(max - 100)?;348}349}350351"utf16" => {352test_overflow(max / 2)?;353test_overflow(max / 2 - 100)?;354}355356"latin1+utf16" => {357test_overflow((max / 2) | UTF16_TAG)?;358// tag a utf16 string with the max length and it should overflow.359test_overflow((max / 2 - 100) | UTF16_TAG)?;360}361362_ => unreachable!(),363}364365Ok(())366}367368// Test that that the pointer returned from `realloc` is bounds-checked.369#[test]370fn realloc_oob() -> Result<()> {371let engine = wasmtime_test_util::component::engine();372for src in ENCODINGS {373for dst in ENCODINGS {374test_realloc_oob(&engine, src, dst)?;375}376}377Ok(())378}379380fn test_realloc_oob(engine: &Engine, src: &str, dst: &str) -> Result<()> {381let component = format!(382r#"383(component384(component $c385(core module $m386(func (export "") (param i32 i32))387(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 100_000)388(memory (export "memory") 1)389)390(core instance $m (instantiate $m))391(func (export "a") (param "a" string)392(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")393string-encoding={dst})394)395)396397(component $c2398(import "a" (func $f (param "a" string)))399(core module $libc400(memory (export "memory") 1)401)402(core instance $libc (instantiate $libc))403(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))404(core module $m405(import "" "" (func $f (param i32 i32)))406407(func (export "f") (call $f (i32.const 1000) (i32.const 10)))408)409(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))410(func (export "f") (canon lift (core func $m "f")))411)412413(instance $c (instantiate $c))414(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))415(export "f" (func $c2 "f"))416)417"#418);419420let component = Component::new(engine, &component)?;421let mut store = Store::new(engine, ());422423let instance = Linker::new(engine).instantiate(&mut store, &component)?;424let func = instance.get_typed_func::<(), ()>(&mut store, "f")?;425let trap = func.call(&mut store, ()).unwrap_err().downcast::<Trap>()?;426assert_eq!(trap, Trap::UnreachableCodeReached);427Ok(())428}429430// Test that that the pointer returned from `realloc` is bounds-checked.431#[test]432fn raw_string_encodings() -> Result<()> {433let engine = wasmtime_test_util::component::engine();434test_invalid_string_encoding(&engine, "utf8", "utf8", &[0xff], 1)?;435let array = b"valid string until \xffthen valid again";436test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;437test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;438let array = b"symbol \xce\xa3 until \xffthen valid";439test_invalid_string_encoding(&engine, "utf8", "utf8", array, array.len() as u32)?;440test_invalid_string_encoding(&engine, "utf8", "utf16", array, array.len() as u32)?;441test_invalid_string_encoding(&engine, "utf8", "latin1+utf16", array, array.len() as u32)?;442test_invalid_string_encoding(&engine, "utf16", "utf8", &[0x01, 0xd8], 1)?;443test_invalid_string_encoding(&engine, "utf16", "utf16", &[0x01, 0xd8], 1)?;444test_invalid_string_encoding(445&engine,446"utf16",447"latin1+utf16",448&[0xff, 0xff, 0x01, 0xd8],4492,450)?;451test_invalid_string_encoding(452&engine,453"latin1+utf16",454"utf8",455&[0x01, 0xd8],4561 | UTF16_TAG,457)?;458test_invalid_string_encoding(459&engine,460"latin1+utf16",461"utf16",462&[0x01, 0xd8],4631 | UTF16_TAG,464)?;465test_invalid_string_encoding(466&engine,467"latin1+utf16",468"utf16",469&[0xff, 0xff, 0x01, 0xd8],4702 | UTF16_TAG,471)?;472test_invalid_string_encoding(473&engine,474"latin1+utf16",475"latin1+utf16",476&[0xab, 0x00, 0xff, 0xff, 0x01, 0xd8],4773 | UTF16_TAG,478)?;479480// This latin1+utf16 string should get compressed to latin1 across the481// boundary.482test_valid_string_encoding(483&engine,484"latin1+utf16",485"latin1+utf16",486&[0xab, 0x00, 0xff, 0x00],4872 | UTF16_TAG,488)?;489Ok(())490}491492fn test_invalid_string_encoding(493engine: &Engine,494src: &str,495dst: &str,496bytes: &[u8],497len: u32,498) -> Result<()> {499let trap = test_raw_when_encoded(engine, src, dst, bytes, len)?.unwrap();500let src = src.replace("latin1+", "");501assert!(502format!("{trap:?}").contains(&format!("invalid {src} encoding")),503"bad error: {trap:?}",504);505Ok(())506}507508fn test_valid_string_encoding(509engine: &Engine,510src: &str,511dst: &str,512bytes: &[u8],513len: u32,514) -> Result<()> {515let err = test_raw_when_encoded(engine, src, dst, bytes, len)?;516assert!(err.is_none());517Ok(())518}519520fn test_raw_when_encoded(521engine: &Engine,522src: &str,523dst: &str,524bytes: &[u8],525len: u32,526) -> Result<Option<anyhow::Error>> {527let component = format!(528r#"529(component530(component $c531(core module $m532(func (export "") (param i32 i32))533(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)534(memory (export "memory") 1)535)536(core instance $m (instantiate $m))537(func (export "a") (param "a" string)538(canon lift (core func $m "") (realloc (func $m "realloc")) (memory $m "memory")539string-encoding={dst})540)541)542543(component $c2544(import "a" (func $f (param "a" string)))545(core module $libc546(memory (export "memory") 1)547(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)548)549(core instance $libc (instantiate $libc))550(core func $f (canon lower (func $f) string-encoding={src} (memory $libc "memory")))551(core module $m552(import "" "" (func $f (param i32 i32)))553554(func (export "f") (param i32 i32 i32) (call $f (local.get 0) (local.get 2)))555)556(core instance $m (instantiate $m (with "" (instance (export "" (func $f))))))557(func (export "f") (param "a" (list u8)) (param "b" u32) (canon lift (core func $m "f")558(memory $libc "memory")559(realloc (func $libc "realloc"))))560)561562(instance $c (instantiate $c))563(instance $c2 (instantiate $c2 (with "a" (func $c "a"))))564(export "f" (func $c2 "f"))565)566"#567);568569let component = Component::new(engine, &component)?;570let mut store = Store::new(engine, ());571572let instance = Linker::new(engine).instantiate(&mut store, &component)?;573let func = instance.get_typed_func::<(&[u8], u32), ()>(&mut store, "f")?;574match func.call(&mut store, (bytes, len)) {575Ok(_) => Ok(None),576Err(e) => Ok(Some(e)),577}578}579580581