ion/string/
mod.rs

1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 */
6
7use std::ops::{Deref, DerefMut, Range};
8use std::string::String as RustString;
9use std::{ptr, slice};
10
11use bytemuck::cast_slice;
12use byteorder::NativeEndian;
13use mozjs::jsapi::{
14	JS_CompareStrings, JS_ConcatStrings, JS_DeprecatedStringHasLatin1Chars, JS_GetEmptyString,
15	JS_GetLatin1StringCharsAndLength, JS_GetStringCharAt, JS_GetTwoByteStringCharsAndLength, JS_NewDependentString,
16	JS_NewExternalStringLatin1, JS_NewExternalUCString, JS_NewUCStringCopyN, JS_StringIsLinear, JSString,
17};
18use utf16string::{WStr, WString};
19
20use crate::string::byte::{ByteStr, ByteString, Latin1};
21use crate::string::external::create_callbacks;
22use crate::utils::BoxExt as _;
23use crate::{Context, Error, ErrorKind, Local};
24
25pub mod byte;
26mod external;
27
28#[derive(Copy, Clone, Debug)]
29pub enum StringRef<'s> {
30	Latin1(&'s ByteStr<Latin1>),
31	Utf16(&'s WStr<NativeEndian>),
32}
33
34impl StringRef<'_> {
35	pub fn is_empty(&self) -> bool {
36		match self {
37			StringRef::Latin1(b) => b.is_empty(),
38			StringRef::Utf16(wstr) => wstr.is_empty(),
39		}
40	}
41
42	pub fn len(&self) -> usize {
43		match self {
44			StringRef::Latin1(b) => b.len(),
45			StringRef::Utf16(wstr) => wstr.len(),
46		}
47	}
48
49	pub fn as_bytes(&self) -> &[u8] {
50		match self {
51			StringRef::Latin1(b) => b,
52			StringRef::Utf16(wstr) => wstr.as_bytes(),
53		}
54	}
55}
56
57/// Represents a primitive string in the JS Runtime.
58/// Strings in JS are immutable and are copied on modification, other than concatenating and slicing.
59///
60/// Refer to [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String) for more details.
61#[derive(Debug)]
62pub struct String<'s> {
63	str: Local<'s, *mut JSString>,
64}
65
66impl<'s> String<'s> {
67	/// Creates an empty [String].
68	pub fn new(cx: &Context) -> String<'_> {
69		String::from(cx.root(unsafe { JS_GetEmptyString(cx.as_ptr()) }))
70	}
71
72	/// Creates a new [String] with a given string, by copying it to the JS Runtime.
73	pub fn copy_from_str<'cx>(cx: &'cx Context, string: &str) -> Option<String<'cx>> {
74		let utf16: Vec<u16> = string.encode_utf16().collect();
75		let jsstr = unsafe { JS_NewUCStringCopyN(cx.as_ptr(), utf16.as_ptr(), utf16.len()) };
76		if jsstr.is_null() {
77			None
78		} else {
79			Some(String::from(cx.root(jsstr)))
80		}
81	}
82
83	/// Creates a new string by moving ownership of the Latin-1 string to the JS Runtime temporarily.
84	/// Returns the bytes if the creation of the string in the runtime fails.
85	pub fn from_latin1(cx: &Context, string: ByteString<Latin1>) -> Result<String<'_>, ByteString<Latin1>> {
86		let bytes = string.into_vec().into_boxed_slice();
87		let (chars, len) = Box::into_raw_parts(bytes);
88
89		unsafe {
90			let callbacks = create_callbacks(len);
91			let jsstr = JS_NewExternalStringLatin1(cx.as_ptr(), chars, len, callbacks);
92
93			if jsstr.is_null() {
94				let bytes = Box::from_raw_parts(chars, len).into_vec();
95				Err(ByteString::from_unchecked(bytes))
96			} else {
97				Ok(String::from(cx.root(jsstr)))
98			}
99		}
100	}
101
102	/// Creates a new string by moving ownership of the UTF-16 string to the JS Runtime temporarily.
103	/// Returns the string if the creation of the string in the runtime fails.
104	pub fn from_wstring(cx: &Context, string: WString<NativeEndian>) -> Result<String<'_>, WString<NativeEndian>> {
105		let bytes = string.into_bytes().into_boxed_slice();
106		let (chars, len) = Box::into_raw_parts(bytes);
107
108		unsafe {
109			let callbacks = create_callbacks(len);
110			#[expect(clippy::cast_ptr_alignment)]
111			let jsstr = JS_NewExternalUCString(cx.as_ptr(), chars.cast::<u16>(), len / 2, callbacks);
112
113			if jsstr.is_null() {
114				let bytes = Box::from_raw_parts(chars, len).into_vec();
115				Err(WString::from_utf16_unchecked(bytes))
116			} else {
117				Ok(String::from(cx.root(jsstr)))
118			}
119		}
120	}
121
122	/// Returns a slice of a [String] as a new [String].
123	pub fn slice<'cx>(&self, cx: &'cx Context, range: Range<usize>) -> String<'cx> {
124		let Range { start, end } = range;
125		String::from(cx.root(unsafe { JS_NewDependentString(cx.as_ptr(), self.handle().into(), start, end) }))
126	}
127
128	/// Concatenates two [String]s into a new [String].
129	/// The resultant [String] is not linear.
130	pub fn concat<'cx>(&self, cx: &'cx Context, other: &String) -> String<'cx> {
131		String::from(cx.root(unsafe { JS_ConcatStrings(cx.as_ptr(), self.handle().into(), other.handle().into()) }))
132	}
133
134	/// Compares two [String]s.
135	pub fn compare(&self, cx: &Context, other: &String) -> i32 {
136		let mut result = 0;
137		unsafe { JS_CompareStrings(cx.as_ptr(), self.get(), other.get(), &raw mut result) };
138		result
139	}
140
141	/// Checks if a string is linear (contiguous) in memory.
142	pub fn is_linear(&self) -> bool {
143		unsafe { JS_StringIsLinear(self.get()) }
144	}
145
146	/// Checks if a string consists of only Latin-1 characters.
147	pub fn is_latin1(&self) -> bool {
148		unsafe { JS_DeprecatedStringHasLatin1Chars(self.get()) }
149	}
150
151	/// Checks if a string consists of UTF-16 characters.
152	pub fn is_utf16(&self) -> bool {
153		!self.is_latin1()
154	}
155
156	/// Returns the UTF-16 codepoint at the given character.
157	/// Returns [None] if the string is not linear.
158	pub fn char_at(&self, cx: &Context, index: usize) -> u16 {
159		unsafe {
160			let mut char = 0;
161			JS_GetStringCharAt(cx.as_ptr(), self.get(), index, &raw mut char);
162			char
163		}
164	}
165
166	/// Converts the [String] into a [prim@slice] of Latin-1 characters.
167	/// Returns [None] if the string contains non-Latin-1 characters.
168	pub fn as_latin1(&self, cx: &Context) -> Option<&'s [u8]> {
169		self.is_latin1().then(|| unsafe {
170			let mut length = 0;
171			let chars = JS_GetLatin1StringCharsAndLength(cx.as_ptr(), ptr::null(), self.get(), &raw mut length);
172			slice::from_raw_parts(chars, length)
173		})
174	}
175
176	/// Converts the [String] into a [WStr].
177	/// Returns [None] if the string contains only Latin-1 characters.
178	pub fn as_wstr(&self, cx: &Context) -> crate::Result<Option<&'s WStr<NativeEndian>>> {
179		self.as_wtf16(cx)
180			.map(|slice| {
181				WStr::from_utf16(cast_slice(slice))
182					.map_err(|_| Error::new("String contains invalid UTF-16 codepoints", ErrorKind::Type))
183			})
184			.transpose()
185	}
186
187	pub fn as_wtf16(&self, cx: &Context) -> Option<&'s [u16]> {
188		self.is_utf16().then(|| unsafe {
189			let mut length = 0;
190			let chars = JS_GetTwoByteStringCharsAndLength(cx.as_ptr(), ptr::null(), self.get(), &raw mut length);
191			slice::from_raw_parts(chars, length)
192		})
193	}
194
195	pub fn as_ref(&self, cx: &Context) -> StringRef<'s> {
196		let mut length = 0;
197		if self.is_latin1() {
198			let chars =
199				unsafe { JS_GetLatin1StringCharsAndLength(cx.as_ptr(), ptr::null(), self.get(), &raw mut length) };
200			StringRef::Latin1(unsafe { ByteStr::from_unchecked(slice::from_raw_parts(chars, length)) })
201		} else {
202			let mut length = 0;
203			let chars =
204				unsafe { JS_GetTwoByteStringCharsAndLength(cx.as_ptr(), ptr::null(), self.get(), &raw mut length) };
205			let slice = unsafe { slice::from_raw_parts(chars, length) };
206			StringRef::Utf16(WStr::from_utf16(cast_slice(slice)).unwrap())
207		}
208	}
209
210	/// Converts a [String] to an owned [String](RustString).
211	pub fn to_owned(&self, cx: &Context) -> crate::Result<RustString> {
212		if let Some(chars) = self.as_latin1(cx) {
213			let mut string = RustString::with_capacity(chars.len());
214			string.extend(chars.iter().map(|c| *c as char));
215			Ok(string)
216		} else {
217			let string = self.as_wstr(cx)?.unwrap();
218			Ok(string.to_utf8())
219		}
220	}
221}
222
223impl<'s> From<Local<'s, *mut JSString>> for String<'s> {
224	fn from(str: Local<'s, *mut JSString>) -> String<'s> {
225		String { str }
226	}
227}
228
229impl<'s> Deref for String<'s> {
230	type Target = Local<'s, *mut JSString>;
231
232	fn deref(&self) -> &Self::Target {
233		&self.str
234	}
235}
236
237impl DerefMut for String<'_> {
238	fn deref_mut(&mut self) -> &mut Self::Target {
239		&mut self.str
240	}
241}