1use byteorder::ByteOrder;
7
8use std::iter::FusedIterator;
9
10use crate::utf16::{decode_surrogates, is_leading_surrogate, is_trailing_surrogate, Utf16CharExt};
11use crate::{WStrCharIndices, WStrChars};
12
13impl<'a, E> Iterator for WStrChars<'a, E>
14where
15 E: ByteOrder,
16{
17 type Item = char;
18
19 #[inline]
20 fn next(&mut self) -> Option<Self::Item> {
21 let chunk = self.chunks.next()?;
23 let u = E::read_u16(chunk);
24
25 if !is_leading_surrogate(u) {
26 Some(unsafe { std::char::from_u32_unchecked(u as u32) })
28 } else {
29 let chunk = self.chunks.next().expect("missing trailing surrogate");
30 let u2 = E::read_u16(chunk);
31 debug_assert!(
32 is_trailing_surrogate(u2),
33 "code unit not a trailing surrogate"
34 );
35 Some(unsafe { decode_surrogates(u, u2) })
36 }
37 }
38
39 #[inline]
40 fn count(self) -> usize {
41 self.chunks
43 .filter(|bb| !is_trailing_surrogate(E::read_u16(bb)))
44 .count()
45 }
46
47 #[inline]
48 fn last(mut self) -> Option<Self::Item> {
49 self.next_back()
50 }
51}
52
53impl<'a, E> FusedIterator for WStrChars<'a, E> where E: ByteOrder {}
54
55impl<'a, E> DoubleEndedIterator for WStrChars<'a, E>
56where
57 E: ByteOrder,
58{
59 #[inline]
60 fn next_back(&mut self) -> Option<Self::Item> {
61 let chunk = self.chunks.next_back()?;
63 let u = E::read_u16(chunk);
64
65 if !is_trailing_surrogate(u) {
66 Some(unsafe { std::char::from_u32_unchecked(u as u32) })
68 } else {
69 let chunk = self.chunks.next_back().expect("missing leading surrogate");
70 let u2 = E::read_u16(chunk);
71 debug_assert!(
72 is_leading_surrogate(u2),
73 "code unit not a leading surrogate"
74 );
75 Some(unsafe { decode_surrogates(u2, u) })
76 }
77 }
78}
79
80impl<'a, E> Iterator for WStrCharIndices<'a, E>
81where
82 E: ByteOrder,
83{
84 type Item = (usize, char);
85
86 #[inline]
87 fn next(&mut self) -> Option<Self::Item> {
88 let pos = self.index;
89 let c = self.chars.next()?;
90 self.index += c.encoded_utf16_len();
91 Some((pos, c))
92 }
93
94 #[inline]
95 fn count(self) -> usize {
96 self.chars.count()
98 }
99
100 #[inline]
101 fn last(mut self) -> Option<Self::Item> {
102 self.next_back()
103 }
104}
105
106impl<'a, E> DoubleEndedIterator for WStrCharIndices<'a, E>
107where
108 E: ByteOrder,
109{
110 #[inline]
111 fn next_back(&mut self) -> Option<Self::Item> {
112 let c = self.chars.next_back()?;
113 let pos = self.index + self.chars.chunks.len() * std::mem::size_of::<u16>();
114 Some((pos, c))
115 }
116}
117
118impl<'a, E> FusedIterator for WStrCharIndices<'a, E> where E: ByteOrder {}
119
120#[cfg(test)]
121mod tests {
122 use crate::WStr;
123
124 #[test]
125 fn test_wstr_chars() {
126 let b = b"h\x00e\x00l\x00l\x00o\x00";
127 let s = WStr::from_utf16le(b).unwrap();
128 let chars: Vec<char> = s.chars().collect();
129 assert_eq!(chars, vec!['h', 'e', 'l', 'l', 'o']);
130
131 let b = b"\x00\xd8\x00\xdcx\x00";
132 let s = WStr::from_utf16le(b).unwrap();
133 let chars: Vec<char> = s.chars().collect();
134 assert_eq!(chars, vec!['\u{10000}', 'x']);
135
136 let b = b"\x41\xf8A\x00";
138 let s = WStr::from_utf16le(b).unwrap();
139 let chars: Vec<char> = s.chars().collect();
140 assert_eq!(chars, vec!['\u{f841}', 'A']);
141 }
142
143 #[test]
144 fn test_wstr_chars_reverse() {
145 let b = b"h\x00e\x00l\x00l\x00o\x00";
146 let s = WStr::from_utf16le(b).unwrap();
147 let chars: Vec<char> = s.chars().rev().collect();
148 assert_eq!(chars, vec!['o', 'l', 'l', 'e', 'h']);
149
150 let b = b"\x00\xd8\x00\xdcx\x00";
151 let s = WStr::from_utf16le(b).unwrap();
152 let chars: Vec<char> = s.chars().rev().collect();
153 assert_eq!(chars, vec!['x', '\u{10000}']);
154 }
155
156 #[test]
157 fn test_wstr_chars_last() {
158 let b = b"h\x00e\x00l\x00l\x00o\x00";
159 let s = WStr::from_utf16le(b).unwrap();
160 let c = s.chars().last().unwrap();
161 assert_eq!(c, 'o');
162
163 let b = b"\x00\xd8\x00\xdcx\x00";
164 let s = WStr::from_utf16le(b).unwrap();
165 let c = s.chars().last().unwrap();
166 assert_eq!(c, 'x');
167 }
168
169 #[test]
170 fn test_wstr_chars_count() {
171 let b = b"h\x00e\x00l\x00l\x00o\x00";
172 let s = WStr::from_utf16le(b).unwrap();
173 let n = s.chars().count();
174 assert_eq!(n, 5);
175
176 let b = b"\x00\xd8\x00\xdcx\x00";
177 let s = WStr::from_utf16le(b).unwrap();
178 let n = s.chars().count();
179 assert_eq!(n, 2);
180 }
181
182 #[test]
183 fn test_wstr_char_indices() {
184 let b = b"h\x00e\x00l\x00l\x00o\x00";
185 let s = WStr::from_utf16le(b).unwrap();
186 let chars: Vec<(usize, char)> = s.char_indices().collect();
187 assert_eq!(
188 chars,
189 vec![(0, 'h'), (2, 'e'), (4, 'l'), (6, 'l'), (8, 'o')]
190 );
191
192 let b = b"\x00\xd8\x00\xdcx\x00";
193 let s = WStr::from_utf16le(b).unwrap();
194 let chars: Vec<(usize, char)> = s.char_indices().collect();
195 assert_eq!(chars, vec![(0, '\u{10000}'), (4, 'x')]);
196 }
197
198 #[test]
199 fn test_wstr_char_indices_reverse() {
200 let b = b"h\x00e\x00l\x00l\x00o\x00";
201 let s = WStr::from_utf16le(b).unwrap();
202 let chars: Vec<(usize, char)> = s.char_indices().rev().collect();
203 assert_eq!(
204 chars,
205 vec![(8, 'o'), (6, 'l'), (4, 'l'), (2, 'e'), (0, 'h')]
206 );
207
208 let b = b"\x00\xd8\x00\xdcx\x00";
209 let s = WStr::from_utf16le(b).unwrap();
210 let chars: Vec<(usize, char)> = s.char_indices().rev().collect();
211 assert_eq!(chars, vec![(4, 'x'), (0, '\u{10000}')]);
212 }
213
214 #[test]
215 fn test_wstr_char_indices_last() {
216 let b = b"h\x00e\x00l\x00l\x00o\x00";
217 let s = WStr::from_utf16le(b).unwrap();
218 let c = s.char_indices().last().unwrap();
219 assert_eq!(c, (8, 'o'));
220
221 let b = b"\x00\xd8\x00\xdcx\x00";
222 let s = WStr::from_utf16le(b).unwrap();
223 let c = s.char_indices().last().unwrap();
224 assert_eq!(c, (4, 'x'));
225 }
226
227 #[test]
228 fn test_wstr_char_indices_count() {
229 let b = b"h\x00e\x00l\x00l\x00o\x00";
230 let s = WStr::from_utf16le(b).unwrap();
231 let n = s.char_indices().count();
232 assert_eq!(n, 5);
233
234 let b = b"\x00\xd8\x00\xdcx\x00";
235 let s = WStr::from_utf16le(b).unwrap();
236 let n = s.char_indices().count();
237 assert_eq!(n, 2);
238 }
239}