Struct ICU4XWordSegmenter

Source

pub struct ICU4XWordSegmenter(/* private fields */);

Expand description

An ICU4X word-break segmenter, capable of finding word breakpoints in strings.

Implementations§

Source §

impl ICU4XWordSegmenter

Source

pub fn create_auto( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

Construct an ICU4XWordSegmenter with automatically selecting the best available LSTM or dictionary payload data.

Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

Source

pub fn create_lstm( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

Construct an ICU4XWordSegmenter with LSTM payload data for Burmese, Khmer, Lao, and Thai.

Warning: ICU4XWordSegmenter created by this function doesn’t handle Chinese or Japanese.

Source

pub fn create_dictionary( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

Construct an ICU4XWordSegmenter with dictionary payload data for Chinese, Japanese, Burmese, Khmer, Lao, and Thai.

Source

pub fn segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<ICU4XWordBreakIteratorUtf8<'a>>

Segments a string.

Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

Source

pub fn segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<ICU4XWordBreakIteratorUtf16<'a>>

Segments a string.

Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

Source

pub fn segment_latin1<'a>( &'a self, input: &'a [u8], ) -> Box<ICU4XWordBreakIteratorLatin1<'a>>

Segments a Latin-1 string.

Auto Trait Implementations§

§

impl UnwindSafe for ICU4XWordSegmenter

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> Filterable for T

Source §

fn filterable( self, filter_name: &'static str, ) -> RequestFilterDataProvider<T, fn(DataRequest<'_>) -> bool>

Creates a filterable data provider with the given name for debugging. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> ErasedDestructor for T
where T: 'static,

Source §

impl<T> MaybeSendSync for T

Layout§

Note: Most layout information is completely unstable and may even differ between compilations. The only exception is types with certain repr(...) attributes. Please see the Rust Reference's “Type Layout” chapter for details on type layout guarantees.

Size: 1752 bytes

ICU4XWordSegmenter

Struct ICU4XWordSegmenter

Implementations§

impl ICU4XWordSegmenter

pub fn create_auto( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn create_lstm( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn create_dictionary( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<ICU4XWordBreakIteratorUtf8<'a>>

pub fn segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<ICU4XWordBreakIteratorUtf16<'a>>

pub fn segment_latin1<'a>( &'a self, input: &'a [u8], ) -> Box<ICU4XWordBreakIteratorLatin1<'a>>

Auto Trait Implementations§

impl Freeze for ICU4XWordSegmenter

impl RefUnwindSafe for ICU4XWordSegmenter

impl !Send for ICU4XWordSegmenter

impl !Sync for ICU4XWordSegmenter

impl Unpin for ICU4XWordSegmenter

impl UnwindSafe for ICU4XWordSegmenter

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> Filterable for T

fn filterable( self, filter_name: &'static str, ) -> RequestFilterDataProvider<T, fn(DataRequest<'_>) -> bool>

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T
where U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for T
where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> ErasedDestructor for T
where T: 'static,

impl<T> MaybeSendSync for T

Layout§

ICU4XWordSegmenter

Struct ICU4XWordSegmenter Copy item path

Implementations§

impl ICU4XWordSegmenter

pub fn create_auto( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn create_lstm( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn create_dictionary( provider: &ICU4XDataProvider, ) -> Result<Box<ICU4XWordSegmenter>, ICU4XError>

pub fn segment_utf8<'a>( &'a self, input: &'a DiplomatStr, ) -> Box<ICU4XWordBreakIteratorUtf8<'a>>

pub fn segment_utf16<'a>( &'a self, input: &'a DiplomatStr16, ) -> Box<ICU4XWordBreakIteratorUtf16<'a>>

pub fn segment_latin1<'a>( &'a self, input: &'a [u8], ) -> Box<ICU4XWordBreakIteratorLatin1<'a>>

Auto Trait Implementations§

impl Freeze for ICU4XWordSegmenter

impl RefUnwindSafe for ICU4XWordSegmenter

impl !Send for ICU4XWordSegmenter

impl !Sync for ICU4XWordSegmenter

impl Unpin for ICU4XWordSegmenter

impl UnwindSafe for ICU4XWordSegmenter

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> Filterable for T

fn filterable( self, filter_name: &'static str, ) -> RequestFilterDataProvider<T, fn(DataRequest<'_>) -> bool>

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> ErasedDestructor for Twhere T: 'static,

impl<T> MaybeSendSync for T

Layout§

Struct ICU4XWordSegmenter

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> ErasedDestructor for T
where T: 'static,