thread 'tokenizers::japanese::tests::japanese_tokenizer' panicked at library/core/src/panicking.rs:219:5:
unsafe precondition(s) violated: invalid value for `char`
stack backtrace:
0: rust_begin_unwind
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/panicking.rs:652:5
1: core::panicking::panic_nounwind_fmt::runtime
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:110:18
2: core::panicking::panic_nounwind_fmt
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:120:5
3: core::panicking::panic_nounwind
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:219:5
4: core::char::convert::from_u32_unchecked::precondition_check
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ub_checks.rs:68:21
5: core::char::convert::from_u32_unchecked
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ub_checks.rs:75:17
6: core::char::from_u32_unchecked
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/char/mod.rs:131:14
7: <tinysegmenter::B3 as core::ops::deref::Deref>::deref::__static_ref_initialize
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tinysegmenter-0.1.1/src/constants.rs:6:34
8: core::ops::function::FnOnce::call_once
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5
9: lazy_static::lazy::Lazy<T>::get::{{closure}}
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/inline_lazy.rs:31:41
10: std::sync::once::Once::call_once::{{closure}}
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sync/once.rs:149:41
11: std::sys::sync::once::queue::Once::call
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sys/sync/once/queue.rs:183:21
12: std::sync::once::Once::call_once
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sync/once.rs:149:9
13: lazy_static::lazy::Lazy<T>::get
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/inline_lazy.rs:30:9
14: <tinysegmenter::B3 as core::ops::deref::Deref>::deref::__stability
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/lib.rs:135:21
15: <tinysegmenter::B3 as core::ops::deref::Deref>::deref
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/lib.rs:137:17
16: tinysegmenter::tokenize
at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tinysegmenter-0.1.1/src/lib.rs:35:10
17: <nlp::tokenizers::japanese::JapaneseTokenizer<T,I> as core::iter::traits::iterator::Iterator>::next
at ./src/tokenizers/japanese.rs:50:35
18: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/spec_from_iter_nested.rs:26:32
19: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/spec_from_iter.rs:33:9
20: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/mod.rs:2970:9
21: core::iter::traits::iterator::Iterator::collect
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/iter/traits/iterator.rs:2005:9
22: nlp::tokenizers::japanese::tests::japanese_tokenizer
at ./src/tokenizers/japanese.rs:78:13
23: nlp::tokenizers::japanese::tests::japanese_tokenizer::{{closure}}
at ./src/tokenizers/japanese.rs:76:28
24: core::ops::function::FnOnce::call_once
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5
25: core::ops::function::FnOnce::call_once
at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5
Hi,
I've started seeing the following
panicon newer rustc version (I'm on 1.80.1 (3f5fd8dd4 2024-08-06)) while using tinysegmenter:thread 'tokenizers::japanese::tests::japanese_tokenizer' panicked at library/core/src/panicking.rs:219:5: unsafe precondition(s) violated: invalid value for `char` stack backtrace: 0: rust_begin_unwind at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/panicking.rs:652:5 1: core::panicking::panic_nounwind_fmt::runtime at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:110:18 2: core::panicking::panic_nounwind_fmt at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:120:5 3: core::panicking::panic_nounwind at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/panicking.rs:219:5 4: core::char::convert::from_u32_unchecked::precondition_check at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ub_checks.rs:68:21 5: core::char::convert::from_u32_unchecked at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ub_checks.rs:75:17 6: core::char::from_u32_unchecked at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/char/mod.rs:131:14 7: <tinysegmenter::B3 as core::ops::deref::Deref>::deref::__static_ref_initialize at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tinysegmenter-0.1.1/src/constants.rs:6:34 8: core::ops::function::FnOnce::call_once at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5 9: lazy_static::lazy::Lazy<T>::get::{{closure}} at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/inline_lazy.rs:31:41 10: std::sync::once::Once::call_once::{{closure}} at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sync/once.rs:149:41 11: std::sys::sync::once::queue::Once::call at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sys/sync/once/queue.rs:183:21 12: std::sync::once::Once::call_once at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/std/src/sync/once.rs:149:9 13: lazy_static::lazy::Lazy<T>::get at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/inline_lazy.rs:30:9 14: <tinysegmenter::B3 as core::ops::deref::Deref>::deref::__stability at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/lib.rs:135:21 15: <tinysegmenter::B3 as core::ops::deref::Deref>::deref at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lazy_static-1.5.0/src/lib.rs:137:17 16: tinysegmenter::tokenize at /Users/me/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tinysegmenter-0.1.1/src/lib.rs:35:10 17: <nlp::tokenizers::japanese::JapaneseTokenizer<T,I> as core::iter::traits::iterator::Iterator>::next at ./src/tokenizers/japanese.rs:50:35 18: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/spec_from_iter_nested.rs:26:32 19: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/spec_from_iter.rs:33:9 20: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/vec/mod.rs:2970:9 21: core::iter::traits::iterator::Iterator::collect at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/iter/traits/iterator.rs:2005:9 22: nlp::tokenizers::japanese::tests::japanese_tokenizer at ./src/tokenizers/japanese.rs:78:13 23: nlp::tokenizers::japanese::tests::japanese_tokenizer::{{closure}} at ./src/tokenizers/japanese.rs:76:28 24: core::ops::function::FnOnce::call_once at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5 25: core::ops::function::FnOnce::call_once at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250:5