diff options
Diffstat (limited to 'third_party/rust/unicode-normalization/src/recompose.rs')
-rw-r--r-- | third_party/rust/unicode-normalization/src/recompose.rs | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/third_party/rust/unicode-normalization/src/recompose.rs b/third_party/rust/unicode-normalization/src/recompose.rs new file mode 100644 index 000000000..b3737d34a --- /dev/null +++ b/third_party/rust/unicode-normalization/src/recompose.rs @@ -0,0 +1,137 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::collections::VecDeque; +use decompose::Decompositions; + +#[derive(Clone)] +enum RecompositionState { + Composing, + Purging, + Finished +} + +/// External iterator for a string recomposition's characters. +#[derive(Clone)] +pub struct Recompositions<I> { + iter: Decompositions<I>, + state: RecompositionState, + buffer: VecDeque<char>, + composee: Option<char>, + last_ccc: Option<u8> +} + +#[inline] +pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> { + Recompositions { + iter: super::decompose::new_canonical(iter), + state: self::RecompositionState::Composing, + buffer: VecDeque::new(), + composee: None, + last_ccc: None, + } +} + +#[inline] +pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> { + Recompositions { + iter: super::decompose::new_compatible(iter), + state : self::RecompositionState::Composing, + buffer: VecDeque::new(), + composee: None, + last_ccc: None, + } +} + +impl<I: Iterator<Item=char>> Iterator for Recompositions<I> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + use self::RecompositionState::*; + + loop { + match self.state { + Composing => { + for ch in self.iter.by_ref() { + let ch_class = super::char::canonical_combining_class(ch); + if self.composee.is_none() { + if ch_class != 0 { + return Some(ch); + } + self.composee = Some(ch); + continue; + } + let k = self.composee.clone().unwrap(); + + match self.last_ccc { + None => { + match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + if ch_class == 0 { + self.composee = Some(ch); + return Some(k); + } + self.buffer.push_back(ch); + self.last_ccc = Some(ch_class); + } + } + } + Some(l_class) => { + if l_class >= ch_class { + // `ch` is blocked from `composee` + if ch_class == 0 { + self.composee = Some(ch); + self.last_ccc = None; + self.state = Purging; + return Some(k); + } + self.buffer.push_back(ch); + self.last_ccc = Some(ch_class); + continue; + } + match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + self.buffer.push_back(ch); + self.last_ccc = Some(ch_class); + } + } + } + } + } + self.state = Finished; + if self.composee.is_some() { + return self.composee.take(); + } + } + Purging => { + match self.buffer.pop_front() { + None => self.state = Composing, + s => return s + } + } + Finished => { + match self.buffer.pop_front() { + None => return self.composee.take(), + s => return s + } + } + } + } + } +} |