diff options
Diffstat (limited to 'third_party/rust/unicode-normalization/src/decompose.rs')
-rw-r--r-- | third_party/rust/unicode-normalization/src/decompose.rs | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/third_party/rust/unicode-normalization/src/decompose.rs b/third_party/rust/unicode-normalization/src/decompose.rs new file mode 100644 index 000000000..81e5e921a --- /dev/null +++ b/third_party/rust/unicode-normalization/src/decompose.rs @@ -0,0 +1,135 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +// Helper functions used for Unicode normalization +fn canonical_sort(comb: &mut [(char, u8)]) { + let len = comb.len(); + for i in 0..len { + let mut swapped = false; + for j in 1..len-i { + let class_a = comb[j-1].1; + let class_b = comb[j].1; + if class_a != 0 && class_b != 0 && class_a > class_b { + comb.swap(j-1, j); + swapped = true; + } + } + if !swapped { break; } + } +} + +#[derive(Clone)] +enum DecompositionType { + Canonical, + Compatible +} + +/// External iterator for a string decomposition's characters. +#[derive(Clone)] +pub struct Decompositions<I> { + kind: DecompositionType, + iter: I, + buffer: Vec<(char, u8)>, + sorted: bool +} + +#[inline] +pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> { + Decompositions { + iter: iter, + buffer: Vec::new(), + sorted: false, + kind: self::DecompositionType::Canonical, + } +} + +#[inline] +pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> { + Decompositions { + iter: iter, + buffer: Vec::new(), + sorted: false, + kind: self::DecompositionType::Compatible, + } +} + +impl<I: Iterator<Item=char>> Iterator for Decompositions<I> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + use self::DecompositionType::*; + + match self.buffer.first() { + Some(&(c, 0)) => { + self.sorted = false; + self.buffer.remove(0); + return Some(c); + } + Some(&(c, _)) if self.sorted => { + self.buffer.remove(0); + return Some(c); + } + _ => self.sorted = false + } + + if !self.sorted { + for ch in self.iter.by_ref() { + let buffer = &mut self.buffer; + let sorted = &mut self.sorted; + { + let callback = |d| { + let class = + super::char::canonical_combining_class(d); + if class == 0 && !*sorted { + canonical_sort(buffer); + *sorted = true; + } + buffer.push((d, class)); + }; + match self.kind { + Canonical => { + super::char::decompose_canonical(ch, callback) + } + Compatible => { + super::char::decompose_compatible(ch, callback) + } + } + } + if *sorted { + break + } + } + } + + if !self.sorted { + canonical_sort(&mut self.buffer); + self.sorted = true; + } + + if self.buffer.is_empty() { + None + } else { + match self.buffer.remove(0) { + (c, 0) => { + self.sorted = false; + Some(c) + } + (c, _) => Some(c), + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let (lower, _) = self.iter.size_hint(); + (lower, None) + } +} |