summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-normalization/src/decompose.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/unicode-normalization/src/decompose.rs')
-rw-r--r--third_party/rust/unicode-normalization/src/decompose.rs135
1 files changed, 135 insertions, 0 deletions
diff --git a/third_party/rust/unicode-normalization/src/decompose.rs b/third_party/rust/unicode-normalization/src/decompose.rs
new file mode 100644
index 000000000..81e5e921a
--- /dev/null
+++ b/third_party/rust/unicode-normalization/src/decompose.rs
@@ -0,0 +1,135 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// Helper functions used for Unicode normalization
+fn canonical_sort(comb: &mut [(char, u8)]) {
+ let len = comb.len();
+ for i in 0..len {
+ let mut swapped = false;
+ for j in 1..len-i {
+ let class_a = comb[j-1].1;
+ let class_b = comb[j].1;
+ if class_a != 0 && class_b != 0 && class_a > class_b {
+ comb.swap(j-1, j);
+ swapped = true;
+ }
+ }
+ if !swapped { break; }
+ }
+}
+
+#[derive(Clone)]
+enum DecompositionType {
+ Canonical,
+ Compatible
+}
+
+/// External iterator for a string decomposition's characters.
+#[derive(Clone)]
+pub struct Decompositions<I> {
+ kind: DecompositionType,
+ iter: I,
+ buffer: Vec<(char, u8)>,
+ sorted: bool
+}
+
+#[inline]
+pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
+ Decompositions {
+ iter: iter,
+ buffer: Vec::new(),
+ sorted: false,
+ kind: self::DecompositionType::Canonical,
+ }
+}
+
+#[inline]
+pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
+ Decompositions {
+ iter: iter,
+ buffer: Vec::new(),
+ sorted: false,
+ kind: self::DecompositionType::Compatible,
+ }
+}
+
+impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
+ type Item = char;
+
+ #[inline]
+ fn next(&mut self) -> Option<char> {
+ use self::DecompositionType::*;
+
+ match self.buffer.first() {
+ Some(&(c, 0)) => {
+ self.sorted = false;
+ self.buffer.remove(0);
+ return Some(c);
+ }
+ Some(&(c, _)) if self.sorted => {
+ self.buffer.remove(0);
+ return Some(c);
+ }
+ _ => self.sorted = false
+ }
+
+ if !self.sorted {
+ for ch in self.iter.by_ref() {
+ let buffer = &mut self.buffer;
+ let sorted = &mut self.sorted;
+ {
+ let callback = |d| {
+ let class =
+ super::char::canonical_combining_class(d);
+ if class == 0 && !*sorted {
+ canonical_sort(buffer);
+ *sorted = true;
+ }
+ buffer.push((d, class));
+ };
+ match self.kind {
+ Canonical => {
+ super::char::decompose_canonical(ch, callback)
+ }
+ Compatible => {
+ super::char::decompose_compatible(ch, callback)
+ }
+ }
+ }
+ if *sorted {
+ break
+ }
+ }
+ }
+
+ if !self.sorted {
+ canonical_sort(&mut self.buffer);
+ self.sorted = true;
+ }
+
+ if self.buffer.is_empty() {
+ None
+ } else {
+ match self.buffer.remove(0) {
+ (c, 0) => {
+ self.sorted = false;
+ Some(c)
+ }
+ (c, _) => Some(c),
+ }
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (lower, _) = self.iter.size_hint();
+ (lower, None)
+ }
+}