diff options
Diffstat (limited to 'third_party/rust/unicode-normalization/src/lib.rs')
-rw-r--r-- | third_party/rust/unicode-normalization/src/lib.rs | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/third_party/rust/unicode-normalization/src/lib.rs b/third_party/rust/unicode-normalization/src/lib.rs new file mode 100644 index 000000000..592599805 --- /dev/null +++ b/third_party/rust/unicode-normalization/src/lib.rs @@ -0,0 +1,138 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Unicode character composition and decomposition utilities +//! as described in +//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). +//! +//! ```rust +//! extern crate unicode_normalization; +//! +//! use unicode_normalization::char::compose; +//! use unicode_normalization::UnicodeNormalization; +//! +//! fn main() { +//! assert_eq!(compose('A','\u{30a}'), Some('Å')); +//! +//! let s = "ÅΩ"; +//! let c = s.nfc().collect::<String>(); +//! assert_eq!(c, "ÅΩ"); +//! } +//! ``` +//! +//! # crates.io +//! +//! You can use this package in your project by adding the following +//! to your `Cargo.toml`: +//! +//! ```toml +//! [dependencies] +//! unicode-normalization = "0.1.0" +//! ``` + +#![deny(missing_docs, unsafe_code)] +#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", + html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] + +pub use tables::UNICODE_VERSION; +pub use decompose::Decompositions; +pub use recompose::Recompositions; +use std::str::Chars; + +mod decompose; +mod normalize; +mod recompose; +mod tables; + +#[cfg(test)] +mod test; +#[cfg(test)] +mod testdata; + +/// Methods for composing and decomposing characters. +pub mod char { + pub use normalize::{decompose_canonical, decompose_compatible, compose}; + + /// Look up the canonical combining class of a character. + pub use tables::normalization::canonical_combining_class; + + /// Return whether the given character is a combining mark (`General_Category=Mark`) + pub use tables::normalization::is_combining_mark; +} + + +/// Methods for iterating over strings while applying Unicode normalizations +/// as described in +/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). +pub trait UnicodeNormalization<I: Iterator<Item=char>> { + /// Returns an iterator over the string in Unicode Normalization Form D + /// (canonical decomposition). + #[inline] + fn nfd(self) -> Decompositions<I>; + + /// Returns an iterator over the string in Unicode Normalization Form KD + /// (compatibility decomposition). + #[inline] + fn nfkd(self) -> Decompositions<I>; + + /// An Iterator over the string in Unicode Normalization Form C + /// (canonical decomposition followed by canonical composition). + #[inline] + fn nfc(self) -> Recompositions<I>; + + /// An Iterator over the string in Unicode Normalization Form KC + /// (compatibility decomposition followed by canonical composition). + #[inline] + fn nfkc(self) -> Recompositions<I>; +} + +impl<'a> UnicodeNormalization<Chars<'a>> for &'a str { + #[inline] + fn nfd(self) -> Decompositions<Chars<'a>> { + decompose::new_canonical(self.chars()) + } + + #[inline] + fn nfkd(self) -> Decompositions<Chars<'a>> { + decompose::new_compatible(self.chars()) + } + + #[inline] + fn nfc(self) -> Recompositions<Chars<'a>> { + recompose::new_canonical(self.chars()) + } + + #[inline] + fn nfkc(self) -> Recompositions<Chars<'a>> { + recompose::new_compatible(self.chars()) + } +} + +impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I { + #[inline] + fn nfd(self) -> Decompositions<I> { + decompose::new_canonical(self) + } + + #[inline] + fn nfkd(self) -> Decompositions<I> { + decompose::new_compatible(self) + } + + #[inline] + fn nfc(self) -> Recompositions<I> { + recompose::new_canonical(self) + } + + #[inline] + fn nfkc(self) -> Recompositions<I> { + recompose::new_compatible(self) + } +} |