// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Unicode character composition and decomposition utilities //! as described in //! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). //! //! ```rust //! extern crate unicode_normalization; //! //! use unicode_normalization::char::compose; //! use unicode_normalization::UnicodeNormalization; //! //! fn main() { //! assert_eq!(compose('A','\u{30a}'), Some('Å')); //! //! let s = "ÅΩ"; //! let c = s.nfc().collect::(); //! assert_eq!(c, "ÅΩ"); //! } //! ``` //! //! # crates.io //! //! You can use this package in your project by adding the following //! to your `Cargo.toml`: //! //! ```toml //! [dependencies] //! unicode-normalization = "0.1.0" //! ``` #![deny(missing_docs, unsafe_code)] #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] pub use tables::UNICODE_VERSION; pub use decompose::Decompositions; pub use recompose::Recompositions; use std::str::Chars; mod decompose; mod normalize; mod recompose; mod tables; #[cfg(test)] mod test; #[cfg(test)] mod testdata; /// Methods for composing and decomposing characters. pub mod char { pub use normalize::{decompose_canonical, decompose_compatible, compose}; /// Look up the canonical combining class of a character. pub use tables::normalization::canonical_combining_class; /// Return whether the given character is a combining mark (`General_Category=Mark`) pub use tables::normalization::is_combining_mark; } /// Methods for iterating over strings while applying Unicode normalizations /// as described in /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). pub trait UnicodeNormalization> { /// Returns an iterator over the string in Unicode Normalization Form D /// (canonical decomposition). #[inline] fn nfd(self) -> Decompositions; /// Returns an iterator over the string in Unicode Normalization Form KD /// (compatibility decomposition). #[inline] fn nfkd(self) -> Decompositions; /// An Iterator over the string in Unicode Normalization Form C /// (canonical decomposition followed by canonical composition). #[inline] fn nfc(self) -> Recompositions; /// An Iterator over the string in Unicode Normalization Form KC /// (compatibility decomposition followed by canonical composition). #[inline] fn nfkc(self) -> Recompositions; } impl<'a> UnicodeNormalization> for &'a str { #[inline] fn nfd(self) -> Decompositions> { decompose::new_canonical(self.chars()) } #[inline] fn nfkd(self) -> Decompositions> { decompose::new_compatible(self.chars()) } #[inline] fn nfc(self) -> Recompositions> { recompose::new_canonical(self.chars()) } #[inline] fn nfkc(self) -> Recompositions> { recompose::new_compatible(self.chars()) } } impl> UnicodeNormalization for I { #[inline] fn nfd(self) -> Decompositions { decompose::new_canonical(self) } #[inline] fn nfkd(self) -> Decompositions { decompose::new_compatible(self) } #[inline] fn nfc(self) -> Recompositions { recompose::new_canonical(self) } #[inline] fn nfkc(self) -> Recompositions { recompose::new_compatible(self) } }