"""ICU-based collation. This collation backend uses the International Components for Unicode library to provide accurate and high-performance collation. It supports multiple locales and advanced sorting capabilities. Use this collation backend if possible; it's by far the best. Avoid this backend if... - ICU is not available for your system. """ __all__ = ["Collator"] import collate._abcollator import collate._locale import collate.errors from collate.icu import _icu class Collator(collate._abcollator.Collator): """ICU-based collation.""" def __init__(self, locale, encoding=None): super(Collator, self).__init__(locale, encoding) locale, encoding = collate._locale.getpair(locale, encoding) icu_locale = "root" if locale == "C" else locale self._collator = _icu.Collator(icu_locale) self.locale = self._collator.locale self.encoding = collate._locale.encoding(encoding) if self._collator.used_default_information and locale != "C": raise collate.errors.InvalidLocaleError(locale) try: self._breaker = _icu.WordBreaker(icu_locale) except ValueError: # Thai is the only language with a special break locale, # so this is a harmless error. self._breaker = _icu.WordBreaker("root") def words(self, string): """Split the string along word boundries.""" string = self.unicode(string) return self._breaker.words(string) def key(self, string): """Sort key for a string. If the string is a str instance, it is decoded to a unicode instance according to the 'encoding' attribute of the Collator. """ string = self.unicode(string) return self._collator.key(string)