"""
-import collate.icu._ucol
import collate._abcollator
import collate._locale
import collate.errors
+from collate.icu import _icu
+
class Collator(collate._abcollator.Collator):
"""ICU-based collation."""
def __init__(self, locale, encoding=None):
locale, encoding = collate._locale.getpair(locale, encoding)
- self._collator = collate.icu._ucol.Collator(locale)
+ icu_locale = "root" if locale == "C" else locale
+ self._collator = _icu.Collator(icu_locale)
self.locale = self._collator.locale
self.encoding = collate._locale.encoding(encoding)
if self._collator.used_default_information and locale != "C":
raise collate.errors.InvalidLocaleError(locale)
+ try:
+ self._breaker = _icu.WordBreaker(icu_locale)
+ except ValueError:
+ # Thai is the only language with a special break locale,
+ # so this is a harmless error.
+ self._breaker = _icu.WordBreaker("root")
+
+ def words(self, string):
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
+ return filter(lambda u: not u.isspace(), self._breaker.words(string))
+
def key(self, string):
"""Sort key for a string.