From dd764628164dd6f23c1ec98a13184ce7c69fa40f Mon Sep 17 00:00:00 2001 From: Joe Wreschnig Date: Sun, 14 Feb 2010 19:04:26 -0800 Subject: [PATCH] icu: Document and tab-fix. --- collate/icu/__init__.py | 50 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/collate/icu/__init__.py b/collate/icu/__init__.py index aa2c24d..eeee418 100644 --- a/collate/icu/__init__.py +++ b/collate/icu/__init__.py @@ -1,13 +1,51 @@ +"""ICU-based collation. + +This collation backend uses the International Components for Unicode +library to provide accurate and high-performance collation. It +supports multiple locales and advanced sorting capabilities. + +Use this collation backend if possible; it's by far the best. + +Avoid this backend if... + - ICU is not available for your system. + +""" + import collate.icu._ucol import collate._abcollator +import collate._locale +import collate.errors class Collator(collate._abcollator.Collator): - def __init__(self, locale, encoding): - self._collator = collate.icu._ucol.Collator(locale) - self.locale = self._collator.locale + """ICU-based collation.""" + + def __init__(self, locale, encoding=None): + locale, encoding = collate._locale.getpair(locale, encoding) + self._collator = collate.icu._ucol.Collator(locale) + self.locale = self._collator.locale + self.encoding = collate._locale.encoding(encoding) + if self._collator.used_default_information and locale != "C": + raise collate.errors.InvalidLocaleError(locale) def key(self, string): - return self._collator.key(string) + """Sort key for a string. + + If the string is a str instance, it is decoded to a unicode + instance according to the 'encoding' attribute of the + Collator. + """ + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') + return self._collator.key(string) + + def cmp(self, a, b): + """Return negative if a < b, zero if a == b, positive if a > b. - def cmp(self, string1, string2): - return self._collator.cmp(string1, string2) + If strs rather than unicodes are passed in, they are first + decoded according to the 'encoding' attribute of the Collator. + """ + if isinstance(a, str): + a = a.decode(self.encoding, 'replace') + if isinstance(b, str): + b = a.decode(self.encoding, 'replace') + return self._collator.cmp(a, b) -- 2.20.1