From c519e411927761939a0461bdf8d0a12b26d965e9 Mon Sep 17 00:00:00 2001 From: Joe Wreschnig Date: Sun, 14 Feb 2010 17:06:44 -0800 Subject: [PATCH] syslocale: Standardize API. --- collate/_abcollator.py | 3 +- collate/syslocale.py | 65 +++++++++++++++++++++++++++++++++++ collate/syslocale/__init__.py | 36 ------------------- 3 files changed, 66 insertions(+), 38 deletions(-) create mode 100644 collate/syslocale.py delete mode 100644 collate/syslocale/__init__.py diff --git a/collate/_abcollator.py b/collate/_abcollator.py index 094a5de..71f5f54 100644 --- a/collate/_abcollator.py +++ b/collate/_abcollator.py @@ -1,5 +1,4 @@ class Collator(object): def cmp(self, string1, string2): + """Return negative if a < b, zero if a == b, positive if a > b.""" return cmp(self.key(string1), self.key(string2)) - - diff --git a/collate/syslocale.py b/collate/syslocale.py new file mode 100644 index 0000000..b9f9b92 --- /dev/null +++ b/collate/syslocale.py @@ -0,0 +1,65 @@ +"""C library locale-based collation. + +This collation backend uses the system's C library to sort strings. It +is fast and almost always available, but may sort strings outside of +the user's native locale incorrectly or confusingly (for example, +en_US tends to ignore hiragana characters; ja_JP does not case-fold +Latin characters). + +Since the C library only supports one locale active at a time per +process, instantiating a Collator from this module will affect the +locale of all previous collators and anything else using the system +locale information. + +Use this collation backend if... + - You are on a system without ICU or UCA datafiles for the locale, + and DUCET results are not acceptable. + +Avoid this backend if... + - ICU or UCA support is available for the current locale. + - You are sorting strings from alphabets outside the primary locale. + - You need to support collating multiple locales at once. +""" + +import locale + +import collate.errors +import collate._abcollator +import collate._locale + +class Collator(collate._abcollator.Collator): + """C library locale-based collation.""" + + def __init__(self, locale_code, encoding=None): + try: + locale.setlocale(locale.LC_COLLATE, locale_code) + except locale.Error: + raise collate.errors.InvalidLocaleError(locale_code) + self.locale = locale.getlocale(locale.LC_COLLATE)[0] + self.encoding = collate._locale.encoding(encoding) + + def key(self, string): + """Sort key for a string. + + If string is a unicode instance that cannot be processed by + the system locale library, it is first encoded according to + the 'encoding' attribute of the Collator. + """ + try: + return locale.strxfrm(string) + except UnicodeEncodeError: + return locale.strxfrm(string.encode(self.encoding, "replace")) + + def cmp(self, a, b): + """Return negative if a < b, zero if a == b, positive if a > b. + + If strs rather than unicodes are passed in, they are first + decoded according to the 'encoding' attribute of the Collator. + """ + + if isinstance(a, str): + a = a.decode(self.encoding, "replace") + if isinstance(b, str): + b = b.decode(self.encoding, "replace") + return locale.strcoll(a, b) + diff --git a/collate/syslocale/__init__.py b/collate/syslocale/__init__.py deleted file mode 100644 index a76d538..0000000 --- a/collate/syslocale/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -import locale - -import collate.errors -import collate._abcollator - -class Collator(collate._abcollator.Collator): - def __init__(self, locale_code, encoding=None): - default = locale.getdefaultlocale()[0] - for locale in [locale_code, default]: - try: - locale.setlocale(locale.LC_COLLATE, locale_code) - except locale.Error as err: - pass - else: - break - else: - raise collate.errors.InvalidLocaleError("no locale found") - self.locale = locale.getlocale()[0] - try: - self.__encoding = locale_code.split(".")[1] - except IndexError: - self.__encoding = locale_code.split(locale.getpreferredencoding()) - - def key(self, string): - try: - return locale.strxfrm(string) - except UnicodeEncodeError: - return locale.strxfrm(string.encode(self.__encoding, "replace")) - - def cmp(self, string1, string2): - if isinstance(string1, str): - string1 = string1.decode(self.__encoding, "replace") - if isinstance(string2, str): - string2 = string2.decode(self.__encoding, "replace") - return locale.strcoll(string1, string2) - -- 2.30.2