--- /dev/null
+"""C library locale-based collation.
+
+This collation backend uses the system's C library to sort strings. It
+is fast and almost always available, but may sort strings outside of
+the user's native locale incorrectly or confusingly (for example,
+en_US tends to ignore hiragana characters; ja_JP does not case-fold
+Latin characters).
+
+Since the C library only supports one locale active at a time per
+process, instantiating a Collator from this module will affect the
+locale of all previous collators and anything else using the system
+locale information.
+
+Use this collation backend if...
+ - You are on a system without ICU or UCA datafiles for the locale,
+ and DUCET results are not acceptable.
+
+Avoid this backend if...
+ - ICU or UCA support is available for the current locale.
+ - You are sorting strings from alphabets outside the primary locale.
+ - You need to support collating multiple locales at once.
+"""
+
+import locale
+
+import collate.errors
+import collate._abcollator
+import collate._locale
+
+class Collator(collate._abcollator.Collator):
+ """C library locale-based collation."""
+
+ def __init__(self, locale_code, encoding=None):
+ try:
+ locale.setlocale(locale.LC_COLLATE, locale_code)
+ except locale.Error:
+ raise collate.errors.InvalidLocaleError(locale_code)
+ self.locale = locale.getlocale(locale.LC_COLLATE)[0]
+ self.encoding = collate._locale.encoding(encoding)
+
+ def key(self, string):
+ """Sort key for a string.
+
+ If string is a unicode instance that cannot be processed by
+ the system locale library, it is first encoded according to
+ the 'encoding' attribute of the Collator.
+ """
+ try:
+ return locale.strxfrm(string)
+ except UnicodeEncodeError:
+ return locale.strxfrm(string.encode(self.encoding, "replace"))
+
+ def cmp(self, a, b):
+ """Return negative if a < b, zero if a == b, positive if a > b.
+
+ If strs rather than unicodes are passed in, they are first
+ decoded according to the 'encoding' attribute of the Collator.
+ """
+
+ if isinstance(a, str):
+ a = a.decode(self.encoding, "replace")
+ if isinstance(b, str):
+ b = b.decode(self.encoding, "replace")
+ return locale.strcoll(a, b)
+
+++ /dev/null
-import locale
-
-import collate.errors
-import collate._abcollator
-
-class Collator(collate._abcollator.Collator):
- def __init__(self, locale_code, encoding=None):
- default = locale.getdefaultlocale()[0]
- for locale in [locale_code, default]:
- try:
- locale.setlocale(locale.LC_COLLATE, locale_code)
- except locale.Error as err:
- pass
- else:
- break
- else:
- raise collate.errors.InvalidLocaleError("no locale found")
- self.locale = locale.getlocale()[0]
- try:
- self.__encoding = locale_code.split(".")[1]
- except IndexError:
- self.__encoding = locale_code.split(locale.getpreferredencoding())
-
- def key(self, string):
- try:
- return locale.strxfrm(string)
- except UnicodeEncodeError:
- return locale.strxfrm(string.encode(self.__encoding, "replace"))
-
- def cmp(self, string1, string2):
- if isinstance(string1, str):
- string1 = string1.decode(self.__encoding, "replace")
- if isinstance(string2, str):
- string2 = string2.decode(self.__encoding, "replace")
- return locale.strcoll(string1, string2)
-