locale information.
Use this collation backend if...
- - You are on a system without ICU or UCA datafiles for the locale,
- and DUCET results are not acceptable.
+ - You are on a system without ICU.
Avoid this backend if...
- - ICU or UCA support is available for the current locale.
+ - ICU is available for the current locale.
- You are sorting strings from alphabets outside the primary locale.
- You need to support collating multiple locales at once.
+ - You need the same results across multiple platforms.
+
"""
+__all__ = ["Collator"]
+
import locale
+import re
import collate.errors
import collate._abcollator
"""C library locale-based collation."""
def __init__(self, locale_code, encoding=None):
+ super(Collator, self).__init__(locale, encoding)
+ locale_code, encoding = collate._locale.getpair(locale_code, encoding)
try:
- locale.setlocale(locale.LC_COLLATE, locale_code)
+ setlocale = locale_code + "." + encoding
+ locale.setlocale(locale.LC_COLLATE, setlocale)
except locale.Error:
- raise collate.errors.InvalidLocaleError(locale_code)
+ raise collate.errors.InvalidLocaleError(setlocale)
self.locale = locale.getlocale(locale.LC_COLLATE)[0]
self.encoding = collate._locale.encoding(encoding)
except UnicodeEncodeError:
return locale.strxfrm(string.encode(self.encoding, "replace"))
- def cmp(self, a, b):
- """Return negative if a < b, zero if a == b, positive if a > b.
-
- If strs rather than unicodes are passed in, they are first
- decoded according to the 'encoding' attribute of the Collator.
- """
+ def words(self, string, sep=re.compile(r"\W+", re.UNICODE)):
+ """Split the string into separate words."""
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
+ return re.split(sep, string)
- if isinstance(a, str):
- a = a.decode(self.encoding, "replace")
- if isinstance(b, str):
- b = b.decode(self.encoding, "replace")
- return locale.strcoll(a, b)
-