X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fsyslocale.py;h=5b8adcac87a22a5db3d815289d8d1880a7284163;hp=b9f9b920df28a68fa3948ffdd1cfcb07f6e21e48;hb=7644110ce07ec8a78003ee7db9dcdfe5cbca3854;hpb=c519e411927761939a0461bdf8d0a12b26d965e9

diff --git a/collate/syslocale.py b/collate/syslocale.py
index b9f9b92..5b8adca 100644
--- a/collate/syslocale.py
+++ b/collate/syslocale.py
@@ -12,16 +12,20 @@ locale of all previous collators and anything else using the system
 locale information.
 
 Use this collation backend if...
- - You are on a system without ICU or UCA datafiles for the locale,
-   and DUCET results are not acceptable.
+ - You are on a system without ICU.
 
 Avoid this backend if...
- - ICU or UCA support is available for the current locale.
+ - ICU is available for the current locale.
  - You are sorting strings from alphabets outside the primary locale.
  - You need to support collating multiple locales at once.
+ - You need the same results across multiple platforms.
+ 
 """
 
+__all__ = ["Collator"]
+
 import locale
+import re
 
 import collate.errors
 import collate._abcollator
@@ -31,10 +35,13 @@ class Collator(collate._abcollator.Collator):
     """C library locale-based collation."""
 
     def __init__(self, locale_code, encoding=None):
+        super(Collator, self).__init__(locale, encoding)
+        locale_code, encoding = collate._locale.getpair(locale_code, encoding)
         try:
-            locale.setlocale(locale.LC_COLLATE, locale_code)
+            setlocale = locale_code + "." + encoding
+            locale.setlocale(locale.LC_COLLATE, setlocale)
         except locale.Error:
-            raise collate.errors.InvalidLocaleError(locale_code)
+            raise collate.errors.InvalidLocaleError(setlocale)
         self.locale = locale.getlocale(locale.LC_COLLATE)[0]
         self.encoding = collate._locale.encoding(encoding)
 
@@ -50,16 +57,9 @@ class Collator(collate._abcollator.Collator):
         except UnicodeEncodeError:
             return locale.strxfrm(string.encode(self.encoding, "replace"))
 
-    def cmp(self, a, b):
-        """Return negative if a < b, zero if a == b, positive if a > b.
-
-        If strs rather than unicodes are passed in, they are first
-        decoded according to the 'encoding' attribute of the Collator.
-        """
+    def words(self, string, sep=re.compile(r"\W+", re.UNICODE)):
+        """Split the string into separate words."""
+        if isinstance(string, str):
+            string = string.decode(self.encoding, 'replace')
+        return re.split(sep, string)
 
-        if isinstance(a, str):
-            a = a.decode(self.encoding, "replace")
-        if isinstance(b, str):
-            b = b.decode(self.encoding, "replace")
-        return locale.strcoll(a, b)
-