X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fsyslocale.py;h=5b8adcac87a22a5db3d815289d8d1880a7284163;hp=b9f9b920df28a68fa3948ffdd1cfcb07f6e21e48;hb=7644110ce07ec8a78003ee7db9dcdfe5cbca3854;hpb=c519e411927761939a0461bdf8d0a12b26d965e9 diff --git a/collate/syslocale.py b/collate/syslocale.py index b9f9b92..5b8adca 100644 --- a/collate/syslocale.py +++ b/collate/syslocale.py @@ -12,16 +12,20 @@ locale of all previous collators and anything else using the system locale information. Use this collation backend if... - - You are on a system without ICU or UCA datafiles for the locale, - and DUCET results are not acceptable. + - You are on a system without ICU. Avoid this backend if... - - ICU or UCA support is available for the current locale. + - ICU is available for the current locale. - You are sorting strings from alphabets outside the primary locale. - You need to support collating multiple locales at once. + - You need the same results across multiple platforms. + """ +__all__ = ["Collator"] + import locale +import re import collate.errors import collate._abcollator @@ -31,10 +35,13 @@ class Collator(collate._abcollator.Collator): """C library locale-based collation.""" def __init__(self, locale_code, encoding=None): + super(Collator, self).__init__(locale, encoding) + locale_code, encoding = collate._locale.getpair(locale_code, encoding) try: - locale.setlocale(locale.LC_COLLATE, locale_code) + setlocale = locale_code + "." + encoding + locale.setlocale(locale.LC_COLLATE, setlocale) except locale.Error: - raise collate.errors.InvalidLocaleError(locale_code) + raise collate.errors.InvalidLocaleError(setlocale) self.locale = locale.getlocale(locale.LC_COLLATE)[0] self.encoding = collate._locale.encoding(encoding) @@ -50,16 +57,9 @@ class Collator(collate._abcollator.Collator): except UnicodeEncodeError: return locale.strxfrm(string.encode(self.encoding, "replace")) - def cmp(self, a, b): - """Return negative if a < b, zero if a == b, positive if a > b. - - If strs rather than unicodes are passed in, they are first - decoded according to the 'encoding' attribute of the Collator. - """ + def words(self, string, sep=re.compile(r"\W+", re.UNICODE)): + """Split the string into separate words.""" + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') + return re.split(sep, string) - if isinstance(a, str): - a = a.decode(self.encoding, "replace") - if isinstance(b, str): - b = b.decode(self.encoding, "replace") - return locale.strcoll(a, b) -