X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fsyslocale.py;h=e2aeed9bdeaba636098c13a5a8e41bf90186569c;hp=4a67044c9046c95453f78b83fd68ad73a450d441;hb=2a37219e2d9c0fe58e78d987a21f6e37cfd33940;hpb=f215248530a3ad1d1ae01bba33c54d7cfdbd5413 diff --git a/collate/syslocale.py b/collate/syslocale.py index 4a67044..e2aeed9 100644 --- a/collate/syslocale.py +++ b/collate/syslocale.py @@ -12,11 +12,10 @@ locale of all previous collators and anything else using the system locale information. Use this collation backend if... - - You are on a system without ICU or UCA datafiles for the locale, - and DUCET results are not acceptable. + - You are on a system without ICU. Avoid this backend if... - - ICU or UCA support is available for the current locale. + - ICU is available for the current locale. - You are sorting strings from alphabets outside the primary locale. - You need to support collating multiple locales at once. - You need the same results across multiple platforms. @@ -24,6 +23,7 @@ Avoid this backend if... """ import locale +import re import collate.errors import collate._abcollator @@ -35,9 +35,10 @@ class Collator(collate._abcollator.Collator): def __init__(self, locale_code, encoding=None): locale_code, encoding = collate._locale.getpair(locale_code, encoding) try: - locale.setlocale(locale.LC_COLLATE, locale_code) + setlocale = locale_code + "." + encoding + locale.setlocale(locale.LC_COLLATE, setlocale) except locale.Error: - raise collate.errors.InvalidLocaleError(locale_code) + raise collate.errors.InvalidLocaleError(setlocale) self.locale = locale.getlocale(locale.LC_COLLATE)[0] self.encoding = collate._locale.encoding(encoding) @@ -65,3 +66,10 @@ class Collator(collate._abcollator.Collator): b = b.decode(self.encoding, "replace") return locale.strcoll(a, b) + def words(self, string, sep=re.compile(r"\W+", re.UNICODE)): + """Split the string into separate words. + + This split is done using the locale's notion of a word boundry. + """ + return re.split(sep, string) +