X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fsyslocale.py;fp=collate%2Fsyslocale.py;h=b9f9b920df28a68fa3948ffdd1cfcb07f6e21e48;hp=0000000000000000000000000000000000000000;hb=c519e411927761939a0461bdf8d0a12b26d965e9;hpb=712ad12a7b2bee93aeb464d2d1ad46a40ddc010f diff --git a/collate/syslocale.py b/collate/syslocale.py new file mode 100644 index 0000000..b9f9b92 --- /dev/null +++ b/collate/syslocale.py @@ -0,0 +1,65 @@ +"""C library locale-based collation. + +This collation backend uses the system's C library to sort strings. It +is fast and almost always available, but may sort strings outside of +the user's native locale incorrectly or confusingly (for example, +en_US tends to ignore hiragana characters; ja_JP does not case-fold +Latin characters). + +Since the C library only supports one locale active at a time per +process, instantiating a Collator from this module will affect the +locale of all previous collators and anything else using the system +locale information. + +Use this collation backend if... + - You are on a system without ICU or UCA datafiles for the locale, + and DUCET results are not acceptable. + +Avoid this backend if... + - ICU or UCA support is available for the current locale. + - You are sorting strings from alphabets outside the primary locale. + - You need to support collating multiple locales at once. +""" + +import locale + +import collate.errors +import collate._abcollator +import collate._locale + +class Collator(collate._abcollator.Collator): + """C library locale-based collation.""" + + def __init__(self, locale_code, encoding=None): + try: + locale.setlocale(locale.LC_COLLATE, locale_code) + except locale.Error: + raise collate.errors.InvalidLocaleError(locale_code) + self.locale = locale.getlocale(locale.LC_COLLATE)[0] + self.encoding = collate._locale.encoding(encoding) + + def key(self, string): + """Sort key for a string. + + If string is a unicode instance that cannot be processed by + the system locale library, it is first encoded according to + the 'encoding' attribute of the Collator. + """ + try: + return locale.strxfrm(string) + except UnicodeEncodeError: + return locale.strxfrm(string.encode(self.encoding, "replace")) + + def cmp(self, a, b): + """Return negative if a < b, zero if a == b, positive if a > b. + + If strs rather than unicodes are passed in, they are first + decoded according to the 'encoding' attribute of the Collator. + """ + + if isinstance(a, str): + a = a.decode(self.encoding, "replace") + if isinstance(b, str): + b = b.decode(self.encoding, "replace") + return locale.strcoll(a, b) +