X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fcodepoint.py;fp=collate%2Fcodepoint.py;h=e0bbcbd2f873dbc3cc05b3e3ce45080f3adafe5c;hp=0000000000000000000000000000000000000000;hb=f7fd328bfc2886f6aed2c09b84cc1e039c7c3240;hpb=22570a3c13079d27cfb60110f631b164dbd8b831 diff --git a/collate/codepoint.py b/collate/codepoint.py new file mode 100644 index 0000000..e0bbcbd --- /dev/null +++ b/collate/codepoint.py @@ -0,0 +1,43 @@ +"""Codepoint-based collation. + +This collation backend sorts using only the basic codepoint order. It +is primarily intended to be used as a baseline and example for other +collation backends. + +Use this collation backend if... + - You are writing tests for pycollate. + - You are writing specialized Unicode software. + - You are on a system with no locale module. + +Avoid this backend if... + - You are writing a normal program for a normal runtime environment. + - You are sorting strings to show normal humans. + +""" + +import collate._abcollator +import collate._locale + +__all__ = ["Collate"] + +class Collator(collate._abcollator.Collator): + """Codepoint-based collation. + + Arguments + locale - all parts but encoding ignored, always 'C' + encoding - try to use this string encoding + """ + + def __init__(self, locale=None, encoding=None): + dummy, self.encoding = collate._locale.getpair(locale, encoding) + self.locale = "C" + + def key(self, string): + """Sort key for a string. + + If string is a str instance, it is first decoded according to + the 'encoding' attribute of the Collator. + """ + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') + return string