X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2F__init__.py;h=5094071fc4f8cdcd274986e0be1ed609ac1ceabf;hp=6ce11148d836cba02f0d652df60832fff0515af7;hb=e8b221311bf60883d3f14083bab63fd947d34509;hpb=22570a3c13079d27cfb60110f631b164dbd8b831 diff --git a/collate/__init__.py b/collate/__init__.py index 6ce1114..5094071 100644 --- a/collate/__init__.py +++ b/collate/__init__.py @@ -1,67 +1,121 @@ -import locale +"""collate - Sort strings intelligently. -if locale.getlocale()[0] is None: - locale.setlocale(locale.LC_COLLATE, '') +This module provides tools to sort strings in a 'human-expected' order. + +Because human expectations are fuzzy and often self-contradictory, the +sort order is not guaranteed to be stable between versions of this +module (rather the opposite - the primary reason to update it will +probably be changed sort results). + +If available, this module uses the ICU localization library. +Otherwise, it uses the system's locale database (and produces +significantly worse results). + +This module tries very hard not to fail loudly. It tends to ignore +most Unicode recoding errors, and will eventually fall back to the C +locale or raw codepoint-based collation. If you would like loud +failure, you can use the collate.strings module and the individual +Collators directly. + +Trivial Use: +------------ +strings = read_strings(...) +strings.sort(key=collate.key) + +Attributes: +----------- +backend - The default collation backend. If available, this is + collate.icu; otherwise, it is collate.syslocale. In special + situations, it may be collate.codepoint. + +collate - The default collator. This is the collator of the default + backend instantiated with the default system locale and encoding. + +""" + +__all__ = ["collator", "set_collator", "cmp", "key", "default", + "VERSION", "VERSION_STRING"] import collate.errors +import collate._locale +try: + import collate.codepoint as default +except ImportError: + pass try: import collate.syslocale as default - import collate.uca as default +except ImportError: + pass +try: import collate.icu as default except ImportError: pass +VERSION = (0, 1) +VERSION_STRING = ".".join(map(str, VERSION)) + collator = None -preferred_locale = None - -def _get_collator(backend, locale_code): - for code in [locale_code, - locale_code.split("_")[0], - locale.getlocale(locale.LC_COLLATE)[0], - locale.getlocale(locale.LC_COLLATE)[0].split("_")[0], - locale.getdefaultlocale()[0], - locale.getdefaultlocale()[0].split("_")[0], - None]: - try: - return default.Collator(code) - except collate.errors.InvalidLocaleError: - pass - -def set_locale(locale_code): - global collator - global preferred_locale - preferred_locale = locale_code - if collator is None or collator.locale != locale_code: - c = _get_collator(default, locale_code) - if c is not None: - collator = c - else: - raise collate.errors.InvalidLocaleError(locale_code) +def set_collator(backend=None, locale=None, encoding=None): + """Set the default collation backend. + + This function tries very hard not to fail; the resulting Collator + may not have the locale or encoding you specified (at the very + least, they will be normalized). Remember to check + collator.locale and collator.encoding. + + Arguments: + backend - 'icu', 'syslocale', or 'codepoint'; None to not change. + locale - e.g. 'en_US', or None for the system locale. + encoding - e.g. 'utf-8', or None for the system locale encoding -def set_backend(backend): + Returns: + The new default Collator instance, or None if no collator could be + created; if None is returned, the existing default Collator is + left intact. + + """ global collator global default - c = _get_collator(backend, preferred_locale) - if c is not None: - collator = c - default = backend - else: - raise collate.errors.InvalidLocaleError(locale_code) + if backend is None: + backend = default + locales = collate._locale.localelist(locale) + possible = None + for locale in locales: + locale, encoding_ = collate._locale.getpair(locale, encoding) + try: + possible = backend.Collator(locale, encoding_) + except collate.errors.InvalidLocaleError: + pass + else: + break + if possible is not None: + collator = possible + default = backend + return collator + return possible def key(string): - return collator.key(string) + """Return a good sorting key for the string. -def cmp(string1, string2): - return collator.cmp(string1, string2) + The sort key should be considered an opaque value which is only + meaningful when compared to other sort keys from the same + collator. -try: - set_locale(locale.getlocale()[0]) -except collate.errors.InvalidLocaleError: - # There's no way this should fail unless the C locale system is - # fucked or missing all data. - import collator.syslocale - set_backend(collator.syslocale) - set_locale(locale.getlocale(locale.LC_COLLATE)[0]) + This is the same as collate.collator.sortemekey(string). + """ + return collator.sortemekey(string) + +def cmp(a, b): + """Return negative if a < b, zero if a == b, positive if a > b. + + This is the same as collate.collator.cmp(a, b). + """ + return collator.cmp(a, b) + +set_collator() + +if collator is None: + raise collate.errors.InvalidLocaleError("C")