"""collate - Sort strings intelligently. This module provides tools to sort strings in a 'human-expected' order. Because human expectations are fuzzy and often self-contradictory, the sort order is not guaranteed to be stable between versions of this module (rather the opposite - the primary reason to update it will probably be changed sort results). If available, this module uses the ICU localization library. Otherwise, it uses the system's locale database (and produces significantly worse results). This module tries very hard not to fail loudly. It tends to ignore most Unicode recoding errors, and will eventually fall back to the C locale or raw codepoint-based collation. If you would like loud failure, you can use the collate.strings module and the individual Collators directly. Trivial Use: ------------ strings = read_strings(...) strings.sort(key=collate.key) Attributes: ----------- backend - The default collation backend. If available, this is collate.icu; otherwise, it is collate.syslocale. In special situations, it may be collate.codepoint. collate - The default collator. This is the collator of the default backend instantiated with the default system locale and encoding. """ __all__ = ["collator", "set_collator", "cmp", "key", "default", "VERSION", "VERSION_STRING"] import collate.errors import collate._locale try: import collate.codepoint as default except ImportError: pass try: import collate.syslocale as default except ImportError: pass try: import collate.icu as default except ImportError: pass VERSION = (0, 2) VERSION_STRING = ".".join(map(str, VERSION)) collator = None def set_collator(backend=None, locale=None, encoding=None): """Set the default collation backend. This function tries very hard not to fail; the resulting Collator may not have the locale or encoding you specified (at the very least, they will be normalized). Remember to check collator.locale and collator.encoding. Arguments: backend - 'icu', 'syslocale', or 'codepoint'; None to not change. locale - e.g. 'en_US', or None for the system locale. encoding - e.g. 'utf-8', or None for the system locale encoding Returns: The new default Collator instance, or None if no collator could be created; if None is returned, the existing default Collator is left intact. """ global collator global default if backend is None: backend = default locales = collate._locale.localelist(locale) possible = None for locale in locales: locale, encoding_ = collate._locale.getpair(locale, encoding) try: possible = backend.Collator(locale, encoding_) except collate.errors.InvalidLocaleError: pass else: break if possible is not None: collator = possible default = backend return collator return possible def key(string): """Return a good sorting key for the string. The sort key should be considered an opaque value which is only meaningful when compared to other sort keys from the same collator. This is the same as collate.collator.sortemekey(string). """ return collator.sortemekey(string) def cmp(a, b): """Return negative if a < b, zero if a == b, positive if a > b. This is the same as collate.collator.cmp(a, b). """ return collator.cmp(a, b) set_collator() if collator is None: raise collate.errors.InvalidLocaleError("C")