-import locale
+"""collate - Sort strings intelligently.
+
+This module provides tools to sort strings in a 'human-expected' order.
+
+Because human expectations are fuzzy and often self-contradictory, the
+sort order is not guaranteed to be stable between versions of this
+module (rather the opposite - the primary reason to update it will
+probably be changed sort results).
+
+If available, this module uses the ICU localization library.
+Otherwise, it uses the system's locale database (and produces
+significantly worse results).
+
+This module tries very hard not to fail loudly. It tends to ignore
+most Unicode recoding errors, and will eventually fall back to the C
+locale or raw codepoint-based collation. If you would like loud
+failure, you can use the collate.strings module and the individual
+Collators directly.
+
+Trivial Use:
+------------
+strings = read_strings(...)
+strings.sort(key=collate.key)
+
+Attributes:
+-----------
+backend - The default collation backend. If available, this is
+ collate.icu; otherwise, it is collate.syslocale. In special
+ situations, it may be collate.codepoint.
+
+collate - The default collator. This is the collator of the default
+ backend instantiated with the default system locale and encoding.
+
+"""
+
+__all__ = ["collator", "set_collator", "cmp", "key", "default",
+ "VERSION", "VERSION_STRING"]
import collate.errors
+import collate._locale
+try:
+ import collate.codepoint as default
+except ImportError:
+ pass
try:
import collate.syslocale as default
- import collate.uca as default
+except ImportError:
+ pass
+try:
import collate.icu as default
except ImportError:
pass
+VERSION = (0, 1)
+VERSION_STRING = ".".join(map(str, VERSION))
+
collator = None
-def set_locale(locale_code):
+def set_collator(backend=None, locale=None, encoding=None):
+ """Set the default collation backend.
+
+ This function tries very hard not to fail; the resulting Collator
+ may not have the locale or encoding you specified (at the very
+ least, they will be normalized). Remember to check
+ collator.locale and collator.encoding.
+
+ Arguments:
+ backend - 'icu', 'syslocale', or 'codepoint'; None to not change.
+ locale - e.g. 'en_US', or None for the system locale.
+ encoding - e.g. 'utf-8', or None for the system locale encoding
+
+ Returns:
+ The new default Collator instance, or None if no collator could be
+ created; if None is returned, the existing default Collator is
+ left intact.
+
+ """
global collator
+ global default
- if collator is None or collator.locale != locale_code:
- for code in [locale_code,
- locale_code.split("_")[0],
- locale.getdefaultlocale()[0],
- locale.getdefaultlocale()[0].split("_")[0],
- None]:
- try:
- collator = default.Collator(code)
- except collate.errors.InvalidLocaleError:
- pass
- else:
- break
+ if backend is None:
+ backend = default
+ locales = collate._locale.localelist(locale)
+ possible = None
+ for locale in locales:
+ locale, encoding_ = collate._locale.getpair(locale, encoding)
+ try:
+ possible = backend.Collator(locale, encoding_)
+ except collate.errors.InvalidLocaleError:
+ pass
else:
- raise collate.errors.InvalidLocaleError(locale_code)
+ break
+ if possible is not None:
+ collator = possible
+ default = backend
+ return collator
+ return possible
-def get_locale():
- return collator.locale
+def key(string):
+ """Return a good sorting key for the string.
-def set_backend(backend):
- pass
+ The sort key should be considered an opaque value which is only
+ meaningful when compared to other sort keys from the same
+ collator.
-def key(string):
- return collator.key(string)
+ This is the same as collate.collator.sortemekey(string).
+ """
+ return collator.sortemekey(string)
+
+def cmp(a, b):
+ """Return negative if a < b, zero if a == b, positive if a > b.
+
+ This is the same as collate.collator.cmp(a, b).
+ """
+ return collator.cmp(a, b)
-def cmp(string1, string2):
- return collator.cmp(string1, string2)
+set_collator()
-set_locale(locale.getdefaultlocale()[0])
+if collator is None:
+ raise collate.errors.InvalidLocaleError("C")