1 """collate - Sort strings intelligently.
3 This module provides tools to sort strings in a 'human-expected' order.
5 Because human expectations are fuzzy and often self-contradictory, the
6 sort order is not guaranteed to be stable between versions of this
7 module (rather the opposite - the primary reason to update it will
8 probably be changed sort results).
10 If available, this module uses the ICU localization library.
11 Otherwise, it uses the system's locale database (and produces
12 significantly worse results).
14 This module tries very hard not to fail loudly. It tends to ignore
15 most Unicode recoding errors, and will eventually fall back to the C
16 locale or raw codepoint-based collation. If you would like loud
17 failure, you can use the collate.strings module and the individual
22 strings = read_strings(...)
23 strings.sort(key=collate.key)
27 backend - The default collation backend. If available, this is
28 collate.icu; otherwise, it is collate.syslocale. In special
29 situations, it may be collate.codepoint.
31 collate - The default collator. This is the collator of the default
32 backend instantiated with the default system locale and encoding.
36 __all__
= ["collator", "set_collator", "cmp", "key", "default",
37 "VERSION", "VERSION_STRING"]
40 import collate
._locale
43 import collate
.codepoint
as default
47 import collate
.syslocale
as default
51 import collate
.icu
as default
56 VERSION_STRING
= ".".join(map(str, VERSION
))
60 def set_collator(backend
=None, locale
=None, encoding
=None):
61 """Set the default collation backend.
63 This function tries very hard not to fail; the resulting Collator
64 may not have the locale or encoding you specified (at the very
65 least, they will be normalized). Remember to check
66 collator.locale and collator.encoding.
69 backend - 'icu', 'syslocale', or 'codepoint'; None to not change.
70 locale - e.g. 'en_US', or None for the system locale.
71 encoding - e.g. 'utf-8', or None for the system locale encoding
74 The new default Collator instance, or None if no collator could be
75 created; if None is returned, the existing default Collator is
84 locales
= collate
._locale
.localelist(locale
)
86 for locale
in locales
:
87 locale
, encoding_
= collate
._locale
.getpair(locale
, encoding
)
89 possible
= backend
.Collator(locale
, encoding_
)
90 except collate
.errors
.InvalidLocaleError
:
94 if possible
is not None:
101 """Return a good sorting key for the string.
103 The sort key should be considered an opaque value which is only
104 meaningful when compared to other sort keys from the same
107 This is the same as collate.collator.sortemekey(string).
109 return collator
.sortemekey(string
)
112 """Return negative if a < b, zero if a == b, positive if a > b.
114 This is the same as collate.collator.cmp(a, b).
116 return collator
.cmp(a
, b
)
121 raise collate
.errors
.InvalidLocaleError("C")