Updates for new hosting.
[python-collate.git] / collate / __init__.py
1 """collate - Sort strings intelligently.
2
3 This module provides tools to sort strings in a 'human-expected' order.
4
5 Because human expectations are fuzzy and often self-contradictory, the
6 sort order is not guaranteed to be stable between versions of this
7 module (rather the opposite - the primary reason to update it will
8 probably be changed sort results).
9
10 If available, this module uses the ICU localization library.
11 Otherwise, it uses the system's locale database (and produces
12 significantly worse results).
13
14 This module tries very hard not to fail loudly. It tends to ignore
15 most Unicode recoding errors, and will eventually fall back to the C
16 locale or raw codepoint-based collation. If you would like loud
17 failure, you can use the collate.strings module and the individual
18 Collators directly.
19
20 Trivial Use:
21 ------------
22 strings = read_strings(...)
23 strings.sort(key=collate.key)
24
25 Attributes:
26 -----------
27 backend - The default collation backend. If available, this is
28 collate.icu; otherwise, it is collate.syslocale. In special
29 situations, it may be collate.codepoint.
30
31 collate - The default collator. This is the collator of the default
32 backend instantiated with the default system locale and encoding.
33
34 """
35
36 __all__ = ["collator", "set_collator", "cmp", "key", "default",
37 "VERSION", "VERSION_STRING"]
38
39 import collate.errors
40 import collate._locale
41
42 try:
43 import collate.codepoint as default
44 except ImportError:
45 pass
46 try:
47 import collate.syslocale as default
48 except ImportError:
49 pass
50 try:
51 import collate.icu as default
52 except ImportError:
53 pass
54
55 VERSION = (0, 2)
56 VERSION_STRING = ".".join(map(str, VERSION))
57
58 collator = None
59
60 def set_collator(backend=None, locale=None, encoding=None):
61 """Set the default collation backend.
62
63 This function tries very hard not to fail; the resulting Collator
64 may not have the locale or encoding you specified (at the very
65 least, they will be normalized). Remember to check
66 collator.locale and collator.encoding.
67
68 Arguments:
69 backend - 'icu', 'syslocale', or 'codepoint'; None to not change.
70 locale - e.g. 'en_US', or None for the system locale.
71 encoding - e.g. 'utf-8', or None for the system locale encoding
72
73 Returns:
74 The new default Collator instance, or None if no collator could be
75 created; if None is returned, the existing default Collator is
76 left intact.
77
78 """
79 global collator
80 global default
81
82 if backend is None:
83 backend = default
84 locales = collate._locale.localelist(locale)
85 possible = None
86 for locale in locales:
87 locale, encoding_ = collate._locale.getpair(locale, encoding)
88 try:
89 possible = backend.Collator(locale, encoding_)
90 except collate.errors.InvalidLocaleError:
91 pass
92 else:
93 break
94 if possible is not None:
95 collator = possible
96 default = backend
97 return collator
98 return possible
99
100 def key(string):
101 """Return a good sorting key for the string.
102
103 The sort key should be considered an opaque value which is only
104 meaningful when compared to other sort keys from the same
105 collator.
106
107 This is the same as collate.collator.sortemekey(string).
108 """
109 return collator.sortemekey(string)
110
111 def cmp(a, b):
112 """Return negative if a < b, zero if a == b, positive if a > b.
113
114 This is the same as collate.collator.cmp(a, b).
115 """
116 return collator.cmp(a, b)
117
118 set_collator()
119
120 if collator is None:
121 raise collate.errors.InvalidLocaleError("C")