Cleanup in preparation for release. Add docstrings, remove basically empty _constants...
[python-collate.git] / collate / __init__.py
1 """collate - Sort strings intelligently.
2
3 This module provides tools to sort strings in a 'human-expected' order.
4
5 Because human expectations are fuzzy and often self-contradictory, the
6 sort order is not guaranteed to be stable between versions of this
7 module (rather the opposite - the primary reason to update it will
8 probably be changed sort results).
9
10 If available, this module uses the ICU localization library.
11 Otherwise, it uses the system's locale database (and produces
12 significantly worse results).
13
14 Trivial Use:
15 ------------
16 strings = read_strings(...)
17 strings.sort(key=collate.key)
18
19 Attributes:
20 -----------
21 backend - The default collation backend. If available, this is
22 collate.icu; otherwise, it is collate.syslocale. In special
23 situations, it may be collate.codepoint.
24
25 collate - The default collator. This is the collator of the default
26 backend instantiated with the default system locale and encoding.
27
28 """
29
30 __all__ = ["collator", "set_collator", "cmp", "key", "default",
31 "VERSION", "VERSION_STRING"]
32
33 import collate.errors
34 import collate._locale
35
36 try:
37 import collate.codepoint as default
38 except ImportError:
39 pass
40 try:
41 import collate.syslocale as default
42 except ImportError:
43 pass
44 try:
45 import collate.icu as default
46 except ImportError:
47 pass
48
49 VERSION = (0, 1)
50 VERSION_STRING = ".".join(map(str, VERSION))
51
52 collator = None
53
54 def set_collator(backend=None, locale=None, encoding=None):
55 """Set the default collation backend.
56
57 This function tries very hard not to fail; the resulting Collator
58 may not have the locale or encoding you specified (at the very
59 least, they will be normalized). Remember to check
60 collator.locale and collator.encoding.
61
62 Arguments:
63 backend - 'icu', 'syslocale', or 'codepoint'; None to not change.
64 locale - e.g. 'en_US', or None for the system locale.
65 encoding - e.g. 'utf-8', or None for the system locale encoding
66
67 Returns:
68 The new default Collator instance, or None if no collator could be
69 created; if None is returned, the existing default Collator is
70 left intact.
71
72 """
73 global collator
74 global default
75
76 if backend is None:
77 backend = default
78 locales = collate._locale.localelist(locale)
79 possible = None
80 for locale in locales:
81 locale, encoding_ = collate._locale.getpair(locale, encoding)
82 try:
83 possible = backend.Collator(locale, encoding_)
84 except collate.errors.InvalidLocaleError:
85 pass
86 else:
87 break
88 if possible is not None:
89 collator = possible
90 default = backend
91 return collator
92 return possible
93
94 def key(string):
95 """Return a good sorting key for the string.
96
97 The sort key should be considered an opaque value which is only
98 meaningful when compared to other sort keys from the same
99 collator.
100
101 This is the same as collate.collator.sortemekey(string).
102 """
103 return collator.sortemekey(string)
104
105 def cmp(a, b):
106 """Return negative if a < b, zero if a == b, positive if a > b.
107
108 This is the same as collate.collator.cmp(a, b).
109 """
110 return collator.cmp(a, b)
111
112 set_collator()
113
114 if collator is None:
115 raise collate.errors.InvalidLocaleError("C")