Updates for new hosting.
[python-collate.git] / collate / syslocale.py
1 """C library locale-based collation.
2
3 This collation backend uses the system's C library to sort strings. It
4 is fast and almost always available, but may sort strings outside of
5 the user's native locale incorrectly or confusingly (for example,
6 en_US tends to ignore hiragana characters; ja_JP does not case-fold
7 Latin characters).
8
9 Since the C library only supports one locale active at a time per
10 process, instantiating a Collator from this module will affect the
11 locale of all previous collators and anything else using the system
12 locale information.
13
14 Use this collation backend if...
15 - You are on a system without ICU.
16
17 Avoid this backend if...
18 - ICU is available for the current locale.
19 - You are sorting strings from alphabets outside the primary locale.
20 - You need to support collating multiple locales at once.
21 - You need the same results across multiple platforms.
22
23 """
24
25 __all__ = ["Collator"]
26
27 import locale
28
29 import collate.errors
30 import collate._abcollator
31 import collate._locale
32
33 class Collator(collate._abcollator.Collator):
34 """C library locale-based collation."""
35
36 def __init__(self, locale_code, encoding=None):
37 super(Collator, self).__init__(locale, encoding)
38 locale_code, encoding = collate._locale.getpair(locale_code, encoding)
39 try:
40 setlocale = locale_code + "." + encoding
41 locale.setlocale(locale.LC_COLLATE, setlocale)
42 except locale.Error:
43 raise collate.errors.InvalidLocaleError(setlocale)
44 self.locale = locale.getlocale(locale.LC_COLLATE)[0]
45 self.encoding = collate._locale.encoding(encoding)
46
47 def key(self, string):
48 """Sort key for a string.
49
50 If string is a unicode instance that cannot be processed by
51 the system locale library, it is first encoded according to
52 the 'encoding' attribute of the Collator.
53 """
54 try:
55 return locale.strxfrm(string)
56 except UnicodeError:
57 return locale.strxfrm(string.str(self.encoding))