eeee418070483d0797b63b639d1aa7bca219442e
[python-collate.git] / collate / icu / __init__.py
1 """ICU-based collation.
2
3 This collation backend uses the International Components for Unicode
4 library to provide accurate and high-performance collation. It
5 supports multiple locales and advanced sorting capabilities.
6
7 Use this collation backend if possible; it's by far the best.
8
9 Avoid this backend if...
10 - ICU is not available for your system.
11
12 """
13
14 import collate.icu._ucol
15 import collate._abcollator
16 import collate._locale
17 import collate.errors
18
19 class Collator(collate._abcollator.Collator):
20 """ICU-based collation."""
21
22 def __init__(self, locale, encoding=None):
23 locale, encoding = collate._locale.getpair(locale, encoding)
24 self._collator = collate.icu._ucol.Collator(locale)
25 self.locale = self._collator.locale
26 self.encoding = collate._locale.encoding(encoding)
27 if self._collator.used_default_information and locale != "C":
28 raise collate.errors.InvalidLocaleError(locale)
29
30 def key(self, string):
31 """Sort key for a string.
32
33 If the string is a str instance, it is decoded to a unicode
34 instance according to the 'encoding' attribute of the
35 Collator.
36 """
37 if isinstance(string, str):
38 string = string.decode(self.encoding, 'replace')
39 return self._collator.key(string)
40
41 def cmp(self, a, b):
42 """Return negative if a < b, zero if a == b, positive if a > b.
43
44 If strs rather than unicodes are passed in, they are first
45 decoded according to the 'encoding' attribute of the Collator.
46 """
47 if isinstance(a, str):
48 a = a.decode(self.encoding, 'replace')
49 if isinstance(b, str):
50 b = a.decode(self.encoding, 'replace')
51 return self._collator.cmp(a, b)