1 """C library locale-based collation.
3 This collation backend uses the system's C library to sort strings. It
4 is fast and almost always available, but may sort strings outside of
5 the user's native locale incorrectly or confusingly (for example,
6 en_US tends to ignore hiragana characters; ja_JP does not case-fold
9 Since the C library only supports one locale active at a time per
10 process, instantiating a Collator from this module will affect the
11 locale of all previous collators and anything else using the system
14 Use this collation backend if...
15 - You are on a system without ICU or UCA datafiles for the locale,
16 and DUCET results are not acceptable.
18 Avoid this backend if...
19 - ICU or UCA support is available for the current locale.
20 - You are sorting strings from alphabets outside the primary locale.
21 - You need to support collating multiple locales at once.
27 import collate
._abcollator
28 import collate
._locale
30 class Collator(collate
._abcollator
.Collator
):
31 """C library locale-based collation."""
33 def __init__(self
, locale_code
, encoding
=None):
35 locale
.setlocale(locale
.LC_COLLATE
, locale_code
)
37 raise collate
.errors
.InvalidLocaleError(locale_code
)
38 self
.locale
= locale
.getlocale(locale
.LC_COLLATE
)[0]
39 self
.encoding
= collate
._locale
.encoding(encoding
)
41 def key(self
, string
):
42 """Sort key for a string.
44 If string is a unicode instance that cannot be processed by
45 the system locale library, it is first encoded according to
46 the 'encoding' attribute of the Collator.
49 return locale
.strxfrm(string
)
50 except UnicodeEncodeError:
51 return locale
.strxfrm(string
.encode(self
.encoding
, "replace"))
54 """Return negative if a < b, zero if a == b, positive if a > b.
56 If strs rather than unicodes are passed in, they are first
57 decoded according to the 'encoding' attribute of the Collator.
60 if isinstance(a
, str):
61 a
= a
.decode(self
.encoding
, "replace")
62 if isinstance(b
, str):
63 b
= b
.decode(self
.encoding
, "replace")
64 return locale
.strcoll(a
, b
)