1 """C library locale-based collation.
3 This collation backend uses the system's C library to sort strings. It
4 is fast and almost always available, but may sort strings outside of
5 the user's native locale incorrectly or confusingly (for example,
6 en_US tends to ignore hiragana characters; ja_JP does not case-fold
9 Since the C library only supports one locale active at a time per
10 process, instantiating a Collator from this module will affect the
11 locale of all previous collators and anything else using the system
14 Use this collation backend if...
15 - You are on a system without ICU.
17 Avoid this backend if...
18 - ICU is available for the current locale.
19 - You are sorting strings from alphabets outside the primary locale.
20 - You need to support collating multiple locales at once.
21 - You need the same results across multiple platforms.
25 __all__
= ["Collator"]
30 import collate
._abcollator
31 import collate
._locale
33 class Collator(collate
._abcollator
.Collator
):
34 """C library locale-based collation."""
36 def __init__(self
, locale_code
, encoding
=None):
37 super(Collator
, self
).__init
__(locale
, encoding
)
38 locale_code
, encoding
= collate
._locale
.getpair(locale_code
, encoding
)
40 setlocale
= locale_code
+ "." + encoding
41 locale
.setlocale(locale
.LC_COLLATE
, setlocale
)
43 raise collate
.errors
.InvalidLocaleError(setlocale
)
44 self
.locale
= locale
.getlocale(locale
.LC_COLLATE
)[0]
45 self
.encoding
= collate
._locale
.encoding(encoding
)
47 def key(self
, string
):
48 """Sort key for a string.
50 If string is a unicode instance that cannot be processed by
51 the system locale library, it is first encoded according to
52 the 'encoding' attribute of the Collator.
55 return locale
.strxfrm(string
)
57 return locale
.strxfrm(string
.str(self
.encoding
))