projects
/
python-collate.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
'Advanced' sorteme functions.
[python-collate.git]
/
collate
/
syslocale.py
diff --git
a/collate/syslocale.py
b/collate/syslocale.py
index
b9f9b92
..
e2aeed9
100644
(file)
--- a/
collate/syslocale.py
+++ b/
collate/syslocale.py
@@
-12,16
+12,18
@@
locale of all previous collators and anything else using the system
locale information.
Use this collation backend if...
locale information.
Use this collation backend if...
- - You are on a system without ICU or UCA datafiles for the locale,
- and DUCET results are not acceptable.
+ - You are on a system without ICU.
Avoid this backend if...
Avoid this backend if...
- - ICU
or UCA support
is available for the current locale.
+ - ICU is available for the current locale.
- You are sorting strings from alphabets outside the primary locale.
- You need to support collating multiple locales at once.
- You are sorting strings from alphabets outside the primary locale.
- You need to support collating multiple locales at once.
+ - You need the same results across multiple platforms.
+
"""
import locale
"""
import locale
+import re
import collate.errors
import collate._abcollator
import collate.errors
import collate._abcollator
@@
-31,10
+33,12
@@
class Collator(collate._abcollator.Collator):
"""C library locale-based collation."""
def __init__(self, locale_code, encoding=None):
"""C library locale-based collation."""
def __init__(self, locale_code, encoding=None):
+ locale_code, encoding = collate._locale.getpair(locale_code, encoding)
try:
try:
- locale.setlocale(locale.LC_COLLATE, locale_code)
+ setlocale = locale_code + "." + encoding
+ locale.setlocale(locale.LC_COLLATE, setlocale)
except locale.Error:
except locale.Error:
- raise collate.errors.InvalidLocaleError(
locale_cod
e)
+ raise collate.errors.InvalidLocaleError(
setlocal
e)
self.locale = locale.getlocale(locale.LC_COLLATE)[0]
self.encoding = collate._locale.encoding(encoding)
self.locale = locale.getlocale(locale.LC_COLLATE)[0]
self.encoding = collate._locale.encoding(encoding)
@@
-56,10
+60,16
@@
class Collator(collate._abcollator.Collator):
If strs rather than unicodes are passed in, they are first
decoded according to the 'encoding' attribute of the Collator.
"""
If strs rather than unicodes are passed in, they are first
decoded according to the 'encoding' attribute of the Collator.
"""
-
if isinstance(a, str):
a = a.decode(self.encoding, "replace")
if isinstance(b, str):
b = b.decode(self.encoding, "replace")
return locale.strcoll(a, b)
if isinstance(a, str):
a = a.decode(self.encoding, "replace")
if isinstance(b, str):
b = b.decode(self.encoding, "replace")
return locale.strcoll(a, b)
+ def words(self, string, sep=re.compile(r"\W+", re.UNICODE)):
+ """Split the string into separate words.
+
+ This split is done using the locale's notion of a word boundry.
+ """
+ return re.split(sep, string)
+