-import locale
-
-if locale.getlocale()[0] is None:
- locale.setlocale(locale.LC_COLLATE, '')
-
import collate.errors
+import collate._locale
+try:
+ import collate.codepoint as default
+except ImportError:
+ pass
try:
import collate.syslocale as default
+except ImportError:
+ pass
+try:
import collate.uca as default
+except ImportError:
+ pass
+try:
import collate.icu as default
except ImportError:
pass
collator = None
-preferred_locale = None
-
-def _get_collator(backend, locale_code):
- for code in [locale_code,
- locale_code.split("_")[0],
- locale.getlocale(locale.LC_COLLATE)[0],
- locale.getlocale(locale.LC_COLLATE)[0].split("_")[0],
- locale.getdefaultlocale()[0],
- locale.getdefaultlocale()[0].split("_")[0],
- None]:
- try:
- return default.Collator(code)
- except collate.errors.InvalidLocaleError:
- pass
-def set_locale(locale_code):
- global collator
- global preferred_locale
-
- preferred_locale = locale_code
- if collator is None or collator.locale != locale_code:
- c = _get_collator(default, locale_code)
- if c is not None:
- collator = c
- else:
- raise collate.errors.InvalidLocaleError(locale_code)
-
-def set_backend(backend):
+def set_default(backend=None, locale=None, encoding=None):
global collator
global default
- c = _get_collator(backend, preferred_locale)
+ if backend is None:
+ backend = default
+ locales = collate._locale.localelist(locale)
+ c = None
+ for locale in locales:
+ locale, encoding_ = collate._locale.getpair(locale, encoding)
+ try:
+ c = backend.Collator(locale, encoding_)
+ except collate.errors.InvalidLocaleError:
+ pass
if c is not None:
- collator = c
- default = backend
- else:
- raise collate.errors.InvalidLocaleError(locale_code)
+ collator = c
+ default = backend
def key(string):
return collator.key(string)
-def cmp(string1, string2):
- return collator.cmp(string1, string2)
+def cmp(a, b):
+ return collator.cmp(a, b)
-try:
- set_locale(locale.getlocale()[0])
-except collate.errors.InvalidLocaleError:
- # There's no way this should fail unless the C locale system is
- # fucked or missing all data.
- import collator.syslocale
- set_backend(collator.syslocale)
- set_locale(locale.getlocale(locale.LC_COLLATE)[0])
+set_default()
--- /dev/null
+"""Locale utility routines."""
+
+import sys
+
+try:
+ import locale
+except ImportError:
+ locale = None
+
+try:
+ import codecs
+except ImportError:
+ codecs = None
+
+__all__ = ["localelist"]
+
+def localelist(*locales):
+ """Normalize and return a list of locales, with appended defaults.
+
+ e.g. on a system with en_US as the default locale,
+
+ localelist('en_GB.utf8', 'de_DE') =>
+ ['en_GB', 'en', 'de_DE', 'de', 'en_US', 'C']
+
+ """
+
+ locales = list(locales)
+
+ if locale is not None:
+ # Set the locale if it hasn't already been set, but don't fail
+ # if we can't set it for some reason.
+ if locale.getlocale(locale.LC_COLLATE)[0] is None:
+ try:
+ locale.setlocale(locale.LC_COLLATE, '')
+ except locale.Error:
+ pass
+
+ # Throw in the user's specified collation locale, the current locale,
+ # the default locale, and POSIX, for free.
+ locales.append(locale.getlocale(locale.LC_COLLATE)[0])
+ locales.append(locale.getlocale()[0])
+ locales.append(locale.getdefaultlocale()[0])
+ locales.append("C")
+
+ # Don't put the same locale in the return list more than twice.
+ added = set()
+ retlist = []
+
+ for code in locales:
+ if not code:
+ continue
+ if locale is not None:
+ code = locale.normalize(code)
+ # Strip off encoding if present.
+ code = code.split(".")[0]
+ if code.lower() not in added:
+ retlist.append(code)
+ added.add(code.lower())
+ # Strip off territory if present.
+ code = code.split("_")[0]
+ if code.lower() not in added:
+ retlist.append(code)
+ added.add(code.lower())
+
+ return retlist
+
+def encoding(preferred=None):
+ """Try to find an optimal encoding.
+
+ Arguments:
+ preferred - use this encoding if possible
+
+ Otherwise, the locale encoding or the Python system encoding are
+ used.
+ """
+ # can't use any codecs, use the system one (ascii).
+ if codecs is None:
+ return sys.getdefaultencoding()
+
+ # if preferred is a valid codec, use it.
+ if preferred is not None:
+ try:
+ return codecs.lookup(preferred).name
+ except (LookupError, AttributeError):
+ pass
+
+ # preferred is bad and can't get it from locale.
+ if locale is None:
+ return sys.getdefaultencoding()
+
+ # try to get it from the locale, if not there, set it and try again.
+ fromlocale = locale.getlocale(locale.LC_COLLATE)[1]
+ if fromlocale is not None:
+ return fromlocale
+ try:
+ locale.setlocale(locale.LC_COLLATE, '')[1]
+ except locale.Error:
+ pass
+ else:
+ fromlocale = locale.getlocale(locale.LC_COLLATE)
+ if fromlocale is not None:
+ return fromlocale
+
+ # okay, LC_COLLATE isn't set, maybe the generic locale is.
+ fromlocale = locale.getlocale()[1]
+ if fromlocale is not None:
+ return fromlocale
+
+ # but we won't reset the generic locale if it isn't, that'd be
+ # rude.
+
+ # if the locale can't even give us a simple encoding, go back
+ # to the system one, and give up.
+ return locale.getpreferredencoding() or sys.getdefaultencoding()
+
+def getpair(locale_, encoding_):
+ if "." in locale_:
+ if encoding_ is None:
+ locale_, encoding_ = locale_.rsplit(".", 1)
+ else:
+ locale_ = locale_.rsplit(".")[0]
+ return locale_, encoding(encoding_)
+
--- /dev/null
+"""Codepoint-based collation.
+
+This collation backend sorts using only the basic codepoint order. It
+is primarily intended to be used as a baseline and example for other
+collation backends.
+
+Use this collation backend if...
+ - You are writing tests for pycollate.
+ - You are writing specialized Unicode software.
+ - You are on a system with no locale module.
+
+Avoid this backend if...
+ - You are writing a normal program for a normal runtime environment.
+ - You are sorting strings to show normal humans.
+
+"""
+
+import collate._abcollator
+import collate._locale
+
+__all__ = ["Collate"]
+
+class Collator(collate._abcollator.Collator):
+ """Codepoint-based collation.
+
+ Arguments
+ locale - all parts but encoding ignored, always 'C'
+ encoding - try to use this string encoding
+ """
+
+ def __init__(self, locale=None, encoding=None):
+ dummy, self.encoding = collate._locale.getpair(locale, encoding)
+ self.locale = "C"
+
+ def key(self, string):
+ """Sort key for a string.
+
+ If string is a str instance, it is first decoded according to
+ the 'encoding' attribute of the Collator.
+ """
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
+ return string
NAME = "ICU"
class Collator(collate._abcollator.Collator):
- def __init__(self, locale):
+ def __init__(self, locale, encoding):
self._collator = collate.icu._ucol.Collator(locale)
self.locale = self._collator.locale
import collate._abcollator
class Collator(collate._abcollator.Collator):
- def __init__(self, locale_code):
+ def __init__(self, locale_code, encoding=None):
default = locale.getdefaultlocale()[0]
for locale in [locale_code, default]:
try:
class Collator(collate._abcollator.Collator):
- def __init__(self, locale_code, strict=False):
+ def __init__(self, locale_code, encoding=None):
self.__table = Trie()
self.locale = locale_code