From 22570a3c13079d27cfb60110f631b164dbd8b831 Mon Sep 17 00:00:00 2001 From: Joe Wreschnig Date: Wed, 10 Feb 2010 23:26:48 -0800 Subject: [PATCH] Test script. Add NAME to backends. Some smarter system locale handling. --- collate/__init__.py | 55 +++++++++++++++++++++++++---------- collate/_constants.py | 1 + collate/icu/__init__.py | 3 ++ collate/syslocale/__init__.py | 28 +++++++++--------- collate/uca/__init__.py | 3 ++ pysort | 35 ++++++++++++++++++++++ 6 files changed, 95 insertions(+), 30 deletions(-) create mode 100644 collate/_constants.py create mode 100755 pysort diff --git a/collate/__init__.py b/collate/__init__.py index 1d48178..6ce1114 100644 --- a/collate/__init__.py +++ b/collate/__init__.py @@ -1,5 +1,8 @@ import locale +if locale.getlocale()[0] is None: + locale.setlocale(locale.LC_COLLATE, '') + import collate.errors try: @@ -10,30 +13,43 @@ except ImportError: pass collator = None +preferred_locale = None + +def _get_collator(backend, locale_code): + for code in [locale_code, + locale_code.split("_")[0], + locale.getlocale(locale.LC_COLLATE)[0], + locale.getlocale(locale.LC_COLLATE)[0].split("_")[0], + locale.getdefaultlocale()[0], + locale.getdefaultlocale()[0].split("_")[0], + None]: + try: + return default.Collator(code) + except collate.errors.InvalidLocaleError: + pass def set_locale(locale_code): global collator + global preferred_locale + preferred_locale = locale_code if collator is None or collator.locale != locale_code: - for code in [locale_code, - locale_code.split("_")[0], - locale.getdefaultlocale()[0], - locale.getdefaultlocale()[0].split("_")[0], - None]: - try: - collator = default.Collator(code) - except collate.errors.InvalidLocaleError: - pass - else: - break + c = _get_collator(default, locale_code) + if c is not None: + collator = c else: raise collate.errors.InvalidLocaleError(locale_code) -def get_locale(): - return collator.locale - def set_backend(backend): - pass + global collator + global default + + c = _get_collator(backend, preferred_locale) + if c is not None: + collator = c + default = backend + else: + raise collate.errors.InvalidLocaleError(locale_code) def key(string): return collator.key(string) @@ -41,4 +57,11 @@ def key(string): def cmp(string1, string2): return collator.cmp(string1, string2) -set_locale(locale.getdefaultlocale()[0]) +try: + set_locale(locale.getlocale()[0]) +except collate.errors.InvalidLocaleError: + # There's no way this should fail unless the C locale system is + # fucked or missing all data. + import collator.syslocale + set_backend(collator.syslocale) + set_locale(locale.getlocale(locale.LC_COLLATE)[0]) diff --git a/collate/_constants.py b/collate/_constants.py new file mode 100644 index 0000000..eb1786c --- /dev/null +++ b/collate/_constants.py @@ -0,0 +1 @@ +VERSION = 0.1 diff --git a/collate/icu/__init__.py b/collate/icu/__init__.py index 16ef1e6..1e2ba50 100644 --- a/collate/icu/__init__.py +++ b/collate/icu/__init__.py @@ -1,9 +1,12 @@ import collate.icu._ucol import collate._abcollator +NAME = "ICU" + class Collator(collate._abcollator.Collator): def __init__(self, locale): self._collator = collate.icu._ucol.Collator(locale) + self.locale = self._collator.locale def key(self, string): return self._collator.key(string) diff --git a/collate/syslocale/__init__.py b/collate/syslocale/__init__.py index 6784a97..43281e2 100644 --- a/collate/syslocale/__init__.py +++ b/collate/syslocale/__init__.py @@ -1,25 +1,25 @@ -import sys import locale + import collate.errors import collate._abcollator class Collator(collate._abcollator.Collator): - def __init__(self, locale_code, strict=False): + def __init__(self, locale_code): + default = locale.getdefaultlocale()[0] + for locale in [locale_code, default]: + try: + locale.setlocale(locale.LC_COLLATE, locale_code) + except locale.Error as err: + pass + else: + break + else: + raise collate.errors.InvalidLocaleError("no locale found") + self.locale = locale.getlocale()[0] try: self.__encoding = locale_code.split(".")[1] except IndexError: - self.__encoding = locale_code.split(sys.getdefaultencoding()) - try: - locale.setlocale(locale.LC_COLLATE, locale_code) - except locale.Error as err: - if strict: - raise collate.errors.InvalidLocaleError(str(err)) - try: - locale.setlocale( - locale.LC_COLLATE, locale.getdefaultlocale()[0]) - except locale.Error as err: - raise collate.errors.InvalidLocaleError(str(err)) - self.locale = locale.getdefaultlocale()[0] + self.__encoding = locale_code.split(locale.getpreferredencoding()) def key(self, string): try: diff --git a/collate/uca/__init__.py b/collate/uca/__init__.py index ba20b48..228a24c 100644 --- a/collate/uca/__init__.py +++ b/collate/uca/__init__.py @@ -2,6 +2,9 @@ import os import collate.errors import collate._abcollator +import collate._constants + +NAME = "Python UCA %g" % collate._constants.VERSION class Trie(object): diff --git a/pysort b/pysort new file mode 100755 index 0000000..00da855 --- /dev/null +++ b/pysort @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +import os +import sys +import locale + +import collate + +def main(argv): + lines = [] + encoding = locale.getpreferredencoding() + + if not argv: + argv.append("-") + for filename in argv: + if filename == "-": + fileobj = sys.stdin + else: + fileobj = open(filename, "rU") + for line in fileobj: + line = line.strip() + line = line.decode(encoding, "replace") + lines.append(line) + lines.sort(key=collate.key) + + for line in lines: + print line.encode(encoding, "replace") + +if __name__ == "__main__": + try: + locale.setlocale(locale.LC_ALL, '') + except locale.Error: + pass + + main(sys.argv[1:]) -- 2.30.2