Word-splitting.
[python-collate.git] / collate / _locale.py
1 """Locale utility routines."""
2
3 import sys
4
5 try:
6 import locale
7 except ImportError:
8 locale = None
9
10 try:
11 import codecs
12 except ImportError:
13 codecs = None
14
15 __all__ = ["localelist"]
16
17 def localelist(*locales):
18 """Normalize and return a list of locales, with appended defaults.
19
20 e.g. on a system with en_US as the default locale,
21
22 localelist('en_GB.utf8', 'de_DE') =>
23 ['en_GB', 'en', 'de_DE', 'de', 'en_US', 'C']
24
25 """
26
27 locales = list(locales)
28
29 if locale is not None:
30 # Set the locale if it hasn't already been set, but don't fail
31 # if we can't set it for some reason.
32 if locale.getlocale(locale.LC_COLLATE)[0] is None:
33 try:
34 locale.setlocale(locale.LC_COLLATE, '')
35 except locale.Error:
36 pass
37
38 # Throw in the user's specified collation locale, the current locale,
39 # the default locale, and POSIX, for free.
40 locales.append(locale.getlocale(locale.LC_COLLATE)[0])
41 locales.append(locale.getlocale()[0])
42 locales.append(locale.getdefaultlocale()[0])
43 locales.append("C")
44
45 # Don't put the same locale in the return list more than twice.
46 added = set()
47 retlist = []
48
49 for code in locales:
50 if not code:
51 continue
52 if locale is not None:
53 code = locale.normalize(code)
54 # Strip off encoding if present.
55 code = code.split(".")[0]
56 if code.lower() not in added:
57 retlist.append(code)
58 added.add(code.lower())
59 # Strip off territory if present.
60 code = code.split("_")[0]
61 if code.lower() not in added:
62 retlist.append(code)
63 added.add(code.lower())
64
65 return retlist
66
67 def encoding(preferred=None):
68 """Try to find an optimal encoding.
69
70 Arguments:
71 preferred - use this encoding if possible
72
73 Otherwise, the locale encoding or the Python system encoding are
74 used.
75 """
76 # can't use any codecs, use the system one (ascii).
77 if codecs is None:
78 return sys.getdefaultencoding()
79
80 # if preferred is a valid codec, use it.
81 if preferred is not None:
82 try:
83 return codecs.lookup(preferred).name
84 except (LookupError, AttributeError):
85 pass
86
87 # preferred is bad and can't get it from locale.
88 if locale is None:
89 return sys.getdefaultencoding()
90
91 # try to get it from the locale, if not there, set it and try again.
92 fromlocale = locale.getlocale(locale.LC_COLLATE)[1]
93 if fromlocale is not None:
94 return fromlocale
95 try:
96 locale.setlocale(locale.LC_COLLATE, '')[1]
97 except locale.Error:
98 pass
99 else:
100 fromlocale = locale.getlocale(locale.LC_COLLATE)
101 if fromlocale is not None:
102 return fromlocale
103
104 # okay, LC_COLLATE isn't set, maybe the generic locale is.
105 fromlocale = locale.getlocale()[1]
106 if fromlocale is not None:
107 return fromlocale
108
109 # but we won't reset the generic locale if it isn't, that'd be
110 # rude.
111
112 # if the locale can't even give us a simple encoding, go back
113 # to the system one, and give up.
114 return locale.getpreferredencoding() or sys.getdefaultencoding()
115
116 def getpair(locale_, encoding_):
117 if "." in locale_:
118 if encoding_ is None:
119 locale_, encoding_ = locale_.rsplit(".", 1)
120 else:
121 locale_ = locale_.rsplit(".")[0]
122 return locale_, encoding(encoding_)
123