import collate._abcollator
import collate._constants
-class Trie(object):
+DIRNAME = os.path.dirname(__file__)
+class Trie(object):
def __init__(self):
self.root = [None, {}]
remainder = remainder[1:]
return (curr_node[0], remainder)
+def load_trie(fileobj, trie):
+ for line in fileobj:
+ if line.startswith("#") or line.startswith("%"):
+ continue
+ if line.strip() == "":
+ continue
+ line = line[:line.find("#")] + "\n"
+ line = line[:line.find("%")] + "\n"
+ line = line.strip()
-class Collator(collate._abcollator.Collator):
+ if line.startswith("@"):
+ pass
+ else:
+ semicolon = line.find(";")
+ charList = line[:semicolon].strip().split()
+ x = line[semicolon:]
+ collElements = []
+ while True:
+ begin = x.find("[")
+ if begin == -1:
+ break
+ end = x[begin:].find("]")
+ collElement = x[begin:begin+end+1]
+ x = x[begin + 1:]
- def __init__(self, locale_code, encoding=None):
+ alt = collElement[1]
+ chars = collElement[2:-1].split(".")
+ chars = [int(_, 16) for _ in chars]
- self.__table = Trie()
- self.locale = locale_code
- dirname = os.path.dirname(__file__)
- locale_code = locale_code.split(".")[0].lower()
- short_code = locale_code.split("_")[0]
- filenames = [os.path.join(dirname, locale_code + ".txt"),
- os.path.join(dirname, short_code + ".txt"),
- os.path.join(dirname, "allkeys.txt")]
- for filename in filenames:
+ collElements.append((alt, chars))
+ integer_points = [int(ch, 16) for ch in charList]
+ trie.add(integer_points, collElements)
+
+class Collator(collate._abcollator.Collator):
+
+ def __init__(self, locale, encoding=None):
+ self.locale, self.encoding = collate._locale.getpair(locale, encoding)
+ if self.locale == "C":
+ self.__table = _DUCET
+ else:
+ self.__table = Trie()
+ filename = os.path.join(DIRNAME, locale.lower() + ".txt")
try:
fileobj = open(filename, "rU")
except EnvironmentError:
- pass
+ raise collate.errors.InvalidLocaleError(self.locale)
else:
- self.__load(fileobj)
- break
- else:
- raise collate.errors.InvalidLocaleError(locale_code)
-
- def __load(self, fileobj):
- for line in fileobj:
- if line.startswith("#") or line.startswith("%"):
- continue
- if line.strip() == "":
- continue
- line = line[:line.find("#")] + "\n"
- line = line[:line.find("%")] + "\n"
- line = line.strip()
-
- if line.startswith("@"):
- pass
- else:
- semicolon = line.find(";")
- charList = line[:semicolon].strip().split()
- x = line[semicolon:]
- collElements = []
- while True:
- begin = x.find("[")
- if begin == -1:
- break
- end = x[begin:].find("]")
- collElement = x[begin:begin+end+1]
- x = x[begin + 1:]
-
- alt = collElement[1]
- chars = collElement[2:-1].split(".")
-
- collElements.append((alt, chars))
- integer_points = [int(ch, 16) for ch in charList]
- self.__table.add(integer_points, collElements)
+ load_trie(fileobj, self.__table)
def __implicit_weight(self, cp):
# UCA 7.1.3.
bbbb = (cp & 0x7FFF) | 0x8000
# FIXME(jfw): Reread standard to make sure the 4th element is
# right.
- return [('.', ["%04X" % aaaa, "0020", "0002", "0002"]),
- ('.', ["%04X" % bbbb, "0000", "0000", "0000"])]
+ return [('.', [aaaa, 0x20, 0x2, 0x2]),
+ ('.', [bbbb, 0x0, 0x0, 0x0])]
def key(self, string):
if level:
sort_key.append(0) # level separator
for element in collation_elements:
- ce_l = int(element[1][level], 16)
+ ce_l = element[1][level]
if ce_l:
sort_key.append(ce_l)
return tuple(sort_key)
+
+try:
+ fileobj = file(os.path.join(DIRNAME, "allkeys.txt"), "rU")
+except EnvironmentError:
+ raise ImportError("no DUCET information available")
+else:
+ _DUCET = Trie()
+ load_trie(fileobj, _DUCET)
+ fileobj.close()
+ del(fileobj)