uca is not going to make the cut.
[python-collate.git] / collate / uca / __init__.py
diff --git a/collate/uca/__init__.py b/collate/uca/__init__.py
deleted file mode 100644 (file)
index 21a819a..0000000
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-
-import collate.errors
-import collate._abcollator
-import collate._constants
-
-DIRNAME = os.path.dirname(__file__)
-
-class Trie(object):
-    def __init__(self):
-        self.root = [None, {}]
-
-    def add(self, key, value):
-        curr_node = self.root
-        for part in key:
-            curr_node = curr_node[1].setdefault(part, [None, {}])
-        curr_node[0] = value
-
-    def find_prefix(self, key):
-        curr_node = self.root
-        remainder = key
-        for part in key:
-            if part not in curr_node[1]:
-                break
-            curr_node = curr_node[1][part]
-            remainder = remainder[1:]
-        return (curr_node[0], remainder)
-
-def load_trie(fileobj, trie):
-    for line in fileobj:
-        if line.startswith("#") or line.startswith("%"):
-            continue
-        if line.strip() == "":
-            continue
-        line = line[:line.find("#")] + "\n"
-        line = line[:line.find("%")] + "\n"
-        line = line.strip()
-
-        if line.startswith("@"):
-            pass
-        else:
-            semicolon = line.find(";")
-            charList = line[:semicolon].strip().split()
-            x = line[semicolon:]
-            collElements = []
-            while True:
-                begin = x.find("[")
-                if begin == -1:
-                    break                
-                end = x[begin:].find("]")
-                collElement = x[begin:begin+end+1]
-                x = x[begin + 1:]
-
-                alt = collElement[1]
-                chars = collElement[2:-1].split(".")
-                chars = [int(_, 16) for _ in chars]
-
-                collElements.append((alt, chars))
-            integer_points = [int(ch, 16) for ch in charList]
-            trie.add(integer_points, collElements)
-
-class Collator(collate._abcollator.Collator):
-
-    def __init__(self, locale, encoding=None):
-        self.locale, self.encoding = collate._locale.getpair(locale, encoding)
-        if self.locale == "C":
-            self.__table = _DUCET
-        else:
-            self.__table = Trie()
-            filename = os.path.join(DIRNAME, locale.lower() + ".txt")
-            try:
-                fileobj = open(filename, "rU")
-            except EnvironmentError:
-                raise collate.errors.InvalidLocaleError(self.locale)
-            else:
-                load_trie(fileobj, self.__table)
-
-    def __implicit_weight(self, cp):
-        # UCA 7.1.3.
-        if (0x4E00 <= cp <= 0x9FCB
-            or (cp in [0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
-                       0xFA1F, 0xFA21, 0xFA23, 0XFA24, 0XFA27,
-                       0xFA28, 0xFA29])):
-            base = 0xFB40
-        elif (0x3400 <= cp <= 0x4DB5
-              or 0x20000 <= cp <= 0x2A6D6
-              or 0x2A700 <= cp <= 0x2B734):
-            base = 0xFB80
-        else:
-            base = 0xFBC0
-                       
-        aaaa = base + (cp >> 15)
-        bbbb = (cp & 0x7FFF) | 0x8000
-        # FIXME(jfw): Reread standard to make sure the 4th element is
-        # right.
-        return [('.', [aaaa, 0x20, 0x2, 0x2]),
-                ('.', [bbbb, 0x0, 0x0, 0x0])]
-
-    def key(self, string):
-        
-        collation_elements = []
-
-        lookup_key = [ord(ch) for ch in string]
-        while lookup_key:
-            value, lookup_key = self.__table.find_prefix(lookup_key)
-            if value is None:
-                value = self.__implicit_weight(lookup_key.pop(0))
-            collation_elements.extend(value)
-    
-        sort_key = []
-        
-        for level in range(4):
-            if level:
-                sort_key.append(0) # level separator
-            for element in collation_elements:
-                ce_l = element[1][level]
-                if ce_l:
-                    sort_key.append(ce_l)
-        
-        return tuple(sort_key)
-
-try:
-    fileobj = file(os.path.join(DIRNAME, "allkeys.txt"), "rU")
-except EnvironmentError:
-    raise ImportError("no DUCET information available")
-else:
-    _DUCET = Trie()
-    load_trie(fileobj, _DUCET)
-    fileobj.close()
-    del(fileobj)