4 import collate
._abcollator
5 import collate
._constants
7 DIRNAME
= os
.path
.dirname(__file__
)
11 self
.root
= [None, {}]
13 def add(self
, key
, value
):
16 curr_node
= curr_node
[1].setdefault(part
, [None, {}])
19 def find_prefix(self
, key
):
23 if part
not in curr_node
[1]:
25 curr_node
= curr_node
[1][part
]
26 remainder
= remainder
[1:]
27 return (curr_node
[0], remainder
)
29 def load_trie(fileobj
, trie
):
31 if line
.startswith("#") or line
.startswith("%"):
33 if line
.strip() == "":
35 line
= line
[:line
.find("#")] + "\n"
36 line
= line
[:line
.find("%")] + "\n"
39 if line
.startswith("@"):
42 semicolon
= line
.find(";")
43 charList
= line
[:semicolon
].strip().split()
50 end
= x
[begin
:].find("]")
51 collElement
= x
[begin
:begin
+end
+1]
55 chars
= collElement
[2:-1].split(".")
56 chars
= [int(_
, 16) for _
in chars
]
58 collElements
.append((alt
, chars
))
59 integer_points
= [int(ch
, 16) for ch
in charList
]
60 trie
.add(integer_points
, collElements
)
62 class Collator(collate
._abcollator
.Collator
):
64 def __init__(self
, locale
, encoding
=None):
65 self
.locale
, self
.encoding
= collate
._locale
.getpair(locale
, encoding
)
66 if self
.locale
== "C":
70 filename
= os
.path
.join(DIRNAME
, locale
.lower() + ".txt")
72 fileobj
= open(filename
, "rU")
73 except EnvironmentError:
74 raise collate
.errors
.InvalidLocaleError(self
.locale
)
76 load_trie(fileobj
, self
.__table
)
78 def __implicit_weight(self
, cp
):
80 if (0x4E00 <= cp
<= 0x9FCB
81 or (cp
in [0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
82 0xFA1F, 0xFA21, 0xFA23, 0XFA24, 0XFA27,
85 elif (0x3400 <= cp
<= 0x4DB5
86 or 0x20000 <= cp
<= 0x2A6D6
87 or 0x2A700 <= cp
<= 0x2B734):
92 aaaa
= base
+ (cp
>> 15)
93 bbbb
= (cp
& 0x7FFF) |
0x8000
94 # FIXME(jfw): Reread standard to make sure the 4th element is
96 return [('.', [aaaa
, 0x20, 0x2, 0x2]),
97 ('.', [bbbb
, 0x0, 0x0, 0x0])]
99 def key(self
, string
):
101 collation_elements
= []
103 lookup_key
= [ord(ch
) for ch
in string
]
105 value
, lookup_key
= self
.__table
.find_prefix(lookup_key
)
107 value
= self
.__implicit
_weight
(lookup_key
.pop(0))
108 collation_elements
.extend(value
)
112 for level
in range(4):
114 sort_key
.append(0) # level separator
115 for element
in collation_elements
:
116 ce_l
= element
[1][level
]
118 sort_key
.append(ce_l
)
120 return tuple(sort_key
)
123 fileobj
= file(os
.path
.join(DIRNAME
, "allkeys.txt"), "rU")
124 except EnvironmentError:
125 raise ImportError("no DUCET information available")
128 load_trie(fileobj
, _DUCET
)