4 import collate
._abcollator
5 import collate
._constants
10 self
.root
= [None, {}]
12 def add(self
, key
, value
):
15 curr_node
= curr_node
[1].setdefault(part
, [None, {}])
18 def find_prefix(self
, key
):
22 if part
not in curr_node
[1]:
24 curr_node
= curr_node
[1][part
]
25 remainder
= remainder
[1:]
26 return (curr_node
[0], remainder
)
29 class Collator(collate
._abcollator
.Collator
):
31 def __init__(self
, locale_code
, encoding
=None):
34 self
.locale
= locale_code
35 dirname
= os
.path
.dirname(__file__
)
36 locale_code
= locale_code
.split(".")[0].lower()
37 short_code
= locale_code
.split("_")[0]
38 filenames
= [os
.path
.join(dirname
, locale_code
+ ".txt"),
39 os
.path
.join(dirname
, short_code
+ ".txt"),
40 os
.path
.join(dirname
, "allkeys.txt")]
41 for filename
in filenames
:
43 fileobj
= open(filename
, "rU")
44 except EnvironmentError:
50 raise collate
.errors
.InvalidLocaleError(locale_code
)
52 def __load(self
, fileobj
):
54 if line
.startswith("#") or line
.startswith("%"):
56 if line
.strip() == "":
58 line
= line
[:line
.find("#")] + "\n"
59 line
= line
[:line
.find("%")] + "\n"
62 if line
.startswith("@"):
65 semicolon
= line
.find(";")
66 charList
= line
[:semicolon
].strip().split()
73 end
= x
[begin
:].find("]")
74 collElement
= x
[begin
:begin
+end
+1]
78 chars
= collElement
[2:-1].split(".")
80 collElements
.append((alt
, chars
))
81 integer_points
= [int(ch
, 16) for ch
in charList
]
82 self
.__table
.add(integer_points
, collElements
)
84 def __implicit_weight(self
, cp
):
86 if (0x4E00 <= cp
<= 0x9FCB
87 or (cp
in [0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
88 0xFA1F, 0xFA21, 0xFA23, 0XFA24, 0XFA27,
91 elif (0x3400 <= cp
<= 0x4DB5
92 or 0x20000 <= cp
<= 0x2A6D6
93 or 0x2A700 <= cp
<= 0x2B734):
98 aaaa
= base
+ (cp
>> 15)
99 bbbb
= (cp
& 0x7FFF) |
0x8000
100 # FIXME(jfw): Reread standard to make sure the 4th element is
102 return [('.', ["%04X" % aaaa
, "0020", "0002", "0002"]),
103 ('.', ["%04X" % bbbb
, "0000", "0000", "0000"])]
105 def key(self
, string
):
107 collation_elements
= []
109 lookup_key
= [ord(ch
) for ch
in string
]
111 value
, lookup_key
= self
.__table
.find_prefix(lookup_key
)
113 value
= self
.__implicit
_weight
(lookup_key
.pop(0))
114 collation_elements
.extend(value
)
118 for level
in range(4):
120 sort_key
.append(0) # level separator
121 for element
in collation_elements
:
122 ce_l
= int(element
[1][level
], 16)
124 sort_key
.append(ce_l
)
126 return tuple(sort_key
)