ba20b487bd1e4559f522c909f6cab161b68b2767
4 import collate
._abcollator
11 def add(self
, key
, value
):
14 curr_node
= curr_node
[1].setdefault(part
, [None, {}])
17 def find_prefix(self
, key
):
21 if part
not in curr_node
[1]:
23 curr_node
= curr_node
[1][part
]
24 remainder
= remainder
[1:]
25 return (curr_node
[0], remainder
)
28 class Collator(collate
._abcollator
.Collator
):
30 def __init__(self
, locale_code
, strict
=False):
33 self
.locale
= locale_code
34 dirname
= os
.path
.dirname(__file__
)
35 locale_code
= locale_code
.split(".")[0].lower()
36 short_code
= locale_code
.split("_")[0]
37 filenames
= [os
.path
.join(dirname
, locale_code
+ ".txt"),
38 os
.path
.join(dirname
, short_code
+ ".txt"),
39 os
.path
.join(dirname
, "allkeys.txt")]
40 for filename
in filenames
:
42 fileobj
= open(filename
, "rU")
43 except EnvironmentError:
49 raise collate
.errors
.InvalidLocaleError(locale_code
)
51 def __load(self
, fileobj
):
53 if line
.startswith("#") or line
.startswith("%"):
55 if line
.strip() == "":
57 line
= line
[:line
.find("#")] + "\n"
58 line
= line
[:line
.find("%")] + "\n"
61 if line
.startswith("@"):
64 semicolon
= line
.find(";")
65 charList
= line
[:semicolon
].strip().split()
72 end
= x
[begin
:].find("]")
73 collElement
= x
[begin
:begin
+end
+1]
77 chars
= collElement
[2:-1].split(".")
79 collElements
.append((alt
, chars
))
80 integer_points
= [int(ch
, 16) for ch
in charList
]
81 self
.__table
.add(integer_points
, collElements
)
83 def __implicit_weight(self
, cp
):
85 if (0x4E00 <= cp
<= 0x9FCB
86 or (cp
in [0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
87 0xFA1F, 0xFA21, 0xFA23, 0XFA24, 0XFA27,
90 elif (0x3400 <= cp
<= 0x4DB5
91 or 0x20000 <= cp
<= 0x2A6D6
92 or 0x2A700 <= cp
<= 0x2B734):
97 aaaa
= base
+ (cp
>> 15)
98 bbbb
= (cp
& 0x7FFF) |
0x8000
99 # FIXME(jfw): Reread standard to make sure the 4th element is
101 return [('.', ["%04X" % aaaa
, "0020", "0002", "0002"]),
102 ('.', ["%04X" % bbbb
, "0000", "0000", "0000"])]
104 def key(self
, string
):
106 collation_elements
= []
108 lookup_key
= [ord(ch
) for ch
in string
]
110 value
, lookup_key
= self
.__table
.find_prefix(lookup_key
)
112 value
= self
.__implicit
_weight
(lookup_key
.pop(0))
113 collation_elements
.extend(value
)
117 for level
in range(4):
119 sort_key
.append(0) # level separator
120 for element
in collation_elements
:
121 ce_l
= int(element
[1][level
], 16)
123 sort_key
.append(ce_l
)
125 return tuple(sort_key
)