4 import collate
._abcollator
5 import collate
._constants
7 NAME
= "Python UCA %g" % collate
._constants
.VERSION
12 self
.root
= [None, {}]
14 def add(self
, key
, value
):
17 curr_node
= curr_node
[1].setdefault(part
, [None, {}])
20 def find_prefix(self
, key
):
24 if part
not in curr_node
[1]:
26 curr_node
= curr_node
[1][part
]
27 remainder
= remainder
[1:]
28 return (curr_node
[0], remainder
)
31 class Collator(collate
._abcollator
.Collator
):
33 def __init__(self
, locale_code
, encoding
=None):
36 self
.locale
= locale_code
37 dirname
= os
.path
.dirname(__file__
)
38 locale_code
= locale_code
.split(".")[0].lower()
39 short_code
= locale_code
.split("_")[0]
40 filenames
= [os
.path
.join(dirname
, locale_code
+ ".txt"),
41 os
.path
.join(dirname
, short_code
+ ".txt"),
42 os
.path
.join(dirname
, "allkeys.txt")]
43 for filename
in filenames
:
45 fileobj
= open(filename
, "rU")
46 except EnvironmentError:
52 raise collate
.errors
.InvalidLocaleError(locale_code
)
54 def __load(self
, fileobj
):
56 if line
.startswith("#") or line
.startswith("%"):
58 if line
.strip() == "":
60 line
= line
[:line
.find("#")] + "\n"
61 line
= line
[:line
.find("%")] + "\n"
64 if line
.startswith("@"):
67 semicolon
= line
.find(";")
68 charList
= line
[:semicolon
].strip().split()
75 end
= x
[begin
:].find("]")
76 collElement
= x
[begin
:begin
+end
+1]
80 chars
= collElement
[2:-1].split(".")
82 collElements
.append((alt
, chars
))
83 integer_points
= [int(ch
, 16) for ch
in charList
]
84 self
.__table
.add(integer_points
, collElements
)
86 def __implicit_weight(self
, cp
):
88 if (0x4E00 <= cp
<= 0x9FCB
89 or (cp
in [0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14,
90 0xFA1F, 0xFA21, 0xFA23, 0XFA24, 0XFA27,
93 elif (0x3400 <= cp
<= 0x4DB5
94 or 0x20000 <= cp
<= 0x2A6D6
95 or 0x2A700 <= cp
<= 0x2B734):
100 aaaa
= base
+ (cp
>> 15)
101 bbbb
= (cp
& 0x7FFF) |
0x8000
102 # FIXME(jfw): Reread standard to make sure the 4th element is
104 return [('.', ["%04X" % aaaa
, "0020", "0002", "0002"]),
105 ('.', ["%04X" % bbbb
, "0000", "0000", "0000"])]
107 def key(self
, string
):
109 collation_elements
= []
111 lookup_key
= [ord(ch
) for ch
in string
]
113 value
, lookup_key
= self
.__table
.find_prefix(lookup_key
)
115 value
= self
.__implicit
_weight
(lookup_key
.pop(0))
116 collation_elements
.extend(value
)
120 for level
in range(4):
122 sort_key
.append(0) # level separator
123 for element
in collation_elements
:
124 ce_l
= int(element
[1][level
], 16)
126 sort_key
.append(ce_l
)
128 return tuple(sort_key
)