X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2F_abcollator.py;h=0ae5d451f59defb65cb34fb4a2ca6b2f36152212;hp=02cb733c79d889462a03e4680ece539e664cb696;hb=b0490f43bebb2aca90496fcdb5b65fa89642b409;hpb=d46d035bdc1ef7276af7c41880034226d0cdfbfc diff --git a/collate/_abcollator.py b/collate/_abcollator.py index 02cb733..0ae5d45 100644 --- a/collate/_abcollator.py +++ b/collate/_abcollator.py @@ -1,4 +1,4 @@ -import collate._strings +import collate.strings class Collator(object): def cmp(self, string1, string2): @@ -6,18 +6,27 @@ class Collator(object): return cmp(self.key(string1), self.key(string2)) def words(self, string): - """Split the string into separate words. - - This split is done using Unicode's definition of whitespace. - """ + """Split the string along word boundries.""" + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') return string.split() - def sortemes(self, string): - return collate._strings.alnumsplit(string) - def sortemekey(self, string, invalid=float('inf')): - words = [] - for sorteme in self.sortemes(string): - num, alpha = collate._strings.numeric(sorteme, invalid) - words.append((num, self.key(alpha))) - return words + """Return a key based on sortemes of a string. + + If the string is a str instance, it is decoded to a unicode + instance according to the 'encoding' attribute of the + Collator. + """ + keys = [] + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') + for sorteme in collate.strings.sortemes(string): + num, alpha = collate.strings.numeric(sorteme, invalid) + if num == invalid: + keys.append(self.key(alpha)) + else: + keys.append(num) + # Shove the sortkeyed original string on the end to resolve + # ties intelligently. + return (keys, self.key(string))