X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2F_abcollator.py;h=9dce32bee99f5a7d9b746310fa7b3e4a90281af7;hp=2e6ab9567338e48884c6e46778b112e914d229e5;hb=f1717db91d1ab7b37937cbe5ab17965cb8b7b592;hpb=7c67e10286c784b572703666a980e85b39b858ee diff --git a/collate/_abcollator.py b/collate/_abcollator.py index 2e6ab95..9dce32b 100644 --- a/collate/_abcollator.py +++ b/collate/_abcollator.py @@ -1,4 +1,4 @@ -import collate._strings +import collate.strings class Collator(object): def cmp(self, string1, string2): @@ -6,19 +6,23 @@ class Collator(object): return cmp(self.key(string1), self.key(string2)) def words(self, string): - """Split the string into separate words. + """Split the string along word boundries.""" + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') + return string.split() + + def sortemekey(self, string, invalid=float('inf')): + """Return a key based on sortemes of a string. - This split is done using Unicode's definition of whitespace. + If the string is a str instance, it is decoded to a unicode + instance according to the 'encoding' attribute of the + Collator. """ - return string.split() + if isinstance(string, str): + string = string.decode(self.encoding, 'replace') - def sortemes(self, string): - return collate._strings.alnumsplit(string) + # Shove the sortkeyed original string on the end to resolve + # ties intelligently. + return (collate.strings.sortemes(string, self.key), + self.key(string)) - def sortemekey(self, string, invalid=float('inf')): - words = [] - for sorteme in self.sortemes(string): - num, alpha = collate._strings.numeric(sorteme, invalid) - alpha = self.key(collate._strings.strip_punc(alpha)) - words.append((num, alpha)) - return words