Category-based splitting.
[python-collate.git] / collate / _abcollator.py
1 import collate._strings
2
3 class Collator(object):
4 def cmp(self, string1, string2):
5 """Return negative if a < b, zero if a == b, positive if a > b."""
6 return cmp(self.key(string1), self.key(string2))
7
8 def words(self, string):
9 """Split the string into separate words.
10
11 This split is done using Unicode's definition of whitespace.
12 """
13 return string.split()
14
15 def sortemes(self, string):
16 return collate._strings.alnumsplit(string)
17
18 def sortemekey(self, string, invalid=float('inf')):
19 words = []
20 for sorteme in self.sortemes(string):
21 num, alpha = collate._strings.numeric(sorteme, invalid)
22 alpha = self.key(collate._strings.strip_punc(alpha))
23 words.append((num, alpha))
24 return words