-import collate._strings
+import collate.strings
class Collator(object):
def cmp(self, string1, string2):
return cmp(self.key(string1), self.key(string2))
def words(self, string):
- """Split the string into separate words.
-
- This split is done using Unicode's definition of whitespace.
- """
+ """Split the string along word boundries."""
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
return string.split()
- def sortemes(self, string):
- return collate._strings.alnumsplit(string)
-
def sortemekey(self, string, invalid=float('inf')):
- words = []
- for sorteme in self.sortemes(string):
- num, alpha = collate._strings.numeric(sorteme, invalid)
- alpha = self.key(collate._strings.strip_punc(alpha))
- words.append((num, alpha))
- return words
+ """Return a key based on sortemes of a string.
+
+ If the string is a str instance, it is decoded to a unicode
+ instance according to the 'encoding' attribute of the
+ Collator.
+ """
+ keys = []
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
+ for sorteme in collate.strings.sortemes(string):
+ num, alpha = collate.strings.numeric(sorteme, invalid)
+ if num == invalid:
+ keys.append(self.key(alpha))
+ else:
+ keys.append(num)
+ # Shove the sortkeyed original string on the end to resolve
+ # ties intelligently.
+ return (keys, self.key(string))