New approach - find split points based on Unicode categories.
[python-collate.git] / collate / _abcollator.py
index 094a5de..fdd7783 100644 (file)
@@ -1,5 +1,18 @@
+import collate._strings
+
 class Collator(object):
     def cmp(self, string1, string2):
+        """Return negative if a < b, zero if a == b, positive if a > b."""
         return cmp(self.key(string1), self.key(string2))
 
-        
+    def sortemekey(self, string, invalid=float('inf')):
+        keys = []
+        for sorteme in collate._strings.sortemes(string):
+            num, alpha = collate._strings.numeric(sorteme, invalid)
+            if num == invalid:
+                keys.append(self.key(alpha))
+            else:
+                keys.append(num)
+        # Shove the sortkeyed original string on the end to resolve
+        # ties intelligently.
+        return (keys, self.key(string))