projects
/
python-collate.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
New approach - find split points based on Unicode categories.
[python-collate.git]
/
collate
/
_abcollator.py
diff --git
a/collate/_abcollator.py
b/collate/_abcollator.py
index
2e6ab95
..
fdd7783
100644
(file)
--- a/
collate/_abcollator.py
+++ b/
collate/_abcollator.py
@@
-5,20
+5,14
@@
class Collator(object):
"""Return negative if a < b, zero if a == b, positive if a > b."""
return cmp(self.key(string1), self.key(string2))
"""Return negative if a < b, zero if a == b, positive if a > b."""
return cmp(self.key(string1), self.key(string2))
- def words(self, string):
- """Split the string into separate words.
-
- This split is done using Unicode's definition of whitespace.
- """
- return string.split()
-
- def sortemes(self, string):
- return collate._strings.alnumsplit(string)
-
def sortemekey(self, string, invalid=float('inf')):
def sortemekey(self, string, invalid=float('inf')):
-
word
s = []
- for sorteme in
self
.sortemes(string):
+
key
s = []
+ for sorteme in
collate._strings
.sortemes(string):
num, alpha = collate._strings.numeric(sorteme, invalid)
num, alpha = collate._strings.numeric(sorteme, invalid)
- alpha = self.key(collate._strings.strip_punc(alpha))
- words.append((num, alpha))
- return words
+ if num == invalid:
+ keys.append(self.key(alpha))
+ else:
+ keys.append(num)
+ # Shove the sortkeyed original string on the end to resolve
+ # ties intelligently.
+ return (keys, self.key(string))