return string.split()
def sortemes(self, string):
- words = []
- for word in self.words(string):
- words.extend(collate._strings.alnumsplit(word))
- return filter(collate._strings.wordlike, words)
+ return collate._strings.alnumsplit(string)
- def sortemekey(self, string):
- words = map(collate._strings.numeric, self.sortemes(string))
- words = [(i, self.key(word)) for (i, word) in words]
+ def sortemekey(self, string, invalid=float('inf')):
+ words = []
+ for sorteme in self.sortemes(string):
+ num, alpha = collate._strings.numeric(sorteme, invalid)
+ words.append((num, self.key(alpha)))
return words
import unicodedata
+def strip_nonalnum(string):
+ while string and not (string[0].isalpha() or string[0].isnumeric()):
+ string = string[1:]
+ while string and not (string[-1].isalpha() or string[-1].isnumeric()):
+ string = string[:-1]
+ return string
+
def alnumsplit(string):
+ if not string:
+ return []
string = unicode(string)
strings = []
- word = []
numeric = None
- for char in string:
+ start = 0
+ for i, char in enumerate(string):
if numeric is None:
broke = False
if char.isnumeric():
numeric = False
elif numeric and char.isalpha():
broke = True
+ numeric = False
+ elif numeric and char.isspace():
+ broke = True
+ numeric = None
elif not numeric and char.isnumeric():
broke = True
+ numeric = True
if broke:
- if word:
- strings.append(u"".join(word))
- word = []
- numeric = None
- word.append(char)
- if word:
- strings.append(u"".join(word))
+ strings.append(strip_nonalnum(string[start:i]))
+ start = i
+ broke = False
+ strings.append(strip_nonalnum(string[start:i + 1]))
return strings
def wordlike(string):
def numeric(orig, invalid=float('inf')):
if not orig:
return (invalid, '')
+
string = unicode(orig)
for c in string:
if c.isnumeric():
mult = -mult
string = string[1:]
+ if not string[:1].isnumeric():
+ return (invalid, orig)
+
# Early out if possible.
try:
return (float(string) * mult, orig)