import unicodedata
+def strip_nonalnum(string):
+ while string and not (string[0].isalpha() or string[0].isnumeric()):
+ string = string[1:]
+ while string and not (string[-1].isalpha() or string[-1].isnumeric()):
+ string = string[:-1]
+ return string
+
def alnumsplit(string):
+ if not string:
+ return []
string = unicode(string)
strings = []
- word = []
numeric = None
- for char in string:
+ start = 0
+ for i, char in enumerate(string):
if numeric is None:
broke = False
if char.isnumeric():
numeric = False
elif numeric and char.isalpha():
broke = True
+ numeric = False
+ elif numeric and char.isspace():
+ broke = True
+ numeric = None
elif not numeric and char.isnumeric():
broke = True
+ numeric = True
if broke:
- if word:
- strings.append(u"".join(word))
- word = []
- numeric = None
- word.append(char)
- if word:
- strings.append(u"".join(word))
+ strings.append(strip_nonalnum(string[start:i]))
+ start = i
+ broke = False
+ strings.append(strip_nonalnum(string[start:i + 1]))
return strings
def wordlike(string):
def numeric(orig, invalid=float('inf')):
if not orig:
return (invalid, '')
+
string = unicode(orig)
for c in string:
if c.isnumeric():
mult = -mult
string = string[1:]
+ if not string[:1].isnumeric():
+ return (invalid, orig)
+
# Early out if possible.
try:
return (float(string) * mult, orig)