X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2F_strings.py;h=f81bfd7e20be5bc65529f85e3bccab5fb0f15d60;hp=fd18bb917008dcb56aa06246dc86d1a721fe601c;hb=2a37219e2d9c0fe58e78d987a21f6e37cfd33940;hpb=53e1676b8d68cccd2b0692654d3871e44e0ba6b6 diff --git a/collate/_strings.py b/collate/_strings.py index fd18bb9..f81bfd7 100644 --- a/collate/_strings.py +++ b/collate/_strings.py @@ -1,11 +1,20 @@ import unicodedata +def strip_nonalnum(string): + while string and not (string[0].isalpha() or string[0].isnumeric()): + string = string[1:] + while string and not (string[-1].isalpha() or string[-1].isnumeric()): + string = string[:-1] + return string + def alnumsplit(string): + if not string: + return [] string = unicode(string) strings = [] - word = [] numeric = None - for char in string: + start = 0 + for i, char in enumerate(string): if numeric is None: broke = False if char.isnumeric(): @@ -14,16 +23,18 @@ def alnumsplit(string): numeric = False elif numeric and char.isalpha(): broke = True + numeric = False + elif numeric and char.isspace(): + broke = True + numeric = None elif not numeric and char.isnumeric(): broke = True + numeric = True if broke: - if word: - strings.append(u"".join(word)) - word = [] - numeric = None - word.append(char) - if word: - strings.append(u"".join(word)) + strings.append(strip_nonalnum(string[start:i])) + start = i + broke = False + strings.append(strip_nonalnum(string[start:i + 1])) return strings def wordlike(string): @@ -44,6 +55,7 @@ def wordlike(string): def numeric(orig, invalid=float('inf')): if not orig: return (invalid, '') + string = unicode(orig) for c in string: if c.isnumeric(): @@ -57,6 +69,9 @@ def numeric(orig, invalid=float('inf')): mult = -mult string = string[1:] + if not string[:1].isnumeric(): + return (invalid, orig) + # Early out if possible. try: return (float(string) * mult, orig)