- # Explicit loop is faster than:
- #return any(map(type(string).isalnum, string))
+ # Split if we find two pieces of punctuation in a row, even
+ # if we should otherwise continue.
+ elif i > 0 and prev_category[0] == "P" and category[0] == "P":
+ broke = True
+ mode = UNKNOWN
+
+ if broke and start is not None and last is not None:
+ # If we read two strings separated by weird punctuation,
+ # pretend the punctuation isn't there.
+ if (this_mode == previous_mode == LETTER
+ and (category[0] == "P" or prev_category[0] == "P")
+ and words):
+ words[-1] += u" " + string[start:last+1]
+ else:
+ # This ensures "foo2 bar" sorts as ["foo ", 2, "bar"]
+ # Which sorts after ["foo", "bar"].
+ if this_mode == NUMBER and previous_mode == LETTER and words:
+ words[-1] += u" "
+ words.append(string[start:last+1])
+ previous_mode = this_mode