- elif prev_category[0] in "P" and category[0] in "P":
- broke = True
- mode = UNKNOWN
-
- if broke and start is not None and last is not None:
- # If we read two strings separated by weird punctuation,
- # pretend the punctuation isn't there.
- if this_mode == previous_mode == LETTER:
- words[-1] += BREAKER + string[start:last+1]
- else:
- if this_mode == NUMBER and previous_mode == LETTER:
- words[-1] += BREAKER
- words.append(string[start:last+1])
- previous_mode = this_mode
-
- if broke:
- start = i
- last = None
- if category[0] in "LN":
- last = i
- this_mode = mode
- if start is not None and last is not None:
- if this_mode == LETTER and previous_mode == LETTER and words:
- words[-1] += BREAKER + string[start:last+1]
+ elif i and categories[i-1][0] in "P" and category[0] in "P":
+ if letters:
+ words.append(u"".join(letters))
+ previous = LETTER
+ letters = []
+ if digits:
+ words.append(u"".join(digits))
+ previous = NUMBER
+ digits = []
+