UNKNOWN, LETTER, NUMBER = range(3)
-BREAKER = u"\u2029" # Paragraph break character
+BREAKER = u"\u2028" # Line break character
+HBREAKER = u"\u2029" # Paragraph break character
INFINITY = float('inf')
KEEP_IN_NUMBERS = u"'.,"
elif category[0] == "N":
digits.append(uchar)
if letters:
+ if unicodedata.category(letters[-1])[0] == "L":
+ letters.append(HBREAKER)
aletters(u"".join(letters))
letters = []
previous = LETTER