strings.sortemes: Use a line break to separate letters and non-continuing-characters...
[python-collate.git] / collate / strings.py
index 8d6af99..60685c5 100644 (file)
@@ -14,7 +14,8 @@ CONTINUE_ON = frozenset([
 
 UNKNOWN, LETTER, NUMBER = range(3)
 
-BREAKER = u"\u2029" # Paragraph break character
+BREAKER = u"\u2028" # Line break character
+HBREAKER = u"\u2029" # Paragraph break character
 INFINITY = float('inf')
 
 KEEP_IN_NUMBERS = u"'.,"
@@ -81,6 +82,8 @@ def sortemes(string, key=lambda s: s):
         elif category[0] == "N":
             digits.append(uchar)
             if letters:
+                if unicodedata.category(letters[-1])[0] == "L":
+                    letters.append(HBREAKER)
                 aletters(u"".join(letters))
                 letters = []
                 previous = LETTER