strings.sortemes: Use a line break to separate letters and non-continuing-characters...
authorJoe Wreschnig <joe.wreschnig@gmail.com>
Wed, 24 Feb 2010 09:21:33 +0000 (01:21 -0800)
committerJoe Wreschnig <joe.wreschnig@gmail.com>
Wed, 24 Feb 2010 09:21:33 +0000 (01:21 -0800)
MANIFEST.in
NEWS.txt [new file with mode: 0644]
collate/strings.py

index 11aaf94..528146e 100644 (file)
@@ -3,3 +3,4 @@ include tests/*/*.list.txt
 include tests/*/*.py
 include ZPL.txt
 include pycollate
+include NEWS.txt
diff --git a/NEWS.txt b/NEWS.txt
new file mode 100644 (file)
index 0000000..8bb6e70
--- /dev/null
+++ b/NEWS.txt
@@ -0,0 +1,5 @@
+2010 - 0.2 - Dedicated to 2 (U+FF12)
+ - 直感~ before 直感2~.
+
+2010.02.22 - 0.1 - Dedicated to 勘 (U+52D8)
+ - Initial release.
index 8d6af99..60685c5 100644 (file)
@@ -14,7 +14,8 @@ CONTINUE_ON = frozenset([
 
 UNKNOWN, LETTER, NUMBER = range(3)
 
-BREAKER = u"\u2029" # Paragraph break character
+BREAKER = u"\u2028" # Line break character
+HBREAKER = u"\u2029" # Paragraph break character
 INFINITY = float('inf')
 
 KEEP_IN_NUMBERS = u"'.,"
@@ -81,6 +82,8 @@ def sortemes(string, key=lambda s: s):
         elif category[0] == "N":
             digits.append(uchar)
             if letters:
+                if unicodedata.category(letters[-1])[0] == "L":
+                    letters.append(HBREAKER)
                 aletters(u"".join(letters))
                 letters = []
                 previous = LETTER