strings.sortemes: Use a line break to separate letters and non-continuing-characters...
authorJoe Wreschnig <joe.wreschnig@gmail.com>
Wed, 24 Feb 2010 09:21:33 +0000 (01:21 -0800)
committerJoe Wreschnig <joe.wreschnig@gmail.com>
Wed, 24 Feb 2010 09:21:33 +0000 (01:21 -0800)
MANIFEST.in
NEWS.txt [new file with mode: 0644]
collate/strings.py

index 11aaf942fcbc904515c450956a0eb277cec2ad9f..528146ee9b7fac1c3ae4df8f502a196ae2fec7e1 100644 (file)
@@ -3,3 +3,4 @@ include tests/*/*.list.txt
 include tests/*/*.py
 include ZPL.txt
 include pycollate
+include NEWS.txt
diff --git a/NEWS.txt b/NEWS.txt
new file mode 100644 (file)
index 0000000..8bb6e70
--- /dev/null
+++ b/NEWS.txt
@@ -0,0 +1,5 @@
+2010 - 0.2 - Dedicated to 2 (U+FF12)
+ - 直感~ before 直感2~.
+
+2010.02.22 - 0.1 - Dedicated to 勘 (U+52D8)
+ - Initial release.
index 8d6af993cd72d5f0fcb4fc04eb6e1953f72904c9..60685c57a3415e763a63d05545340429113ec222 100644 (file)
@@ -14,7 +14,8 @@ CONTINUE_ON = frozenset([
 
 UNKNOWN, LETTER, NUMBER = range(3)
 
-BREAKER = u"\u2029" # Paragraph break character
+BREAKER = u"\u2028" # Line break character
+HBREAKER = u"\u2029" # Paragraph break character
 INFINITY = float('inf')
 
 KEEP_IN_NUMBERS = u"'.,"
@@ -81,6 +82,8 @@ def sortemes(string, key=lambda s: s):
         elif category[0] == "N":
             digits.append(uchar)
             if letters:
+                if unicodedata.category(letters[-1])[0] == "L":
+                    letters.append(HBREAKER)
                 aletters(u"".join(letters))
                 letters = []
                 previous = LETTER