From: Joe Wreschnig Date: Wed, 24 Feb 2010 09:21:33 +0000 (-0800) Subject: strings.sortemes: Use a line break to separate letters and non-continuing-characters... X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=commitdiff_plain;h=ee7317abef6e6c235c6344f3f173af134e52cf8c strings.sortemes: Use a line break to separate letters and non-continuing-characters and a paragraph break to separate letters and numbers. (Fixes issue #2) --- diff --git a/MANIFEST.in b/MANIFEST.in index 11aaf94..528146e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include tests/*/*.list.txt include tests/*/*.py include ZPL.txt include pycollate +include NEWS.txt diff --git a/NEWS.txt b/NEWS.txt new file mode 100644 index 0000000..8bb6e70 --- /dev/null +++ b/NEWS.txt @@ -0,0 +1,5 @@ +2010 - 0.2 - Dedicated to 2 (U+FF12) + - 直感~ before 直感2~. + +2010.02.22 - 0.1 - Dedicated to 勘 (U+52D8) + - Initial release. diff --git a/collate/strings.py b/collate/strings.py index 8d6af99..60685c5 100644 --- a/collate/strings.py +++ b/collate/strings.py @@ -14,7 +14,8 @@ CONTINUE_ON = frozenset([ UNKNOWN, LETTER, NUMBER = range(3) -BREAKER = u"\u2029" # Paragraph break character +BREAKER = u"\u2028" # Line break character +HBREAKER = u"\u2029" # Paragraph break character INFINITY = float('inf') KEEP_IN_NUMBERS = u"'.," @@ -81,6 +82,8 @@ def sortemes(string, key=lambda s: s): elif category[0] == "N": digits.append(uchar) if letters: + if unicodedata.category(letters[-1])[0] == "L": + letters.append(HBREAKER) aletters(u"".join(letters)) letters = [] previous = LETTER