projects
/
python-collate.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
strings.sortemes: Use a line break to separate letters and non-continuing-characters...
[python-collate.git]
/
collate
/
strings.py
diff --git
a/collate/strings.py
b/collate/strings.py
index
8d6af99
..
60685c5
100644
(file)
--- a/
collate/strings.py
+++ b/
collate/strings.py
@@
-14,7
+14,8
@@
CONTINUE_ON = frozenset([
UNKNOWN, LETTER, NUMBER = range(3)
UNKNOWN, LETTER, NUMBER = range(3)
-BREAKER = u"\u2029" # Paragraph break character
+BREAKER = u"\u2028" # Line break character
+HBREAKER = u"\u2029" # Paragraph break character
INFINITY = float('inf')
KEEP_IN_NUMBERS = u"'.,"
INFINITY = float('inf')
KEEP_IN_NUMBERS = u"'.,"
@@
-81,6
+82,8
@@
def sortemes(string, key=lambda s: s):
elif category[0] == "N":
digits.append(uchar)
if letters:
elif category[0] == "N":
digits.append(uchar)
if letters:
+ if unicodedata.category(letters[-1])[0] == "L":
+ letters.append(HBREAKER)
aletters(u"".join(letters))
letters = []
previous = LETTER
aletters(u"".join(letters))
letters = []
previous = LETTER