dd7ea1430445548736e8f3d754ab9325210ac117
1 """Abstract base collator."""
7 class Collator(object):
8 """Abstract base class for Collators.
11 locale - the collator follows rules for this locale
12 encoding - assumed string encoding
18 def __init__(self
, locale
=None, encoding
=None):
21 def cmp(self
, string1
, string2
):
22 """Return negative if a < b, zero if a == b, positive if a > b."""
23 return cmp(self
.key(string1
), self
.key(string2
))
25 def key(self
, string
):
26 """Return a good sorting key for the string.
28 The sort key should be considered an opaque value which is
29 only meaningful when compared to other sort keys from the same
32 return self
.unicode(string
)
34 def words(self
, string
, sep
=re
.compile(r
"(\s+)", re
.UNICODE
)):
35 """Split the string into separate words."""
36 return re
.split(sep
, self
.unicode(string
))
38 def sortemekey(self
, string
):
39 """Return a key based on sortemes of a string.
41 A sorteme, by analogy with grapheme/morpheme/etc. is an atom
42 of sort information. This is larger than a word boundry but
43 smaller than a sentence boundry; roughly, a sorteme boundry
44 occurs between letters and numbers, between numbers and
45 numbers if 'too much' punctuation exists in between, between
48 string
= self
.unicode(string
)
49 # Shove the sortkeyed original string on the end to resolve
51 return (collate
.strings
.sortemes(string
, self
.key
),
54 def unicode(self
, string
):
55 """Convert a str to a unicode using the collator encoding."""
57 return unicode(string
)
59 return string
.decode(self
.encoding
, 'replace')
61 def str(self
, string
):
62 """Convert a unicode to a str using the collator encoding."""
66 return string
.encode(self
.encoding
, 'replace')
69 self
, string
, strip
=collate
.strings
.INITIAL_STOPS
, append
=u
", "):
70 """Strip words and whitespace from the start of a string.
72 If append is not empty, it and the words stripped from the
73 front are appended to the end.
75 string
= self
.unicode(string
)
77 words
= self
.words(string
)
78 while words
and (words
[0].isspace() or words
[0].lower() in strip
):
79 stripped
.append(words
.pop(0))
80 while stripped
and stripped
[-1].isspace():
82 if append
and stripped
:
85 words
.extend(stripped
)
86 return u
"".join(words
)
89 self
, string
, strip
=collate
.strings
.INITIAL_STOPS
, append
=u
", "):
90 """Return a key based on sortemes of a prefix-stripped string."""
91 string
= self
.unicode(string
)
92 stripped
= self
.lstripwords(string
, strip
, append
)
93 return (self
.sortemekey(stripped
), self
.key(string
))