+"""Abstract base collator."""
+
import collate.strings
class Collator(object):
+ """Abstract base class for Collators.
+
+ Attributes:
+ locale - the collator follows rules for this locale
+ encoding - assumed string encoding
+ """
+
+ locale = "C"
encoding = "ascii"
+ def __init__(self, locale=None, encoding=None):
+ pass
+
def cmp(self, string1, string2):
"""Return negative if a < b, zero if a == b, positive if a > b."""
return cmp(self.key(string1), self.key(string2))
def key(self, string):
+ """Return a good sorting key for the string.
+
+ The sort key should be considered an opaque value which is
+ only meaningful when compared to other sort keys from the same
+ collator.
+ """
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
return string
def words(self, string):
def sortemekey(self, string):
"""Return a key based on sortemes of a string.
- If the string is a str instance, it is decoded to a unicode
- instance according to the 'encoding' attribute of the
- Collator.
+ A sorteme, by analogy with grapheme/morpheme/etc. is an atom
+ of sort information. This is larger than a word boundry but
+ smaller than a sentence boundry; roughly, a sorteme boundry
+ occurs between letters and numbers, between numbers and
+ numbers if 'too much' punctuation exists in between, between
+ lines.
"""
if isinstance(string, str):
string = string.decode(self.encoding, 'replace')