622766d3b5056c27ca9b9a98ce4609961dd11c1d
[python-collate.git] / collate / _abcollator.py
1 """Abstract base collator."""
2
3 import collate.strings
4
5 class Collator(object):
6 """Abstract base class for Collators.
7
8 Attributes:
9 locale - the collator follows rules for this locale
10 encoding - assumed string encoding
11 """
12
13 locale = "C"
14 encoding = "ascii"
15
16 def __init__(self, locale=None, encoding=None):
17 pass
18
19 def cmp(self, string1, string2):
20 """Return negative if a < b, zero if a == b, positive if a > b."""
21 return cmp(self.key(string1), self.key(string2))
22
23 def key(self, string):
24 """Return a good sorting key for the string.
25
26 The sort key should be considered an opaque value which is
27 only meaningful when compared to other sort keys from the same
28 collator.
29 """
30 if isinstance(string, str):
31 string = string.decode(self.encoding, 'replace')
32 return string
33
34 def words(self, string):
35 """Split the string along word boundries."""
36 if isinstance(string, str):
37 string = string.decode(self.encoding, 'replace')
38 return string.split()
39
40 def sortemekey(self, string):
41 """Return a key based on sortemes of a string.
42
43 A sorteme, by analogy with grapheme/morpheme/etc. is an atom
44 of sort information. This is larger than a word boundry but
45 smaller than a sentence boundry; roughly, a sorteme boundry
46 occurs between letters and numbers, between numbers and
47 numbers if 'too much' punctuation exists in between, between
48 lines.
49 """
50 if isinstance(string, str):
51 string = string.decode(self.encoding, 'replace')
52
53 # Shove the sortkeyed original string on the end to resolve
54 # ties intelligently.
55 return (collate.strings.sortemes(string, self.key),
56 self.key(string))
57