X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Fstrings.py;h=fd6f71d852fe4c86f9260eb9a5d83b073dbe7e8d;hp=60685c57a3415e763a63d05545340429113ec222;hb=29aa14bcc723a78fb2f172488c65c4893f6797a3;hpb=ee7317abef6e6c235c6344f3f173af134e52cf8c diff --git a/collate/strings.py b/collate/strings.py index 60685c5..fd6f71d 100644 --- a/collate/strings.py +++ b/collate/strings.py @@ -21,6 +21,25 @@ INFINITY = float('inf') KEEP_IN_NUMBERS = u"'.," ALLOWED_IN_NUMBERS = KEEP_IN_NUMBERS + u"_" +ROMAN = { + u"i": 1, + u"v": 5, + u"x": 10, + u"l": 50, + u"c": 100, + u"d": 500, + u"m": 1000, + u"\u2180": 1000, + u"\u2181": 5000, + u"\u2182": 10000, + u"\u2183": 100, + u"\u2184": 100, + u"\u2185": 6, + u"\u2186": 50, + u"\u2187": 50000, + u"\u2188": 100000, + } + def stripends(word): """Strip punctuation and symbols from the ends of a string.""" while word and unicodedata.category(word[0])[0] in "PS": @@ -154,6 +173,10 @@ def numeric(orig, invalid=INFINITY): else: return invalid + for char in string: + if u"\u2160" <= char <= u"\u2188": + return deroman(string) + mult = 1 while string[:1] == u"-" or string[:1] == u"+": if string[:1] == u"-": @@ -276,3 +299,20 @@ def normalize_number(string): string = string.replace(u".", u"") return string or "NaN" + +def deroman(string): + """Turn a Roman numeral into an integer.""" + string = unicodedata.normalize('NFKD', unicode(string)).lower() + previous = 0 + building = 0 + for char in reversed(string): + try: + value = ROMAN[char] + except KeyError: + continue + if value < previous: + building -= value + else: + building += value + previous = value + return building