2010 - 0.2 - Dedicated to 2 (U+FF12)
- 直感~ before 直感2~.
+ - Detect Unicode Roman numeral codepoints and sort them numerically.
2010.02.22 - 0.1 - Dedicated to 勘 (U+52D8)
- Initial release.
KEEP_IN_NUMBERS = u"'.,"
ALLOWED_IN_NUMBERS = KEEP_IN_NUMBERS + u"_"
+ROMAN = {
+ u"i": 1,
+ u"v": 5,
+ u"x": 10,
+ u"l": 50,
+ u"c": 100,
+ u"d": 500,
+ u"m": 1000,
+ u"\u2180": 1000,
+ u"\u2181": 5000,
+ u"\u2182": 10000,
+ u"\u2183": 100,
+ u"\u2184": 100,
+ u"\u2185": 6,
+ u"\u2186": 50,
+ u"\u2187": 50000,
+ u"\u2188": 100000,
+ }
+
def stripends(word):
"""Strip punctuation and symbols from the ends of a string."""
while word and unicodedata.category(word[0])[0] in "PS":
else:
return invalid
+ for char in string:
+ if u"\u2160" <= char <= u"\u2188":
+ return deroman(string)
+
mult = 1
while string[:1] == u"-" or string[:1] == u"+":
if string[:1] == u"-":
string = string.replace(u".", u"")
return string or "NaN"
+
+def deroman(string):
+ """Turn a Roman numeral into an integer."""
+ string = unicodedata.normalize('NFKD', unicode(string)).lower()
+ previous = 0
+ building = 0
+ for char in reversed(string):
+ try:
+ value = ROMAN[char]
+ except KeyError:
+ continue
+ if value < previous:
+ building -= value
+ else:
+ building += value
+ previous = value
+ return building