--- /dev/null
+def alnumsplit(string):
+ string = unicode(string)
+ strings = []
+ word = []
+ numeric = None
+ for char in string:
+ if numeric is None:
+ broke = False
+ if char.isnumeric():
+ numeric = True
+ elif char.isalpha():
+ numeric = False
+ elif numeric and char.isalpha():
+ broke = True
+ elif not numeric and char.isnumeric():
+ broke = True
+ if broke:
+ if word:
+ strings.append("".join(word))
+ word = []
+ numeric = None
+ word.append(char)
+ if word:
+ strings.append("".join(word))
+ return strings
+
+def wordlike(string):
+ """Check if a string is 'word-like'.
+
+ Word-like strings contain at least one alphanumeric character.
+ """
+ return any(map(type(string).isalnum, string))
+
+def numeric(string, invalid=float('inf')):
+ string = unicode(string)
+ if not any(map(type(string).isnumeric, string)):
+ return (invalid, string)
+ if not string:
+ return (invalid, '')
+
+ mult = 1
+ while string[:1] == "-" or string[:1] == "+":
+ if string[0] == "-":
+ mult = -mult
+ string = string[1:]
+
+ # Maybe we got lucky and this is a trivial case...
+ try:
+ return float(string) * mult
+ except ValueError:
+ pass
+
+ # Otherwise we need to do this the hard way.
+ return mult * float(normalize_dots(string))
+
+def normalize_dots(string):
+ string = unicode(string.strip(",.'"))
+ string = filter(lambda u: u.isnumeric() or u in ",.'", string)
+ commas = string.count(",")
+ stops = string.count(".")
+ quotes = string.count("'")
+
+ # If anything occurs more than once, it's a separator.
+ if commas > 1:
+ string = string.replace(",", "")
+ commas = 0
+ if stops > 1:
+ string = string.replace(".", "")
+ stops = 0
+ if quotes > 1:
+ string = string.replace("'", "")
+ quotes = 0
+
+ def normalize_two(a, b):
+ # One of each - assume the first is grouping, second is point.
+ a_idx = string.rindex(a)
+ b_idx = string.rindex(b)
+ if a_idx > b_idx:
+ string = string.replace(b, "").replace(a, ".")
+ else:
+ string = string.replace(a, "").replace(b, ".")
+ return string
+
+ if commas and stops and quotes:
+ # If all three, assume the middle is the decimal point.
+ # A,AAA.BB'CC
+ # A.AAA,BB'CC
+ # A,AAA'BB.CC
+ # A.AAA'BB,CC
+ # Not really valid, so do whatever we want...
+ # A'AAA.BB,CC
+ # A'AAA,BB.CC
+ comma_idx = string.index(",")
+ stops_idx = string.index(".")
+ quotes_idx = string.index("'")
+ if (comma_idx < stops_idx < quotes_idx
+ or quotes_idx < stops_idx < comma_idx):
+ string = string.replace(",", "").replace("'", "")
+ elif (comma_idx < quotes_idx < stops_idx
+ or stops_idx < quotes_idx < comma_idx):
+ string = string.replace(",", "").replace(".", "").replace("'", ".")
+ else:
+ string = string.replace("'", "").replace(".", "").replace(",", ".")
+
+ elif stops and quotes:
+ string = normalize_two('.', "'")
+
+ elif commas and quotes:
+ string = normalize_two(',', "'")
+
+ elif commas and stops:
+ string = normalize_two(',', '.')
+
+ elif commas:
+ if string[-4:-3] == "," and len(string) <= 7:
+ # Single comma as a thousands separator.
+ string = string.replace(",", "")
+ else:
+ # Single comma, not thousands - probably a decimal point.
+ string = string.replace(",", ".")
+
+ elif quotes:
+ # Single quote, probably MM'SS", equivalent to a decimal point.
+ string = string.replace("'", ".")
+
+ return string