6bcfd9c5393c824e12edf84257c92866b9c48555
1 def alnumsplit(string
):
2 string
= unicode(string
)
13 elif numeric
and char
.isalpha():
15 elif not numeric
and char
.isnumeric():
19 strings
.append("".join(word
))
24 strings
.append("".join(word
))
28 """Check if a string is 'word-like'.
30 Word-like strings contain at least one alphanumeric character.
32 return any(map(type(string
).isalnum
, string
))
34 def numeric(string
, invalid
=float('inf')):
35 string
= unicode(string
)
36 if not any(map(type(string
).isnumeric
, string
)):
37 return (invalid
, string
)
42 while string
[:1] == "-" or string
[:1] == "+":
47 # Maybe we got lucky and this is a trivial case...
49 return float(string
) * mult
53 # Otherwise we need to do this the hard way.
54 return mult
* float(normalize_dots(string
))
56 def normalize_dots(string
):
57 string
= unicode(string
.strip(",.'"))
58 string
= filter(lambda u
: u
.isnumeric() or u
in ",.'", string
)
59 commas
= string
.count(",")
60 stops
= string
.count(".")
61 quotes
= string
.count("'")
63 # If anything occurs more than once, it's a separator.
65 string
= string
.replace(",", "")
68 string
= string
.replace(".", "")
71 string
= string
.replace("'", "")
74 def normalize_two(a
, b
):
75 # One of each - assume the first is grouping, second is point.
76 a_idx
= string
.rindex(a
)
77 b_idx
= string
.rindex(b
)
79 string
= string
.replace(b
, "").replace(a
, ".")
81 string
= string
.replace(a
, "").replace(b
, ".")
84 if commas
and stops
and quotes
:
85 # If all three, assume the middle is the decimal point.
90 # Not really valid, so do whatever we want...
93 comma_idx
= string
.index(",")
94 stops_idx
= string
.index(".")
95 quotes_idx
= string
.index("'")
96 if (comma_idx
< stops_idx
< quotes_idx
97 or quotes_idx
< stops_idx
< comma_idx
):
98 string
= string
.replace(",", "").replace("'", "")
99 elif (comma_idx
< quotes_idx
< stops_idx
100 or stops_idx
< quotes_idx
< comma_idx
):
101 string
= string
.replace(",", "").replace(".", "").replace("'", ".")
103 string
= string
.replace("'", "").replace(".", "").replace(",", ".")
105 elif stops
and quotes
:
106 string
= normalize_two('.', "'")
108 elif commas
and quotes
:
109 string
= normalize_two(',', "'")
111 elif commas
and stops
:
112 string
= normalize_two(',', '.')
115 if string
[-4:-3] == "," and len(string
) <= 7:
116 # Single comma as a thousands separator.
117 string
= string
.replace(",", "")
119 # Single comma, not thousands - probably a decimal point.
120 string
= string
.replace(",", ".")
123 # Single quote, probably MM'SS", equivalent to a decimal point.
124 string
= string
.replace("'", ".")