+ try:
+ self._breaker = _icu.WordBreaker(icu_locale)
+ except ValueError:
+ # Thai is the only language with a special break locale,
+ # so this is a harmless error.
+ self._breaker = _icu.WordBreaker("root")
+
+ def words(self, string):
+ if isinstance(string, str):
+ string = string.decode(self.encoding, 'replace')
+ return filter(lambda u: not u.isspace(), self._breaker.words(string))
+