X-Git-Url: https://git.korewanetadesu.com/?p=python-collate.git;a=blobdiff_plain;f=collate%2Ficu%2F_ucol.pyx;fp=collate%2Ficu%2F_ucol.pyx;h=0000000000000000000000000000000000000000;hp=e54dd5319604a1b0cdf0fde89aadcb0a26951d73;hb=f73c4c6cd3ed326c5735ab33a6896697227d07e3;hpb=08e8f0a8bb8d7276d114087f8ff8dbce4acdb1cf diff --git a/collate/icu/_ucol.pyx b/collate/icu/_ucol.pyx deleted file mode 100644 index e54dd53..0000000 --- a/collate/icu/_ucol.pyx +++ /dev/null @@ -1,199 +0,0 @@ -############################################################################## -# -# Copyright (c) 2004 Zope Corporation and Contributors. -# All Rights Reserved. -# -# This software is subject to the provisions of the Zope Public License, -# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. -# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED -# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS -# FOR A PARTICULAR PURPOSE. -# -############################################################################## -"""Simple wrapper for ICU ucol API - -""" - -import sys - -cdef extern from "unicode/utypes.h": - - cdef enum UErrorCode: - U_USING_DEFAULT_WARNING = -127 - U_USING_FALLBACK_WARNING = -128 - U_ZERO_ERROR = 0 - U_ILLEGAL_ARGUMENT_ERROR = 1 - ctypedef int int32_t - ctypedef char uint8_t - int U_FAILURE(UErrorCode status) - -cdef extern from "unicode/utf.h": - - ctypedef int UChar - ctypedef int UChar32 - -cdef extern from "unicode/ustring.h": - - UChar *u_strFromUTF32(UChar *dest, int32_t destCapacity, - int32_t *pDestLength, - UChar32 *src, int32_t srcLength, - UErrorCode *status) - -cdef extern from "unicode/ucol.h": - - ctypedef struct UCollator: - pass - UCollator *ucol_open(char *locale, UErrorCode *status) - void ucol_close(UCollator *collator) - int32_t ucol_getSortKey(UCollator *coll, - UChar *source, int32_t sourceLength, - uint8_t *result, - int32_t resultLength - ) - int ucol_strcoll(UCollator *coll, - UChar *source, int32_t sourceLength, - UChar *target, int32_t targetLength) - -cdef extern from "Python.h": - - int PyUnicode_Check(ob) - int PyString_Check(ob) - - ctypedef int Py_UNICODE - Py_UNICODE *PyUnicode_AS_UNICODE(ob) - int PyUnicode_GET_SIZE(ob) - char *PyString_AS_STRING(ob) - - void *PyMem_Malloc(int size) - void PyMem_Free(void *p) - object PyString_FromStringAndSize(char *v, int l) - - -cdef class UCharString: - """Wrapper for ICU UChar arrays - """ - - cdef UChar *data - cdef readonly int32_t length - cdef readonly object base - cdef readonly int need_to_free - - def __cinit__(self, text): - cdef int32_t buffsize - cdef UErrorCode status - cdef Py_UNICODE *str - cdef int length - - if not PyUnicode_Check(text): - if PyString_Check(text): - text = unicode(text) - assert PyUnicode_Check(text) - else: - raise TypeError("Expected unicode string") - - length = PyUnicode_GET_SIZE(text) - str = PyUnicode_AS_UNICODE(text) - - - if sizeof(Py_UNICODE) == 2: - self.data = str - self.length = length - self.base = text - self.need_to_free = 0 - else: - buffsize = 2*length + 1 - self.data = PyMem_Malloc(buffsize*sizeof(UChar)) - if self.data == NULL: - raise MemoryError - status = U_ZERO_ERROR - u_strFromUTF32(self.data, buffsize, &(self.length), - str, length, &status) - assert self.length <= buffsize - self.need_to_free = 1 - if U_FAILURE(status): - raise ValueError( - "Couldn't convert Python unicode data to ICU unicode data." - ) - - def __dealloc__(self): - if self.need_to_free and self.data != NULL: - PyMem_Free(self.data) - self.data = NULL - - -cdef class Collator: - """Compute a collation key for a unicode string. - """ - - cdef UCollator *collator - cdef readonly object locale - cdef readonly int used_default_information - - def __cinit__(self, locale): - cdef UCollator *collator - cdef UErrorCode status - - if not PyString_Check(locale): - raise TypeError("String locale expected") - - status = U_ZERO_ERROR - collator = ucol_open(PyString_AS_STRING(locale), &status) - if U_FAILURE(status): - raise ValueError("Couldn't create a collator") - self.collator = collator - self.locale = locale - if (status == U_USING_DEFAULT_WARNING - or status == U_USING_FALLBACK_WARNING): - status = U_ILLEGAL_ARGUMENT_ERROR - self.used_default_information = status - - def __dealloc__(self): - if self.collator != NULL: - ucol_close(self.collator) - - def key(self, text): - """Compute a collation key for the given unicode text. - - Of course, the key is only valid for the given locale. - """ - cdef char *buffer - cdef int32_t bufsize - cdef int32_t size - - icutext = UCharString(text) - bufsize = (icutext).length*2+10 - - # the +1 below is needed to avoid an apprent buffer overflow bug in ICU - buffer = PyMem_Malloc(bufsize +1) - if buffer == NULL: - raise MemoryError - size = ucol_getSortKey(self.collator, - (icutext).data, - (icutext).length, - buffer, bufsize) - while size > bufsize: - bufsize = size - PyMem_Free(buffer) - buffer = PyMem_Malloc(bufsize +1) # See above +1 - if buffer == NULL: - raise MemoryError - size = ucol_getSortKey(self.collator, - (icutext).data, - (icutext).length, - buffer, bufsize) - - result = PyString_FromStringAndSize(buffer, size) - PyMem_Free(buffer) - return result - - def cmp(self, o1, o2): - u1 = UCharString(o1) - u2 = UCharString(o2) - return ucol_strcoll( - self.collator, - (u1).data, - (u1).length, - (u2).data, - (u2).length, - )