aboutsummaryrefslogtreecommitdiff
blob: 0728761d6c442faa68c2ff4a66415c8dc38353c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from rpython.rlib import rutf8

from pypy.interpreter.baseobjspace import W_Root
from pypy.interpreter.gateway import interp2app, unwrap_spec
from pypy.interpreter.typedef import TypeDef
from pypy.interpreter.error import OperationError, oefmt
from pypy.module._multibytecodec import c_codecs
from pypy.module._codecs.interp_codecs import CodecState


class MultibyteCodec(W_Root):
    def __init__(self, name, codec):
        self.name = name
        self.codec = codec

    @unwrap_spec(input='bytes', errors="text_or_none")
    def decode(self, space, input, errors=None):
        if errors is None:
            errors = 'strict'
        state = space.fromcache(CodecState)
        #
        try:
            utf8_output = c_codecs.decode(self.codec, input, errors,
                                     state.decode_error_handler, self.name)
        except c_codecs.EncodeDecodeError as e:
            raise wrap_unicodedecodeerror(space, e, input, self.name)
        except RuntimeError:
            raise wrap_runtimeerror(space)
        lgt, flag = rutf8.check_utf8(utf8_output, True)
        return space.newtuple([space.newutf8(utf8_output, lgt, flag),
                               space.newint(len(input))])

    @unwrap_spec(input='utf8', errors="text_or_none")
    def encode(self, space, input, errors=None):
        if errors is None:
            errors = 'strict'
        state = space.fromcache(CodecState)
        #
        u_input = input.decode('utf8')
        try:
            output = c_codecs.encode(self.codec, u_input, errors,
                                     state.encode_error_handler, self.name)
        except c_codecs.EncodeDecodeError as e:
            raise wrap_unicodeencodeerror(space, e, input, len(u_input),
                                          self.name)
        except RuntimeError:
            raise wrap_runtimeerror(space)
        return space.newtuple([space.newbytes(output),
                               space.newint(len(u_input))])


MultibyteCodec.typedef = TypeDef(
    'MultibyteCodec',
    decode = interp2app(MultibyteCodec.decode),
    encode = interp2app(MultibyteCodec.encode),
    )
MultibyteCodec.typedef.acceptable_as_base_class = False


@unwrap_spec(name='text')
def getcodec(space, name):
    try:
        codec = c_codecs.getcodec(name)
    except KeyError:
        raise oefmt(space.w_LookupError, "no such codec is supported.")
    return MultibyteCodec(name, codec)


def wrap_unicodedecodeerror(space, e, input, name):
    return OperationError(
        space.w_UnicodeDecodeError,
        space.newtuple([
            space.newtext(name),
            space.newbytes(input),
            space.newint(e.start),
            space.newint(e.end),
            space.newtext(e.reason)]))

def wrap_unicodeencodeerror(space, e, input, inputlen, name):
    _, flag = rutf8.check_utf8(input, True)
    raise OperationError(
        space.w_UnicodeEncodeError,
        space.newtuple([
            space.newtext(name),
            space.newutf8(input, inputlen, flag),
            space.newint(e.start),
            space.newint(e.end),
            space.newtext(e.reason)]))

def wrap_runtimeerror(space):
    raise oefmt(space.w_RuntimeError, "internal codec error")