diff options
| author | Anand Avati <avati@redhat.com> | 2013-03-06 01:11:59 -0800 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-09-03 11:25:33 -0700 | 
| commit | 0d60175bd684cf6a14f750579d82dbd1ba97fcbc (patch) | |
| tree | 1571f530548196006526442f3fc027cb623bb6fa /contrib/qemu/qobject/json-lexer.c | |
| parent | 7dbfbfd3694e02b90e8f3ce509f5279da1523a02 (diff) | |
contrib/qemu: Import qemu block source code
This qemu block format source code and its minimal
dependency files will be used in the next patch to implement
a qemu-block format translator.
Change-Id: Ic87638972f7ea9b3df84d7a0539512a250c11c1c
BUG: 986775
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/5366
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Diffstat (limited to 'contrib/qemu/qobject/json-lexer.c')
| -rw-r--r-- | contrib/qemu/qobject/json-lexer.c | 373 | 
1 files changed, 373 insertions, 0 deletions
diff --git a/contrib/qemu/qobject/json-lexer.c b/contrib/qemu/qobject/json-lexer.c new file mode 100644 index 000000000..440df6039 --- /dev/null +++ b/contrib/qemu/qobject/json-lexer.c @@ -0,0 +1,373 @@ +/* + * JSON lexer + * + * Copyright IBM, Corp. 2009 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qint.h" +#include "qemu-common.h" +#include "qapi/qmp/json-lexer.h" + +#define MAX_TOKEN_SIZE (64ULL << 20) + +/* + * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\" + * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*' + * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+)) + * [{}\[\],:] + * [a-z]+ + * + */ + +enum json_lexer_state { +    IN_ERROR = 0, +    IN_DQ_UCODE3, +    IN_DQ_UCODE2, +    IN_DQ_UCODE1, +    IN_DQ_UCODE0, +    IN_DQ_STRING_ESCAPE, +    IN_DQ_STRING, +    IN_SQ_UCODE3, +    IN_SQ_UCODE2, +    IN_SQ_UCODE1, +    IN_SQ_UCODE0, +    IN_SQ_STRING_ESCAPE, +    IN_SQ_STRING, +    IN_ZERO, +    IN_DIGITS, +    IN_DIGIT, +    IN_EXP_E, +    IN_MANTISSA, +    IN_MANTISSA_DIGITS, +    IN_NONZERO_NUMBER, +    IN_NEG_NONZERO_NUMBER, +    IN_KEYWORD, +    IN_ESCAPE, +    IN_ESCAPE_L, +    IN_ESCAPE_LL, +    IN_ESCAPE_I, +    IN_ESCAPE_I6, +    IN_ESCAPE_I64, +    IN_WHITESPACE, +    IN_START, +}; + +#define TERMINAL(state) [0 ... 0x7F] = (state) + +/* Return whether TERMINAL is a terminal state and the transition to it +   from OLD_STATE required lookahead.  This happens whenever the table +   below uses the TERMINAL macro.  */ +#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ +            (json_lexer[(old_state)][0] == (terminal)) + +static const uint8_t json_lexer[][256] =  { +    /* double quote string */ +    [IN_DQ_UCODE3] = { +        ['0' ... '9'] = IN_DQ_STRING, +        ['a' ... 'f'] = IN_DQ_STRING, +        ['A' ... 'F'] = IN_DQ_STRING, +    }, +    [IN_DQ_UCODE2] = { +        ['0' ... '9'] = IN_DQ_UCODE3, +        ['a' ... 'f'] = IN_DQ_UCODE3, +        ['A' ... 'F'] = IN_DQ_UCODE3, +    }, +    [IN_DQ_UCODE1] = { +        ['0' ... '9'] = IN_DQ_UCODE2, +        ['a' ... 'f'] = IN_DQ_UCODE2, +        ['A' ... 'F'] = IN_DQ_UCODE2, +    }, +    [IN_DQ_UCODE0] = { +        ['0' ... '9'] = IN_DQ_UCODE1, +        ['a' ... 'f'] = IN_DQ_UCODE1, +        ['A' ... 'F'] = IN_DQ_UCODE1, +    }, +    [IN_DQ_STRING_ESCAPE] = { +        ['b'] = IN_DQ_STRING, +        ['f'] =  IN_DQ_STRING, +        ['n'] =  IN_DQ_STRING, +        ['r'] =  IN_DQ_STRING, +        ['t'] =  IN_DQ_STRING, +        ['/'] = IN_DQ_STRING, +        ['\\'] = IN_DQ_STRING, +        ['\''] = IN_DQ_STRING, +        ['\"'] = IN_DQ_STRING, +        ['u'] = IN_DQ_UCODE0, +    }, +    [IN_DQ_STRING] = { +        [1 ... 0xBF] = IN_DQ_STRING, +        [0xC2 ... 0xF4] = IN_DQ_STRING, +        ['\\'] = IN_DQ_STRING_ESCAPE, +        ['"'] = JSON_STRING, +    }, + +    /* single quote string */ +    [IN_SQ_UCODE3] = { +        ['0' ... '9'] = IN_SQ_STRING, +        ['a' ... 'f'] = IN_SQ_STRING, +        ['A' ... 'F'] = IN_SQ_STRING, +    }, +    [IN_SQ_UCODE2] = { +        ['0' ... '9'] = IN_SQ_UCODE3, +        ['a' ... 'f'] = IN_SQ_UCODE3, +        ['A' ... 'F'] = IN_SQ_UCODE3, +    }, +    [IN_SQ_UCODE1] = { +        ['0' ... '9'] = IN_SQ_UCODE2, +        ['a' ... 'f'] = IN_SQ_UCODE2, +        ['A' ... 'F'] = IN_SQ_UCODE2, +    }, +    [IN_SQ_UCODE0] = { +        ['0' ... '9'] = IN_SQ_UCODE1, +        ['a' ... 'f'] = IN_SQ_UCODE1, +        ['A' ... 'F'] = IN_SQ_UCODE1, +    }, +    [IN_SQ_STRING_ESCAPE] = { +        ['b'] = IN_SQ_STRING, +        ['f'] =  IN_SQ_STRING, +        ['n'] =  IN_SQ_STRING, +        ['r'] =  IN_SQ_STRING, +        ['t'] =  IN_SQ_STRING, +        ['/'] = IN_DQ_STRING, +        ['\\'] = IN_DQ_STRING, +        ['\''] = IN_SQ_STRING, +        ['\"'] = IN_SQ_STRING, +        ['u'] = IN_SQ_UCODE0, +    }, +    [IN_SQ_STRING] = { +        [1 ... 0xBF] = IN_SQ_STRING, +        [0xC2 ... 0xF4] = IN_SQ_STRING, +        ['\\'] = IN_SQ_STRING_ESCAPE, +        ['\''] = JSON_STRING, +    }, + +    /* Zero */ +    [IN_ZERO] = { +        TERMINAL(JSON_INTEGER), +        ['0' ... '9'] = IN_ERROR, +        ['.'] = IN_MANTISSA, +    }, + +    /* Float */ +    [IN_DIGITS] = { +        TERMINAL(JSON_FLOAT), +        ['0' ... '9'] = IN_DIGITS, +    }, + +    [IN_DIGIT] = { +        ['0' ... '9'] = IN_DIGITS, +    }, + +    [IN_EXP_E] = { +        ['-'] = IN_DIGIT, +        ['+'] = IN_DIGIT, +        ['0' ... '9'] = IN_DIGITS, +    }, + +    [IN_MANTISSA_DIGITS] = { +        TERMINAL(JSON_FLOAT), +        ['0' ... '9'] = IN_MANTISSA_DIGITS, +        ['e'] = IN_EXP_E, +        ['E'] = IN_EXP_E, +    }, + +    [IN_MANTISSA] = { +        ['0' ... '9'] = IN_MANTISSA_DIGITS, +    }, + +    /* Number */ +    [IN_NONZERO_NUMBER] = { +        TERMINAL(JSON_INTEGER), +        ['0' ... '9'] = IN_NONZERO_NUMBER, +        ['e'] = IN_EXP_E, +        ['E'] = IN_EXP_E, +        ['.'] = IN_MANTISSA, +    }, + +    [IN_NEG_NONZERO_NUMBER] = { +        ['0'] = IN_ZERO, +        ['1' ... '9'] = IN_NONZERO_NUMBER, +    }, + +    /* keywords */ +    [IN_KEYWORD] = { +        TERMINAL(JSON_KEYWORD), +        ['a' ... 'z'] = IN_KEYWORD, +    }, + +    /* whitespace */ +    [IN_WHITESPACE] = { +        TERMINAL(JSON_SKIP), +        [' '] = IN_WHITESPACE, +        ['\t'] = IN_WHITESPACE, +        ['\r'] = IN_WHITESPACE, +        ['\n'] = IN_WHITESPACE, +    },         + +    /* escape */ +    [IN_ESCAPE_LL] = { +        ['d'] = JSON_ESCAPE, +    }, + +    [IN_ESCAPE_L] = { +        ['d'] = JSON_ESCAPE, +        ['l'] = IN_ESCAPE_LL, +    }, + +    [IN_ESCAPE_I64] = { +        ['d'] = JSON_ESCAPE, +    }, + +    [IN_ESCAPE_I6] = { +        ['4'] = IN_ESCAPE_I64, +    }, + +    [IN_ESCAPE_I] = { +        ['6'] = IN_ESCAPE_I6, +    }, + +    [IN_ESCAPE] = { +        ['d'] = JSON_ESCAPE, +        ['i'] = JSON_ESCAPE, +        ['p'] = JSON_ESCAPE, +        ['s'] = JSON_ESCAPE, +        ['f'] = JSON_ESCAPE, +        ['l'] = IN_ESCAPE_L, +        ['I'] = IN_ESCAPE_I, +    }, + +    /* top level rule */ +    [IN_START] = { +        ['"'] = IN_DQ_STRING, +        ['\''] = IN_SQ_STRING, +        ['0'] = IN_ZERO, +        ['1' ... '9'] = IN_NONZERO_NUMBER, +        ['-'] = IN_NEG_NONZERO_NUMBER, +        ['{'] = JSON_OPERATOR, +        ['}'] = JSON_OPERATOR, +        ['['] = JSON_OPERATOR, +        [']'] = JSON_OPERATOR, +        [','] = JSON_OPERATOR, +        [':'] = JSON_OPERATOR, +        ['a' ... 'z'] = IN_KEYWORD, +        ['%'] = IN_ESCAPE, +        [' '] = IN_WHITESPACE, +        ['\t'] = IN_WHITESPACE, +        ['\r'] = IN_WHITESPACE, +        ['\n'] = IN_WHITESPACE, +    }, +}; + +void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) +{ +    lexer->emit = func; +    lexer->state = IN_START; +    lexer->token = qstring_new(); +    lexer->x = lexer->y = 0; +} + +static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) +{ +    int char_consumed, new_state; + +    lexer->x++; +    if (ch == '\n') { +        lexer->x = 0; +        lexer->y++; +    } + +    do { +        new_state = json_lexer[lexer->state][(uint8_t)ch]; +        char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); +        if (char_consumed) { +            qstring_append_chr(lexer->token, ch); +        } + +        switch (new_state) { +        case JSON_OPERATOR: +        case JSON_ESCAPE: +        case JSON_INTEGER: +        case JSON_FLOAT: +        case JSON_KEYWORD: +        case JSON_STRING: +            lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); +            /* fall through */ +        case JSON_SKIP: +            QDECREF(lexer->token); +            lexer->token = qstring_new(); +            new_state = IN_START; +            break; +        case IN_ERROR: +            /* XXX: To avoid having previous bad input leaving the parser in an +             * unresponsive state where we consume unpredictable amounts of +             * subsequent "good" input, percolate this error state up to the +             * tokenizer/parser by forcing a NULL object to be emitted, then +             * reset state. +             * +             * Also note that this handling is required for reliable channel +             * negotiation between QMP and the guest agent, since chr(0xFF) +             * is placed at the beginning of certain events to ensure proper +             * delivery when the channel is in an unknown state. chr(0xFF) is +             * never a valid ASCII/UTF-8 sequence, so this should reliably +             * induce an error/flush state. +             */ +            lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); +            QDECREF(lexer->token); +            lexer->token = qstring_new(); +            new_state = IN_START; +            lexer->state = new_state; +            return 0; +        default: +            break; +        } +        lexer->state = new_state; +    } while (!char_consumed && !flush); + +    /* Do not let a single token grow to an arbitrarily large size, +     * this is a security consideration. +     */ +    if (lexer->token->length > MAX_TOKEN_SIZE) { +        lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); +        QDECREF(lexer->token); +        lexer->token = qstring_new(); +        lexer->state = IN_START; +    } + +    return 0; +} + +int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) +{ +    size_t i; + +    for (i = 0; i < size; i++) { +        int err; + +        err = json_lexer_feed_char(lexer, buffer[i], false); +        if (err < 0) { +            return err; +        } +    } + +    return 0; +} + +int json_lexer_flush(JSONLexer *lexer) +{ +    return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); +} + +void json_lexer_destroy(JSONLexer *lexer) +{ +    QDECREF(lexer->token); +}  | 
