From c292a38e0e46ee5a930673fb6a31e63e74b28d99 Mon Sep 17 00:00:00 2001 From: miker Date: Sun, 26 Nov 2006 18:05:29 +0000 Subject: [PATCH] handle the first byte all special-like with utf-8 git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@802 9efc2488-bf62-4759-914b-345cdb29e865 --- src/utils/utils.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/utils/utils.c b/src/utils/utils.c index 004e027..ffe738e 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -254,20 +254,45 @@ int buffer_add_char(growing_buffer* gb, char c) { char* uescape( const char* string, int size, int full_escape ) { growing_buffer* buf = buffer_init(size + 64); + int clen = 1; int idx = 0; - int c = 0; + unsigned long int c = 0x0; while (string[idx]) { - c ^= c; + c = 0x0; + + if ((unsigned char)string[idx] >= 0x80) { // not ASCII + + if ((unsigned char)string[idx] >= 0xC0 && (unsigned char)string[idx] <= 0xF4) { // starts a UTF8 string + + clen = 1; + if (((unsigned char)string[idx] & 0xF0) == 0xF0) { + clen = 4; + c = (unsigned char)string[idx] ^ 0xF0; + + } else if (((unsigned char)string[idx] & 0xE0) == 0xE0) { + clen = 3; + c = (unsigned char)string[idx] ^ 0xE0; + + } else if (((unsigned char)string[idx] & 0xC0) == 0xC0) { + clen = 2; + c = (unsigned char)string[idx] ^ 0xC0; + } + + for (;clen;--clen) { + + idx++; // look at the next byte + c = (c << 6) | ((unsigned char)string[idx] & 0x3F); // add this byte worth + + } + + buffer_fadd(buf, "\\u%04x", c); + + } else { + return NULL; + } - if (!((unsigned char)string[idx] < 0x80)) { // not ASCII - if ((unsigned char)string[idx] >= 0xc0 && (unsigned char)string[idx] <= 0xfd) { // starts a UTF8 string - do { - c = (c << 6) | ((unsigned char)string[idx] & 0x3f); // add this byte worth - } while (((unsigned char)string[idx + 1] >= 0x80 && (unsigned char)string[idx + 1] <= 0xbf) && idx++); // and continue if there's more - buffer_fadd(buf, "\\u%0.4x", c); - } else return NULL; } else { c = string[idx]; -- 2.43.2