Paul Cercueil / libxml2

Dependents:   libiio

Committer:
pcercuei
Date:
Thu Aug 25 10:07:34 2016 +0000
Revision:
1:26f20484cbdc
Parent:
0:03b5121a232e
Add config.h and dummy.c containing empty functions

Who changed what in which revision?

UserRevisionLine numberNew contents of line
pcercuei 0:03b5121a232e 1 /*
pcercuei 0:03b5121a232e 2 * string.c : an XML string utilities module
pcercuei 0:03b5121a232e 3 *
pcercuei 0:03b5121a232e 4 * This module provides various utility functions for manipulating
pcercuei 0:03b5121a232e 5 * the xmlChar* type. All functions named xmlStr* have been moved here
pcercuei 0:03b5121a232e 6 * from the parser.c file (their original home).
pcercuei 0:03b5121a232e 7 *
pcercuei 0:03b5121a232e 8 * See Copyright for the status of this software.
pcercuei 0:03b5121a232e 9 *
pcercuei 0:03b5121a232e 10 * UTF8 string routines from:
pcercuei 0:03b5121a232e 11 * William Brack <wbrack@mmm.com.hk>
pcercuei 0:03b5121a232e 12 *
pcercuei 0:03b5121a232e 13 * daniel@veillard.com
pcercuei 0:03b5121a232e 14 */
pcercuei 0:03b5121a232e 15
pcercuei 0:03b5121a232e 16 #define IN_LIBXML
pcercuei 0:03b5121a232e 17 #include "libxml.h"
pcercuei 0:03b5121a232e 18
pcercuei 0:03b5121a232e 19 #include <stdlib.h>
pcercuei 0:03b5121a232e 20 #include <string.h>
pcercuei 0:03b5121a232e 21 #include <libxml/xmlmemory.h>
pcercuei 0:03b5121a232e 22 #include <libxml/parserInternals.h>
pcercuei 0:03b5121a232e 23 #include <libxml/xmlstring.h>
pcercuei 0:03b5121a232e 24
pcercuei 0:03b5121a232e 25 /************************************************************************
pcercuei 0:03b5121a232e 26 * *
pcercuei 0:03b5121a232e 27 * Commodity functions to handle xmlChars *
pcercuei 0:03b5121a232e 28 * *
pcercuei 0:03b5121a232e 29 ************************************************************************/
pcercuei 0:03b5121a232e 30
pcercuei 0:03b5121a232e 31 /**
pcercuei 0:03b5121a232e 32 * xmlStrndup:
pcercuei 0:03b5121a232e 33 * @cur: the input xmlChar *
pcercuei 0:03b5121a232e 34 * @len: the len of @cur
pcercuei 0:03b5121a232e 35 *
pcercuei 0:03b5121a232e 36 * a strndup for array of xmlChar's
pcercuei 0:03b5121a232e 37 *
pcercuei 0:03b5121a232e 38 * Returns a new xmlChar * or NULL
pcercuei 0:03b5121a232e 39 */
pcercuei 0:03b5121a232e 40 xmlChar *
pcercuei 0:03b5121a232e 41 xmlStrndup(const xmlChar *cur, int len) {
pcercuei 0:03b5121a232e 42 xmlChar *ret;
pcercuei 0:03b5121a232e 43
pcercuei 0:03b5121a232e 44 if ((cur == NULL) || (len < 0)) return(NULL);
pcercuei 0:03b5121a232e 45 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
pcercuei 0:03b5121a232e 46 if (ret == NULL) {
pcercuei 0:03b5121a232e 47 xmlErrMemory(NULL, NULL);
pcercuei 0:03b5121a232e 48 return(NULL);
pcercuei 0:03b5121a232e 49 }
pcercuei 0:03b5121a232e 50 memcpy(ret, cur, len * sizeof(xmlChar));
pcercuei 0:03b5121a232e 51 ret[len] = 0;
pcercuei 0:03b5121a232e 52 return(ret);
pcercuei 0:03b5121a232e 53 }
pcercuei 0:03b5121a232e 54
pcercuei 0:03b5121a232e 55 /**
pcercuei 0:03b5121a232e 56 * xmlStrdup:
pcercuei 0:03b5121a232e 57 * @cur: the input xmlChar *
pcercuei 0:03b5121a232e 58 *
pcercuei 0:03b5121a232e 59 * a strdup for array of xmlChar's. Since they are supposed to be
pcercuei 0:03b5121a232e 60 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
pcercuei 0:03b5121a232e 61 * a termination mark of '0'.
pcercuei 0:03b5121a232e 62 *
pcercuei 0:03b5121a232e 63 * Returns a new xmlChar * or NULL
pcercuei 0:03b5121a232e 64 */
pcercuei 0:03b5121a232e 65 xmlChar *
pcercuei 0:03b5121a232e 66 xmlStrdup(const xmlChar *cur) {
pcercuei 0:03b5121a232e 67 const xmlChar *p = cur;
pcercuei 0:03b5121a232e 68
pcercuei 0:03b5121a232e 69 if (cur == NULL) return(NULL);
pcercuei 0:03b5121a232e 70 while (*p != 0) p++; /* non input consuming */
pcercuei 0:03b5121a232e 71 return(xmlStrndup(cur, p - cur));
pcercuei 0:03b5121a232e 72 }
pcercuei 0:03b5121a232e 73
pcercuei 0:03b5121a232e 74 /**
pcercuei 0:03b5121a232e 75 * xmlCharStrndup:
pcercuei 0:03b5121a232e 76 * @cur: the input char *
pcercuei 0:03b5121a232e 77 * @len: the len of @cur
pcercuei 0:03b5121a232e 78 *
pcercuei 0:03b5121a232e 79 * a strndup for char's to xmlChar's
pcercuei 0:03b5121a232e 80 *
pcercuei 0:03b5121a232e 81 * Returns a new xmlChar * or NULL
pcercuei 0:03b5121a232e 82 */
pcercuei 0:03b5121a232e 83
pcercuei 0:03b5121a232e 84 xmlChar *
pcercuei 0:03b5121a232e 85 xmlCharStrndup(const char *cur, int len) {
pcercuei 0:03b5121a232e 86 int i;
pcercuei 0:03b5121a232e 87 xmlChar *ret;
pcercuei 0:03b5121a232e 88
pcercuei 0:03b5121a232e 89 if ((cur == NULL) || (len < 0)) return(NULL);
pcercuei 0:03b5121a232e 90 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
pcercuei 0:03b5121a232e 91 if (ret == NULL) {
pcercuei 0:03b5121a232e 92 xmlErrMemory(NULL, NULL);
pcercuei 0:03b5121a232e 93 return(NULL);
pcercuei 0:03b5121a232e 94 }
pcercuei 0:03b5121a232e 95 for (i = 0;i < len;i++) {
pcercuei 0:03b5121a232e 96 ret[i] = (xmlChar) cur[i];
pcercuei 0:03b5121a232e 97 if (ret[i] == 0) return(ret);
pcercuei 0:03b5121a232e 98 }
pcercuei 0:03b5121a232e 99 ret[len] = 0;
pcercuei 0:03b5121a232e 100 return(ret);
pcercuei 0:03b5121a232e 101 }
pcercuei 0:03b5121a232e 102
pcercuei 0:03b5121a232e 103 /**
pcercuei 0:03b5121a232e 104 * xmlCharStrdup:
pcercuei 0:03b5121a232e 105 * @cur: the input char *
pcercuei 0:03b5121a232e 106 *
pcercuei 0:03b5121a232e 107 * a strdup for char's to xmlChar's
pcercuei 0:03b5121a232e 108 *
pcercuei 0:03b5121a232e 109 * Returns a new xmlChar * or NULL
pcercuei 0:03b5121a232e 110 */
pcercuei 0:03b5121a232e 111
pcercuei 0:03b5121a232e 112 xmlChar *
pcercuei 0:03b5121a232e 113 xmlCharStrdup(const char *cur) {
pcercuei 0:03b5121a232e 114 const char *p = cur;
pcercuei 0:03b5121a232e 115
pcercuei 0:03b5121a232e 116 if (cur == NULL) return(NULL);
pcercuei 0:03b5121a232e 117 while (*p != '\0') p++; /* non input consuming */
pcercuei 0:03b5121a232e 118 return(xmlCharStrndup(cur, p - cur));
pcercuei 0:03b5121a232e 119 }
pcercuei 0:03b5121a232e 120
pcercuei 0:03b5121a232e 121 /**
pcercuei 0:03b5121a232e 122 * xmlStrcmp:
pcercuei 0:03b5121a232e 123 * @str1: the first xmlChar *
pcercuei 0:03b5121a232e 124 * @str2: the second xmlChar *
pcercuei 0:03b5121a232e 125 *
pcercuei 0:03b5121a232e 126 * a strcmp for xmlChar's
pcercuei 0:03b5121a232e 127 *
pcercuei 0:03b5121a232e 128 * Returns the integer result of the comparison
pcercuei 0:03b5121a232e 129 */
pcercuei 0:03b5121a232e 130
pcercuei 0:03b5121a232e 131 int
pcercuei 0:03b5121a232e 132 xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
pcercuei 0:03b5121a232e 133 register int tmp;
pcercuei 0:03b5121a232e 134
pcercuei 0:03b5121a232e 135 if (str1 == str2) return(0);
pcercuei 0:03b5121a232e 136 if (str1 == NULL) return(-1);
pcercuei 0:03b5121a232e 137 if (str2 == NULL) return(1);
pcercuei 0:03b5121a232e 138 do {
pcercuei 0:03b5121a232e 139 tmp = *str1++ - *str2;
pcercuei 0:03b5121a232e 140 if (tmp != 0) return(tmp);
pcercuei 0:03b5121a232e 141 } while (*str2++ != 0);
pcercuei 0:03b5121a232e 142 return 0;
pcercuei 0:03b5121a232e 143 }
pcercuei 0:03b5121a232e 144
pcercuei 0:03b5121a232e 145 /**
pcercuei 0:03b5121a232e 146 * xmlStrEqual:
pcercuei 0:03b5121a232e 147 * @str1: the first xmlChar *
pcercuei 0:03b5121a232e 148 * @str2: the second xmlChar *
pcercuei 0:03b5121a232e 149 *
pcercuei 0:03b5121a232e 150 * Check if both strings are equal of have same content.
pcercuei 0:03b5121a232e 151 * Should be a bit more readable and faster than xmlStrcmp()
pcercuei 0:03b5121a232e 152 *
pcercuei 0:03b5121a232e 153 * Returns 1 if they are equal, 0 if they are different
pcercuei 0:03b5121a232e 154 */
pcercuei 0:03b5121a232e 155
pcercuei 0:03b5121a232e 156 int
pcercuei 0:03b5121a232e 157 xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
pcercuei 0:03b5121a232e 158 if (str1 == str2) return(1);
pcercuei 0:03b5121a232e 159 if (str1 == NULL) return(0);
pcercuei 0:03b5121a232e 160 if (str2 == NULL) return(0);
pcercuei 0:03b5121a232e 161 do {
pcercuei 0:03b5121a232e 162 if (*str1++ != *str2) return(0);
pcercuei 0:03b5121a232e 163 } while (*str2++);
pcercuei 0:03b5121a232e 164 return(1);
pcercuei 0:03b5121a232e 165 }
pcercuei 0:03b5121a232e 166
pcercuei 0:03b5121a232e 167 /**
pcercuei 0:03b5121a232e 168 * xmlStrQEqual:
pcercuei 0:03b5121a232e 169 * @pref: the prefix of the QName
pcercuei 0:03b5121a232e 170 * @name: the localname of the QName
pcercuei 0:03b5121a232e 171 * @str: the second xmlChar *
pcercuei 0:03b5121a232e 172 *
pcercuei 0:03b5121a232e 173 * Check if a QName is Equal to a given string
pcercuei 0:03b5121a232e 174 *
pcercuei 0:03b5121a232e 175 * Returns 1 if they are equal, 0 if they are different
pcercuei 0:03b5121a232e 176 */
pcercuei 0:03b5121a232e 177
pcercuei 0:03b5121a232e 178 int
pcercuei 0:03b5121a232e 179 xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
pcercuei 0:03b5121a232e 180 if (pref == NULL) return(xmlStrEqual(name, str));
pcercuei 0:03b5121a232e 181 if (name == NULL) return(0);
pcercuei 0:03b5121a232e 182 if (str == NULL) return(0);
pcercuei 0:03b5121a232e 183
pcercuei 0:03b5121a232e 184 do {
pcercuei 0:03b5121a232e 185 if (*pref++ != *str) return(0);
pcercuei 0:03b5121a232e 186 } while ((*str++) && (*pref));
pcercuei 0:03b5121a232e 187 if (*str++ != ':') return(0);
pcercuei 0:03b5121a232e 188 do {
pcercuei 0:03b5121a232e 189 if (*name++ != *str) return(0);
pcercuei 0:03b5121a232e 190 } while (*str++);
pcercuei 0:03b5121a232e 191 return(1);
pcercuei 0:03b5121a232e 192 }
pcercuei 0:03b5121a232e 193
pcercuei 0:03b5121a232e 194 /**
pcercuei 0:03b5121a232e 195 * xmlStrncmp:
pcercuei 0:03b5121a232e 196 * @str1: the first xmlChar *
pcercuei 0:03b5121a232e 197 * @str2: the second xmlChar *
pcercuei 0:03b5121a232e 198 * @len: the max comparison length
pcercuei 0:03b5121a232e 199 *
pcercuei 0:03b5121a232e 200 * a strncmp for xmlChar's
pcercuei 0:03b5121a232e 201 *
pcercuei 0:03b5121a232e 202 * Returns the integer result of the comparison
pcercuei 0:03b5121a232e 203 */
pcercuei 0:03b5121a232e 204
pcercuei 0:03b5121a232e 205 int
pcercuei 0:03b5121a232e 206 xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
pcercuei 0:03b5121a232e 207 register int tmp;
pcercuei 0:03b5121a232e 208
pcercuei 0:03b5121a232e 209 if (len <= 0) return(0);
pcercuei 0:03b5121a232e 210 if (str1 == str2) return(0);
pcercuei 0:03b5121a232e 211 if (str1 == NULL) return(-1);
pcercuei 0:03b5121a232e 212 if (str2 == NULL) return(1);
pcercuei 0:03b5121a232e 213 #ifdef __GNUC__
pcercuei 0:03b5121a232e 214 tmp = strncmp((const char *)str1, (const char *)str2, len);
pcercuei 0:03b5121a232e 215 return tmp;
pcercuei 0:03b5121a232e 216 #else
pcercuei 0:03b5121a232e 217 do {
pcercuei 0:03b5121a232e 218 tmp = *str1++ - *str2;
pcercuei 0:03b5121a232e 219 if (tmp != 0 || --len == 0) return(tmp);
pcercuei 0:03b5121a232e 220 } while (*str2++ != 0);
pcercuei 0:03b5121a232e 221 return 0;
pcercuei 0:03b5121a232e 222 #endif
pcercuei 0:03b5121a232e 223 }
pcercuei 0:03b5121a232e 224
pcercuei 0:03b5121a232e 225 static const xmlChar casemap[256] = {
pcercuei 0:03b5121a232e 226 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
pcercuei 0:03b5121a232e 227 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
pcercuei 0:03b5121a232e 228 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
pcercuei 0:03b5121a232e 229 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
pcercuei 0:03b5121a232e 230 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
pcercuei 0:03b5121a232e 231 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
pcercuei 0:03b5121a232e 232 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
pcercuei 0:03b5121a232e 233 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
pcercuei 0:03b5121a232e 234 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
pcercuei 0:03b5121a232e 235 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
pcercuei 0:03b5121a232e 236 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
pcercuei 0:03b5121a232e 237 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
pcercuei 0:03b5121a232e 238 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
pcercuei 0:03b5121a232e 239 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
pcercuei 0:03b5121a232e 240 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
pcercuei 0:03b5121a232e 241 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
pcercuei 0:03b5121a232e 242 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
pcercuei 0:03b5121a232e 243 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
pcercuei 0:03b5121a232e 244 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
pcercuei 0:03b5121a232e 245 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
pcercuei 0:03b5121a232e 246 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
pcercuei 0:03b5121a232e 247 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
pcercuei 0:03b5121a232e 248 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
pcercuei 0:03b5121a232e 249 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
pcercuei 0:03b5121a232e 250 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
pcercuei 0:03b5121a232e 251 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
pcercuei 0:03b5121a232e 252 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
pcercuei 0:03b5121a232e 253 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
pcercuei 0:03b5121a232e 254 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
pcercuei 0:03b5121a232e 255 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
pcercuei 0:03b5121a232e 256 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
pcercuei 0:03b5121a232e 257 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
pcercuei 0:03b5121a232e 258 };
pcercuei 0:03b5121a232e 259
pcercuei 0:03b5121a232e 260 /**
pcercuei 0:03b5121a232e 261 * xmlStrcasecmp:
pcercuei 0:03b5121a232e 262 * @str1: the first xmlChar *
pcercuei 0:03b5121a232e 263 * @str2: the second xmlChar *
pcercuei 0:03b5121a232e 264 *
pcercuei 0:03b5121a232e 265 * a strcasecmp for xmlChar's
pcercuei 0:03b5121a232e 266 *
pcercuei 0:03b5121a232e 267 * Returns the integer result of the comparison
pcercuei 0:03b5121a232e 268 */
pcercuei 0:03b5121a232e 269
pcercuei 0:03b5121a232e 270 int
pcercuei 0:03b5121a232e 271 xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
pcercuei 0:03b5121a232e 272 register int tmp;
pcercuei 0:03b5121a232e 273
pcercuei 0:03b5121a232e 274 if (str1 == str2) return(0);
pcercuei 0:03b5121a232e 275 if (str1 == NULL) return(-1);
pcercuei 0:03b5121a232e 276 if (str2 == NULL) return(1);
pcercuei 0:03b5121a232e 277 do {
pcercuei 0:03b5121a232e 278 tmp = casemap[*str1++] - casemap[*str2];
pcercuei 0:03b5121a232e 279 if (tmp != 0) return(tmp);
pcercuei 0:03b5121a232e 280 } while (*str2++ != 0);
pcercuei 0:03b5121a232e 281 return 0;
pcercuei 0:03b5121a232e 282 }
pcercuei 0:03b5121a232e 283
pcercuei 0:03b5121a232e 284 /**
pcercuei 0:03b5121a232e 285 * xmlStrncasecmp:
pcercuei 0:03b5121a232e 286 * @str1: the first xmlChar *
pcercuei 0:03b5121a232e 287 * @str2: the second xmlChar *
pcercuei 0:03b5121a232e 288 * @len: the max comparison length
pcercuei 0:03b5121a232e 289 *
pcercuei 0:03b5121a232e 290 * a strncasecmp for xmlChar's
pcercuei 0:03b5121a232e 291 *
pcercuei 0:03b5121a232e 292 * Returns the integer result of the comparison
pcercuei 0:03b5121a232e 293 */
pcercuei 0:03b5121a232e 294
pcercuei 0:03b5121a232e 295 int
pcercuei 0:03b5121a232e 296 xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
pcercuei 0:03b5121a232e 297 register int tmp;
pcercuei 0:03b5121a232e 298
pcercuei 0:03b5121a232e 299 if (len <= 0) return(0);
pcercuei 0:03b5121a232e 300 if (str1 == str2) return(0);
pcercuei 0:03b5121a232e 301 if (str1 == NULL) return(-1);
pcercuei 0:03b5121a232e 302 if (str2 == NULL) return(1);
pcercuei 0:03b5121a232e 303 do {
pcercuei 0:03b5121a232e 304 tmp = casemap[*str1++] - casemap[*str2];
pcercuei 0:03b5121a232e 305 if (tmp != 0 || --len == 0) return(tmp);
pcercuei 0:03b5121a232e 306 } while (*str2++ != 0);
pcercuei 0:03b5121a232e 307 return 0;
pcercuei 0:03b5121a232e 308 }
pcercuei 0:03b5121a232e 309
pcercuei 0:03b5121a232e 310 /**
pcercuei 0:03b5121a232e 311 * xmlStrchr:
pcercuei 0:03b5121a232e 312 * @str: the xmlChar * array
pcercuei 0:03b5121a232e 313 * @val: the xmlChar to search
pcercuei 0:03b5121a232e 314 *
pcercuei 0:03b5121a232e 315 * a strchr for xmlChar's
pcercuei 0:03b5121a232e 316 *
pcercuei 0:03b5121a232e 317 * Returns the xmlChar * for the first occurrence or NULL.
pcercuei 0:03b5121a232e 318 */
pcercuei 0:03b5121a232e 319
pcercuei 0:03b5121a232e 320 const xmlChar *
pcercuei 0:03b5121a232e 321 xmlStrchr(const xmlChar *str, xmlChar val) {
pcercuei 0:03b5121a232e 322 if (str == NULL) return(NULL);
pcercuei 0:03b5121a232e 323 while (*str != 0) { /* non input consuming */
pcercuei 0:03b5121a232e 324 if (*str == val) return((xmlChar *) str);
pcercuei 0:03b5121a232e 325 str++;
pcercuei 0:03b5121a232e 326 }
pcercuei 0:03b5121a232e 327 return(NULL);
pcercuei 0:03b5121a232e 328 }
pcercuei 0:03b5121a232e 329
pcercuei 0:03b5121a232e 330 /**
pcercuei 0:03b5121a232e 331 * xmlStrstr:
pcercuei 0:03b5121a232e 332 * @str: the xmlChar * array (haystack)
pcercuei 0:03b5121a232e 333 * @val: the xmlChar to search (needle)
pcercuei 0:03b5121a232e 334 *
pcercuei 0:03b5121a232e 335 * a strstr for xmlChar's
pcercuei 0:03b5121a232e 336 *
pcercuei 0:03b5121a232e 337 * Returns the xmlChar * for the first occurrence or NULL.
pcercuei 0:03b5121a232e 338 */
pcercuei 0:03b5121a232e 339
pcercuei 0:03b5121a232e 340 const xmlChar *
pcercuei 0:03b5121a232e 341 xmlStrstr(const xmlChar *str, const xmlChar *val) {
pcercuei 0:03b5121a232e 342 int n;
pcercuei 0:03b5121a232e 343
pcercuei 0:03b5121a232e 344 if (str == NULL) return(NULL);
pcercuei 0:03b5121a232e 345 if (val == NULL) return(NULL);
pcercuei 0:03b5121a232e 346 n = xmlStrlen(val);
pcercuei 0:03b5121a232e 347
pcercuei 0:03b5121a232e 348 if (n == 0) return(str);
pcercuei 0:03b5121a232e 349 while (*str != 0) { /* non input consuming */
pcercuei 0:03b5121a232e 350 if (*str == *val) {
pcercuei 0:03b5121a232e 351 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
pcercuei 0:03b5121a232e 352 }
pcercuei 0:03b5121a232e 353 str++;
pcercuei 0:03b5121a232e 354 }
pcercuei 0:03b5121a232e 355 return(NULL);
pcercuei 0:03b5121a232e 356 }
pcercuei 0:03b5121a232e 357
pcercuei 0:03b5121a232e 358 /**
pcercuei 0:03b5121a232e 359 * xmlStrcasestr:
pcercuei 0:03b5121a232e 360 * @str: the xmlChar * array (haystack)
pcercuei 0:03b5121a232e 361 * @val: the xmlChar to search (needle)
pcercuei 0:03b5121a232e 362 *
pcercuei 0:03b5121a232e 363 * a case-ignoring strstr for xmlChar's
pcercuei 0:03b5121a232e 364 *
pcercuei 0:03b5121a232e 365 * Returns the xmlChar * for the first occurrence or NULL.
pcercuei 0:03b5121a232e 366 */
pcercuei 0:03b5121a232e 367
pcercuei 0:03b5121a232e 368 const xmlChar *
pcercuei 0:03b5121a232e 369 xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
pcercuei 0:03b5121a232e 370 int n;
pcercuei 0:03b5121a232e 371
pcercuei 0:03b5121a232e 372 if (str == NULL) return(NULL);
pcercuei 0:03b5121a232e 373 if (val == NULL) return(NULL);
pcercuei 0:03b5121a232e 374 n = xmlStrlen(val);
pcercuei 0:03b5121a232e 375
pcercuei 0:03b5121a232e 376 if (n == 0) return(str);
pcercuei 0:03b5121a232e 377 while (*str != 0) { /* non input consuming */
pcercuei 0:03b5121a232e 378 if (casemap[*str] == casemap[*val])
pcercuei 0:03b5121a232e 379 if (!xmlStrncasecmp(str, val, n)) return(str);
pcercuei 0:03b5121a232e 380 str++;
pcercuei 0:03b5121a232e 381 }
pcercuei 0:03b5121a232e 382 return(NULL);
pcercuei 0:03b5121a232e 383 }
pcercuei 0:03b5121a232e 384
pcercuei 0:03b5121a232e 385 /**
pcercuei 0:03b5121a232e 386 * xmlStrsub:
pcercuei 0:03b5121a232e 387 * @str: the xmlChar * array (haystack)
pcercuei 0:03b5121a232e 388 * @start: the index of the first char (zero based)
pcercuei 0:03b5121a232e 389 * @len: the length of the substring
pcercuei 0:03b5121a232e 390 *
pcercuei 0:03b5121a232e 391 * Extract a substring of a given string
pcercuei 0:03b5121a232e 392 *
pcercuei 0:03b5121a232e 393 * Returns the xmlChar * for the first occurrence or NULL.
pcercuei 0:03b5121a232e 394 */
pcercuei 0:03b5121a232e 395
pcercuei 0:03b5121a232e 396 xmlChar *
pcercuei 0:03b5121a232e 397 xmlStrsub(const xmlChar *str, int start, int len) {
pcercuei 0:03b5121a232e 398 int i;
pcercuei 0:03b5121a232e 399
pcercuei 0:03b5121a232e 400 if (str == NULL) return(NULL);
pcercuei 0:03b5121a232e 401 if (start < 0) return(NULL);
pcercuei 0:03b5121a232e 402 if (len < 0) return(NULL);
pcercuei 0:03b5121a232e 403
pcercuei 0:03b5121a232e 404 for (i = 0;i < start;i++) {
pcercuei 0:03b5121a232e 405 if (*str == 0) return(NULL);
pcercuei 0:03b5121a232e 406 str++;
pcercuei 0:03b5121a232e 407 }
pcercuei 0:03b5121a232e 408 if (*str == 0) return(NULL);
pcercuei 0:03b5121a232e 409 return(xmlStrndup(str, len));
pcercuei 0:03b5121a232e 410 }
pcercuei 0:03b5121a232e 411
pcercuei 0:03b5121a232e 412 /**
pcercuei 0:03b5121a232e 413 * xmlStrlen:
pcercuei 0:03b5121a232e 414 * @str: the xmlChar * array
pcercuei 0:03b5121a232e 415 *
pcercuei 0:03b5121a232e 416 * length of a xmlChar's string
pcercuei 0:03b5121a232e 417 *
pcercuei 0:03b5121a232e 418 * Returns the number of xmlChar contained in the ARRAY.
pcercuei 0:03b5121a232e 419 */
pcercuei 0:03b5121a232e 420
pcercuei 0:03b5121a232e 421 int
pcercuei 0:03b5121a232e 422 xmlStrlen(const xmlChar *str) {
pcercuei 0:03b5121a232e 423 int len = 0;
pcercuei 0:03b5121a232e 424
pcercuei 0:03b5121a232e 425 if (str == NULL) return(0);
pcercuei 0:03b5121a232e 426 while (*str != 0) { /* non input consuming */
pcercuei 0:03b5121a232e 427 str++;
pcercuei 0:03b5121a232e 428 len++;
pcercuei 0:03b5121a232e 429 }
pcercuei 0:03b5121a232e 430 return(len);
pcercuei 0:03b5121a232e 431 }
pcercuei 0:03b5121a232e 432
pcercuei 0:03b5121a232e 433 /**
pcercuei 0:03b5121a232e 434 * xmlStrncat:
pcercuei 0:03b5121a232e 435 * @cur: the original xmlChar * array
pcercuei 0:03b5121a232e 436 * @add: the xmlChar * array added
pcercuei 0:03b5121a232e 437 * @len: the length of @add
pcercuei 0:03b5121a232e 438 *
pcercuei 0:03b5121a232e 439 * a strncat for array of xmlChar's, it will extend @cur with the len
pcercuei 0:03b5121a232e 440 * first bytes of @add. Note that if @len < 0 then this is an API error
pcercuei 0:03b5121a232e 441 * and NULL will be returned.
pcercuei 0:03b5121a232e 442 *
pcercuei 0:03b5121a232e 443 * Returns a new xmlChar *, the original @cur is reallocated if needed
pcercuei 0:03b5121a232e 444 * and should not be freed
pcercuei 0:03b5121a232e 445 */
pcercuei 0:03b5121a232e 446
pcercuei 0:03b5121a232e 447 xmlChar *
pcercuei 0:03b5121a232e 448 xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
pcercuei 0:03b5121a232e 449 int size;
pcercuei 0:03b5121a232e 450 xmlChar *ret;
pcercuei 0:03b5121a232e 451
pcercuei 0:03b5121a232e 452 if ((add == NULL) || (len == 0))
pcercuei 0:03b5121a232e 453 return(cur);
pcercuei 0:03b5121a232e 454 if (len < 0)
pcercuei 0:03b5121a232e 455 return(NULL);
pcercuei 0:03b5121a232e 456 if (cur == NULL)
pcercuei 0:03b5121a232e 457 return(xmlStrndup(add, len));
pcercuei 0:03b5121a232e 458
pcercuei 0:03b5121a232e 459 size = xmlStrlen(cur);
pcercuei 0:03b5121a232e 460 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
pcercuei 0:03b5121a232e 461 if (ret == NULL) {
pcercuei 0:03b5121a232e 462 xmlErrMemory(NULL, NULL);
pcercuei 0:03b5121a232e 463 return(cur);
pcercuei 0:03b5121a232e 464 }
pcercuei 0:03b5121a232e 465 memcpy(&ret[size], add, len * sizeof(xmlChar));
pcercuei 0:03b5121a232e 466 ret[size + len] = 0;
pcercuei 0:03b5121a232e 467 return(ret);
pcercuei 0:03b5121a232e 468 }
pcercuei 0:03b5121a232e 469
pcercuei 0:03b5121a232e 470 /**
pcercuei 0:03b5121a232e 471 * xmlStrncatNew:
pcercuei 0:03b5121a232e 472 * @str1: first xmlChar string
pcercuei 0:03b5121a232e 473 * @str2: second xmlChar string
pcercuei 0:03b5121a232e 474 * @len: the len of @str2 or < 0
pcercuei 0:03b5121a232e 475 *
pcercuei 0:03b5121a232e 476 * same as xmlStrncat, but creates a new string. The original
pcercuei 0:03b5121a232e 477 * two strings are not freed. If @len is < 0 then the length
pcercuei 0:03b5121a232e 478 * will be calculated automatically.
pcercuei 0:03b5121a232e 479 *
pcercuei 0:03b5121a232e 480 * Returns a new xmlChar * or NULL
pcercuei 0:03b5121a232e 481 */
pcercuei 0:03b5121a232e 482 xmlChar *
pcercuei 0:03b5121a232e 483 xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
pcercuei 0:03b5121a232e 484 int size;
pcercuei 0:03b5121a232e 485 xmlChar *ret;
pcercuei 0:03b5121a232e 486
pcercuei 0:03b5121a232e 487 if (len < 0)
pcercuei 0:03b5121a232e 488 len = xmlStrlen(str2);
pcercuei 0:03b5121a232e 489 if ((str2 == NULL) || (len == 0))
pcercuei 0:03b5121a232e 490 return(xmlStrdup(str1));
pcercuei 0:03b5121a232e 491 if (str1 == NULL)
pcercuei 0:03b5121a232e 492 return(xmlStrndup(str2, len));
pcercuei 0:03b5121a232e 493
pcercuei 0:03b5121a232e 494 size = xmlStrlen(str1);
pcercuei 0:03b5121a232e 495 ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
pcercuei 0:03b5121a232e 496 if (ret == NULL) {
pcercuei 0:03b5121a232e 497 xmlErrMemory(NULL, NULL);
pcercuei 0:03b5121a232e 498 return(xmlStrndup(str1, size));
pcercuei 0:03b5121a232e 499 }
pcercuei 0:03b5121a232e 500 memcpy(ret, str1, size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 501 memcpy(&ret[size], str2, len * sizeof(xmlChar));
pcercuei 0:03b5121a232e 502 ret[size + len] = 0;
pcercuei 0:03b5121a232e 503 return(ret);
pcercuei 0:03b5121a232e 504 }
pcercuei 0:03b5121a232e 505
pcercuei 0:03b5121a232e 506 /**
pcercuei 0:03b5121a232e 507 * xmlStrcat:
pcercuei 0:03b5121a232e 508 * @cur: the original xmlChar * array
pcercuei 0:03b5121a232e 509 * @add: the xmlChar * array added
pcercuei 0:03b5121a232e 510 *
pcercuei 0:03b5121a232e 511 * a strcat for array of xmlChar's. Since they are supposed to be
pcercuei 0:03b5121a232e 512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
pcercuei 0:03b5121a232e 513 * a termination mark of '0'.
pcercuei 0:03b5121a232e 514 *
pcercuei 0:03b5121a232e 515 * Returns a new xmlChar * containing the concatenated string.
pcercuei 0:03b5121a232e 516 */
pcercuei 0:03b5121a232e 517 xmlChar *
pcercuei 0:03b5121a232e 518 xmlStrcat(xmlChar *cur, const xmlChar *add) {
pcercuei 0:03b5121a232e 519 const xmlChar *p = add;
pcercuei 0:03b5121a232e 520
pcercuei 0:03b5121a232e 521 if (add == NULL) return(cur);
pcercuei 0:03b5121a232e 522 if (cur == NULL)
pcercuei 0:03b5121a232e 523 return(xmlStrdup(add));
pcercuei 0:03b5121a232e 524
pcercuei 0:03b5121a232e 525 while (*p != 0) p++; /* non input consuming */
pcercuei 0:03b5121a232e 526 return(xmlStrncat(cur, add, p - add));
pcercuei 0:03b5121a232e 527 }
pcercuei 0:03b5121a232e 528
pcercuei 0:03b5121a232e 529 /**
pcercuei 0:03b5121a232e 530 * xmlStrPrintf:
pcercuei 0:03b5121a232e 531 * @buf: the result buffer.
pcercuei 0:03b5121a232e 532 * @len: the result buffer length.
pcercuei 0:03b5121a232e 533 * @msg: the message with printf formatting.
pcercuei 0:03b5121a232e 534 * @...: extra parameters for the message.
pcercuei 0:03b5121a232e 535 *
pcercuei 0:03b5121a232e 536 * Formats @msg and places result into @buf.
pcercuei 0:03b5121a232e 537 *
pcercuei 0:03b5121a232e 538 * Returns the number of characters written to @buf or -1 if an error occurs.
pcercuei 0:03b5121a232e 539 */
pcercuei 0:03b5121a232e 540 int XMLCDECL
pcercuei 0:03b5121a232e 541 xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
pcercuei 0:03b5121a232e 542 va_list args;
pcercuei 0:03b5121a232e 543 int ret;
pcercuei 0:03b5121a232e 544
pcercuei 0:03b5121a232e 545 if((buf == NULL) || (msg == NULL)) {
pcercuei 0:03b5121a232e 546 return(-1);
pcercuei 0:03b5121a232e 547 }
pcercuei 0:03b5121a232e 548
pcercuei 0:03b5121a232e 549 va_start(args, msg);
pcercuei 0:03b5121a232e 550 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
pcercuei 0:03b5121a232e 551 va_end(args);
pcercuei 0:03b5121a232e 552 buf[len - 1] = 0; /* be safe ! */
pcercuei 0:03b5121a232e 553
pcercuei 0:03b5121a232e 554 return(ret);
pcercuei 0:03b5121a232e 555 }
pcercuei 0:03b5121a232e 556
pcercuei 0:03b5121a232e 557 /**
pcercuei 0:03b5121a232e 558 * xmlStrVPrintf:
pcercuei 0:03b5121a232e 559 * @buf: the result buffer.
pcercuei 0:03b5121a232e 560 * @len: the result buffer length.
pcercuei 0:03b5121a232e 561 * @msg: the message with printf formatting.
pcercuei 0:03b5121a232e 562 * @ap: extra parameters for the message.
pcercuei 0:03b5121a232e 563 *
pcercuei 0:03b5121a232e 564 * Formats @msg and places result into @buf.
pcercuei 0:03b5121a232e 565 *
pcercuei 0:03b5121a232e 566 * Returns the number of characters written to @buf or -1 if an error occurs.
pcercuei 0:03b5121a232e 567 */
pcercuei 0:03b5121a232e 568 int
pcercuei 0:03b5121a232e 569 xmlStrVPrintf(xmlChar *buf, int len, const xmlChar *msg, va_list ap) {
pcercuei 0:03b5121a232e 570 int ret;
pcercuei 0:03b5121a232e 571
pcercuei 0:03b5121a232e 572 if((buf == NULL) || (msg == NULL)) {
pcercuei 0:03b5121a232e 573 return(-1);
pcercuei 0:03b5121a232e 574 }
pcercuei 0:03b5121a232e 575
pcercuei 0:03b5121a232e 576 ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
pcercuei 0:03b5121a232e 577 buf[len - 1] = 0; /* be safe ! */
pcercuei 0:03b5121a232e 578
pcercuei 0:03b5121a232e 579 return(ret);
pcercuei 0:03b5121a232e 580 }
pcercuei 0:03b5121a232e 581
pcercuei 0:03b5121a232e 582 /************************************************************************
pcercuei 0:03b5121a232e 583 * *
pcercuei 0:03b5121a232e 584 * Generic UTF8 handling routines *
pcercuei 0:03b5121a232e 585 * *
pcercuei 0:03b5121a232e 586 * From rfc2044: encoding of the Unicode values on UTF-8: *
pcercuei 0:03b5121a232e 587 * *
pcercuei 0:03b5121a232e 588 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
pcercuei 0:03b5121a232e 589 * 0000 0000-0000 007F 0xxxxxxx *
pcercuei 0:03b5121a232e 590 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
pcercuei 0:03b5121a232e 591 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
pcercuei 0:03b5121a232e 592 * *
pcercuei 0:03b5121a232e 593 * I hope we won't use values > 0xFFFF anytime soon ! *
pcercuei 0:03b5121a232e 594 * *
pcercuei 0:03b5121a232e 595 ************************************************************************/
pcercuei 0:03b5121a232e 596
pcercuei 0:03b5121a232e 597
pcercuei 0:03b5121a232e 598 /**
pcercuei 0:03b5121a232e 599 * xmlUTF8Size:
pcercuei 0:03b5121a232e 600 * @utf: pointer to the UTF8 character
pcercuei 0:03b5121a232e 601 *
pcercuei 0:03b5121a232e 602 * calculates the internal size of a UTF8 character
pcercuei 0:03b5121a232e 603 *
pcercuei 0:03b5121a232e 604 * returns the numbers of bytes in the character, -1 on format error
pcercuei 0:03b5121a232e 605 */
pcercuei 0:03b5121a232e 606 int
pcercuei 0:03b5121a232e 607 xmlUTF8Size(const xmlChar *utf) {
pcercuei 0:03b5121a232e 608 xmlChar mask;
pcercuei 0:03b5121a232e 609 int len;
pcercuei 0:03b5121a232e 610
pcercuei 0:03b5121a232e 611 if (utf == NULL)
pcercuei 0:03b5121a232e 612 return -1;
pcercuei 0:03b5121a232e 613 if (*utf < 0x80)
pcercuei 0:03b5121a232e 614 return 1;
pcercuei 0:03b5121a232e 615 /* check valid UTF8 character */
pcercuei 0:03b5121a232e 616 if (!(*utf & 0x40))
pcercuei 0:03b5121a232e 617 return -1;
pcercuei 0:03b5121a232e 618 /* determine number of bytes in char */
pcercuei 0:03b5121a232e 619 len = 2;
pcercuei 0:03b5121a232e 620 for (mask=0x20; mask != 0; mask>>=1) {
pcercuei 0:03b5121a232e 621 if (!(*utf & mask))
pcercuei 0:03b5121a232e 622 return len;
pcercuei 0:03b5121a232e 623 len++;
pcercuei 0:03b5121a232e 624 }
pcercuei 0:03b5121a232e 625 return -1;
pcercuei 0:03b5121a232e 626 }
pcercuei 0:03b5121a232e 627
pcercuei 0:03b5121a232e 628 /**
pcercuei 0:03b5121a232e 629 * xmlUTF8Charcmp:
pcercuei 0:03b5121a232e 630 * @utf1: pointer to first UTF8 char
pcercuei 0:03b5121a232e 631 * @utf2: pointer to second UTF8 char
pcercuei 0:03b5121a232e 632 *
pcercuei 0:03b5121a232e 633 * compares the two UCS4 values
pcercuei 0:03b5121a232e 634 *
pcercuei 0:03b5121a232e 635 * returns result of the compare as with xmlStrncmp
pcercuei 0:03b5121a232e 636 */
pcercuei 0:03b5121a232e 637 int
pcercuei 0:03b5121a232e 638 xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
pcercuei 0:03b5121a232e 639
pcercuei 0:03b5121a232e 640 if (utf1 == NULL ) {
pcercuei 0:03b5121a232e 641 if (utf2 == NULL)
pcercuei 0:03b5121a232e 642 return 0;
pcercuei 0:03b5121a232e 643 return -1;
pcercuei 0:03b5121a232e 644 }
pcercuei 0:03b5121a232e 645 return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
pcercuei 0:03b5121a232e 646 }
pcercuei 0:03b5121a232e 647
pcercuei 0:03b5121a232e 648 /**
pcercuei 0:03b5121a232e 649 * xmlUTF8Strlen:
pcercuei 0:03b5121a232e 650 * @utf: a sequence of UTF-8 encoded bytes
pcercuei 0:03b5121a232e 651 *
pcercuei 0:03b5121a232e 652 * compute the length of an UTF8 string, it doesn't do a full UTF8
pcercuei 0:03b5121a232e 653 * checking of the content of the string.
pcercuei 0:03b5121a232e 654 *
pcercuei 0:03b5121a232e 655 * Returns the number of characters in the string or -1 in case of error
pcercuei 0:03b5121a232e 656 */
pcercuei 0:03b5121a232e 657 int
pcercuei 0:03b5121a232e 658 xmlUTF8Strlen(const xmlChar *utf) {
pcercuei 0:03b5121a232e 659 int ret = 0;
pcercuei 0:03b5121a232e 660
pcercuei 0:03b5121a232e 661 if (utf == NULL)
pcercuei 0:03b5121a232e 662 return(-1);
pcercuei 0:03b5121a232e 663
pcercuei 0:03b5121a232e 664 while (*utf != 0) {
pcercuei 0:03b5121a232e 665 if (utf[0] & 0x80) {
pcercuei 0:03b5121a232e 666 if ((utf[1] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 667 return(-1);
pcercuei 0:03b5121a232e 668 if ((utf[0] & 0xe0) == 0xe0) {
pcercuei 0:03b5121a232e 669 if ((utf[2] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 670 return(-1);
pcercuei 0:03b5121a232e 671 if ((utf[0] & 0xf0) == 0xf0) {
pcercuei 0:03b5121a232e 672 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 673 return(-1);
pcercuei 0:03b5121a232e 674 utf += 4;
pcercuei 0:03b5121a232e 675 } else {
pcercuei 0:03b5121a232e 676 utf += 3;
pcercuei 0:03b5121a232e 677 }
pcercuei 0:03b5121a232e 678 } else {
pcercuei 0:03b5121a232e 679 utf += 2;
pcercuei 0:03b5121a232e 680 }
pcercuei 0:03b5121a232e 681 } else {
pcercuei 0:03b5121a232e 682 utf++;
pcercuei 0:03b5121a232e 683 }
pcercuei 0:03b5121a232e 684 ret++;
pcercuei 0:03b5121a232e 685 }
pcercuei 0:03b5121a232e 686 return(ret);
pcercuei 0:03b5121a232e 687 }
pcercuei 0:03b5121a232e 688
pcercuei 0:03b5121a232e 689 /**
pcercuei 0:03b5121a232e 690 * xmlGetUTF8Char:
pcercuei 0:03b5121a232e 691 * @utf: a sequence of UTF-8 encoded bytes
pcercuei 0:03b5121a232e 692 * @len: a pointer to the minimum number of bytes present in
pcercuei 0:03b5121a232e 693 * the sequence. This is used to assure the next character
pcercuei 0:03b5121a232e 694 * is completely contained within the sequence.
pcercuei 0:03b5121a232e 695 *
pcercuei 0:03b5121a232e 696 * Read the first UTF8 character from @utf
pcercuei 0:03b5121a232e 697 *
pcercuei 0:03b5121a232e 698 * Returns the char value or -1 in case of error, and sets *len to
pcercuei 0:03b5121a232e 699 * the actual number of bytes consumed (0 in case of error)
pcercuei 0:03b5121a232e 700 */
pcercuei 0:03b5121a232e 701 int
pcercuei 0:03b5121a232e 702 xmlGetUTF8Char(const unsigned char *utf, int *len) {
pcercuei 0:03b5121a232e 703 unsigned int c;
pcercuei 0:03b5121a232e 704
pcercuei 0:03b5121a232e 705 if (utf == NULL)
pcercuei 0:03b5121a232e 706 goto error;
pcercuei 0:03b5121a232e 707 if (len == NULL)
pcercuei 0:03b5121a232e 708 goto error;
pcercuei 0:03b5121a232e 709 if (*len < 1)
pcercuei 0:03b5121a232e 710 goto error;
pcercuei 0:03b5121a232e 711
pcercuei 0:03b5121a232e 712 c = utf[0];
pcercuei 0:03b5121a232e 713 if (c & 0x80) {
pcercuei 0:03b5121a232e 714 if (*len < 2)
pcercuei 0:03b5121a232e 715 goto error;
pcercuei 0:03b5121a232e 716 if ((utf[1] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 717 goto error;
pcercuei 0:03b5121a232e 718 if ((c & 0xe0) == 0xe0) {
pcercuei 0:03b5121a232e 719 if (*len < 3)
pcercuei 0:03b5121a232e 720 goto error;
pcercuei 0:03b5121a232e 721 if ((utf[2] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 722 goto error;
pcercuei 0:03b5121a232e 723 if ((c & 0xf0) == 0xf0) {
pcercuei 0:03b5121a232e 724 if (*len < 4)
pcercuei 0:03b5121a232e 725 goto error;
pcercuei 0:03b5121a232e 726 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 727 goto error;
pcercuei 0:03b5121a232e 728 *len = 4;
pcercuei 0:03b5121a232e 729 /* 4-byte code */
pcercuei 0:03b5121a232e 730 c = (utf[0] & 0x7) << 18;
pcercuei 0:03b5121a232e 731 c |= (utf[1] & 0x3f) << 12;
pcercuei 0:03b5121a232e 732 c |= (utf[2] & 0x3f) << 6;
pcercuei 0:03b5121a232e 733 c |= utf[3] & 0x3f;
pcercuei 0:03b5121a232e 734 } else {
pcercuei 0:03b5121a232e 735 /* 3-byte code */
pcercuei 0:03b5121a232e 736 *len = 3;
pcercuei 0:03b5121a232e 737 c = (utf[0] & 0xf) << 12;
pcercuei 0:03b5121a232e 738 c |= (utf[1] & 0x3f) << 6;
pcercuei 0:03b5121a232e 739 c |= utf[2] & 0x3f;
pcercuei 0:03b5121a232e 740 }
pcercuei 0:03b5121a232e 741 } else {
pcercuei 0:03b5121a232e 742 /* 2-byte code */
pcercuei 0:03b5121a232e 743 *len = 2;
pcercuei 0:03b5121a232e 744 c = (utf[0] & 0x1f) << 6;
pcercuei 0:03b5121a232e 745 c |= utf[1] & 0x3f;
pcercuei 0:03b5121a232e 746 }
pcercuei 0:03b5121a232e 747 } else {
pcercuei 0:03b5121a232e 748 /* 1-byte code */
pcercuei 0:03b5121a232e 749 *len = 1;
pcercuei 0:03b5121a232e 750 }
pcercuei 0:03b5121a232e 751 return(c);
pcercuei 0:03b5121a232e 752
pcercuei 0:03b5121a232e 753 error:
pcercuei 0:03b5121a232e 754 if (len != NULL)
pcercuei 0:03b5121a232e 755 *len = 0;
pcercuei 0:03b5121a232e 756 return(-1);
pcercuei 0:03b5121a232e 757 }
pcercuei 0:03b5121a232e 758
pcercuei 0:03b5121a232e 759 /**
pcercuei 0:03b5121a232e 760 * xmlCheckUTF8:
pcercuei 0:03b5121a232e 761 * @utf: Pointer to putative UTF-8 encoded string.
pcercuei 0:03b5121a232e 762 *
pcercuei 0:03b5121a232e 763 * Checks @utf for being valid UTF-8. @utf is assumed to be
pcercuei 0:03b5121a232e 764 * null-terminated. This function is not super-strict, as it will
pcercuei 0:03b5121a232e 765 * allow longer UTF-8 sequences than necessary. Note that Java is
pcercuei 0:03b5121a232e 766 * capable of producing these sequences if provoked. Also note, this
pcercuei 0:03b5121a232e 767 * routine checks for the 4-byte maximum size, but does not check for
pcercuei 0:03b5121a232e 768 * 0x10ffff maximum value.
pcercuei 0:03b5121a232e 769 *
pcercuei 0:03b5121a232e 770 * Return value: true if @utf is valid.
pcercuei 0:03b5121a232e 771 **/
pcercuei 0:03b5121a232e 772 int
pcercuei 0:03b5121a232e 773 xmlCheckUTF8(const unsigned char *utf)
pcercuei 0:03b5121a232e 774 {
pcercuei 0:03b5121a232e 775 int ix;
pcercuei 0:03b5121a232e 776 unsigned char c;
pcercuei 0:03b5121a232e 777
pcercuei 0:03b5121a232e 778 if (utf == NULL)
pcercuei 0:03b5121a232e 779 return(0);
pcercuei 0:03b5121a232e 780 /*
pcercuei 0:03b5121a232e 781 * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
pcercuei 0:03b5121a232e 782 * are as follows (in "bit format"):
pcercuei 0:03b5121a232e 783 * 0xxxxxxx valid 1-byte
pcercuei 0:03b5121a232e 784 * 110xxxxx 10xxxxxx valid 2-byte
pcercuei 0:03b5121a232e 785 * 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
pcercuei 0:03b5121a232e 786 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
pcercuei 0:03b5121a232e 787 */
pcercuei 0:03b5121a232e 788 for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
pcercuei 0:03b5121a232e 789 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
pcercuei 0:03b5121a232e 790 ix++;
pcercuei 0:03b5121a232e 791 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
pcercuei 0:03b5121a232e 792 if ((utf[ix+1] & 0xc0 ) != 0x80)
pcercuei 0:03b5121a232e 793 return 0;
pcercuei 0:03b5121a232e 794 ix += 2;
pcercuei 0:03b5121a232e 795 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
pcercuei 0:03b5121a232e 796 if (((utf[ix+1] & 0xc0) != 0x80) ||
pcercuei 0:03b5121a232e 797 ((utf[ix+2] & 0xc0) != 0x80))
pcercuei 0:03b5121a232e 798 return 0;
pcercuei 0:03b5121a232e 799 ix += 3;
pcercuei 0:03b5121a232e 800 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
pcercuei 0:03b5121a232e 801 if (((utf[ix+1] & 0xc0) != 0x80) ||
pcercuei 0:03b5121a232e 802 ((utf[ix+2] & 0xc0) != 0x80) ||
pcercuei 0:03b5121a232e 803 ((utf[ix+3] & 0xc0) != 0x80))
pcercuei 0:03b5121a232e 804 return 0;
pcercuei 0:03b5121a232e 805 ix += 4;
pcercuei 0:03b5121a232e 806 } else /* unknown encoding */
pcercuei 0:03b5121a232e 807 return 0;
pcercuei 0:03b5121a232e 808 }
pcercuei 0:03b5121a232e 809 return(1);
pcercuei 0:03b5121a232e 810 }
pcercuei 0:03b5121a232e 811
pcercuei 0:03b5121a232e 812 /**
pcercuei 0:03b5121a232e 813 * xmlUTF8Strsize:
pcercuei 0:03b5121a232e 814 * @utf: a sequence of UTF-8 encoded bytes
pcercuei 0:03b5121a232e 815 * @len: the number of characters in the array
pcercuei 0:03b5121a232e 816 *
pcercuei 0:03b5121a232e 817 * storage size of an UTF8 string
pcercuei 0:03b5121a232e 818 * the behaviour is not garanteed if the input string is not UTF-8
pcercuei 0:03b5121a232e 819 *
pcercuei 0:03b5121a232e 820 * Returns the storage size of
pcercuei 0:03b5121a232e 821 * the first 'len' characters of ARRAY
pcercuei 0:03b5121a232e 822 */
pcercuei 0:03b5121a232e 823
pcercuei 0:03b5121a232e 824 int
pcercuei 0:03b5121a232e 825 xmlUTF8Strsize(const xmlChar *utf, int len) {
pcercuei 0:03b5121a232e 826 const xmlChar *ptr=utf;
pcercuei 0:03b5121a232e 827 xmlChar ch;
pcercuei 0:03b5121a232e 828
pcercuei 0:03b5121a232e 829 if (utf == NULL)
pcercuei 0:03b5121a232e 830 return(0);
pcercuei 0:03b5121a232e 831
pcercuei 0:03b5121a232e 832 if (len <= 0)
pcercuei 0:03b5121a232e 833 return(0);
pcercuei 0:03b5121a232e 834
pcercuei 0:03b5121a232e 835 while ( len-- > 0) {
pcercuei 0:03b5121a232e 836 if ( !*ptr )
pcercuei 0:03b5121a232e 837 break;
pcercuei 0:03b5121a232e 838 if ( (ch = *ptr++) & 0x80)
pcercuei 0:03b5121a232e 839 while ((ch<<=1) & 0x80 ) {
pcercuei 0:03b5121a232e 840 ptr++;
pcercuei 0:03b5121a232e 841 if (*ptr == 0) break;
pcercuei 0:03b5121a232e 842 }
pcercuei 0:03b5121a232e 843 }
pcercuei 0:03b5121a232e 844 return (ptr - utf);
pcercuei 0:03b5121a232e 845 }
pcercuei 0:03b5121a232e 846
pcercuei 0:03b5121a232e 847
pcercuei 0:03b5121a232e 848 /**
pcercuei 0:03b5121a232e 849 * xmlUTF8Strndup:
pcercuei 0:03b5121a232e 850 * @utf: the input UTF8 *
pcercuei 0:03b5121a232e 851 * @len: the len of @utf (in chars)
pcercuei 0:03b5121a232e 852 *
pcercuei 0:03b5121a232e 853 * a strndup for array of UTF8's
pcercuei 0:03b5121a232e 854 *
pcercuei 0:03b5121a232e 855 * Returns a new UTF8 * or NULL
pcercuei 0:03b5121a232e 856 */
pcercuei 0:03b5121a232e 857 xmlChar *
pcercuei 0:03b5121a232e 858 xmlUTF8Strndup(const xmlChar *utf, int len) {
pcercuei 0:03b5121a232e 859 xmlChar *ret;
pcercuei 0:03b5121a232e 860 int i;
pcercuei 0:03b5121a232e 861
pcercuei 0:03b5121a232e 862 if ((utf == NULL) || (len < 0)) return(NULL);
pcercuei 0:03b5121a232e 863 i = xmlUTF8Strsize(utf, len);
pcercuei 0:03b5121a232e 864 ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
pcercuei 0:03b5121a232e 865 if (ret == NULL) {
pcercuei 0:03b5121a232e 866 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 867 "malloc of %ld byte failed\n",
pcercuei 0:03b5121a232e 868 (len + 1) * (long)sizeof(xmlChar));
pcercuei 0:03b5121a232e 869 return(NULL);
pcercuei 0:03b5121a232e 870 }
pcercuei 0:03b5121a232e 871 memcpy(ret, utf, i * sizeof(xmlChar));
pcercuei 0:03b5121a232e 872 ret[i] = 0;
pcercuei 0:03b5121a232e 873 return(ret);
pcercuei 0:03b5121a232e 874 }
pcercuei 0:03b5121a232e 875
pcercuei 0:03b5121a232e 876 /**
pcercuei 0:03b5121a232e 877 * xmlUTF8Strpos:
pcercuei 0:03b5121a232e 878 * @utf: the input UTF8 *
pcercuei 0:03b5121a232e 879 * @pos: the position of the desired UTF8 char (in chars)
pcercuei 0:03b5121a232e 880 *
pcercuei 0:03b5121a232e 881 * a function to provide the equivalent of fetching a
pcercuei 0:03b5121a232e 882 * character from a string array
pcercuei 0:03b5121a232e 883 *
pcercuei 0:03b5121a232e 884 * Returns a pointer to the UTF8 character or NULL
pcercuei 0:03b5121a232e 885 */
pcercuei 0:03b5121a232e 886 const xmlChar *
pcercuei 0:03b5121a232e 887 xmlUTF8Strpos(const xmlChar *utf, int pos) {
pcercuei 0:03b5121a232e 888 xmlChar ch;
pcercuei 0:03b5121a232e 889
pcercuei 0:03b5121a232e 890 if (utf == NULL) return(NULL);
pcercuei 0:03b5121a232e 891 if (pos < 0)
pcercuei 0:03b5121a232e 892 return(NULL);
pcercuei 0:03b5121a232e 893 while (pos--) {
pcercuei 0:03b5121a232e 894 if ((ch=*utf++) == 0) return(NULL);
pcercuei 0:03b5121a232e 895 if ( ch & 0x80 ) {
pcercuei 0:03b5121a232e 896 /* if not simple ascii, verify proper format */
pcercuei 0:03b5121a232e 897 if ( (ch & 0xc0) != 0xc0 )
pcercuei 0:03b5121a232e 898 return(NULL);
pcercuei 0:03b5121a232e 899 /* then skip over remaining bytes for this char */
pcercuei 0:03b5121a232e 900 while ( (ch <<= 1) & 0x80 )
pcercuei 0:03b5121a232e 901 if ( (*utf++ & 0xc0) != 0x80 )
pcercuei 0:03b5121a232e 902 return(NULL);
pcercuei 0:03b5121a232e 903 }
pcercuei 0:03b5121a232e 904 }
pcercuei 0:03b5121a232e 905 return((xmlChar *)utf);
pcercuei 0:03b5121a232e 906 }
pcercuei 0:03b5121a232e 907
pcercuei 0:03b5121a232e 908 /**
pcercuei 0:03b5121a232e 909 * xmlUTF8Strloc:
pcercuei 0:03b5121a232e 910 * @utf: the input UTF8 *
pcercuei 0:03b5121a232e 911 * @utfchar: the UTF8 character to be found
pcercuei 0:03b5121a232e 912 *
pcercuei 0:03b5121a232e 913 * a function to provide the relative location of a UTF8 char
pcercuei 0:03b5121a232e 914 *
pcercuei 0:03b5121a232e 915 * Returns the relative character position of the desired char
pcercuei 0:03b5121a232e 916 * or -1 if not found
pcercuei 0:03b5121a232e 917 */
pcercuei 0:03b5121a232e 918 int
pcercuei 0:03b5121a232e 919 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
pcercuei 0:03b5121a232e 920 int i, size;
pcercuei 0:03b5121a232e 921 xmlChar ch;
pcercuei 0:03b5121a232e 922
pcercuei 0:03b5121a232e 923 if (utf==NULL || utfchar==NULL) return -1;
pcercuei 0:03b5121a232e 924 size = xmlUTF8Strsize(utfchar, 1);
pcercuei 0:03b5121a232e 925 for(i=0; (ch=*utf) != 0; i++) {
pcercuei 0:03b5121a232e 926 if (xmlStrncmp(utf, utfchar, size)==0)
pcercuei 0:03b5121a232e 927 return(i);
pcercuei 0:03b5121a232e 928 utf++;
pcercuei 0:03b5121a232e 929 if ( ch & 0x80 ) {
pcercuei 0:03b5121a232e 930 /* if not simple ascii, verify proper format */
pcercuei 0:03b5121a232e 931 if ( (ch & 0xc0) != 0xc0 )
pcercuei 0:03b5121a232e 932 return(-1);
pcercuei 0:03b5121a232e 933 /* then skip over remaining bytes for this char */
pcercuei 0:03b5121a232e 934 while ( (ch <<= 1) & 0x80 )
pcercuei 0:03b5121a232e 935 if ( (*utf++ & 0xc0) != 0x80 )
pcercuei 0:03b5121a232e 936 return(-1);
pcercuei 0:03b5121a232e 937 }
pcercuei 0:03b5121a232e 938 }
pcercuei 0:03b5121a232e 939
pcercuei 0:03b5121a232e 940 return(-1);
pcercuei 0:03b5121a232e 941 }
pcercuei 0:03b5121a232e 942 /**
pcercuei 0:03b5121a232e 943 * xmlUTF8Strsub:
pcercuei 0:03b5121a232e 944 * @utf: a sequence of UTF-8 encoded bytes
pcercuei 0:03b5121a232e 945 * @start: relative pos of first char
pcercuei 0:03b5121a232e 946 * @len: total number to copy
pcercuei 0:03b5121a232e 947 *
pcercuei 0:03b5121a232e 948 * Create a substring from a given UTF-8 string
pcercuei 0:03b5121a232e 949 * Note: positions are given in units of UTF-8 chars
pcercuei 0:03b5121a232e 950 *
pcercuei 0:03b5121a232e 951 * Returns a pointer to a newly created string
pcercuei 0:03b5121a232e 952 * or NULL if any problem
pcercuei 0:03b5121a232e 953 */
pcercuei 0:03b5121a232e 954
pcercuei 0:03b5121a232e 955 xmlChar *
pcercuei 0:03b5121a232e 956 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
pcercuei 0:03b5121a232e 957 int i;
pcercuei 0:03b5121a232e 958 xmlChar ch;
pcercuei 0:03b5121a232e 959
pcercuei 0:03b5121a232e 960 if (utf == NULL) return(NULL);
pcercuei 0:03b5121a232e 961 if (start < 0) return(NULL);
pcercuei 0:03b5121a232e 962 if (len < 0) return(NULL);
pcercuei 0:03b5121a232e 963
pcercuei 0:03b5121a232e 964 /*
pcercuei 0:03b5121a232e 965 * Skip over any leading chars
pcercuei 0:03b5121a232e 966 */
pcercuei 0:03b5121a232e 967 for (i = 0;i < start;i++) {
pcercuei 0:03b5121a232e 968 if ((ch=*utf++) == 0) return(NULL);
pcercuei 0:03b5121a232e 969 if ( ch & 0x80 ) {
pcercuei 0:03b5121a232e 970 /* if not simple ascii, verify proper format */
pcercuei 0:03b5121a232e 971 if ( (ch & 0xc0) != 0xc0 )
pcercuei 0:03b5121a232e 972 return(NULL);
pcercuei 0:03b5121a232e 973 /* then skip over remaining bytes for this char */
pcercuei 0:03b5121a232e 974 while ( (ch <<= 1) & 0x80 )
pcercuei 0:03b5121a232e 975 if ( (*utf++ & 0xc0) != 0x80 )
pcercuei 0:03b5121a232e 976 return(NULL);
pcercuei 0:03b5121a232e 977 }
pcercuei 0:03b5121a232e 978 }
pcercuei 0:03b5121a232e 979
pcercuei 0:03b5121a232e 980 return(xmlUTF8Strndup(utf, len));
pcercuei 0:03b5121a232e 981 }
pcercuei 0:03b5121a232e 982
pcercuei 0:03b5121a232e 983 #define bottom_xmlstring
pcercuei 0:03b5121a232e 984 #include "elfgcchack.h"
pcercuei 0:03b5121a232e 985