mbed I/F binding for mruby
Dependents: mruby_mbed_web mirb_mbed
string.c
00001 /* 00002 ** string.c - String class 00003 ** 00004 ** See Copyright Notice in mruby.h 00005 */ 00006 00007 #include <ctype.h> 00008 #include <float.h> 00009 #include <limits.h> 00010 #include <stddef.h> 00011 #include <stdlib.h> 00012 #include <string.h> 00013 #include "mruby.h" 00014 #include "mruby/array.h" 00015 #include "mruby/class.h" 00016 #include "mruby/range.h" 00017 #include "mruby/string.h" 00018 #include "mruby/re.h" 00019 00020 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; 00021 00022 typedef struct mrb_shared_string { 00023 mrb_bool nofree : 1; 00024 int refcnt; 00025 char *ptr; 00026 mrb_int len; 00027 } mrb_shared_string; 00028 00029 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2); 00030 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len); 00031 00032 MRB_API mrb_int 00033 mrb_str_strlen(mrb_state *mrb, struct RString *s) 00034 { 00035 mrb_int i, max = RSTR_LEN(s); 00036 char *p = RSTR_PTR(s); 00037 00038 if (!p) return 0; 00039 for (i=0; i<max; i++) { 00040 if (p[i] == '\0') { 00041 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); 00042 } 00043 } 00044 return max; 00045 } 00046 00047 static inline void 00048 resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity) 00049 { 00050 if (RSTR_EMBED_P(s)) { 00051 if (RSTRING_EMBED_LEN_MAX < capacity) { 00052 char *const tmp = (char *)mrb_malloc(mrb, capacity+1); 00053 const mrb_int len = RSTR_EMBED_LEN(s); 00054 memcpy(tmp, s->as.ary, len); 00055 RSTR_UNSET_EMBED_FLAG(s); 00056 s->as.heap.ptr = tmp; 00057 s->as.heap.len = len; 00058 s->as.heap.aux.capa = capacity; 00059 } 00060 } 00061 else { 00062 s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1); 00063 s->as.heap.aux.capa = capacity; 00064 } 00065 } 00066 00067 static void 00068 str_decref(mrb_state *mrb, mrb_shared_string *shared) 00069 { 00070 shared->refcnt--; 00071 if (shared->refcnt == 0) { 00072 if (!shared->nofree) { 00073 mrb_free(mrb, shared->ptr); 00074 } 00075 mrb_free(mrb, shared); 00076 } 00077 } 00078 00079 MRB_API void 00080 mrb_str_modify(mrb_state *mrb, struct RString *s) 00081 { 00082 if (RSTR_SHARED_P(s)) { 00083 mrb_shared_string *shared = s->as.heap.aux.shared; 00084 00085 if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { 00086 s->as.heap.ptr = shared->ptr; 00087 s->as.heap.aux.capa = shared->len; 00088 RSTR_PTR(s)[s->as.heap.len] = '\0'; 00089 mrb_free(mrb, shared); 00090 } 00091 else { 00092 char *ptr, *p; 00093 mrb_int len; 00094 00095 p = RSTR_PTR(s); 00096 len = s->as.heap.len; 00097 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1); 00098 if (p) { 00099 memcpy(ptr, p, len); 00100 } 00101 ptr[len] = '\0'; 00102 s->as.heap.ptr = ptr; 00103 s->as.heap.aux.capa = len; 00104 str_decref(mrb, shared); 00105 } 00106 RSTR_UNSET_SHARED_FLAG(s); 00107 return; 00108 } 00109 if (RSTR_NOFREE_P(s)) { 00110 char *p = s->as.heap.ptr; 00111 00112 s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1); 00113 if (p) { 00114 memcpy(RSTR_PTR(s), p, s->as.heap.len); 00115 } 00116 RSTR_PTR(s)[s->as.heap.len] = '\0'; 00117 s->as.heap.aux.capa = s->as.heap.len; 00118 RSTR_UNSET_NOFREE_FLAG(s); 00119 return; 00120 } 00121 } 00122 00123 MRB_API mrb_value 00124 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) 00125 { 00126 mrb_int slen; 00127 struct RString *s = mrb_str_ptr(str); 00128 00129 mrb_str_modify(mrb, s); 00130 slen = RSTR_LEN(s); 00131 if (len != slen) { 00132 if (slen < len || slen - len > 256) { 00133 resize_capa(mrb, s, len); 00134 } 00135 RSTR_SET_LEN(s, len); 00136 RSTR_PTR(s)[len] = '\0'; /* sentinel */ 00137 } 00138 return str; 00139 } 00140 00141 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) 00142 00143 static struct RString* 00144 str_new_static(mrb_state *mrb, const char *p, size_t len) 00145 { 00146 struct RString *s; 00147 00148 if (len >= MRB_INT_MAX) { 00149 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 00150 } 00151 s = mrb_obj_alloc_string(mrb); 00152 s->as.heap.len = len; 00153 s->as.heap.aux.capa = 0; /* nofree */ 00154 s->as.heap.ptr = (char *)p; 00155 s->flags = MRB_STR_NOFREE; 00156 00157 return s; 00158 } 00159 00160 static struct RString* 00161 str_new(mrb_state *mrb, const char *p, size_t len) 00162 { 00163 struct RString *s; 00164 00165 if (mrb_ro_data_p(p)) { 00166 return str_new_static(mrb, p, len); 00167 } 00168 s = mrb_obj_alloc_string(mrb); 00169 if (len < RSTRING_EMBED_LEN_MAX) { 00170 RSTR_SET_EMBED_FLAG(s); 00171 RSTR_SET_EMBED_LEN(s, len); 00172 if (p) { 00173 memcpy(s->as.ary, p, len); 00174 } 00175 } else { 00176 if (len >= MRB_INT_MAX) { 00177 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 00178 } 00179 s->as.heap.len = len; 00180 s->as.heap.aux.capa = len; 00181 s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1); 00182 if (p) { 00183 memcpy(s->as.heap.ptr, p, len); 00184 } 00185 } 00186 RSTR_PTR(s)[len] = '\0'; 00187 return s; 00188 } 00189 00190 static inline void 00191 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) 00192 { 00193 s->c = mrb_str_ptr(obj)->c; 00194 } 00195 00196 static mrb_value 00197 mrb_str_new_empty(mrb_state *mrb, mrb_value str) 00198 { 00199 struct RString *s = str_new(mrb, 0, 0); 00200 00201 str_with_class(mrb, s, str); 00202 return mrb_obj_value(s); 00203 } 00204 00205 #ifndef MRB_STR_BUF_MIN_SIZE 00206 # define MRB_STR_BUF_MIN_SIZE 128 00207 #endif 00208 00209 MRB_API mrb_value 00210 mrb_str_buf_new(mrb_state *mrb, size_t capa) 00211 { 00212 struct RString *s; 00213 00214 s = mrb_obj_alloc_string(mrb); 00215 00216 if (capa >= MRB_INT_MAX) { 00217 mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big"); 00218 } 00219 if (capa < MRB_STR_BUF_MIN_SIZE) { 00220 capa = MRB_STR_BUF_MIN_SIZE; 00221 } 00222 s->as.heap.len = 0; 00223 s->as.heap.aux.capa = capa; 00224 s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1); 00225 RSTR_PTR(s)[0] = '\0'; 00226 00227 return mrb_obj_value(s); 00228 } 00229 00230 static void 00231 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len) 00232 { 00233 size_t capa; 00234 size_t total; 00235 ptrdiff_t off = -1; 00236 00237 if (len == 0) return; 00238 mrb_str_modify(mrb, s); 00239 if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) { 00240 off = ptr - RSTR_PTR(s); 00241 } 00242 00243 if (RSTR_EMBED_P(s)) 00244 capa = RSTRING_EMBED_LEN_MAX; 00245 else 00246 capa = s->as.heap.aux.capa; 00247 00248 if (RSTR_LEN(s) >= MRB_INT_MAX - (mrb_int)len) { 00249 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 00250 } 00251 total = RSTR_LEN(s)+len; 00252 if (capa <= total) { 00253 while (total > capa) { 00254 if (capa + 1 >= MRB_INT_MAX / 2) { 00255 capa = (total + 4095) / 4096; 00256 break; 00257 } 00258 capa = (capa + 1) * 2; 00259 } 00260 resize_capa(mrb, s, capa); 00261 } 00262 if (off != -1) { 00263 ptr = RSTR_PTR(s) + off; 00264 } 00265 memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len); 00266 mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX); 00267 RSTR_SET_LEN(s, total); 00268 RSTR_PTR(s)[total] = '\0'; /* sentinel */ 00269 } 00270 00271 MRB_API mrb_value 00272 mrb_str_new(mrb_state *mrb, const char *p, size_t len) 00273 { 00274 return mrb_obj_value(str_new(mrb, p, len)); 00275 } 00276 00277 /* 00278 * call-seq: (Caution! NULL string) 00279 * String.new(str="") => new_str 00280 * 00281 * Returns a new string object containing a copy of <i>str</i>. 00282 */ 00283 00284 MRB_API mrb_value 00285 mrb_str_new_cstr(mrb_state *mrb, const char *p) 00286 { 00287 struct RString *s; 00288 size_t len; 00289 00290 if (p) { 00291 len = strlen(p); 00292 } 00293 else { 00294 len = 0; 00295 } 00296 00297 s = str_new(mrb, p, len); 00298 00299 return mrb_obj_value(s); 00300 } 00301 00302 MRB_API mrb_value 00303 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len) 00304 { 00305 struct RString *s = str_new_static(mrb, p, len); 00306 return mrb_obj_value(s); 00307 } 00308 00309 void 00310 mrb_gc_free_str(mrb_state *mrb, struct RString *str) 00311 { 00312 if (RSTR_EMBED_P(str)) 00313 /* no code */; 00314 else if (RSTR_SHARED_P(str)) 00315 str_decref(mrb, str->as.heap.aux.shared); 00316 else if (!RSTR_NOFREE_P(str)) 00317 mrb_free(mrb, str->as.heap.ptr); 00318 } 00319 00320 MRB_API char* 00321 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) 00322 { 00323 struct RString *s; 00324 00325 if (!mrb_string_p(str0)) { 00326 mrb_raise(mrb, E_TYPE_ERROR, "expected String"); 00327 } 00328 00329 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); 00330 if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) { 00331 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); 00332 } 00333 return RSTR_PTR(s); 00334 } 00335 00336 static void 00337 str_make_shared(mrb_state *mrb, struct RString *s) 00338 { 00339 if (!RSTR_SHARED_P(s)) { 00340 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); 00341 00342 shared->refcnt = 1; 00343 if (RSTR_EMBED_P(s)) { 00344 const mrb_int len = RSTR_EMBED_LEN(s); 00345 char *const tmp = (char *)mrb_malloc(mrb, len+1); 00346 memcpy(tmp, s->as.ary, len); 00347 tmp[len] = '\0'; 00348 RSTR_UNSET_EMBED_FLAG(s); 00349 s->as.heap.ptr = tmp; 00350 s->as.heap.len = len; 00351 shared->nofree = FALSE; 00352 shared->ptr = s->as.heap.ptr; 00353 } 00354 else if (RSTR_NOFREE_P(s)) { 00355 shared->nofree = TRUE; 00356 shared->ptr = s->as.heap.ptr; 00357 RSTR_UNSET_NOFREE_FLAG(s); 00358 } 00359 else { 00360 shared->nofree = FALSE; 00361 if (s->as.heap.aux.capa > s->as.heap.len) { 00362 s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1); 00363 } 00364 else { 00365 shared->ptr = s->as.heap.ptr; 00366 } 00367 } 00368 shared->len = s->as.heap.len; 00369 s->as.heap.aux.shared = shared; 00370 RSTR_SET_SHARED_FLAG(s); 00371 } 00372 } 00373 00374 /* 00375 * call-seq: (Caution! String("abcd") change) 00376 * String("abcdefg") = String("abcd") + String("efg") 00377 * 00378 * Returns a new string object containing a copy of <i>str</i>. 00379 */ 00380 MRB_API void 00381 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) 00382 { 00383 struct RString *s1 = mrb_str_ptr(self), *s2; 00384 mrb_int len; 00385 00386 mrb_str_modify(mrb, s1); 00387 if (!mrb_string_p(other)) { 00388 other = mrb_str_to_str(mrb, other); 00389 } 00390 s2 = mrb_str_ptr(other); 00391 len = RSTR_LEN(s1) + RSTR_LEN(s2); 00392 00393 if (RSTRING_CAPA(self) < len) { 00394 resize_capa(mrb, s1, len); 00395 } 00396 memcpy(RSTR_PTR(s1)+RSTR_LEN(s1), RSTR_PTR(s2), RSTR_LEN(s2)); 00397 RSTR_SET_LEN(s1, len); 00398 RSTR_PTR(s1)[len] = '\0'; 00399 } 00400 00401 /* 00402 * call-seq: (Caution! String("abcd") remain) 00403 * String("abcdefg") = String("abcd") + String("efg") 00404 * 00405 * Returns a new string object containing a copy of <i>str</i>. 00406 */ 00407 MRB_API mrb_value 00408 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) 00409 { 00410 struct RString *s = mrb_str_ptr(a); 00411 struct RString *s2 = mrb_str_ptr(b); 00412 struct RString *t; 00413 00414 t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2)); 00415 memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s)); 00416 memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2)); 00417 00418 return mrb_obj_value(t); 00419 } 00420 00421 /* 15.2.10.5.2 */ 00422 00423 /* 00424 * call-seq: (Caution! String("abcd") remain) for stack_argument 00425 * String("abcdefg") = String("abcd") + String("efg") 00426 * 00427 * Returns a new string object containing a copy of <i>str</i>. 00428 */ 00429 static mrb_value 00430 mrb_str_plus_m(mrb_state *mrb, mrb_value self) 00431 { 00432 mrb_value str; 00433 00434 mrb_get_args(mrb, "S", &str); 00435 return mrb_str_plus(mrb, self, str); 00436 } 00437 00438 /* 15.2.10.5.26 */ 00439 /* 15.2.10.5.33 */ 00440 /* 00441 * call-seq: 00442 * len = strlen(String("abcd")) 00443 * 00444 * Returns the length of string. 00445 */ 00446 static mrb_value 00447 mrb_str_size(mrb_state *mrb, mrb_value self) 00448 { 00449 struct RString *s = mrb_str_ptr(self); 00450 return mrb_fixnum_value(RSTR_LEN(s)); 00451 } 00452 00453 /* 15.2.10.5.1 */ 00454 /* 00455 * call-seq: 00456 * str * integer => new_str 00457 * 00458 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of 00459 * the receiver. 00460 * 00461 * "Ho! " * 3 #=> "Ho! Ho! Ho! " 00462 */ 00463 static mrb_value 00464 mrb_str_times(mrb_state *mrb, mrb_value self) 00465 { 00466 mrb_int n,len,times; 00467 struct RString *str2; 00468 char *p; 00469 00470 mrb_get_args(mrb, "i", ×); 00471 if (times < 0) { 00472 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); 00473 } 00474 if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) { 00475 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); 00476 } 00477 00478 len = RSTRING_LEN(self)*times; 00479 str2 = str_new(mrb, 0, len); 00480 str_with_class(mrb, str2, self); 00481 p = RSTR_PTR(str2); 00482 if (len > 0) { 00483 n = RSTRING_LEN(self); 00484 memcpy(p, RSTRING_PTR(self), n); 00485 while (n <= len/2) { 00486 memcpy(p + n, p, n); 00487 n *= 2; 00488 } 00489 memcpy(p + n, p, len-n); 00490 } 00491 p[RSTR_LEN(str2)] = '\0'; 00492 00493 return mrb_obj_value(str2); 00494 } 00495 /* -------------------------------------------------------------- */ 00496 00497 #define lesser(a,b) (((a)>(b))?(b):(a)) 00498 00499 /* ---------------------------*/ 00500 /* 00501 * call-seq: 00502 * mrb_value str1 <=> mrb_value str2 => int 00503 * > 1 00504 * = 0 00505 * < -1 00506 */ 00507 MRB_API int 00508 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) 00509 { 00510 mrb_int len; 00511 mrb_int retval; 00512 struct RString *s1 = mrb_str_ptr(str1); 00513 struct RString *s2 = mrb_str_ptr(str2); 00514 00515 len = lesser(RSTR_LEN(s1), RSTR_LEN(s2)); 00516 retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len); 00517 if (retval == 0) { 00518 if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0; 00519 if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1; 00520 return -1; 00521 } 00522 if (retval > 0) return 1; 00523 return -1; 00524 } 00525 00526 /* 15.2.10.5.3 */ 00527 00528 /* 00529 * call-seq: 00530 * str <=> other_str => -1, 0, +1 00531 * 00532 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if 00533 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than 00534 * <i>str</i>. If the strings are of different lengths, and the strings are 00535 * equal when compared up to the shortest length, then the longer string is 00536 * considered greater than the shorter one. If the variable <code>$=</code> is 00537 * <code>false</code>, the comparison is based on comparing the binary values 00538 * of each character in the string. In older versions of Ruby, setting 00539 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated 00540 * in favor of using <code>String#casecmp</code>. 00541 * 00542 * <code><=></code> is the basis for the methods <code><</code>, 00543 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>, 00544 * included from module <code>Comparable</code>. The method 00545 * <code>String#==</code> does not use <code>Comparable#==</code>. 00546 * 00547 * "abcdef" <=> "abcde" #=> 1 00548 * "abcdef" <=> "abcdef" #=> 0 00549 * "abcdef" <=> "abcdefg" #=> -1 00550 * "abcdef" <=> "ABCDEF" #=> 1 00551 */ 00552 static mrb_value 00553 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) 00554 { 00555 mrb_value str2; 00556 mrb_int result; 00557 00558 mrb_get_args(mrb, "o", &str2); 00559 if (!mrb_string_p(str2)) { 00560 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) { 00561 return mrb_nil_value(); 00562 } 00563 else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) { 00564 return mrb_nil_value(); 00565 } 00566 else { 00567 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); 00568 00569 if (mrb_nil_p(tmp)) return mrb_nil_value(); 00570 if (!mrb_fixnum(tmp)) { 00571 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); 00572 } 00573 result = -mrb_fixnum(tmp); 00574 } 00575 } 00576 else { 00577 result = mrb_str_cmp(mrb, str1, str2); 00578 } 00579 return mrb_fixnum_value(result); 00580 } 00581 00582 static mrb_bool 00583 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) 00584 { 00585 const mrb_int len = RSTRING_LEN(str1); 00586 00587 if (len != RSTRING_LEN(str2)) return FALSE; 00588 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0) 00589 return TRUE; 00590 return FALSE; 00591 } 00592 00593 MRB_API mrb_bool 00594 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) 00595 { 00596 if (mrb_immediate_p(str2)) return FALSE; 00597 if (!mrb_string_p(str2)) { 00598 if (mrb_nil_p(str2)) return FALSE; 00599 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { 00600 return FALSE; 00601 } 00602 str2 = mrb_funcall(mrb, str2, "to_str", 0); 00603 return mrb_equal(mrb, str2, str1); 00604 } 00605 return str_eql(mrb, str1, str2); 00606 } 00607 00608 /* 15.2.10.5.4 */ 00609 /* 00610 * call-seq: 00611 * str == obj => true or false 00612 * 00613 * Equality--- 00614 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>. 00615 * Otherwise, returns <code>false</code> or <code>true</code> 00616 * 00617 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero. 00618 */ 00619 static mrb_value 00620 mrb_str_equal_m(mrb_state *mrb, mrb_value str1) 00621 { 00622 mrb_value str2; 00623 00624 mrb_get_args(mrb, "o", &str2); 00625 00626 return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); 00627 } 00628 /* ---------------------------------- */ 00629 MRB_API mrb_value 00630 mrb_str_to_str(mrb_state *mrb, mrb_value str) 00631 { 00632 mrb_value s; 00633 00634 if (!mrb_string_p(str)) { 00635 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); 00636 if (mrb_nil_p(s)) { 00637 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); 00638 } 00639 return s; 00640 } 00641 return str; 00642 } 00643 00644 MRB_API const char* 00645 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) 00646 { 00647 mrb_value str = mrb_str_to_str(mrb, ptr); 00648 return RSTRING_PTR(str); 00649 } 00650 00651 void 00652 mrb_noregexp(mrb_state *mrb, mrb_value self) 00653 { 00654 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented"); 00655 } 00656 00657 void 00658 mrb_regexp_check(mrb_state *mrb, mrb_value obj) 00659 { 00660 if (mrb_regexp_p(mrb, obj)) { 00661 mrb_noregexp(mrb, obj); 00662 } 00663 } 00664 00665 static inline mrb_int 00666 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) 00667 { 00668 const unsigned char *x = xs, *xe = xs + m; 00669 const unsigned char *y = ys; 00670 int i, qstable[256]; 00671 00672 /* Preprocessing */ 00673 for (i = 0; i < 256; ++i) 00674 qstable[i] = m + 1; 00675 for (; x < xe; ++x) 00676 qstable[*x] = xe - x; 00677 /* Searching */ 00678 for (; y + m <= ys + n; y += *(qstable + y[m])) { 00679 if (*xs == *y && memcmp(xs, y, m) == 0) 00680 return y - ys; 00681 } 00682 return -1; 00683 } 00684 00685 static mrb_int 00686 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n) 00687 { 00688 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0; 00689 00690 if (m > n) return -1; 00691 else if (m == n) { 00692 return memcmp(x0, y0, m) == 0 ? 0 : -1; 00693 } 00694 else if (m < 1) { 00695 return 0; 00696 } 00697 else if (m == 1) { 00698 const unsigned char *ys = y, *ye = ys + n; 00699 for (; y < ye; ++y) { 00700 if (*x == *y) 00701 return y - ys; 00702 } 00703 return -1; 00704 } 00705 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); 00706 } 00707 00708 static mrb_int 00709 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) 00710 { 00711 mrb_int pos; 00712 char *s, *sptr; 00713 mrb_int len, slen; 00714 00715 len = RSTRING_LEN(str); 00716 slen = RSTRING_LEN(sub); 00717 if (offset < 0) { 00718 offset += len; 00719 if (offset < 0) return -1; 00720 } 00721 if (len - offset < slen) return -1; 00722 s = RSTRING_PTR(str); 00723 if (offset) { 00724 s += offset; 00725 } 00726 if (slen == 0) return offset; 00727 /* need proceed one character at a time */ 00728 sptr = RSTRING_PTR(sub); 00729 slen = RSTRING_LEN(sub); 00730 len = RSTRING_LEN(str) - offset; 00731 pos = mrb_memsearch(sptr, slen, s, len); 00732 if (pos < 0) return pos; 00733 return pos + offset; 00734 } 00735 00736 MRB_API mrb_value 00737 mrb_str_dup(mrb_state *mrb, mrb_value str) 00738 { 00739 struct RString *s = mrb_str_ptr(str); 00740 struct RString *dup = str_new(mrb, 0, 0); 00741 00742 str_with_class(mrb, dup, str); 00743 return str_replace(mrb, dup, s); 00744 } 00745 00746 static mrb_value 00747 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) 00748 { 00749 mrb_int idx; 00750 00751 mrb_regexp_check(mrb, indx); 00752 switch (mrb_type(indx)) { 00753 case MRB_TT_FIXNUM: 00754 idx = mrb_fixnum(indx); 00755 00756 num_index: 00757 str = mrb_str_substr(mrb, str, idx, 1); 00758 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); 00759 return str; 00760 00761 case MRB_TT_STRING: 00762 if (mrb_str_index(mrb, str, indx, 0) != -1) 00763 return mrb_str_dup(mrb, indx); 00764 return mrb_nil_value(); 00765 00766 case MRB_TT_RANGE: 00767 /* check if indx is Range */ 00768 { 00769 mrb_int beg, len; 00770 00771 len = RSTRING_LEN(str); 00772 if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) { 00773 return mrb_str_subseq(mrb, str, beg, len); 00774 } 00775 else { 00776 return mrb_nil_value(); 00777 } 00778 } 00779 default: 00780 idx = mrb_fixnum(indx); 00781 goto num_index; 00782 } 00783 return mrb_nil_value(); /* not reached */ 00784 } 00785 00786 /* 15.2.10.5.6 */ 00787 /* 15.2.10.5.34 */ 00788 /* 00789 * call-seq: 00790 * str[fixnum] => fixnum or nil 00791 * str[fixnum, fixnum] => new_str or nil 00792 * str[range] => new_str or nil 00793 * str[regexp] => new_str or nil 00794 * str[regexp, fixnum] => new_str or nil 00795 * str[other_str] => new_str or nil 00796 * str.slice(fixnum) => fixnum or nil 00797 * str.slice(fixnum, fixnum) => new_str or nil 00798 * str.slice(range) => new_str or nil 00799 * str.slice(other_str) => new_str or nil 00800 * 00801 * Element Reference---If passed a single <code>Fixnum</code>, returns the code 00802 * of the character at that position. If passed two <code>Fixnum</code> 00803 * objects, returns a substring starting at the offset given by the first, and 00804 * a length given by the second. If given a range, a substring containing 00805 * characters at offsets given by the range is returned. In all three cases, if 00806 * an offset is negative, it is counted from the end of <i>str</i>. Returns 00807 * <code>nil</code> if the initial offset falls outside the string, the length 00808 * is negative, or the beginning of the range is greater than the end. 00809 * 00810 * If a <code>String</code> is given, that string is returned if it occurs in 00811 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no 00812 * match. 00813 * 00814 * a = "hello there" 00815 * a[1] #=> 101(1.8.7) "e"(1.9.2) 00816 * a[1,3] #=> "ell" 00817 * a[1..3] #=> "ell" 00818 * a[-3,2] #=> "er" 00819 * a[-4..-2] #=> "her" 00820 * a[12..-1] #=> nil 00821 * a[-2..-4] #=> "" 00822 * a["lo"] #=> "lo" 00823 * a["bye"] #=> nil 00824 */ 00825 static mrb_value 00826 mrb_str_aref_m(mrb_state *mrb, mrb_value str) 00827 { 00828 mrb_value a1, a2; 00829 int argc; 00830 00831 argc = mrb_get_args(mrb, "o|o", &a1, &a2); 00832 if (argc == 2) { 00833 mrb_regexp_check(mrb, a1); 00834 return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2)); 00835 } 00836 if (argc != 1) { 00837 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc)); 00838 } 00839 return mrb_str_aref(mrb, str, a1); 00840 } 00841 00842 /* 15.2.10.5.8 */ 00843 /* 00844 * call-seq: 00845 * str.capitalize! => str or nil 00846 * 00847 * Modifies <i>str</i> by converting the first character to uppercase and the 00848 * remainder to lowercase. Returns <code>nil</code> if no changes are made. 00849 * 00850 * a = "hello" 00851 * a.capitalize! #=> "Hello" 00852 * a #=> "Hello" 00853 * a.capitalize! #=> nil 00854 */ 00855 static mrb_value 00856 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) 00857 { 00858 char *p, *pend; 00859 mrb_bool modify = FALSE; 00860 struct RString *s = mrb_str_ptr(str); 00861 00862 mrb_str_modify(mrb, s); 00863 if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value(); 00864 p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s); 00865 if (ISLOWER(*p)) { 00866 *p = TOUPPER(*p); 00867 modify = TRUE; 00868 } 00869 while (++p < pend) { 00870 if (ISUPPER(*p)) { 00871 *p = TOLOWER(*p); 00872 modify = TRUE; 00873 } 00874 } 00875 if (modify) return str; 00876 return mrb_nil_value(); 00877 } 00878 00879 /* 15.2.10.5.7 */ 00880 /* 00881 * call-seq: 00882 * str.capitalize => new_str 00883 * 00884 * Returns a copy of <i>str</i> with the first character converted to uppercase 00885 * and the remainder to lowercase. 00886 * 00887 * "hello".capitalize #=> "Hello" 00888 * "HELLO".capitalize #=> "Hello" 00889 * "123ABC".capitalize #=> "123abc" 00890 */ 00891 static mrb_value 00892 mrb_str_capitalize(mrb_state *mrb, mrb_value self) 00893 { 00894 mrb_value str; 00895 00896 str = mrb_str_dup(mrb, self); 00897 mrb_str_capitalize_bang(mrb, str); 00898 return str; 00899 } 00900 00901 /* 15.2.10.5.10 */ 00902 /* 00903 * call-seq: 00904 * str.chomp!(separator=$/) => str or nil 00905 * 00906 * Modifies <i>str</i> in place as described for <code>String#chomp</code>, 00907 * returning <i>str</i>, or <code>nil</code> if no modifications were made. 00908 */ 00909 static mrb_value 00910 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) 00911 { 00912 mrb_value rs; 00913 mrb_int newline; 00914 char *p, *pp; 00915 mrb_int rslen; 00916 mrb_int len; 00917 struct RString *s = mrb_str_ptr(str); 00918 00919 mrb_str_modify(mrb, s); 00920 len = RSTR_LEN(s); 00921 if (mrb_get_args(mrb, "|S", &rs) == 0) { 00922 if (len == 0) return mrb_nil_value(); 00923 smart_chomp: 00924 if (RSTR_PTR(s)[len-1] == '\n') { 00925 RSTR_SET_LEN(s, RSTR_LEN(s) - 1); 00926 if (RSTR_LEN(s) > 0 && 00927 RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') { 00928 RSTR_SET_LEN(s, RSTR_LEN(s) - 1); 00929 } 00930 } 00931 else if (RSTR_PTR(s)[len-1] == '\r') { 00932 RSTR_SET_LEN(s, RSTR_LEN(s) - 1); 00933 } 00934 else { 00935 return mrb_nil_value(); 00936 } 00937 RSTR_PTR(s)[RSTR_LEN(s)] = '\0'; 00938 return str; 00939 } 00940 00941 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value(); 00942 p = RSTR_PTR(s); 00943 rslen = RSTRING_LEN(rs); 00944 if (rslen == 0) { 00945 while (len>0 && p[len-1] == '\n') { 00946 len--; 00947 if (len>0 && p[len-1] == '\r') 00948 len--; 00949 } 00950 if (len < RSTR_LEN(s)) { 00951 RSTR_SET_LEN(s, len); 00952 p[len] = '\0'; 00953 return str; 00954 } 00955 return mrb_nil_value(); 00956 } 00957 if (rslen > len) return mrb_nil_value(); 00958 newline = RSTRING_PTR(rs)[rslen-1]; 00959 if (rslen == 1 && newline == '\n') 00960 newline = RSTRING_PTR(rs)[rslen-1]; 00961 if (rslen == 1 && newline == '\n') 00962 goto smart_chomp; 00963 00964 pp = p + len - rslen; 00965 if (p[len-1] == newline && 00966 (rslen <= 1 || 00967 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { 00968 RSTR_SET_LEN(s, len - rslen); 00969 p[RSTR_LEN(s)] = '\0'; 00970 return str; 00971 } 00972 return mrb_nil_value(); 00973 } 00974 00975 /* 15.2.10.5.9 */ 00976 /* 00977 * call-seq: 00978 * str.chomp(separator=$/) => new_str 00979 * 00980 * Returns a new <code>String</code> with the given record separator removed 00981 * from the end of <i>str</i> (if present). If <code>$/</code> has not been 00982 * changed from the default Ruby record separator, then <code>chomp</code> also 00983 * removes carriage return characters (that is it will remove <code>\n</code>, 00984 * <code>\r</code>, and <code>\r\n</code>). 00985 * 00986 * "hello".chomp #=> "hello" 00987 * "hello\n".chomp #=> "hello" 00988 * "hello\r\n".chomp #=> "hello" 00989 * "hello\n\r".chomp #=> "hello\n" 00990 * "hello\r".chomp #=> "hello" 00991 * "hello \n there".chomp #=> "hello \n there" 00992 * "hello".chomp("llo") #=> "he" 00993 */ 00994 static mrb_value 00995 mrb_str_chomp(mrb_state *mrb, mrb_value self) 00996 { 00997 mrb_value str; 00998 00999 str = mrb_str_dup(mrb, self); 01000 mrb_str_chomp_bang(mrb, str); 01001 return str; 01002 } 01003 01004 /* 15.2.10.5.12 */ 01005 /* 01006 * call-seq: 01007 * str.chop! => str or nil 01008 * 01009 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>, 01010 * or <code>nil</code> if <i>str</i> is the empty string. See also 01011 * <code>String#chomp!</code>. 01012 */ 01013 static mrb_value 01014 mrb_str_chop_bang(mrb_state *mrb, mrb_value str) 01015 { 01016 struct RString *s = mrb_str_ptr(str); 01017 01018 mrb_str_modify(mrb, s); 01019 if (RSTR_LEN(s) > 0) { 01020 mrb_int len; 01021 len = RSTR_LEN(s) - 1; 01022 if (RSTR_PTR(s)[len] == '\n') { 01023 if (len > 0 && 01024 RSTR_PTR(s)[len-1] == '\r') { 01025 len--; 01026 } 01027 } 01028 RSTR_SET_LEN(s, len); 01029 RSTR_PTR(s)[len] = '\0'; 01030 return str; 01031 } 01032 return mrb_nil_value(); 01033 } 01034 01035 /* 15.2.10.5.11 */ 01036 /* 01037 * call-seq: 01038 * str.chop => new_str 01039 * 01040 * Returns a new <code>String</code> with the last character removed. If the 01041 * string ends with <code>\r\n</code>, both characters are removed. Applying 01042 * <code>chop</code> to an empty string returns an empty 01043 * string. <code>String#chomp</code> is often a safer alternative, as it leaves 01044 * the string unchanged if it doesn't end in a record separator. 01045 * 01046 * "string\r\n".chop #=> "string" 01047 * "string\n\r".chop #=> "string\n" 01048 * "string\n".chop #=> "string" 01049 * "string".chop #=> "strin" 01050 * "x".chop #=> "" 01051 */ 01052 static mrb_value 01053 mrb_str_chop(mrb_state *mrb, mrb_value self) 01054 { 01055 mrb_value str; 01056 str = mrb_str_dup(mrb, self); 01057 mrb_str_chop_bang(mrb, str); 01058 return str; 01059 } 01060 01061 /* 15.2.10.5.14 */ 01062 /* 01063 * call-seq: 01064 * str.downcase! => str or nil 01065 * 01066 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no 01067 * changes were made. 01068 */ 01069 static mrb_value 01070 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) 01071 { 01072 char *p, *pend; 01073 mrb_bool modify = FALSE; 01074 struct RString *s = mrb_str_ptr(str); 01075 01076 mrb_str_modify(mrb, s); 01077 p = RSTR_PTR(s); 01078 pend = RSTR_PTR(s) + RSTR_LEN(s); 01079 while (p < pend) { 01080 if (ISUPPER(*p)) { 01081 *p = TOLOWER(*p); 01082 modify = TRUE; 01083 } 01084 p++; 01085 } 01086 01087 if (modify) return str; 01088 return mrb_nil_value(); 01089 } 01090 01091 /* 15.2.10.5.13 */ 01092 /* 01093 * call-seq: 01094 * str.downcase => new_str 01095 * 01096 * Returns a copy of <i>str</i> with all uppercase letters replaced with their 01097 * lowercase counterparts. The operation is locale insensitive---only 01098 * characters ``A'' to ``Z'' are affected. 01099 * 01100 * "hEllO".downcase #=> "hello" 01101 */ 01102 static mrb_value 01103 mrb_str_downcase(mrb_state *mrb, mrb_value self) 01104 { 01105 mrb_value str; 01106 01107 str = mrb_str_dup(mrb, self); 01108 mrb_str_downcase_bang(mrb, str); 01109 return str; 01110 } 01111 01112 /* 15.2.10.5.16 */ 01113 /* 01114 * call-seq: 01115 * str.empty? => true or false 01116 * 01117 * Returns <code>true</code> if <i>str</i> has a length of zero. 01118 * 01119 * "hello".empty? #=> false 01120 * "".empty? #=> true 01121 */ 01122 static mrb_value 01123 mrb_str_empty_p(mrb_state *mrb, mrb_value self) 01124 { 01125 struct RString *s = mrb_str_ptr(self); 01126 01127 return mrb_bool_value(RSTR_LEN(s) == 0); 01128 } 01129 01130 /* 15.2.10.5.17 */ 01131 /* 01132 * call-seq: 01133 * str.eql?(other) => true or false 01134 * 01135 * Two strings are equal if the have the same length and content. 01136 */ 01137 static mrb_value 01138 mrb_str_eql(mrb_state *mrb, mrb_value self) 01139 { 01140 mrb_value str2; 01141 mrb_bool eql_p; 01142 01143 mrb_get_args(mrb, "o", &str2); 01144 eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2); 01145 01146 return mrb_bool_value(eql_p); 01147 } 01148 01149 static mrb_value 01150 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 01151 { 01152 struct RString *orig, *s; 01153 mrb_shared_string *shared; 01154 01155 orig = mrb_str_ptr(str); 01156 if (RSTR_EMBED_P(orig)) { 01157 s = str_new(mrb, orig->as.ary+beg, len); 01158 } else { 01159 str_make_shared(mrb, orig); 01160 shared = orig->as.heap.aux.shared; 01161 s = mrb_obj_alloc_string(mrb); 01162 s->as.heap.ptr = orig->as.heap.ptr + beg; 01163 s->as.heap.len = len; 01164 s->as.heap.aux.shared = shared; 01165 RSTR_SET_SHARED_FLAG(s); 01166 shared->refcnt++; 01167 } 01168 01169 return mrb_obj_value(s); 01170 } 01171 01172 MRB_API mrb_value 01173 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 01174 { 01175 if (len < 0) return mrb_nil_value(); 01176 if (!RSTRING_LEN(str)) { 01177 len = 0; 01178 } 01179 if (beg > RSTRING_LEN(str)) return mrb_nil_value(); 01180 if (beg < 0) { 01181 beg += RSTRING_LEN(str); 01182 if (beg < 0) return mrb_nil_value(); 01183 } 01184 if (beg + len > RSTRING_LEN(str)) 01185 len = RSTRING_LEN(str) - beg; 01186 if (len <= 0) { 01187 len = 0; 01188 } 01189 return mrb_str_subseq(mrb, str, beg, len); 01190 } 01191 01192 mrb_int 01193 mrb_str_hash(mrb_state *mrb, mrb_value str) 01194 { 01195 /* 1-8-7 */ 01196 struct RString *s = mrb_str_ptr(str); 01197 mrb_int len = RSTR_LEN(s); 01198 char *p = RSTR_PTR(s); 01199 mrb_int key = 0; 01200 01201 while (len--) { 01202 key = key*65599 + *p; 01203 p++; 01204 } 01205 return key + (key>>5); 01206 } 01207 01208 /* 15.2.10.5.20 */ 01209 /* 01210 * call-seq: 01211 * str.hash => fixnum 01212 * 01213 * Return a hash based on the string's length and content. 01214 */ 01215 static mrb_value 01216 mrb_str_hash_m(mrb_state *mrb, mrb_value self) 01217 { 01218 mrb_int key = mrb_str_hash(mrb, self); 01219 return mrb_fixnum_value(key); 01220 } 01221 01222 /* 15.2.10.5.21 */ 01223 /* 01224 * call-seq: 01225 * str.include? other_str => true or false 01226 * str.include? fixnum => true or false 01227 * 01228 * Returns <code>true</code> if <i>str</i> contains the given string or 01229 * character. 01230 * 01231 * "hello".include? "lo" #=> true 01232 * "hello".include? "ol" #=> false 01233 * "hello".include? ?h #=> true 01234 */ 01235 static mrb_value 01236 mrb_str_include(mrb_state *mrb, mrb_value self) 01237 { 01238 mrb_int i; 01239 mrb_value str2; 01240 mrb_bool include_p; 01241 01242 mrb_get_args(mrb, "o", &str2); 01243 if (mrb_fixnum_p(str2)) { 01244 include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL); 01245 } 01246 else { 01247 str2 = mrb_str_to_str(mrb, str2); 01248 i = mrb_str_index(mrb, self, str2, 0); 01249 01250 include_p = (i != -1); 01251 } 01252 01253 return mrb_bool_value(include_p); 01254 } 01255 01256 /* 15.2.10.5.22 */ 01257 /* 01258 * call-seq: 01259 * str.index(substring [, offset]) => fixnum or nil 01260 * str.index(fixnum [, offset]) => fixnum or nil 01261 * str.index(regexp [, offset]) => fixnum or nil 01262 * 01263 * Returns the index of the first occurrence of the given 01264 * <i>substring</i>, 01265 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. 01266 * Returns 01267 * <code>nil</code> if not found. 01268 * If the second parameter is present, it 01269 * specifies the position in the string to begin the search. 01270 * 01271 * "hello".index('e') #=> 1 01272 * "hello".index('lo') #=> 3 01273 * "hello".index('a') #=> nil 01274 * "hello".index(101) #=> 1(101=0x65='e') 01275 * "hello".index(/[aeiou]/, -3) #=> 4 01276 */ 01277 static mrb_value 01278 mrb_str_index_m(mrb_state *mrb, mrb_value str) 01279 { 01280 mrb_value *argv; 01281 mrb_int argc; 01282 mrb_value sub; 01283 mrb_int pos; 01284 01285 mrb_get_args(mrb, "*", &argv, &argc); 01286 if (argc == 2) { 01287 pos = mrb_fixnum(argv[1]); 01288 sub = argv[0]; 01289 } 01290 else { 01291 pos = 0; 01292 if (argc > 0) 01293 sub = argv[0]; 01294 else 01295 sub = mrb_nil_value(); 01296 } 01297 mrb_regexp_check(mrb, sub); 01298 if (pos < 0) { 01299 pos += RSTRING_LEN(str); 01300 if (pos < 0) { 01301 return mrb_nil_value(); 01302 } 01303 } 01304 01305 switch (mrb_type(sub)) { 01306 case MRB_TT_FIXNUM: { 01307 int c = mrb_fixnum(sub); 01308 mrb_int len = RSTRING_LEN(str); 01309 unsigned char *p = (unsigned char*)RSTRING_PTR(str); 01310 01311 for (;pos<len;pos++) { 01312 if (p[pos] == c) return mrb_fixnum_value(pos); 01313 } 01314 return mrb_nil_value(); 01315 } 01316 01317 default: { 01318 mrb_value tmp; 01319 01320 tmp = mrb_check_string_type(mrb, sub); 01321 if (mrb_nil_p(tmp)) { 01322 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); 01323 } 01324 sub = tmp; 01325 } 01326 /* fall through */ 01327 case MRB_TT_STRING: 01328 pos = mrb_str_index(mrb, str, sub, pos); 01329 break; 01330 } 01331 01332 if (pos == -1) return mrb_nil_value(); 01333 return mrb_fixnum_value(pos); 01334 } 01335 01336 #define STR_REPLACE_SHARED_MIN 10 01337 01338 static mrb_value 01339 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) 01340 { 01341 long len; 01342 01343 len = RSTR_LEN(s2); 01344 if (RSTR_SHARED_P(s1)) { 01345 str_decref(mrb, s1->as.heap.aux.shared); 01346 } 01347 else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) { 01348 mrb_free(mrb, s1->as.heap.ptr); 01349 } 01350 01351 RSTR_UNSET_NOFREE_FLAG(s1); 01352 01353 if (RSTR_SHARED_P(s2)) { 01354 L_SHARE: 01355 RSTR_UNSET_EMBED_FLAG(s1); 01356 s1->as.heap.ptr = s2->as.heap.ptr; 01357 s1->as.heap.len = len; 01358 s1->as.heap.aux.shared = s2->as.heap.aux.shared; 01359 RSTR_SET_SHARED_FLAG(s1); 01360 s1->as.heap.aux.shared->refcnt++; 01361 } 01362 else { 01363 if (len <= RSTRING_EMBED_LEN_MAX) { 01364 RSTR_UNSET_SHARED_FLAG(s1); 01365 RSTR_SET_EMBED_FLAG(s1); 01366 memcpy(s1->as.ary, RSTR_PTR(s2), len); 01367 RSTR_SET_EMBED_LEN(s1, len); 01368 } 01369 else { 01370 str_make_shared(mrb, s2); 01371 goto L_SHARE; 01372 } 01373 } 01374 01375 return mrb_obj_value(s1); 01376 } 01377 01378 /* 15.2.10.5.24 */ 01379 /* 15.2.10.5.28 */ 01380 /* 01381 * call-seq: 01382 * str.replace(other_str) => str 01383 * 01384 * s = "hello" #=> "hello" 01385 * s.replace "world" #=> "world" 01386 */ 01387 static mrb_value 01388 mrb_str_replace(mrb_state *mrb, mrb_value str) 01389 { 01390 mrb_value str2; 01391 01392 mrb_get_args(mrb, "S", &str2); 01393 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2)); 01394 } 01395 01396 /* 15.2.10.5.23 */ 01397 /* 01398 * call-seq: 01399 * String.new(str="") => new_str 01400 * 01401 * Returns a new string object containing a copy of <i>str</i>. 01402 */ 01403 static mrb_value 01404 mrb_str_init(mrb_state *mrb, mrb_value self) 01405 { 01406 mrb_value str2; 01407 01408 if (mrb_get_args(mrb, "|S", &str2) == 1) { 01409 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2)); 01410 } 01411 return self; 01412 } 01413 01414 /* 15.2.10.5.25 */ 01415 /* 15.2.10.5.41 */ 01416 /* 01417 * call-seq: 01418 * str.intern => symbol 01419 * str.to_sym => symbol 01420 * 01421 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the 01422 * symbol if it did not previously exist. See <code>Symbol#id2name</code>. 01423 * 01424 * "Koala".intern #=> :Koala 01425 * s = 'cat'.to_sym #=> :cat 01426 * s == :cat #=> true 01427 * s = '@cat'.to_sym #=> :@cat 01428 * s == :@cat #=> true 01429 * 01430 * This can also be used to create symbols that cannot be represented using the 01431 * <code>:xxx</code> notation. 01432 * 01433 * 'cat and dog'.to_sym #=> :"cat and dog" 01434 */ 01435 MRB_API mrb_value 01436 mrb_str_intern(mrb_state *mrb, mrb_value self) 01437 { 01438 return mrb_symbol_value(mrb_intern_str(mrb, self)); 01439 } 01440 /* ---------------------------------- */ 01441 MRB_API mrb_value 01442 mrb_obj_as_string(mrb_state *mrb, mrb_value obj) 01443 { 01444 mrb_value str; 01445 01446 if (mrb_string_p(obj)) { 01447 return obj; 01448 } 01449 str = mrb_funcall(mrb, obj, "to_s", 0); 01450 if (!mrb_string_p(str)) 01451 return mrb_any_to_s(mrb, obj); 01452 return str; 01453 } 01454 01455 MRB_API mrb_value 01456 mrb_ptr_to_str(mrb_state *mrb, void *p) 01457 { 01458 struct RString *p_str; 01459 char *p1; 01460 char *p2; 01461 uintptr_t n = (uintptr_t)p; 01462 01463 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4); 01464 p1 = RSTR_PTR(p_str); 01465 *p1++ = '0'; 01466 *p1++ = 'x'; 01467 p2 = p1; 01468 01469 do { 01470 *p2++ = mrb_digitmap[n % 16]; 01471 n /= 16; 01472 } while (n > 0); 01473 *p2 = '\0'; 01474 RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str))); 01475 01476 while (p1 < p2) { 01477 const char c = *p1; 01478 *p1++ = *--p2; 01479 *p2 = c; 01480 } 01481 01482 return mrb_obj_value(p_str); 01483 } 01484 01485 MRB_API mrb_value 01486 mrb_string_type(mrb_state *mrb, mrb_value str) 01487 { 01488 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); 01489 } 01490 01491 MRB_API mrb_value 01492 mrb_check_string_type(mrb_state *mrb, mrb_value str) 01493 { 01494 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); 01495 } 01496 01497 /* ---------------------------------- */ 01498 /* 15.2.10.5.29 */ 01499 /* 01500 * call-seq: 01501 * str.reverse => new_str 01502 * 01503 * Returns a new string with the characters from <i>str</i> in reverse order. 01504 * 01505 * "stressed".reverse #=> "desserts" 01506 */ 01507 static mrb_value 01508 mrb_str_reverse(mrb_state *mrb, mrb_value str) 01509 { 01510 struct RString *s2; 01511 char *s, *e, *p; 01512 01513 if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str); 01514 01515 s2 = str_new(mrb, 0, RSTRING_LEN(str)); 01516 str_with_class(mrb, s2, str); 01517 s = RSTRING_PTR(str); e = RSTRING_END(str) - 1; 01518 p = RSTR_PTR(s2); 01519 01520 while (e >= s) { 01521 *p++ = *e--; 01522 } 01523 return mrb_obj_value(s2); 01524 } 01525 01526 /* 15.2.10.5.30 */ 01527 /* 01528 * call-seq: 01529 * str.reverse! => str 01530 * 01531 * Reverses <i>str</i> in place. 01532 */ 01533 static mrb_value 01534 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) 01535 { 01536 struct RString *s = mrb_str_ptr(str); 01537 char *p, *e; 01538 char c; 01539 01540 mrb_str_modify(mrb, s); 01541 if (RSTR_LEN(s) > 1) { 01542 p = RSTR_PTR(s); 01543 e = p + RSTR_LEN(s) - 1; 01544 while (p < e) { 01545 c = *p; 01546 *p++ = *e; 01547 *e-- = c; 01548 } 01549 } 01550 return str; 01551 } 01552 01553 /* 01554 * call-seq: 01555 * str.rindex(substring [, fixnum]) => fixnum or nil 01556 * str.rindex(fixnum [, fixnum]) => fixnum or nil 01557 * str.rindex(regexp [, fixnum]) => fixnum or nil 01558 * 01559 * Returns the index of the last occurrence of the given <i>substring</i>, 01560 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns 01561 * <code>nil</code> if not found. If the second parameter is present, it 01562 * specifies the position in the string to end the search---characters beyond 01563 * this point will not be considered. 01564 * 01565 * "hello".rindex('e') #=> 1 01566 * "hello".rindex('l') #=> 3 01567 * "hello".rindex('a') #=> nil 01568 * "hello".rindex(101) #=> 1 01569 * "hello".rindex(/[aeiou]/, -2) #=> 1 01570 */ 01571 static mrb_int 01572 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) 01573 { 01574 char *s, *sbeg, *t; 01575 struct RString *ps = mrb_str_ptr(str); 01576 mrb_int len = RSTRING_LEN(sub); 01577 01578 /* substring longer than string */ 01579 if (RSTR_LEN(ps) < len) return -1; 01580 if (RSTR_LEN(ps) - pos < len) { 01581 pos = RSTR_LEN(ps) - len; 01582 } 01583 sbeg = RSTR_PTR(ps); 01584 s = RSTR_PTR(ps) + pos; 01585 t = RSTRING_PTR(sub); 01586 if (len) { 01587 while (sbeg <= s) { 01588 if (memcmp(s, t, len) == 0) { 01589 return s - RSTR_PTR(ps); 01590 } 01591 s--; 01592 } 01593 return -1; 01594 } 01595 else { 01596 return pos; 01597 } 01598 } 01599 01600 /* 15.2.10.5.31 */ 01601 /* 01602 * call-seq: 01603 * str.rindex(substring [, fixnum]) => fixnum or nil 01604 * str.rindex(fixnum [, fixnum]) => fixnum or nil 01605 * str.rindex(regexp [, fixnum]) => fixnum or nil 01606 * 01607 * Returns the index of the last occurrence of the given <i>substring</i>, 01608 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns 01609 * <code>nil</code> if not found. If the second parameter is present, it 01610 * specifies the position in the string to end the search---characters beyond 01611 * this point will not be considered. 01612 * 01613 * "hello".rindex('e') #=> 1 01614 * "hello".rindex('l') #=> 3 01615 * "hello".rindex('a') #=> nil 01616 * "hello".rindex(101) #=> 1 01617 * "hello".rindex(/[aeiou]/, -2) #=> 1 01618 */ 01619 static mrb_value 01620 mrb_str_rindex_m(mrb_state *mrb, mrb_value str) 01621 { 01622 mrb_value *argv; 01623 mrb_int argc; 01624 mrb_value sub; 01625 mrb_value vpos; 01626 mrb_int pos, len = RSTRING_LEN(str); 01627 01628 mrb_get_args(mrb, "*", &argv, &argc); 01629 if (argc == 2) { 01630 sub = argv[0]; 01631 vpos = argv[1]; 01632 pos = mrb_fixnum(vpos); 01633 if (pos < 0) { 01634 pos += len; 01635 if (pos < 0) { 01636 mrb_regexp_check(mrb, sub); 01637 return mrb_nil_value(); 01638 } 01639 } 01640 if (pos > len) pos = len; 01641 } 01642 else { 01643 pos = len; 01644 if (argc > 0) 01645 sub = argv[0]; 01646 else 01647 sub = mrb_nil_value(); 01648 } 01649 mrb_regexp_check(mrb, sub); 01650 01651 switch (mrb_type(sub)) { 01652 case MRB_TT_FIXNUM: { 01653 int c = mrb_fixnum(sub); 01654 unsigned char *p = (unsigned char*)RSTRING_PTR(str); 01655 01656 for (pos=len-1;pos>=0;pos--) { 01657 if (p[pos] == c) return mrb_fixnum_value(pos); 01658 } 01659 return mrb_nil_value(); 01660 } 01661 01662 default: { 01663 mrb_value tmp; 01664 01665 tmp = mrb_check_string_type(mrb, sub); 01666 if (mrb_nil_p(tmp)) { 01667 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); 01668 } 01669 sub = tmp; 01670 } 01671 /* fall through */ 01672 case MRB_TT_STRING: 01673 pos = mrb_str_rindex(mrb, str, sub, pos); 01674 if (pos >= 0) return mrb_fixnum_value(pos); 01675 break; 01676 01677 } /* end of switch (TYPE(sub)) */ 01678 return mrb_nil_value(); 01679 } 01680 01681 /* 15.2.10.5.35 */ 01682 01683 /* 01684 * call-seq: 01685 * str.split(pattern=$;, [limit]) => anArray 01686 * 01687 * Divides <i>str</i> into substrings based on a delimiter, returning an array 01688 * of these substrings. 01689 * 01690 * If <i>pattern</i> is a <code>String</code>, then its contents are used as 01691 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single 01692 * space, <i>str</i> is split on whitespace, with leading whitespace and runs 01693 * of contiguous whitespace characters ignored. 01694 * 01695 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the 01696 * pattern matches. Whenever the pattern matches a zero-length string, 01697 * <i>str</i> is split into individual characters. 01698 * 01699 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If 01700 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is 01701 * split on whitespace as if ` ' were specified. 01702 * 01703 * If the <i>limit</i> parameter is omitted, trailing null fields are 01704 * suppressed. If <i>limit</i> is a positive number, at most that number of 01705 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire 01706 * string is returned as the only entry in an array). If negative, there is no 01707 * limit to the number of fields returned, and trailing null fields are not 01708 * suppressed. 01709 * 01710 * " now's the time".split #=> ["now's", "the", "time"] 01711 * " now's the time".split(' ') #=> ["now's", "the", "time"] 01712 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] 01713 * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] 01714 * "hello".split(//) #=> ["h", "e", "l", "l", "o"] 01715 * "hello".split(//, 3) #=> ["h", "e", "llo"] 01716 * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] 01717 * 01718 * "mellow yellow".split("ello") #=> ["m", "w y", "w"] 01719 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] 01720 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] 01721 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] 01722 */ 01723 01724 static mrb_value 01725 mrb_str_split_m(mrb_state *mrb, mrb_value str) 01726 { 01727 int argc; 01728 mrb_value spat = mrb_nil_value(); 01729 enum {awk, string, regexp} split_type = string; 01730 long i = 0, lim_p; 01731 mrb_int beg; 01732 mrb_int end; 01733 mrb_int lim = 0; 01734 mrb_value result, tmp; 01735 01736 argc = mrb_get_args(mrb, "|oi", &spat, &lim); 01737 lim_p = (lim > 0 && argc == 2); 01738 if (argc == 2) { 01739 if (lim == 1) { 01740 if (RSTRING_LEN(str) == 0) 01741 return mrb_ary_new_capa(mrb, 0); 01742 return mrb_ary_new_from_values(mrb, 1, &str); 01743 } 01744 i = 1; 01745 } 01746 01747 if (argc == 0 || mrb_nil_p(spat)) { 01748 split_type = awk; 01749 } 01750 else { 01751 if (mrb_string_p(spat)) { 01752 split_type = string; 01753 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') { 01754 split_type = awk; 01755 } 01756 } 01757 else { 01758 mrb_noregexp(mrb, str); 01759 } 01760 } 01761 01762 result = mrb_ary_new(mrb); 01763 beg = 0; 01764 if (split_type == awk) { 01765 char *ptr = RSTRING_PTR(str); 01766 char *eptr = RSTRING_END(str); 01767 char *bptr = ptr; 01768 mrb_bool skip = TRUE; 01769 unsigned int c; 01770 01771 end = beg; 01772 while (ptr < eptr) { 01773 int ai = mrb_gc_arena_save(mrb); 01774 c = (unsigned char)*ptr++; 01775 if (skip) { 01776 if (ISSPACE(c)) { 01777 beg = ptr - bptr; 01778 } 01779 else { 01780 end = ptr - bptr; 01781 skip = FALSE; 01782 if (lim_p && lim <= i) break; 01783 } 01784 } 01785 else if (ISSPACE(c)) { 01786 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); 01787 mrb_gc_arena_restore(mrb, ai); 01788 skip = TRUE; 01789 beg = ptr - bptr; 01790 if (lim_p) ++i; 01791 } 01792 else { 01793 end = ptr - bptr; 01794 } 01795 } 01796 } 01797 else if (split_type == string) { 01798 char *ptr = RSTRING_PTR(str); /* s->as.ary */ 01799 char *temp = ptr; 01800 char *eptr = RSTRING_END(str); 01801 mrb_int slen = RSTRING_LEN(spat); 01802 01803 if (slen == 0) { 01804 int ai = mrb_gc_arena_save(mrb); 01805 while (ptr < eptr) { 01806 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1)); 01807 mrb_gc_arena_restore(mrb, ai); 01808 ptr++; 01809 if (lim_p && lim <= ++i) break; 01810 } 01811 } 01812 else { 01813 char *sptr = RSTRING_PTR(spat); 01814 int ai = mrb_gc_arena_save(mrb); 01815 01816 while (ptr < eptr && 01817 (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) { 01818 mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end)); 01819 mrb_gc_arena_restore(mrb, ai); 01820 ptr += end + slen; 01821 if (lim_p && lim <= ++i) break; 01822 } 01823 } 01824 beg = ptr - temp; 01825 } 01826 else { 01827 mrb_noregexp(mrb, str); 01828 } 01829 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) { 01830 if (RSTRING_LEN(str) == beg) { 01831 tmp = mrb_str_new_empty(mrb, str); 01832 } 01833 else { 01834 tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); 01835 } 01836 mrb_ary_push(mrb, result, tmp); 01837 } 01838 if (!lim_p && lim == 0) { 01839 mrb_int len; 01840 while ((len = RARRAY_LEN(result)) > 0 && 01841 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) 01842 mrb_ary_pop(mrb, result); 01843 } 01844 01845 return result; 01846 } 01847 01848 MRB_API mrb_value 01849 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) 01850 { 01851 const char *p; 01852 char sign = 1; 01853 int c, uscore; 01854 unsigned long n = 0; 01855 mrb_int val; 01856 01857 #define conv_digit(c) \ 01858 (ISDIGIT(c) ? ((c) - '0') : \ 01859 ISLOWER(c) ? ((c) - 'a' + 10) : \ 01860 ISUPPER(c) ? ((c) - 'A' + 10) : \ 01861 -1) 01862 01863 if (!str) { 01864 if (badcheck) goto bad; 01865 return mrb_fixnum_value(0); 01866 } 01867 while (ISSPACE(*str)) str++; 01868 01869 if (str[0] == '+') { 01870 str++; 01871 } 01872 else if (str[0] == '-') { 01873 str++; 01874 sign = 0; 01875 } 01876 if (str[0] == '+' || str[0] == '-') { 01877 if (badcheck) goto bad; 01878 return mrb_fixnum_value(0); 01879 } 01880 if (base <= 0) { 01881 if (str[0] == '0') { 01882 switch (str[1]) { 01883 case 'x': case 'X': 01884 base = 16; 01885 break; 01886 case 'b': case 'B': 01887 base = 2; 01888 break; 01889 case 'o': case 'O': 01890 base = 8; 01891 break; 01892 case 'd': case 'D': 01893 base = 10; 01894 break; 01895 default: 01896 base = 8; 01897 } 01898 } 01899 else if (base < -1) { 01900 base = -base; 01901 } 01902 else { 01903 base = 10; 01904 } 01905 } 01906 switch (base) { 01907 case 2: 01908 if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) { 01909 str += 2; 01910 } 01911 break; 01912 case 3: 01913 break; 01914 case 8: 01915 if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) { 01916 str += 2; 01917 } 01918 case 4: case 5: case 6: case 7: 01919 break; 01920 case 10: 01921 if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) { 01922 str += 2; 01923 } 01924 case 9: case 11: case 12: case 13: case 14: case 15: 01925 break; 01926 case 16: 01927 if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) { 01928 str += 2; 01929 } 01930 break; 01931 default: 01932 if (base < 2 || 36 < base) { 01933 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base)); 01934 } 01935 break; 01936 } /* end of switch (base) { */ 01937 if (*str == '0') { /* squeeze preceeding 0s */ 01938 uscore = 0; 01939 while ((c = *++str) == '0' || c == '_') { 01940 if (c == '_') { 01941 if (++uscore >= 2) 01942 break; 01943 } 01944 else 01945 uscore = 0; 01946 } 01947 if (!(c = *str) || ISSPACE(c)) --str; 01948 } 01949 c = *str; 01950 c = conv_digit(c); 01951 if (c < 0 || c >= base) { 01952 if (badcheck) goto bad; 01953 return mrb_fixnum_value(0); 01954 } 01955 01956 uscore = 0; 01957 for (p=str;*p;p++) { 01958 if (*p == '_') { 01959 if (uscore == 0) { 01960 uscore++; 01961 continue; 01962 } 01963 if (badcheck) goto bad; 01964 break; 01965 } 01966 uscore = 0; 01967 c = conv_digit(*p); 01968 if (c < 0 || c >= base) { 01969 if (badcheck) goto bad; 01970 break; 01971 } 01972 n *= base; 01973 n += c; 01974 } 01975 if (n > MRB_INT_MAX) { 01976 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str)); 01977 } 01978 val = n; 01979 if (badcheck) { 01980 if (p == str) goto bad; /* no number */ 01981 while (*p && ISSPACE(*p)) p++; 01982 if (*p) goto bad; /* trailing garbage */ 01983 } 01984 01985 return mrb_fixnum_value(sign ? val : -val); 01986 bad: 01987 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str)); 01988 /* not reached */ 01989 return mrb_fixnum_value(0); 01990 } 01991 01992 MRB_API const char* 01993 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) 01994 { 01995 struct RString *ps = mrb_str_ptr(*ptr); 01996 mrb_int len = mrb_str_strlen(mrb, ps); 01997 char *p = RSTR_PTR(ps); 01998 01999 if (!p || p[len] != '\0') { 02000 mrb_str_modify(mrb, ps); 02001 return RSTR_PTR(ps); 02002 } 02003 return p; 02004 } 02005 02006 MRB_API mrb_value 02007 mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck) 02008 { 02009 const char *s; 02010 mrb_int len; 02011 02012 str = mrb_str_to_str(mrb, str); 02013 if (badcheck) { 02014 s = mrb_string_value_cstr(mrb, &str); 02015 } 02016 else { 02017 s = RSTRING_PTR(str); 02018 } 02019 if (s) { 02020 len = RSTRING_LEN(str); 02021 if (s[len]) { /* no sentinel somehow */ 02022 struct RString *temp_str = str_new(mrb, s, len); 02023 s = RSTR_PTR(temp_str); 02024 } 02025 } 02026 return mrb_cstr_to_inum(mrb, s, base, badcheck); 02027 } 02028 02029 /* 15.2.10.5.38 */ 02030 /* 02031 * call-seq: 02032 * str.to_i(base=10) => integer 02033 * 02034 * Returns the result of interpreting leading characters in <i>str</i> as an 02035 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the 02036 * end of a valid number are ignored. If there is not a valid number at the 02037 * start of <i>str</i>, <code>0</code> is returned. This method never raises an 02038 * exception. 02039 * 02040 * "12345".to_i #=> 12345 02041 * "99 red balloons".to_i #=> 99 02042 * "0a".to_i #=> 0 02043 * "0a".to_i(16) #=> 10 02044 * "hello".to_i #=> 0 02045 * "1100101".to_i(2) #=> 101 02046 * "1100101".to_i(8) #=> 294977 02047 * "1100101".to_i(10) #=> 1100101 02048 * "1100101".to_i(16) #=> 17826049 02049 */ 02050 static mrb_value 02051 mrb_str_to_i(mrb_state *mrb, mrb_value self) 02052 { 02053 mrb_int base = 10; 02054 02055 mrb_get_args(mrb, "|i", &base); 02056 if (base < 0) { 02057 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base)); 02058 } 02059 return mrb_str_to_inum(mrb, self, base, FALSE); 02060 } 02061 02062 MRB_API double 02063 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) 02064 { 02065 char *end; 02066 double d; 02067 02068 enum {max_width = 20}; 02069 02070 if (!p) return 0.0; 02071 while (ISSPACE(*p)) p++; 02072 02073 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 02074 return 0.0; 02075 } 02076 d = strtod(p, &end); 02077 if (p == end) { 02078 if (badcheck) { 02079 bad: 02080 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p)); 02081 /* not reached */ 02082 } 02083 return d; 02084 } 02085 if (*end) { 02086 char buf[DBL_DIG * 4 + 10]; 02087 char *n = buf; 02088 char *e = buf + sizeof(buf) - 1; 02089 char prev = 0; 02090 02091 while (p < end && n < e) prev = *n++ = *p++; 02092 while (*p) { 02093 if (*p == '_') { 02094 /* remove underscores between digits */ 02095 if (badcheck) { 02096 if (n == buf || !ISDIGIT(prev)) goto bad; 02097 ++p; 02098 if (!ISDIGIT(*p)) goto bad; 02099 } 02100 else { 02101 while (*++p == '_'); 02102 continue; 02103 } 02104 } 02105 prev = *p++; 02106 if (n < e) *n++ = prev; 02107 } 02108 *n = '\0'; 02109 p = buf; 02110 02111 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 02112 return 0.0; 02113 } 02114 02115 d = strtod(p, &end); 02116 if (badcheck) { 02117 if (!end || p == end) goto bad; 02118 while (*end && ISSPACE(*end)) end++; 02119 if (*end) goto bad; 02120 } 02121 } 02122 return d; 02123 } 02124 02125 MRB_API double 02126 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) 02127 { 02128 char *s; 02129 mrb_int len; 02130 02131 str = mrb_str_to_str(mrb, str); 02132 s = RSTRING_PTR(str); 02133 len = RSTRING_LEN(str); 02134 if (s) { 02135 if (badcheck && memchr(s, '\0', len)) { 02136 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); 02137 } 02138 if (s[len]) { /* no sentinel somehow */ 02139 struct RString *temp_str = str_new(mrb, s, len); 02140 s = RSTR_PTR(temp_str); 02141 } 02142 } 02143 return mrb_cstr_to_dbl(mrb, s, badcheck); 02144 } 02145 02146 /* 15.2.10.5.39 */ 02147 /* 02148 * call-seq: 02149 * str.to_f => float 02150 * 02151 * Returns the result of interpreting leading characters in <i>str</i> as a 02152 * floating point number. Extraneous characters past the end of a valid number 02153 * are ignored. If there is not a valid number at the start of <i>str</i>, 02154 * <code>0.0</code> is returned. This method never raises an exception. 02155 * 02156 * "123.45e1".to_f #=> 1234.5 02157 * "45.67 degrees".to_f #=> 45.67 02158 * "thx1138".to_f #=> 0.0 02159 */ 02160 static mrb_value 02161 mrb_str_to_f(mrb_state *mrb, mrb_value self) 02162 { 02163 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE)); 02164 } 02165 02166 /* 15.2.10.5.40 */ 02167 /* 02168 * call-seq: 02169 * str.to_s => str 02170 * str.to_str => str 02171 * 02172 * Returns the receiver. 02173 */ 02174 static mrb_value 02175 mrb_str_to_s(mrb_state *mrb, mrb_value self) 02176 { 02177 if (mrb_obj_class(mrb, self) != mrb->string_class) { 02178 return mrb_str_dup(mrb, self); 02179 } 02180 return self; 02181 } 02182 02183 /* 15.2.10.5.43 */ 02184 /* 02185 * call-seq: 02186 * str.upcase! => str or nil 02187 * 02188 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes 02189 * were made. 02190 */ 02191 static mrb_value 02192 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) 02193 { 02194 struct RString *s = mrb_str_ptr(str); 02195 char *p, *pend; 02196 mrb_bool modify = FALSE; 02197 02198 mrb_str_modify(mrb, s); 02199 p = RSTRING_PTR(str); 02200 pend = RSTRING_END(str); 02201 while (p < pend) { 02202 if (ISLOWER(*p)) { 02203 *p = TOUPPER(*p); 02204 modify = TRUE; 02205 } 02206 p++; 02207 } 02208 02209 if (modify) return str; 02210 return mrb_nil_value(); 02211 } 02212 02213 /* 15.2.10.5.42 */ 02214 /* 02215 * call-seq: 02216 * str.upcase => new_str 02217 * 02218 * Returns a copy of <i>str</i> with all lowercase letters replaced with their 02219 * uppercase counterparts. The operation is locale insensitive---only 02220 * characters ``a'' to ``z'' are affected. 02221 * 02222 * "hEllO".upcase #=> "HELLO" 02223 */ 02224 static mrb_value 02225 mrb_str_upcase(mrb_state *mrb, mrb_value self) 02226 { 02227 mrb_value str; 02228 02229 str = mrb_str_dup(mrb, self); 02230 mrb_str_upcase_bang(mrb, str); 02231 return str; 02232 } 02233 02234 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) 02235 02236 /* 02237 * call-seq: 02238 * str.dump -> new_str 02239 * 02240 * Produces a version of <i>str</i> with all nonprinting characters replaced by 02241 * <code>\nnn</code> notation and all special characters escaped. 02242 */ 02243 mrb_value 02244 mrb_str_dump(mrb_state *mrb, mrb_value str) 02245 { 02246 mrb_int len; 02247 const char *p, *pend; 02248 char *q; 02249 struct RString *result; 02250 02251 len = 2; /* "" */ 02252 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); 02253 while (p < pend) { 02254 unsigned char c = *p++; 02255 switch (c) { 02256 case '"': case '\\': 02257 case '\n': case '\r': 02258 case '\t': case '\f': 02259 case '\013': case '\010': case '\007': case '\033': 02260 len += 2; 02261 break; 02262 02263 case '#': 02264 len += IS_EVSTR(p, pend) ? 2 : 1; 02265 break; 02266 02267 default: 02268 if (ISPRINT(c)) { 02269 len++; 02270 } 02271 else { 02272 len += 4; /* \NNN */ 02273 } 02274 break; 02275 } 02276 } 02277 02278 result = str_new(mrb, 0, len); 02279 str_with_class(mrb, result, str); 02280 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); 02281 q = RSTR_PTR(result); 02282 *q++ = '"'; 02283 while (p < pend) { 02284 unsigned char c = *p++; 02285 02286 switch (c) { 02287 case '"': 02288 case '\\': 02289 *q++ = '\\'; 02290 *q++ = c; 02291 break; 02292 02293 case '\n': 02294 *q++ = '\\'; 02295 *q++ = 'n'; 02296 break; 02297 02298 case '\r': 02299 *q++ = '\\'; 02300 *q++ = 'r'; 02301 break; 02302 02303 case '\t': 02304 *q++ = '\\'; 02305 *q++ = 't'; 02306 break; 02307 02308 case '\f': 02309 *q++ = '\\'; 02310 *q++ = 'f'; 02311 break; 02312 02313 case '\013': 02314 *q++ = '\\'; 02315 *q++ = 'v'; 02316 break; 02317 02318 case '\010': 02319 *q++ = '\\'; 02320 *q++ = 'b'; 02321 break; 02322 02323 case '\007': 02324 *q++ = '\\'; 02325 *q++ = 'a'; 02326 break; 02327 02328 case '\033': 02329 *q++ = '\\'; 02330 *q++ = 'e'; 02331 break; 02332 02333 case '#': 02334 if (IS_EVSTR(p, pend)) *q++ = '\\'; 02335 *q++ = '#'; 02336 break; 02337 02338 default: 02339 if (ISPRINT(c)) { 02340 *q++ = c; 02341 } 02342 else { 02343 *q++ = '\\'; 02344 q[2] = '0' + c % 8; c /= 8; 02345 q[1] = '0' + c % 8; c /= 8; 02346 q[0] = '0' + c % 8; 02347 q += 3; 02348 } 02349 } 02350 } 02351 *q = '"'; 02352 return mrb_obj_value(result); 02353 } 02354 02355 MRB_API mrb_value 02356 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) 02357 { 02358 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len); 02359 return str; 02360 } 02361 02362 MRB_API mrb_value 02363 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) 02364 { 02365 return mrb_str_cat(mrb, str, ptr, strlen(ptr)); 02366 } 02367 02368 MRB_API mrb_value 02369 mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) 02370 { 02371 return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); 02372 } 02373 02374 MRB_API mrb_value 02375 mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) 02376 { 02377 str2 = mrb_str_to_str(mrb, str2); 02378 return mrb_str_cat_str(mrb, str, str2); 02379 } 02380 02381 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ 02382 02383 /* 02384 * call-seq: 02385 * str.inspect -> string 02386 * 02387 * Returns a printable version of _str_, surrounded by quote marks, 02388 * with special characters escaped. 02389 * 02390 * str = "hello" 02391 * str[3] = "\b" 02392 * str.inspect #=> "\"hel\\bo\"" 02393 */ 02394 mrb_value 02395 mrb_str_inspect(mrb_state *mrb, mrb_value str) 02396 { 02397 const char *p, *pend; 02398 char buf[CHAR_ESC_LEN + 1]; 02399 mrb_value result = mrb_str_new_lit(mrb, "\""); 02400 02401 p = RSTRING_PTR(str); pend = RSTRING_END(str); 02402 for (;p < pend; p++) { 02403 unsigned char c, cc; 02404 02405 c = *p; 02406 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) { 02407 buf[0] = '\\'; buf[1] = c; 02408 mrb_str_cat(mrb, result, buf, 2); 02409 continue; 02410 } 02411 if (ISPRINT(c)) { 02412 buf[0] = c; 02413 mrb_str_cat(mrb, result, buf, 1); 02414 continue; 02415 } 02416 switch (c) { 02417 case '\n': cc = 'n'; break; 02418 case '\r': cc = 'r'; break; 02419 case '\t': cc = 't'; break; 02420 case '\f': cc = 'f'; break; 02421 case '\013': cc = 'v'; break; 02422 case '\010': cc = 'b'; break; 02423 case '\007': cc = 'a'; break; 02424 case 033: cc = 'e'; break; 02425 default: cc = 0; break; 02426 } 02427 if (cc) { 02428 buf[0] = '\\'; 02429 buf[1] = (char)cc; 02430 mrb_str_cat(mrb, result, buf, 2); 02431 continue; 02432 } 02433 else { 02434 buf[0] = '\\'; 02435 buf[3] = '0' + c % 8; c /= 8; 02436 buf[2] = '0' + c % 8; c /= 8; 02437 buf[1] = '0' + c % 8; 02438 mrb_str_cat(mrb, result, buf, 4); 02439 continue; 02440 } 02441 } 02442 mrb_str_cat_lit(mrb, result, "\""); 02443 02444 return result; 02445 } 02446 02447 /* 02448 * call-seq: 02449 * str.bytes -> array of fixnums 02450 * 02451 * Returns an array of bytes in _str_. 02452 * 02453 * str = "hello" 02454 * str.bytes #=> [104, 101, 108, 108, 111] 02455 */ 02456 static mrb_value 02457 mrb_str_bytes(mrb_state *mrb, mrb_value str) 02458 { 02459 struct RString *s = mrb_str_ptr(str); 02460 mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s)); 02461 unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s); 02462 02463 while (p < pend) { 02464 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0])); 02465 p++; 02466 } 02467 return a; 02468 } 02469 02470 /* ---------------------------*/ 02471 void 02472 mrb_init_string(mrb_state *mrb) 02473 { 02474 struct RClass *s; 02475 02476 mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string"); 02477 02478 s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ 02479 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); 02480 02481 mrb_define_method(mrb, s, "bytesize", mrb_str_size, MRB_ARGS_NONE()); 02482 02483 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */ 02484 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */ 02485 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ 02486 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ 02487 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ 02488 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ 02489 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ 02490 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ 02491 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */ 02492 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */ 02493 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */ 02494 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */ 02495 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */ 02496 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */ 02497 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */ 02498 02499 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ 02500 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ 02501 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */ 02502 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ 02503 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */ 02504 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */ 02505 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */ 02506 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */ 02507 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */ 02508 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */ 02509 mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */ 02510 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */ 02511 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */ 02512 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */ 02513 02514 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */ 02515 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ 02516 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ 02517 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); 02518 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ 02519 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ 02520 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ 02521 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ 02522 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); 02523 } 02524
Generated on Tue Jul 12 2022 18:00:35 by 1.7.2