mbed I/F binding for mruby

Dependents:   mruby_mbed_web mirb_mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers string.c Source File

string.c

00001 /*
00002 ** string.c - String class
00003 **
00004 ** See Copyright Notice in mruby.h
00005 */
00006 
00007 #include <ctype.h>
00008 #include <float.h>
00009 #include <limits.h>
00010 #include <stddef.h>
00011 #include <stdlib.h>
00012 #include <string.h>
00013 #include "mruby.h"
00014 #include "mruby/array.h"
00015 #include "mruby/class.h"
00016 #include "mruby/range.h"
00017 #include "mruby/string.h"
00018 #include "mruby/re.h"
00019 
00020 const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
00021 
00022 typedef struct mrb_shared_string {
00023   mrb_bool nofree : 1;
00024   int refcnt;
00025   char *ptr;
00026   mrb_int len;
00027 } mrb_shared_string;
00028 
00029 static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
00030 static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
00031 
00032 MRB_API mrb_int
00033 mrb_str_strlen(mrb_state *mrb, struct RString *s)
00034 {
00035   mrb_int i, max = RSTR_LEN(s);
00036   char *p = RSTR_PTR(s);
00037 
00038   if (!p) return 0;
00039   for (i=0; i<max; i++) {
00040     if (p[i] == '\0') {
00041       mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
00042     }
00043   }
00044   return max;
00045 }
00046 
00047 static inline void
00048 resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
00049 {
00050   if (RSTR_EMBED_P(s)) {
00051     if (RSTRING_EMBED_LEN_MAX < capacity) {
00052       char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
00053       const mrb_int len = RSTR_EMBED_LEN(s);
00054       memcpy(tmp, s->as.ary, len);
00055       RSTR_UNSET_EMBED_FLAG(s);
00056       s->as.heap.ptr = tmp;
00057       s->as.heap.len = len;
00058       s->as.heap.aux.capa = capacity;
00059     }
00060   }
00061   else {
00062     s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
00063     s->as.heap.aux.capa = capacity;
00064   }
00065 }
00066 
00067 static void
00068 str_decref(mrb_state *mrb, mrb_shared_string *shared)
00069 {
00070   shared->refcnt--;
00071   if (shared->refcnt == 0) {
00072     if (!shared->nofree) {
00073       mrb_free(mrb, shared->ptr);
00074     }
00075     mrb_free(mrb, shared);
00076   }
00077 }
00078 
00079 MRB_API void
00080 mrb_str_modify(mrb_state *mrb, struct RString *s)
00081 {
00082   if (RSTR_SHARED_P(s)) {
00083     mrb_shared_string *shared = s->as.heap.aux.shared;
00084 
00085     if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
00086       s->as.heap.ptr = shared->ptr;
00087       s->as.heap.aux.capa = shared->len;
00088       RSTR_PTR(s)[s->as.heap.len] = '\0';
00089       mrb_free(mrb, shared);
00090     }
00091     else {
00092       char *ptr, *p;
00093       mrb_int len;
00094 
00095       p = RSTR_PTR(s);
00096       len = s->as.heap.len;
00097       ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
00098       if (p) {
00099         memcpy(ptr, p, len);
00100       }
00101       ptr[len] = '\0';
00102       s->as.heap.ptr = ptr;
00103       s->as.heap.aux.capa = len;
00104       str_decref(mrb, shared);
00105     }
00106     RSTR_UNSET_SHARED_FLAG(s);
00107     return;
00108   }
00109   if (RSTR_NOFREE_P(s)) {
00110     char *p = s->as.heap.ptr;
00111 
00112     s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1);
00113     if (p) {
00114       memcpy(RSTR_PTR(s), p, s->as.heap.len);
00115     }
00116     RSTR_PTR(s)[s->as.heap.len] = '\0';
00117     s->as.heap.aux.capa = s->as.heap.len;
00118     RSTR_UNSET_NOFREE_FLAG(s);
00119     return;
00120   }
00121 }
00122 
00123 MRB_API mrb_value
00124 mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
00125 {
00126   mrb_int slen;
00127   struct RString *s = mrb_str_ptr(str);
00128 
00129   mrb_str_modify(mrb, s);
00130   slen = RSTR_LEN(s);
00131   if (len != slen) {
00132     if (slen < len || slen - len > 256) {
00133       resize_capa(mrb, s, len);
00134     }
00135     RSTR_SET_LEN(s, len);
00136     RSTR_PTR(s)[len] = '\0';   /* sentinel */
00137   }
00138   return str;
00139 }
00140 
00141 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
00142 
00143 static struct RString*
00144 str_new_static(mrb_state *mrb, const char *p, size_t len)
00145 {
00146   struct RString *s;
00147 
00148   if (len >= MRB_INT_MAX) {
00149     mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
00150   }
00151   s = mrb_obj_alloc_string(mrb);
00152   s->as.heap.len = len;
00153   s->as.heap.aux.capa = 0;             /* nofree */
00154   s->as.heap.ptr = (char *)p;
00155   s->flags = MRB_STR_NOFREE;
00156 
00157   return s;
00158 }
00159 
00160 static struct RString*
00161 str_new(mrb_state *mrb, const char *p, size_t len)
00162 {
00163   struct RString *s;
00164 
00165   if (mrb_ro_data_p(p)) {
00166     return str_new_static(mrb, p, len);
00167   }
00168   s = mrb_obj_alloc_string(mrb);
00169   if (len < RSTRING_EMBED_LEN_MAX) {
00170     RSTR_SET_EMBED_FLAG(s);
00171     RSTR_SET_EMBED_LEN(s, len);
00172     if (p) {
00173       memcpy(s->as.ary, p, len);
00174     }
00175   } else {
00176     if (len >= MRB_INT_MAX) {
00177       mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
00178     }
00179     s->as.heap.len = len;
00180     s->as.heap.aux.capa = len;
00181     s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1);
00182     if (p) {
00183       memcpy(s->as.heap.ptr, p, len);
00184     }
00185   }
00186   RSTR_PTR(s)[len] = '\0';
00187   return s;
00188 }
00189 
00190 static inline void
00191 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
00192 {
00193   s->c = mrb_str_ptr(obj)->c;
00194 }
00195 
00196 static mrb_value
00197 mrb_str_new_empty(mrb_state *mrb, mrb_value str)
00198 {
00199   struct RString *s = str_new(mrb, 0, 0);
00200 
00201   str_with_class(mrb, s, str);
00202   return mrb_obj_value(s);
00203 }
00204 
00205 #ifndef MRB_STR_BUF_MIN_SIZE
00206 # define MRB_STR_BUF_MIN_SIZE 128
00207 #endif
00208 
00209 MRB_API mrb_value
00210 mrb_str_buf_new(mrb_state *mrb, size_t capa)
00211 {
00212   struct RString *s;
00213 
00214   s = mrb_obj_alloc_string(mrb);
00215 
00216   if (capa >= MRB_INT_MAX) {
00217     mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
00218   }
00219   if (capa < MRB_STR_BUF_MIN_SIZE) {
00220     capa = MRB_STR_BUF_MIN_SIZE;
00221   }
00222   s->as.heap.len = 0;
00223   s->as.heap.aux.capa = capa;
00224   s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1);
00225   RSTR_PTR(s)[0] = '\0';
00226 
00227   return mrb_obj_value(s);
00228 }
00229 
00230 static void
00231 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
00232 {
00233   size_t capa;
00234   size_t total;
00235   ptrdiff_t off = -1;
00236 
00237   if (len == 0) return;
00238   mrb_str_modify(mrb, s);
00239   if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
00240       off = ptr - RSTR_PTR(s);
00241   }
00242 
00243   if (RSTR_EMBED_P(s))
00244     capa = RSTRING_EMBED_LEN_MAX;
00245   else
00246     capa = s->as.heap.aux.capa;
00247 
00248   if (RSTR_LEN(s) >= MRB_INT_MAX - (mrb_int)len) {
00249     mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
00250   }
00251   total = RSTR_LEN(s)+len;
00252   if (capa <= total) {
00253     while (total > capa) {
00254       if (capa + 1 >= MRB_INT_MAX / 2) {
00255         capa = (total + 4095) / 4096;
00256         break;
00257       }
00258       capa = (capa + 1) * 2;
00259     }
00260     resize_capa(mrb, s, capa);
00261   }
00262   if (off != -1) {
00263       ptr = RSTR_PTR(s) + off;
00264   }
00265   memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
00266   mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX);
00267   RSTR_SET_LEN(s, total);
00268   RSTR_PTR(s)[total] = '\0';   /* sentinel */
00269 }
00270 
00271 MRB_API mrb_value
00272 mrb_str_new(mrb_state *mrb, const char *p, size_t len)
00273 {
00274   return mrb_obj_value(str_new(mrb, p, len));
00275 }
00276 
00277 /*
00278  *  call-seq: (Caution! NULL string)
00279  *     String.new(str="")   => new_str
00280  *
00281  *  Returns a new string object containing a copy of <i>str</i>.
00282  */
00283 
00284 MRB_API mrb_value
00285 mrb_str_new_cstr(mrb_state *mrb, const char *p)
00286 {
00287   struct RString *s;
00288   size_t len;
00289 
00290   if (p) {
00291     len = strlen(p);
00292   }
00293   else {
00294     len = 0;
00295   }
00296 
00297   s = str_new(mrb, p, len);
00298 
00299   return mrb_obj_value(s);
00300 }
00301 
00302 MRB_API mrb_value
00303 mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
00304 {
00305   struct RString *s = str_new_static(mrb, p, len);
00306   return mrb_obj_value(s);
00307 }
00308 
00309 void
00310 mrb_gc_free_str(mrb_state *mrb, struct RString *str)
00311 {
00312   if (RSTR_EMBED_P(str))
00313     /* no code */;
00314   else if (RSTR_SHARED_P(str))
00315     str_decref(mrb, str->as.heap.aux.shared);
00316   else if (!RSTR_NOFREE_P(str))
00317     mrb_free(mrb, str->as.heap.ptr);
00318 }
00319 
00320 MRB_API char*
00321 mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
00322 {
00323   struct RString *s;
00324 
00325   if (!mrb_string_p(str0)) {
00326     mrb_raise(mrb, E_TYPE_ERROR, "expected String");
00327   }
00328 
00329   s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
00330   if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
00331     mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
00332   }
00333   return RSTR_PTR(s);
00334 }
00335 
00336 static void
00337 str_make_shared(mrb_state *mrb, struct RString *s)
00338 {
00339   if (!RSTR_SHARED_P(s)) {
00340     mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
00341 
00342     shared->refcnt = 1;
00343     if (RSTR_EMBED_P(s)) {
00344       const mrb_int len = RSTR_EMBED_LEN(s);
00345       char *const tmp = (char *)mrb_malloc(mrb, len+1);
00346       memcpy(tmp, s->as.ary, len);
00347       tmp[len] = '\0';
00348       RSTR_UNSET_EMBED_FLAG(s);
00349       s->as.heap.ptr = tmp;
00350       s->as.heap.len = len;
00351       shared->nofree = FALSE;
00352       shared->ptr = s->as.heap.ptr;
00353     }
00354     else if (RSTR_NOFREE_P(s)) {
00355       shared->nofree = TRUE;
00356       shared->ptr = s->as.heap.ptr;
00357       RSTR_UNSET_NOFREE_FLAG(s);
00358     }
00359     else {
00360       shared->nofree = FALSE;
00361       if (s->as.heap.aux.capa > s->as.heap.len) {
00362         s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
00363       }
00364       else {
00365         shared->ptr = s->as.heap.ptr;
00366       }
00367     }
00368     shared->len = s->as.heap.len;
00369     s->as.heap.aux.shared = shared;
00370     RSTR_SET_SHARED_FLAG(s);
00371   }
00372 }
00373 
00374 /*
00375  *  call-seq: (Caution! String("abcd") change)
00376  *     String("abcdefg") = String("abcd") + String("efg")
00377  *
00378  *  Returns a new string object containing a copy of <i>str</i>.
00379  */
00380 MRB_API void
00381 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
00382 {
00383   struct RString *s1 = mrb_str_ptr(self), *s2;
00384   mrb_int len;
00385 
00386   mrb_str_modify(mrb, s1);
00387   if (!mrb_string_p(other)) {
00388     other = mrb_str_to_str(mrb, other);
00389   }
00390   s2 = mrb_str_ptr(other);
00391   len = RSTR_LEN(s1) + RSTR_LEN(s2);
00392 
00393   if (RSTRING_CAPA(self) < len) {
00394     resize_capa(mrb, s1, len);
00395   }
00396   memcpy(RSTR_PTR(s1)+RSTR_LEN(s1), RSTR_PTR(s2), RSTR_LEN(s2));
00397   RSTR_SET_LEN(s1, len);
00398   RSTR_PTR(s1)[len] = '\0';
00399 }
00400 
00401 /*
00402  *  call-seq: (Caution! String("abcd") remain)
00403  *     String("abcdefg") = String("abcd") + String("efg")
00404  *
00405  *  Returns a new string object containing a copy of <i>str</i>.
00406  */
00407 MRB_API mrb_value
00408 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
00409 {
00410   struct RString *s = mrb_str_ptr(a);
00411   struct RString *s2 = mrb_str_ptr(b);
00412   struct RString *t;
00413 
00414   t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2));
00415   memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s));
00416   memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2));
00417 
00418   return mrb_obj_value(t);
00419 }
00420 
00421 /* 15.2.10.5.2  */
00422 
00423 /*
00424  *  call-seq: (Caution! String("abcd") remain) for stack_argument
00425  *     String("abcdefg") = String("abcd") + String("efg")
00426  *
00427  *  Returns a new string object containing a copy of <i>str</i>.
00428  */
00429 static mrb_value
00430 mrb_str_plus_m(mrb_state *mrb, mrb_value self)
00431 {
00432   mrb_value str;
00433 
00434   mrb_get_args(mrb, "S", &str);
00435   return mrb_str_plus(mrb, self, str);
00436 }
00437 
00438 /* 15.2.10.5.26 */
00439 /* 15.2.10.5.33 */
00440 /*
00441  *  call-seq:
00442  *     len = strlen(String("abcd"))
00443  *
00444  *  Returns the length of string.
00445  */
00446 static mrb_value
00447 mrb_str_size(mrb_state *mrb, mrb_value self)
00448 {
00449   struct RString *s = mrb_str_ptr(self);
00450   return mrb_fixnum_value(RSTR_LEN(s));
00451 }
00452 
00453 /* 15.2.10.5.1  */
00454 /*
00455  *  call-seq:
00456  *     str * integer   => new_str
00457  *
00458  *  Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
00459  *  the receiver.
00460  *
00461  *     "Ho! " * 3   #=> "Ho! Ho! Ho! "
00462  */
00463 static mrb_value
00464 mrb_str_times(mrb_state *mrb, mrb_value self)
00465 {
00466   mrb_int n,len,times;
00467   struct RString *str2;
00468   char *p;
00469 
00470   mrb_get_args(mrb, "i", &times);
00471   if (times < 0) {
00472     mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
00473   }
00474   if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
00475     mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
00476   }
00477 
00478   len = RSTRING_LEN(self)*times;
00479   str2 = str_new(mrb, 0, len);
00480   str_with_class(mrb, str2, self);
00481   p = RSTR_PTR(str2);
00482   if (len > 0) {
00483     n = RSTRING_LEN(self);
00484     memcpy(p, RSTRING_PTR(self), n);
00485     while (n <= len/2) {
00486       memcpy(p + n, p, n);
00487       n *= 2;
00488     }
00489     memcpy(p + n, p, len-n);
00490   }
00491   p[RSTR_LEN(str2)] = '\0';
00492 
00493   return mrb_obj_value(str2);
00494 }
00495 /* -------------------------------------------------------------- */
00496 
00497 #define lesser(a,b) (((a)>(b))?(b):(a))
00498 
00499 /* ---------------------------*/
00500 /*
00501  *  call-seq:
00502  *     mrb_value str1 <=> mrb_value str2   => int
00503  *                     >  1
00504  *                     =  0
00505  *                     <  -1
00506  */
00507 MRB_API int
00508 mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
00509 {
00510   mrb_int len;
00511   mrb_int retval;
00512   struct RString *s1 = mrb_str_ptr(str1);
00513   struct RString *s2 = mrb_str_ptr(str2);
00514 
00515   len = lesser(RSTR_LEN(s1), RSTR_LEN(s2));
00516   retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len);
00517   if (retval == 0) {
00518     if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0;
00519     if (RSTR_LEN(s1) > RSTR_LEN(s2))  return 1;
00520     return -1;
00521   }
00522   if (retval > 0) return 1;
00523   return -1;
00524 }
00525 
00526 /* 15.2.10.5.3  */
00527 
00528 /*
00529  *  call-seq:
00530  *     str <=> other_str   => -1, 0, +1
00531  *
00532  *  Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
00533  *  <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
00534  *  <i>str</i>. If the strings are of different lengths, and the strings are
00535  *  equal when compared up to the shortest length, then the longer string is
00536  *  considered greater than the shorter one. If the variable <code>$=</code> is
00537  *  <code>false</code>, the comparison is based on comparing the binary values
00538  *  of each character in the string. In older versions of Ruby, setting
00539  *  <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
00540  *  in favor of using <code>String#casecmp</code>.
00541  *
00542  *  <code><=></code> is the basis for the methods <code><</code>,
00543  *  <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
00544  *  included from module <code>Comparable</code>.  The method
00545  *  <code>String#==</code> does not use <code>Comparable#==</code>.
00546  *
00547  *     "abcdef" <=> "abcde"     #=> 1
00548  *     "abcdef" <=> "abcdef"    #=> 0
00549  *     "abcdef" <=> "abcdefg"   #=> -1
00550  *     "abcdef" <=> "ABCDEF"    #=> 1
00551  */
00552 static mrb_value
00553 mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
00554 {
00555   mrb_value str2;
00556   mrb_int result;
00557 
00558   mrb_get_args(mrb, "o", &str2);
00559   if (!mrb_string_p(str2)) {
00560     if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
00561       return mrb_nil_value();
00562     }
00563     else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
00564       return mrb_nil_value();
00565     }
00566     else {
00567       mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
00568 
00569       if (mrb_nil_p(tmp)) return mrb_nil_value();
00570       if (!mrb_fixnum(tmp)) {
00571         return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
00572       }
00573       result = -mrb_fixnum(tmp);
00574     }
00575   }
00576   else {
00577     result = mrb_str_cmp(mrb, str1, str2);
00578   }
00579   return mrb_fixnum_value(result);
00580 }
00581 
00582 static mrb_bool
00583 str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
00584 {
00585   const mrb_int len = RSTRING_LEN(str1);
00586 
00587   if (len != RSTRING_LEN(str2)) return FALSE;
00588   if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
00589     return TRUE;
00590   return FALSE;
00591 }
00592 
00593 MRB_API mrb_bool
00594 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
00595 {
00596   if (mrb_immediate_p(str2)) return FALSE;
00597   if (!mrb_string_p(str2)) {
00598     if (mrb_nil_p(str2)) return FALSE;
00599     if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
00600       return FALSE;
00601     }
00602     str2 = mrb_funcall(mrb, str2, "to_str", 0);
00603     return mrb_equal(mrb, str2, str1);
00604   }
00605   return str_eql(mrb, str1, str2);
00606 }
00607 
00608 /* 15.2.10.5.4  */
00609 /*
00610  *  call-seq:
00611  *     str == obj   => true or false
00612  *
00613  *  Equality---
00614  *  If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
00615  *  Otherwise, returns <code>false</code> or <code>true</code>
00616  *
00617  *   caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
00618  */
00619 static mrb_value
00620 mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
00621 {
00622   mrb_value str2;
00623 
00624   mrb_get_args(mrb, "o", &str2);
00625 
00626   return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
00627 }
00628 /* ---------------------------------- */
00629 MRB_API mrb_value
00630 mrb_str_to_str(mrb_state *mrb, mrb_value str)
00631 {
00632   mrb_value s;
00633 
00634   if (!mrb_string_p(str)) {
00635     s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
00636     if (mrb_nil_p(s)) {
00637       s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
00638     }
00639     return s;
00640   }
00641   return str;
00642 }
00643 
00644 MRB_API const char*
00645 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
00646 {
00647   mrb_value str = mrb_str_to_str(mrb, ptr);
00648   return RSTRING_PTR(str);
00649 }
00650 
00651 void
00652 mrb_noregexp(mrb_state *mrb, mrb_value self)
00653 {
00654   mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
00655 }
00656 
00657 void
00658 mrb_regexp_check(mrb_state *mrb, mrb_value obj)
00659 {
00660   if (mrb_regexp_p(mrb, obj)) {
00661     mrb_noregexp(mrb, obj);
00662   }
00663 }
00664 
00665 static inline mrb_int
00666 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
00667 {
00668   const unsigned char *x = xs, *xe = xs + m;
00669   const unsigned char *y = ys;
00670   int i, qstable[256];
00671 
00672   /* Preprocessing */
00673   for (i = 0; i < 256; ++i)
00674     qstable[i] = m + 1;
00675   for (; x < xe; ++x)
00676     qstable[*x] = xe - x;
00677   /* Searching */
00678   for (; y + m <= ys + n; y += *(qstable + y[m])) {
00679     if (*xs == *y && memcmp(xs, y, m) == 0)
00680         return y - ys;
00681   }
00682   return -1;
00683 }
00684 
00685 static mrb_int
00686 mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
00687 {
00688   const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
00689 
00690   if (m > n) return -1;
00691   else if (m == n) {
00692     return memcmp(x0, y0, m) == 0 ? 0 : -1;
00693   }
00694   else if (m < 1) {
00695     return 0;
00696   }
00697   else if (m == 1) {
00698     const unsigned char *ys = y, *ye = ys + n;
00699     for (; y < ye; ++y) {
00700       if (*x == *y)
00701         return y - ys;
00702     }
00703     return -1;
00704   }
00705   return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
00706 }
00707 
00708 static mrb_int
00709 mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
00710 {
00711   mrb_int pos;
00712   char *s, *sptr;
00713   mrb_int len, slen;
00714 
00715   len = RSTRING_LEN(str);
00716   slen = RSTRING_LEN(sub);
00717   if (offset < 0) {
00718     offset += len;
00719     if (offset < 0) return -1;
00720   }
00721   if (len - offset < slen) return -1;
00722   s = RSTRING_PTR(str);
00723   if (offset) {
00724     s += offset;
00725   }
00726   if (slen == 0) return offset;
00727   /* need proceed one character at a time */
00728   sptr = RSTRING_PTR(sub);
00729   slen = RSTRING_LEN(sub);
00730   len = RSTRING_LEN(str) - offset;
00731   pos = mrb_memsearch(sptr, slen, s, len);
00732   if (pos < 0) return pos;
00733   return pos + offset;
00734 }
00735 
00736 MRB_API mrb_value
00737 mrb_str_dup(mrb_state *mrb, mrb_value str)
00738 {
00739   struct RString *s = mrb_str_ptr(str);
00740   struct RString *dup = str_new(mrb, 0, 0);
00741 
00742   str_with_class(mrb, dup, str);
00743   return str_replace(mrb, dup, s);
00744 }
00745 
00746 static mrb_value
00747 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
00748 {
00749   mrb_int idx;
00750 
00751   mrb_regexp_check(mrb, indx);
00752   switch (mrb_type(indx)) {
00753     case MRB_TT_FIXNUM:
00754       idx = mrb_fixnum(indx);
00755 
00756 num_index:
00757       str = mrb_str_substr(mrb, str, idx, 1);
00758       if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
00759       return str;
00760 
00761     case MRB_TT_STRING:
00762       if (mrb_str_index(mrb, str, indx, 0) != -1)
00763         return mrb_str_dup(mrb, indx);
00764       return mrb_nil_value();
00765 
00766     case MRB_TT_RANGE:
00767       /* check if indx is Range */
00768       {
00769         mrb_int beg, len;
00770 
00771         len = RSTRING_LEN(str);
00772         if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
00773           return mrb_str_subseq(mrb, str, beg, len);
00774         }
00775         else {
00776           return mrb_nil_value();
00777         }
00778       }
00779     default:
00780       idx = mrb_fixnum(indx);
00781       goto num_index;
00782   }
00783   return mrb_nil_value();    /* not reached */
00784 }
00785 
00786 /* 15.2.10.5.6  */
00787 /* 15.2.10.5.34 */
00788 /*
00789  *  call-seq:
00790  *     str[fixnum]                 => fixnum or nil
00791  *     str[fixnum, fixnum]         => new_str or nil
00792  *     str[range]                  => new_str or nil
00793  *     str[regexp]                 => new_str or nil
00794  *     str[regexp, fixnum]         => new_str or nil
00795  *     str[other_str]              => new_str or nil
00796  *     str.slice(fixnum)           => fixnum or nil
00797  *     str.slice(fixnum, fixnum)   => new_str or nil
00798  *     str.slice(range)            => new_str or nil
00799  *     str.slice(other_str)        => new_str or nil
00800  *
00801  *  Element Reference---If passed a single <code>Fixnum</code>, returns the code
00802  *  of the character at that position. If passed two <code>Fixnum</code>
00803  *  objects, returns a substring starting at the offset given by the first, and
00804  *  a length given by the second. If given a range, a substring containing
00805  *  characters at offsets given by the range is returned. In all three cases, if
00806  *  an offset is negative, it is counted from the end of <i>str</i>. Returns
00807  *  <code>nil</code> if the initial offset falls outside the string, the length
00808  *  is negative, or the beginning of the range is greater than the end.
00809  *
00810  *  If a <code>String</code> is given, that string is returned if it occurs in
00811  *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
00812  *  match.
00813  *
00814  *     a = "hello there"
00815  *     a[1]                   #=> 101(1.8.7) "e"(1.9.2)
00816  *     a[1,3]                 #=> "ell"
00817  *     a[1..3]                #=> "ell"
00818  *     a[-3,2]                #=> "er"
00819  *     a[-4..-2]              #=> "her"
00820  *     a[12..-1]              #=> nil
00821  *     a[-2..-4]              #=> ""
00822  *     a["lo"]                #=> "lo"
00823  *     a["bye"]               #=> nil
00824  */
00825 static mrb_value
00826 mrb_str_aref_m(mrb_state *mrb, mrb_value str)
00827 {
00828   mrb_value a1, a2;
00829   int argc;
00830 
00831   argc = mrb_get_args(mrb, "o|o", &a1, &a2);
00832   if (argc == 2) {
00833     mrb_regexp_check(mrb, a1);
00834     return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
00835   }
00836   if (argc != 1) {
00837     mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
00838   }
00839   return mrb_str_aref(mrb, str, a1);
00840 }
00841 
00842 /* 15.2.10.5.8  */
00843 /*
00844  *  call-seq:
00845  *     str.capitalize!   => str or nil
00846  *
00847  *  Modifies <i>str</i> by converting the first character to uppercase and the
00848  *  remainder to lowercase. Returns <code>nil</code> if no changes are made.
00849  *
00850  *     a = "hello"
00851  *     a.capitalize!   #=> "Hello"
00852  *     a               #=> "Hello"
00853  *     a.capitalize!   #=> nil
00854  */
00855 static mrb_value
00856 mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
00857 {
00858   char *p, *pend;
00859   mrb_bool modify = FALSE;
00860   struct RString *s = mrb_str_ptr(str);
00861 
00862   mrb_str_modify(mrb, s);
00863   if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
00864   p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
00865   if (ISLOWER(*p)) {
00866     *p = TOUPPER(*p);
00867     modify = TRUE;
00868   }
00869   while (++p < pend) {
00870     if (ISUPPER(*p)) {
00871       *p = TOLOWER(*p);
00872       modify = TRUE;
00873     }
00874   }
00875   if (modify) return str;
00876   return mrb_nil_value();
00877 }
00878 
00879 /* 15.2.10.5.7  */
00880 /*
00881  *  call-seq:
00882  *     str.capitalize   => new_str
00883  *
00884  *  Returns a copy of <i>str</i> with the first character converted to uppercase
00885  *  and the remainder to lowercase.
00886  *
00887  *     "hello".capitalize    #=> "Hello"
00888  *     "HELLO".capitalize    #=> "Hello"
00889  *     "123ABC".capitalize   #=> "123abc"
00890  */
00891 static mrb_value
00892 mrb_str_capitalize(mrb_state *mrb, mrb_value self)
00893 {
00894   mrb_value str;
00895 
00896   str = mrb_str_dup(mrb, self);
00897   mrb_str_capitalize_bang(mrb, str);
00898   return str;
00899 }
00900 
00901 /* 15.2.10.5.10  */
00902 /*
00903  *  call-seq:
00904  *     str.chomp!(separator=$/)   => str or nil
00905  *
00906  *  Modifies <i>str</i> in place as described for <code>String#chomp</code>,
00907  *  returning <i>str</i>, or <code>nil</code> if no modifications were made.
00908  */
00909 static mrb_value
00910 mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
00911 {
00912   mrb_value rs;
00913   mrb_int newline;
00914   char *p, *pp;
00915   mrb_int rslen;
00916   mrb_int len;
00917   struct RString *s = mrb_str_ptr(str);
00918 
00919   mrb_str_modify(mrb, s);
00920   len = RSTR_LEN(s);
00921   if (mrb_get_args(mrb, "|S", &rs) == 0) {
00922     if (len == 0) return mrb_nil_value();
00923   smart_chomp:
00924     if (RSTR_PTR(s)[len-1] == '\n') {
00925       RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
00926       if (RSTR_LEN(s) > 0 &&
00927           RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') {
00928         RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
00929       }
00930     }
00931     else if (RSTR_PTR(s)[len-1] == '\r') {
00932       RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
00933     }
00934     else {
00935       return mrb_nil_value();
00936     }
00937     RSTR_PTR(s)[RSTR_LEN(s)] = '\0';
00938     return str;
00939   }
00940 
00941   if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
00942   p = RSTR_PTR(s);
00943   rslen = RSTRING_LEN(rs);
00944   if (rslen == 0) {
00945     while (len>0 && p[len-1] == '\n') {
00946       len--;
00947       if (len>0 && p[len-1] == '\r')
00948         len--;
00949     }
00950     if (len < RSTR_LEN(s)) {
00951       RSTR_SET_LEN(s, len);
00952       p[len] = '\0';
00953       return str;
00954     }
00955     return mrb_nil_value();
00956   }
00957   if (rslen > len) return mrb_nil_value();
00958   newline = RSTRING_PTR(rs)[rslen-1];
00959   if (rslen == 1 && newline == '\n')
00960     newline = RSTRING_PTR(rs)[rslen-1];
00961   if (rslen == 1 && newline == '\n')
00962     goto smart_chomp;
00963 
00964   pp = p + len - rslen;
00965   if (p[len-1] == newline &&
00966      (rslen <= 1 ||
00967      memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
00968     RSTR_SET_LEN(s, len - rslen);
00969     p[RSTR_LEN(s)] = '\0';
00970     return str;
00971   }
00972   return mrb_nil_value();
00973 }
00974 
00975 /* 15.2.10.5.9  */
00976 /*
00977  *  call-seq:
00978  *     str.chomp(separator=$/)   => new_str
00979  *
00980  *  Returns a new <code>String</code> with the given record separator removed
00981  *  from the end of <i>str</i> (if present). If <code>$/</code> has not been
00982  *  changed from the default Ruby record separator, then <code>chomp</code> also
00983  *  removes carriage return characters (that is it will remove <code>\n</code>,
00984  *  <code>\r</code>, and <code>\r\n</code>).
00985  *
00986  *     "hello".chomp            #=> "hello"
00987  *     "hello\n".chomp          #=> "hello"
00988  *     "hello\r\n".chomp        #=> "hello"
00989  *     "hello\n\r".chomp        #=> "hello\n"
00990  *     "hello\r".chomp          #=> "hello"
00991  *     "hello \n there".chomp   #=> "hello \n there"
00992  *     "hello".chomp("llo")     #=> "he"
00993  */
00994 static mrb_value
00995 mrb_str_chomp(mrb_state *mrb, mrb_value self)
00996 {
00997   mrb_value str;
00998 
00999   str = mrb_str_dup(mrb, self);
01000   mrb_str_chomp_bang(mrb, str);
01001   return str;
01002 }
01003 
01004 /* 15.2.10.5.12 */
01005 /*
01006  *  call-seq:
01007  *     str.chop!   => str or nil
01008  *
01009  *  Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
01010  *  or <code>nil</code> if <i>str</i> is the empty string.  See also
01011  *  <code>String#chomp!</code>.
01012  */
01013 static mrb_value
01014 mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
01015 {
01016   struct RString *s = mrb_str_ptr(str);
01017 
01018   mrb_str_modify(mrb, s);
01019   if (RSTR_LEN(s) > 0) {
01020     mrb_int len;
01021     len = RSTR_LEN(s) - 1;
01022     if (RSTR_PTR(s)[len] == '\n') {
01023       if (len > 0 &&
01024           RSTR_PTR(s)[len-1] == '\r') {
01025         len--;
01026       }
01027     }
01028     RSTR_SET_LEN(s, len);
01029     RSTR_PTR(s)[len] = '\0';
01030     return str;
01031   }
01032   return mrb_nil_value();
01033 }
01034 
01035 /* 15.2.10.5.11 */
01036 /*
01037  *  call-seq:
01038  *     str.chop   => new_str
01039  *
01040  *  Returns a new <code>String</code> with the last character removed.  If the
01041  *  string ends with <code>\r\n</code>, both characters are removed. Applying
01042  *  <code>chop</code> to an empty string returns an empty
01043  *  string. <code>String#chomp</code> is often a safer alternative, as it leaves
01044  *  the string unchanged if it doesn't end in a record separator.
01045  *
01046  *     "string\r\n".chop   #=> "string"
01047  *     "string\n\r".chop   #=> "string\n"
01048  *     "string\n".chop     #=> "string"
01049  *     "string".chop       #=> "strin"
01050  *     "x".chop            #=> ""
01051  */
01052 static mrb_value
01053 mrb_str_chop(mrb_state *mrb, mrb_value self)
01054 {
01055   mrb_value str;
01056   str = mrb_str_dup(mrb, self);
01057   mrb_str_chop_bang(mrb, str);
01058   return str;
01059 }
01060 
01061 /* 15.2.10.5.14 */
01062 /*
01063  *  call-seq:
01064  *     str.downcase!   => str or nil
01065  *
01066  *  Downcases the contents of <i>str</i>, returning <code>nil</code> if no
01067  *  changes were made.
01068  */
01069 static mrb_value
01070 mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
01071 {
01072   char *p, *pend;
01073   mrb_bool modify = FALSE;
01074   struct RString *s = mrb_str_ptr(str);
01075 
01076   mrb_str_modify(mrb, s);
01077   p = RSTR_PTR(s);
01078   pend = RSTR_PTR(s) + RSTR_LEN(s);
01079   while (p < pend) {
01080     if (ISUPPER(*p)) {
01081       *p = TOLOWER(*p);
01082       modify = TRUE;
01083     }
01084     p++;
01085   }
01086 
01087   if (modify) return str;
01088   return mrb_nil_value();
01089 }
01090 
01091 /* 15.2.10.5.13 */
01092 /*
01093  *  call-seq:
01094  *     str.downcase   => new_str
01095  *
01096  *  Returns a copy of <i>str</i> with all uppercase letters replaced with their
01097  *  lowercase counterparts. The operation is locale insensitive---only
01098  *  characters ``A'' to ``Z'' are affected.
01099  *
01100  *     "hEllO".downcase   #=> "hello"
01101  */
01102 static mrb_value
01103 mrb_str_downcase(mrb_state *mrb, mrb_value self)
01104 {
01105   mrb_value str;
01106 
01107   str = mrb_str_dup(mrb, self);
01108   mrb_str_downcase_bang(mrb, str);
01109   return str;
01110 }
01111 
01112 /* 15.2.10.5.16 */
01113 /*
01114  *  call-seq:
01115  *     str.empty?   => true or false
01116  *
01117  *  Returns <code>true</code> if <i>str</i> has a length of zero.
01118  *
01119  *     "hello".empty?   #=> false
01120  *     "".empty?        #=> true
01121  */
01122 static mrb_value
01123 mrb_str_empty_p(mrb_state *mrb, mrb_value self)
01124 {
01125   struct RString *s = mrb_str_ptr(self);
01126 
01127   return mrb_bool_value(RSTR_LEN(s) == 0);
01128 }
01129 
01130 /* 15.2.10.5.17 */
01131 /*
01132  * call-seq:
01133  *   str.eql?(other)   => true or false
01134  *
01135  * Two strings are equal if the have the same length and content.
01136  */
01137 static mrb_value
01138 mrb_str_eql(mrb_state *mrb, mrb_value self)
01139 {
01140   mrb_value str2;
01141   mrb_bool eql_p;
01142 
01143   mrb_get_args(mrb, "o", &str2);
01144   eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
01145 
01146   return mrb_bool_value(eql_p);
01147 }
01148 
01149 static mrb_value
01150 mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
01151 {
01152   struct RString *orig, *s;
01153   mrb_shared_string *shared;
01154 
01155   orig = mrb_str_ptr(str);
01156   if (RSTR_EMBED_P(orig)) {
01157     s = str_new(mrb, orig->as.ary+beg, len);
01158   } else {
01159     str_make_shared(mrb, orig);
01160     shared = orig->as.heap.aux.shared;
01161     s = mrb_obj_alloc_string(mrb);
01162     s->as.heap.ptr = orig->as.heap.ptr + beg;
01163     s->as.heap.len = len;
01164     s->as.heap.aux.shared = shared;
01165     RSTR_SET_SHARED_FLAG(s);
01166     shared->refcnt++;
01167   }
01168 
01169   return mrb_obj_value(s);
01170 }
01171 
01172 MRB_API mrb_value
01173 mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
01174 {
01175   if (len < 0) return mrb_nil_value();
01176   if (!RSTRING_LEN(str)) {
01177     len = 0;
01178   }
01179   if (beg > RSTRING_LEN(str)) return mrb_nil_value();
01180   if (beg < 0) {
01181     beg += RSTRING_LEN(str);
01182     if (beg < 0) return mrb_nil_value();
01183   }
01184   if (beg + len > RSTRING_LEN(str))
01185     len = RSTRING_LEN(str) - beg;
01186   if (len <= 0) {
01187     len = 0;
01188   }
01189   return mrb_str_subseq(mrb, str, beg, len);
01190 }
01191 
01192 mrb_int
01193 mrb_str_hash(mrb_state *mrb, mrb_value str)
01194 {
01195   /* 1-8-7 */
01196   struct RString *s = mrb_str_ptr(str);
01197   mrb_int len = RSTR_LEN(s);
01198   char *p = RSTR_PTR(s);
01199   mrb_int key = 0;
01200 
01201   while (len--) {
01202     key = key*65599 + *p;
01203     p++;
01204   }
01205   return key + (key>>5);
01206 }
01207 
01208 /* 15.2.10.5.20 */
01209 /*
01210  * call-seq:
01211  *    str.hash   => fixnum
01212  *
01213  * Return a hash based on the string's length and content.
01214  */
01215 static mrb_value
01216 mrb_str_hash_m(mrb_state *mrb, mrb_value self)
01217 {
01218   mrb_int key = mrb_str_hash(mrb, self);
01219   return mrb_fixnum_value(key);
01220 }
01221 
01222 /* 15.2.10.5.21 */
01223 /*
01224  *  call-seq:
01225  *     str.include? other_str   => true or false
01226  *     str.include? fixnum      => true or false
01227  *
01228  *  Returns <code>true</code> if <i>str</i> contains the given string or
01229  *  character.
01230  *
01231  *     "hello".include? "lo"   #=> true
01232  *     "hello".include? "ol"   #=> false
01233  *     "hello".include? ?h     #=> true
01234  */
01235 static mrb_value
01236 mrb_str_include(mrb_state *mrb, mrb_value self)
01237 {
01238   mrb_int i;
01239   mrb_value str2;
01240   mrb_bool include_p;
01241 
01242   mrb_get_args(mrb, "o", &str2);
01243   if (mrb_fixnum_p(str2)) {
01244     include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
01245   }
01246   else {
01247     str2 = mrb_str_to_str(mrb, str2);
01248     i = mrb_str_index(mrb, self, str2, 0);
01249 
01250     include_p = (i != -1);
01251   }
01252 
01253   return mrb_bool_value(include_p);
01254 }
01255 
01256 /* 15.2.10.5.22 */
01257 /*
01258  *  call-seq:
01259  *     str.index(substring [, offset])   => fixnum or nil
01260  *     str.index(fixnum [, offset])      => fixnum or nil
01261  *     str.index(regexp [, offset])      => fixnum or nil
01262  *
01263  *  Returns the index of the first occurrence of the given
01264  *  <i>substring</i>,
01265  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
01266  *  Returns
01267  *  <code>nil</code> if not found.
01268  *  If the second parameter is present, it
01269  *  specifies the position in the string to begin the search.
01270  *
01271  *     "hello".index('e')             #=> 1
01272  *     "hello".index('lo')            #=> 3
01273  *     "hello".index('a')             #=> nil
01274  *     "hello".index(101)             #=> 1(101=0x65='e')
01275  *     "hello".index(/[aeiou]/, -3)   #=> 4
01276  */
01277 static mrb_value
01278 mrb_str_index_m(mrb_state *mrb, mrb_value str)
01279 {
01280   mrb_value *argv;
01281   mrb_int argc;
01282   mrb_value sub;
01283   mrb_int pos;
01284 
01285   mrb_get_args(mrb, "*", &argv, &argc);
01286   if (argc == 2) {
01287     pos = mrb_fixnum(argv[1]);
01288     sub = argv[0];
01289   }
01290   else {
01291     pos = 0;
01292     if (argc > 0)
01293       sub = argv[0];
01294     else
01295       sub = mrb_nil_value();
01296   }
01297   mrb_regexp_check(mrb, sub);
01298   if (pos < 0) {
01299     pos += RSTRING_LEN(str);
01300     if (pos < 0) {
01301       return mrb_nil_value();
01302     }
01303   }
01304 
01305   switch (mrb_type(sub)) {
01306     case MRB_TT_FIXNUM: {
01307       int c = mrb_fixnum(sub);
01308       mrb_int len = RSTRING_LEN(str);
01309       unsigned char *p = (unsigned char*)RSTRING_PTR(str);
01310 
01311       for (;pos<len;pos++) {
01312         if (p[pos] == c) return mrb_fixnum_value(pos);
01313       }
01314       return mrb_nil_value();
01315     }
01316 
01317     default: {
01318       mrb_value tmp;
01319 
01320       tmp = mrb_check_string_type(mrb, sub);
01321       if (mrb_nil_p(tmp)) {
01322         mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
01323       }
01324       sub = tmp;
01325     }
01326     /* fall through */
01327     case MRB_TT_STRING:
01328       pos = mrb_str_index(mrb, str, sub, pos);
01329       break;
01330   }
01331 
01332   if (pos == -1) return mrb_nil_value();
01333   return mrb_fixnum_value(pos);
01334 }
01335 
01336 #define STR_REPLACE_SHARED_MIN 10
01337 
01338 static mrb_value
01339 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
01340 {
01341   long len;
01342 
01343   len = RSTR_LEN(s2);
01344   if (RSTR_SHARED_P(s1)) {
01345     str_decref(mrb, s1->as.heap.aux.shared);
01346   }
01347   else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
01348     mrb_free(mrb, s1->as.heap.ptr);
01349   }
01350 
01351   RSTR_UNSET_NOFREE_FLAG(s1);
01352 
01353   if (RSTR_SHARED_P(s2)) {
01354 L_SHARE:
01355     RSTR_UNSET_EMBED_FLAG(s1);
01356     s1->as.heap.ptr = s2->as.heap.ptr;
01357     s1->as.heap.len = len;
01358     s1->as.heap.aux.shared = s2->as.heap.aux.shared;
01359     RSTR_SET_SHARED_FLAG(s1);
01360     s1->as.heap.aux.shared->refcnt++;
01361   }
01362   else {
01363     if (len <= RSTRING_EMBED_LEN_MAX) {
01364       RSTR_UNSET_SHARED_FLAG(s1);
01365       RSTR_SET_EMBED_FLAG(s1);
01366       memcpy(s1->as.ary, RSTR_PTR(s2), len);
01367       RSTR_SET_EMBED_LEN(s1, len);
01368     }
01369     else {
01370       str_make_shared(mrb, s2);
01371       goto L_SHARE;
01372     }
01373   }
01374 
01375   return mrb_obj_value(s1);
01376 }
01377 
01378 /* 15.2.10.5.24 */
01379 /* 15.2.10.5.28 */
01380 /*
01381  *  call-seq:
01382  *     str.replace(other_str)   => str
01383  *
01384  *     s = "hello"         #=> "hello"
01385  *     s.replace "world"   #=> "world"
01386  */
01387 static mrb_value
01388 mrb_str_replace(mrb_state *mrb, mrb_value str)
01389 {
01390   mrb_value str2;
01391 
01392   mrb_get_args(mrb, "S", &str2);
01393   return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
01394 }
01395 
01396 /* 15.2.10.5.23 */
01397 /*
01398  *  call-seq:
01399  *     String.new(str="")   => new_str
01400  *
01401  *  Returns a new string object containing a copy of <i>str</i>.
01402  */
01403 static mrb_value
01404 mrb_str_init(mrb_state *mrb, mrb_value self)
01405 {
01406   mrb_value str2;
01407 
01408   if (mrb_get_args(mrb, "|S", &str2) == 1) {
01409     str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
01410   }
01411   return self;
01412 }
01413 
01414 /* 15.2.10.5.25 */
01415 /* 15.2.10.5.41 */
01416 /*
01417  *  call-seq:
01418  *     str.intern   => symbol
01419  *     str.to_sym   => symbol
01420  *
01421  *  Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
01422  *  symbol if it did not previously exist. See <code>Symbol#id2name</code>.
01423  *
01424  *     "Koala".intern         #=> :Koala
01425  *     s = 'cat'.to_sym       #=> :cat
01426  *     s == :cat              #=> true
01427  *     s = '@cat'.to_sym      #=> :@cat
01428  *     s == :@cat             #=> true
01429  *
01430  *  This can also be used to create symbols that cannot be represented using the
01431  *  <code>:xxx</code> notation.
01432  *
01433  *     'cat and dog'.to_sym   #=> :"cat and dog"
01434  */
01435 MRB_API mrb_value
01436 mrb_str_intern(mrb_state *mrb, mrb_value self)
01437 {
01438   return mrb_symbol_value(mrb_intern_str(mrb, self));
01439 }
01440 /* ---------------------------------- */
01441 MRB_API mrb_value
01442 mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
01443 {
01444   mrb_value str;
01445 
01446   if (mrb_string_p(obj)) {
01447     return obj;
01448   }
01449   str = mrb_funcall(mrb, obj, "to_s", 0);
01450   if (!mrb_string_p(str))
01451     return mrb_any_to_s(mrb, obj);
01452   return str;
01453 }
01454 
01455 MRB_API mrb_value
01456 mrb_ptr_to_str(mrb_state *mrb, void *p)
01457 {
01458   struct RString *p_str;
01459   char *p1;
01460   char *p2;
01461   uintptr_t n = (uintptr_t)p;
01462 
01463   p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
01464   p1 = RSTR_PTR(p_str);
01465   *p1++ = '0';
01466   *p1++ = 'x';
01467   p2 = p1;
01468 
01469   do {
01470     *p2++ = mrb_digitmap[n % 16];
01471     n /= 16;
01472   } while (n > 0);
01473   *p2 = '\0';
01474   RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str)));
01475 
01476   while (p1 < p2) {
01477     const char  c = *p1;
01478     *p1++ = *--p2;
01479     *p2 = c;
01480   }
01481 
01482   return mrb_obj_value(p_str);
01483 }
01484 
01485 MRB_API mrb_value
01486 mrb_string_type(mrb_state *mrb, mrb_value str)
01487 {
01488   return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
01489 }
01490 
01491 MRB_API mrb_value
01492 mrb_check_string_type(mrb_state *mrb, mrb_value str)
01493 {
01494   return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
01495 }
01496 
01497 /* ---------------------------------- */
01498 /* 15.2.10.5.29 */
01499 /*
01500  *  call-seq:
01501  *     str.reverse   => new_str
01502  *
01503  *  Returns a new string with the characters from <i>str</i> in reverse order.
01504  *
01505  *     "stressed".reverse   #=> "desserts"
01506  */
01507 static mrb_value
01508 mrb_str_reverse(mrb_state *mrb, mrb_value str)
01509 {
01510   struct RString *s2;
01511   char *s, *e, *p;
01512 
01513   if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
01514 
01515   s2 = str_new(mrb, 0, RSTRING_LEN(str));
01516   str_with_class(mrb, s2, str);
01517   s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
01518   p = RSTR_PTR(s2);
01519 
01520   while (e >= s) {
01521     *p++ = *e--;
01522   }
01523   return mrb_obj_value(s2);
01524 }
01525 
01526 /* 15.2.10.5.30 */
01527 /*
01528  *  call-seq:
01529  *     str.reverse!   => str
01530  *
01531  *  Reverses <i>str</i> in place.
01532  */
01533 static mrb_value
01534 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
01535 {
01536   struct RString *s = mrb_str_ptr(str);
01537   char *p, *e;
01538   char c;
01539 
01540   mrb_str_modify(mrb, s);
01541   if (RSTR_LEN(s) > 1) {
01542     p = RSTR_PTR(s);
01543     e = p + RSTR_LEN(s) - 1;
01544     while (p < e) {
01545       c = *p;
01546       *p++ = *e;
01547       *e-- = c;
01548     }
01549   }
01550   return str;
01551 }
01552 
01553 /*
01554  *  call-seq:
01555  *     str.rindex(substring [, fixnum])   => fixnum or nil
01556  *     str.rindex(fixnum [, fixnum])   => fixnum or nil
01557  *     str.rindex(regexp [, fixnum])   => fixnum or nil
01558  *
01559  *  Returns the index of the last occurrence of the given <i>substring</i>,
01560  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01561  *  <code>nil</code> if not found. If the second parameter is present, it
01562  *  specifies the position in the string to end the search---characters beyond
01563  *  this point will not be considered.
01564  *
01565  *     "hello".rindex('e')             #=> 1
01566  *     "hello".rindex('l')             #=> 3
01567  *     "hello".rindex('a')             #=> nil
01568  *     "hello".rindex(101)             #=> 1
01569  *     "hello".rindex(/[aeiou]/, -2)   #=> 1
01570  */
01571 static mrb_int
01572 mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
01573 {
01574   char *s, *sbeg, *t;
01575   struct RString *ps = mrb_str_ptr(str);
01576   mrb_int len = RSTRING_LEN(sub);
01577 
01578   /* substring longer than string */
01579   if (RSTR_LEN(ps) < len) return -1;
01580   if (RSTR_LEN(ps) - pos < len) {
01581     pos = RSTR_LEN(ps) - len;
01582   }
01583   sbeg = RSTR_PTR(ps);
01584   s = RSTR_PTR(ps) + pos;
01585   t = RSTRING_PTR(sub);
01586   if (len) {
01587     while (sbeg <= s) {
01588       if (memcmp(s, t, len) == 0) {
01589         return s - RSTR_PTR(ps);
01590       }
01591       s--;
01592     }
01593     return -1;
01594   }
01595   else {
01596     return pos;
01597   }
01598 }
01599 
01600 /* 15.2.10.5.31 */
01601 /*
01602  *  call-seq:
01603  *     str.rindex(substring [, fixnum])   => fixnum or nil
01604  *     str.rindex(fixnum [, fixnum])   => fixnum or nil
01605  *     str.rindex(regexp [, fixnum])   => fixnum or nil
01606  *
01607  *  Returns the index of the last occurrence of the given <i>substring</i>,
01608  *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
01609  *  <code>nil</code> if not found. If the second parameter is present, it
01610  *  specifies the position in the string to end the search---characters beyond
01611  *  this point will not be considered.
01612  *
01613  *     "hello".rindex('e')             #=> 1
01614  *     "hello".rindex('l')             #=> 3
01615  *     "hello".rindex('a')             #=> nil
01616  *     "hello".rindex(101)             #=> 1
01617  *     "hello".rindex(/[aeiou]/, -2)   #=> 1
01618  */
01619 static mrb_value
01620 mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
01621 {
01622   mrb_value *argv;
01623   mrb_int argc;
01624   mrb_value sub;
01625   mrb_value vpos;
01626   mrb_int pos, len = RSTRING_LEN(str);
01627 
01628   mrb_get_args(mrb, "*", &argv, &argc);
01629   if (argc == 2) {
01630     sub = argv[0];
01631     vpos = argv[1];
01632     pos = mrb_fixnum(vpos);
01633     if (pos < 0) {
01634       pos += len;
01635       if (pos < 0) {
01636         mrb_regexp_check(mrb, sub);
01637         return mrb_nil_value();
01638       }
01639     }
01640     if (pos > len) pos = len;
01641   }
01642   else {
01643     pos = len;
01644     if (argc > 0)
01645       sub = argv[0];
01646     else
01647       sub = mrb_nil_value();
01648   }
01649   mrb_regexp_check(mrb, sub);
01650 
01651   switch (mrb_type(sub)) {
01652     case MRB_TT_FIXNUM: {
01653       int c = mrb_fixnum(sub);
01654       unsigned char *p = (unsigned char*)RSTRING_PTR(str);
01655 
01656       for (pos=len-1;pos>=0;pos--) {
01657         if (p[pos] == c) return mrb_fixnum_value(pos);
01658       }
01659       return mrb_nil_value();
01660     }
01661 
01662     default: {
01663       mrb_value tmp;
01664 
01665       tmp = mrb_check_string_type(mrb, sub);
01666       if (mrb_nil_p(tmp)) {
01667         mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
01668       }
01669       sub = tmp;
01670     }
01671      /* fall through */
01672     case MRB_TT_STRING:
01673       pos = mrb_str_rindex(mrb, str, sub, pos);
01674       if (pos >= 0) return mrb_fixnum_value(pos);
01675       break;
01676 
01677   } /* end of switch (TYPE(sub)) */
01678   return mrb_nil_value();
01679 }
01680 
01681 /* 15.2.10.5.35 */
01682 
01683 /*
01684  *  call-seq:
01685  *     str.split(pattern=$;, [limit])   => anArray
01686  *
01687  *  Divides <i>str</i> into substrings based on a delimiter, returning an array
01688  *  of these substrings.
01689  *
01690  *  If <i>pattern</i> is a <code>String</code>, then its contents are used as
01691  *  the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
01692  *  space, <i>str</i> is split on whitespace, with leading whitespace and runs
01693  *  of contiguous whitespace characters ignored.
01694  *
01695  *  If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
01696  *  pattern matches. Whenever the pattern matches a zero-length string,
01697  *  <i>str</i> is split into individual characters.
01698  *
01699  *  If <i>pattern</i> is omitted, the value of <code>$;</code> is used.  If
01700  *  <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
01701  *  split on whitespace as if ` ' were specified.
01702  *
01703  *  If the <i>limit</i> parameter is omitted, trailing null fields are
01704  *  suppressed. If <i>limit</i> is a positive number, at most that number of
01705  *  fields will be returned (if <i>limit</i> is <code>1</code>, the entire
01706  *  string is returned as the only entry in an array). If negative, there is no
01707  *  limit to the number of fields returned, and trailing null fields are not
01708  *  suppressed.
01709  *
01710  *     " now's  the time".split        #=> ["now's", "the", "time"]
01711  *     " now's  the time".split(' ')   #=> ["now's", "the", "time"]
01712  *     " now's  the time".split(/ /)   #=> ["", "now's", "", "the", "time"]
01713  *     "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
01714  *     "hello".split(//)               #=> ["h", "e", "l", "l", "o"]
01715  *     "hello".split(//, 3)            #=> ["h", "e", "llo"]
01716  *     "hi mom".split(%r{\s*})         #=> ["h", "i", "m", "o", "m"]
01717  *
01718  *     "mellow yellow".split("ello")   #=> ["m", "w y", "w"]
01719  *     "1,2,,3,4,,".split(',')         #=> ["1", "2", "", "3", "4"]
01720  *     "1,2,,3,4,,".split(',', 4)      #=> ["1", "2", "", "3,4,,"]
01721  *     "1,2,,3,4,,".split(',', -4)     #=> ["1", "2", "", "3", "4", "", ""]
01722  */
01723 
01724 static mrb_value
01725 mrb_str_split_m(mrb_state *mrb, mrb_value str)
01726 {
01727   int argc;
01728   mrb_value spat = mrb_nil_value();
01729   enum {awk, string, regexp} split_type = string;
01730   long i = 0, lim_p;
01731   mrb_int beg;
01732   mrb_int end;
01733   mrb_int lim = 0;
01734   mrb_value result, tmp;
01735 
01736   argc = mrb_get_args(mrb, "|oi", &spat, &lim);
01737   lim_p = (lim > 0 && argc == 2);
01738   if (argc == 2) {
01739     if (lim == 1) {
01740       if (RSTRING_LEN(str) == 0)
01741         return mrb_ary_new_capa(mrb, 0);
01742       return mrb_ary_new_from_values(mrb, 1, &str);
01743     }
01744     i = 1;
01745   }
01746 
01747   if (argc == 0 || mrb_nil_p(spat)) {
01748     split_type = awk;
01749   }
01750   else {
01751     if (mrb_string_p(spat)) {
01752       split_type = string;
01753       if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
01754           split_type = awk;
01755       }
01756     }
01757     else {
01758       mrb_noregexp(mrb, str);
01759     }
01760   }
01761 
01762   result = mrb_ary_new(mrb);
01763   beg = 0;
01764   if (split_type == awk) {
01765     char *ptr = RSTRING_PTR(str);
01766     char *eptr = RSTRING_END(str);
01767     char *bptr = ptr;
01768     mrb_bool skip = TRUE;
01769     unsigned int c;
01770 
01771     end = beg;
01772     while (ptr < eptr) {
01773       int ai = mrb_gc_arena_save(mrb);
01774       c = (unsigned char)*ptr++;
01775       if (skip) {
01776         if (ISSPACE(c)) {
01777           beg = ptr - bptr;
01778         }
01779         else {
01780           end = ptr - bptr;
01781           skip = FALSE;
01782           if (lim_p && lim <= i) break;
01783         }
01784       }
01785       else if (ISSPACE(c)) {
01786         mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
01787         mrb_gc_arena_restore(mrb, ai);
01788         skip = TRUE;
01789         beg = ptr - bptr;
01790         if (lim_p) ++i;
01791       }
01792       else {
01793         end = ptr - bptr;
01794       }
01795     }
01796   }
01797   else if (split_type == string) {
01798     char *ptr = RSTRING_PTR(str); /* s->as.ary */
01799     char *temp = ptr;
01800     char *eptr = RSTRING_END(str);
01801     mrb_int slen = RSTRING_LEN(spat);
01802 
01803     if (slen == 0) {
01804       int ai = mrb_gc_arena_save(mrb);
01805       while (ptr < eptr) {
01806         mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
01807         mrb_gc_arena_restore(mrb, ai);
01808         ptr++;
01809         if (lim_p && lim <= ++i) break;
01810       }
01811     }
01812     else {
01813       char *sptr = RSTRING_PTR(spat);
01814       int ai = mrb_gc_arena_save(mrb);
01815 
01816       while (ptr < eptr &&
01817         (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
01818         mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
01819         mrb_gc_arena_restore(mrb, ai);
01820         ptr += end + slen;
01821         if (lim_p && lim <= ++i) break;
01822       }
01823     }
01824     beg = ptr - temp;
01825   }
01826   else {
01827     mrb_noregexp(mrb, str);
01828   }
01829   if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
01830     if (RSTRING_LEN(str) == beg) {
01831       tmp = mrb_str_new_empty(mrb, str);
01832     }
01833     else {
01834       tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
01835     }
01836     mrb_ary_push(mrb, result, tmp);
01837   }
01838   if (!lim_p && lim == 0) {
01839     mrb_int len;
01840     while ((len = RARRAY_LEN(result)) > 0 &&
01841            (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
01842       mrb_ary_pop(mrb, result);
01843   }
01844 
01845   return result;
01846 }
01847 
01848 MRB_API mrb_value
01849 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
01850 {
01851   const char *p;
01852   char sign = 1;
01853   int c, uscore;
01854   unsigned long n = 0;
01855   mrb_int val;
01856 
01857 #define conv_digit(c) \
01858     (ISDIGIT(c) ? ((c) - '0') : \
01859      ISLOWER(c) ? ((c) - 'a' + 10) : \
01860      ISUPPER(c) ? ((c) - 'A' + 10) : \
01861      -1)
01862 
01863   if (!str) {
01864     if (badcheck) goto bad;
01865     return mrb_fixnum_value(0);
01866   }
01867   while (ISSPACE(*str)) str++;
01868 
01869   if (str[0] == '+') {
01870     str++;
01871   }
01872   else if (str[0] == '-') {
01873     str++;
01874     sign = 0;
01875   }
01876   if (str[0] == '+' || str[0] == '-') {
01877     if (badcheck) goto bad;
01878     return mrb_fixnum_value(0);
01879   }
01880   if (base <= 0) {
01881     if (str[0] == '0') {
01882       switch (str[1]) {
01883         case 'x': case 'X':
01884           base = 16;
01885           break;
01886         case 'b': case 'B':
01887           base = 2;
01888           break;
01889         case 'o': case 'O':
01890           base = 8;
01891           break;
01892         case 'd': case 'D':
01893           base = 10;
01894           break;
01895         default:
01896           base = 8;
01897       }
01898     }
01899     else if (base < -1) {
01900       base = -base;
01901     }
01902     else {
01903       base = 10;
01904     }
01905   }
01906   switch (base) {
01907     case 2:
01908       if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
01909         str += 2;
01910       }
01911       break;
01912     case 3:
01913       break;
01914     case 8:
01915       if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
01916         str += 2;
01917       }
01918     case 4: case 5: case 6: case 7:
01919       break;
01920     case 10:
01921       if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
01922         str += 2;
01923       }
01924     case 9: case 11: case 12: case 13: case 14: case 15:
01925       break;
01926     case 16:
01927       if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
01928         str += 2;
01929       }
01930       break;
01931     default:
01932       if (base < 2 || 36 < base) {
01933         mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
01934       }
01935       break;
01936   } /* end of switch (base) { */
01937   if (*str == '0') {    /* squeeze preceeding 0s */
01938     uscore = 0;
01939     while ((c = *++str) == '0' || c == '_') {
01940       if (c == '_') {
01941         if (++uscore >= 2)
01942           break;
01943       }
01944       else
01945         uscore = 0;
01946     }
01947     if (!(c = *str) || ISSPACE(c)) --str;
01948   }
01949   c = *str;
01950   c = conv_digit(c);
01951   if (c < 0 || c >= base) {
01952     if (badcheck) goto bad;
01953     return mrb_fixnum_value(0);
01954   }
01955 
01956   uscore = 0;
01957   for (p=str;*p;p++) {
01958     if (*p == '_') {
01959       if (uscore == 0) {
01960         uscore++;
01961         continue;
01962       }
01963       if (badcheck) goto bad;
01964       break;
01965     }
01966     uscore = 0;
01967     c = conv_digit(*p);
01968     if (c < 0 || c >= base) {
01969       if (badcheck) goto bad;
01970       break;
01971     }
01972     n *= base;
01973     n += c;
01974   }
01975   if (n > MRB_INT_MAX) {
01976     mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
01977   }
01978   val = n;
01979   if (badcheck) {
01980     if (p == str) goto bad; /* no number */
01981     while (*p && ISSPACE(*p)) p++;
01982     if (*p) goto bad;           /* trailing garbage */
01983   }
01984 
01985   return mrb_fixnum_value(sign ? val : -val);
01986 bad:
01987   mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
01988   /* not reached */
01989   return mrb_fixnum_value(0);
01990 }
01991 
01992 MRB_API const char*
01993 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
01994 {
01995   struct RString *ps = mrb_str_ptr(*ptr);
01996   mrb_int len = mrb_str_strlen(mrb, ps);
01997   char *p = RSTR_PTR(ps);
01998 
01999   if (!p || p[len] != '\0') {
02000     mrb_str_modify(mrb, ps);
02001     return RSTR_PTR(ps);
02002   }
02003   return p;
02004 }
02005 
02006 MRB_API mrb_value
02007 mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
02008 {
02009   const char *s;
02010   mrb_int len;
02011 
02012   str = mrb_str_to_str(mrb, str);
02013   if (badcheck) {
02014     s = mrb_string_value_cstr(mrb, &str);
02015   }
02016   else {
02017     s = RSTRING_PTR(str);
02018   }
02019   if (s) {
02020     len = RSTRING_LEN(str);
02021     if (s[len]) {    /* no sentinel somehow */
02022       struct RString *temp_str = str_new(mrb, s, len);
02023       s = RSTR_PTR(temp_str);
02024     }
02025   }
02026   return mrb_cstr_to_inum(mrb, s, base, badcheck);
02027 }
02028 
02029 /* 15.2.10.5.38 */
02030 /*
02031  *  call-seq:
02032  *     str.to_i(base=10)   => integer
02033  *
02034  *  Returns the result of interpreting leading characters in <i>str</i> as an
02035  *  integer base <i>base</i> (between 2 and 36). Extraneous characters past the
02036  *  end of a valid number are ignored. If there is not a valid number at the
02037  *  start of <i>str</i>, <code>0</code> is returned. This method never raises an
02038  *  exception.
02039  *
02040  *     "12345".to_i             #=> 12345
02041  *     "99 red balloons".to_i   #=> 99
02042  *     "0a".to_i                #=> 0
02043  *     "0a".to_i(16)            #=> 10
02044  *     "hello".to_i             #=> 0
02045  *     "1100101".to_i(2)        #=> 101
02046  *     "1100101".to_i(8)        #=> 294977
02047  *     "1100101".to_i(10)       #=> 1100101
02048  *     "1100101".to_i(16)       #=> 17826049
02049  */
02050 static mrb_value
02051 mrb_str_to_i(mrb_state *mrb, mrb_value self)
02052 {
02053   mrb_int base = 10;
02054 
02055   mrb_get_args(mrb, "|i", &base);
02056   if (base < 0) {
02057     mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
02058   }
02059   return mrb_str_to_inum(mrb, self, base, FALSE);
02060 }
02061 
02062 MRB_API double
02063 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
02064 {
02065   char *end;
02066   double d;
02067 
02068   enum {max_width = 20};
02069 
02070   if (!p) return 0.0;
02071   while (ISSPACE(*p)) p++;
02072 
02073   if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
02074     return 0.0;
02075   }
02076   d = strtod(p, &end);
02077   if (p == end) {
02078     if (badcheck) {
02079 bad:
02080       mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
02081       /* not reached */
02082     }
02083     return d;
02084   }
02085   if (*end) {
02086     char buf[DBL_DIG * 4 + 10];
02087     char *n = buf;
02088     char *e = buf + sizeof(buf) - 1;
02089     char prev = 0;
02090 
02091     while (p < end && n < e) prev = *n++ = *p++;
02092     while (*p) {
02093       if (*p == '_') {
02094         /* remove underscores between digits */
02095         if (badcheck) {
02096           if (n == buf || !ISDIGIT(prev)) goto bad;
02097           ++p;
02098           if (!ISDIGIT(*p)) goto bad;
02099         }
02100         else {
02101           while (*++p == '_');
02102           continue;
02103         }
02104       }
02105       prev = *p++;
02106       if (n < e) *n++ = prev;
02107     }
02108     *n = '\0';
02109     p = buf;
02110 
02111     if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
02112       return 0.0;
02113     }
02114 
02115     d = strtod(p, &end);
02116     if (badcheck) {
02117       if (!end || p == end) goto bad;
02118       while (*end && ISSPACE(*end)) end++;
02119       if (*end) goto bad;
02120     }
02121   }
02122   return d;
02123 }
02124 
02125 MRB_API double
02126 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
02127 {
02128   char *s;
02129   mrb_int len;
02130 
02131   str = mrb_str_to_str(mrb, str);
02132   s = RSTRING_PTR(str);
02133   len = RSTRING_LEN(str);
02134   if (s) {
02135     if (badcheck && memchr(s, '\0', len)) {
02136       mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
02137     }
02138     if (s[len]) {    /* no sentinel somehow */
02139       struct RString *temp_str = str_new(mrb, s, len);
02140       s = RSTR_PTR(temp_str);
02141     }
02142   }
02143   return mrb_cstr_to_dbl(mrb, s, badcheck);
02144 }
02145 
02146 /* 15.2.10.5.39 */
02147 /*
02148  *  call-seq:
02149  *     str.to_f   => float
02150  *
02151  *  Returns the result of interpreting leading characters in <i>str</i> as a
02152  *  floating point number. Extraneous characters past the end of a valid number
02153  *  are ignored. If there is not a valid number at the start of <i>str</i>,
02154  *  <code>0.0</code> is returned. This method never raises an exception.
02155  *
02156  *     "123.45e1".to_f        #=> 1234.5
02157  *     "45.67 degrees".to_f   #=> 45.67
02158  *     "thx1138".to_f         #=> 0.0
02159  */
02160 static mrb_value
02161 mrb_str_to_f(mrb_state *mrb, mrb_value self)
02162 {
02163   return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE));
02164 }
02165 
02166 /* 15.2.10.5.40 */
02167 /*
02168  *  call-seq:
02169  *     str.to_s     => str
02170  *     str.to_str   => str
02171  *
02172  *  Returns the receiver.
02173  */
02174 static mrb_value
02175 mrb_str_to_s(mrb_state *mrb, mrb_value self)
02176 {
02177   if (mrb_obj_class(mrb, self) != mrb->string_class) {
02178     return mrb_str_dup(mrb, self);
02179   }
02180   return self;
02181 }
02182 
02183 /* 15.2.10.5.43 */
02184 /*
02185  *  call-seq:
02186  *     str.upcase!   => str or nil
02187  *
02188  *  Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
02189  *  were made.
02190  */
02191 static mrb_value
02192 mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
02193 {
02194   struct RString *s = mrb_str_ptr(str);
02195   char *p, *pend;
02196   mrb_bool modify = FALSE;
02197 
02198   mrb_str_modify(mrb, s);
02199   p = RSTRING_PTR(str);
02200   pend = RSTRING_END(str);
02201   while (p < pend) {
02202     if (ISLOWER(*p)) {
02203       *p = TOUPPER(*p);
02204       modify = TRUE;
02205     }
02206     p++;
02207   }
02208 
02209   if (modify) return str;
02210   return mrb_nil_value();
02211 }
02212 
02213 /* 15.2.10.5.42 */
02214 /*
02215  *  call-seq:
02216  *     str.upcase   => new_str
02217  *
02218  *  Returns a copy of <i>str</i> with all lowercase letters replaced with their
02219  *  uppercase counterparts. The operation is locale insensitive---only
02220  *  characters ``a'' to ``z'' are affected.
02221  *
02222  *     "hEllO".upcase   #=> "HELLO"
02223  */
02224 static mrb_value
02225 mrb_str_upcase(mrb_state *mrb, mrb_value self)
02226 {
02227   mrb_value str;
02228 
02229   str = mrb_str_dup(mrb, self);
02230   mrb_str_upcase_bang(mrb, str);
02231   return str;
02232 }
02233 
02234 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
02235 
02236 /*
02237  *  call-seq:
02238  *     str.dump   -> new_str
02239  *
02240  *  Produces a version of <i>str</i> with all nonprinting characters replaced by
02241  *  <code>\nnn</code> notation and all special characters escaped.
02242  */
02243 mrb_value
02244 mrb_str_dump(mrb_state *mrb, mrb_value str)
02245 {
02246   mrb_int len;
02247   const char *p, *pend;
02248   char *q;
02249   struct RString *result;
02250 
02251   len = 2;                  /* "" */
02252   p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
02253   while (p < pend) {
02254     unsigned char c = *p++;
02255     switch (c) {
02256       case '"':  case '\\':
02257       case '\n': case '\r':
02258       case '\t': case '\f':
02259       case '\013': case '\010': case '\007': case '\033':
02260         len += 2;
02261         break;
02262 
02263       case '#':
02264         len += IS_EVSTR(p, pend) ? 2 : 1;
02265         break;
02266 
02267       default:
02268         if (ISPRINT(c)) {
02269           len++;
02270         }
02271         else {
02272           len += 4;                /* \NNN */
02273         }
02274         break;
02275     }
02276   }
02277 
02278   result = str_new(mrb, 0, len);
02279   str_with_class(mrb, result, str);
02280   p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
02281   q = RSTR_PTR(result);
02282   *q++ = '"';
02283   while (p < pend) {
02284     unsigned char c = *p++;
02285 
02286     switch (c) {
02287       case '"':
02288       case '\\':
02289         *q++ = '\\';
02290         *q++ = c;
02291         break;
02292 
02293       case '\n':
02294         *q++ = '\\';
02295         *q++ = 'n';
02296         break;
02297 
02298       case '\r':
02299         *q++ = '\\';
02300         *q++ = 'r';
02301         break;
02302 
02303       case '\t':
02304         *q++ = '\\';
02305         *q++ = 't';
02306         break;
02307 
02308       case '\f':
02309         *q++ = '\\';
02310         *q++ = 'f';
02311         break;
02312 
02313       case '\013':
02314         *q++ = '\\';
02315         *q++ = 'v';
02316         break;
02317 
02318       case '\010':
02319         *q++ = '\\';
02320         *q++ = 'b';
02321         break;
02322 
02323       case '\007':
02324         *q++ = '\\';
02325         *q++ = 'a';
02326         break;
02327 
02328       case '\033':
02329         *q++ = '\\';
02330         *q++ = 'e';
02331         break;
02332 
02333       case '#':
02334         if (IS_EVSTR(p, pend)) *q++ = '\\';
02335         *q++ = '#';
02336         break;
02337 
02338       default:
02339         if (ISPRINT(c)) {
02340           *q++ = c;
02341         }
02342         else {
02343           *q++ = '\\';
02344           q[2] = '0' + c % 8; c /= 8;
02345           q[1] = '0' + c % 8; c /= 8;
02346           q[0] = '0' + c % 8;
02347           q += 3;
02348         }
02349     }
02350   }
02351   *q = '"';
02352   return mrb_obj_value(result);
02353 }
02354 
02355 MRB_API mrb_value
02356 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
02357 {
02358   str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
02359   return str;
02360 }
02361 
02362 MRB_API mrb_value
02363 mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
02364 {
02365   return mrb_str_cat(mrb, str, ptr, strlen(ptr));
02366 }
02367 
02368 MRB_API mrb_value
02369 mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
02370 {
02371   return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
02372 }
02373 
02374 MRB_API mrb_value
02375 mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
02376 {
02377   str2 = mrb_str_to_str(mrb, str2);
02378   return mrb_str_cat_str(mrb, str, str2);
02379 }
02380 
02381 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
02382 
02383 /*
02384  * call-seq:
02385  *   str.inspect   -> string
02386  *
02387  * Returns a printable version of _str_, surrounded by quote marks,
02388  * with special characters escaped.
02389  *
02390  *    str = "hello"
02391  *    str[3] = "\b"
02392  *    str.inspect       #=> "\"hel\\bo\""
02393  */
02394 mrb_value
02395 mrb_str_inspect(mrb_state *mrb, mrb_value str)
02396 {
02397   const char *p, *pend;
02398   char buf[CHAR_ESC_LEN + 1];
02399   mrb_value result = mrb_str_new_lit(mrb, "\"");
02400 
02401   p = RSTRING_PTR(str); pend = RSTRING_END(str);
02402   for (;p < pend; p++) {
02403     unsigned char c, cc;
02404 
02405     c = *p;
02406     if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
02407       buf[0] = '\\'; buf[1] = c;
02408       mrb_str_cat(mrb, result, buf, 2);
02409       continue;
02410     }
02411     if (ISPRINT(c)) {
02412       buf[0] = c;
02413       mrb_str_cat(mrb, result, buf, 1);
02414       continue;
02415     }
02416     switch (c) {
02417       case '\n': cc = 'n'; break;
02418       case '\r': cc = 'r'; break;
02419       case '\t': cc = 't'; break;
02420       case '\f': cc = 'f'; break;
02421       case '\013': cc = 'v'; break;
02422       case '\010': cc = 'b'; break;
02423       case '\007': cc = 'a'; break;
02424       case 033: cc = 'e'; break;
02425       default: cc = 0; break;
02426     }
02427     if (cc) {
02428       buf[0] = '\\';
02429       buf[1] = (char)cc;
02430       mrb_str_cat(mrb, result, buf, 2);
02431       continue;
02432     }
02433     else {
02434       buf[0] = '\\';
02435       buf[3] = '0' + c % 8; c /= 8;
02436       buf[2] = '0' + c % 8; c /= 8;
02437       buf[1] = '0' + c % 8;
02438       mrb_str_cat(mrb, result, buf, 4);
02439       continue;
02440     }
02441   }
02442   mrb_str_cat_lit(mrb, result, "\"");
02443 
02444   return result;
02445 }
02446 
02447 /*
02448  * call-seq:
02449  *   str.bytes   -> array of fixnums
02450  *
02451  * Returns an array of bytes in _str_.
02452  *
02453  *    str = "hello"
02454  *    str.bytes       #=> [104, 101, 108, 108, 111]
02455  */
02456 static mrb_value
02457 mrb_str_bytes(mrb_state *mrb, mrb_value str)
02458 {
02459   struct RString *s = mrb_str_ptr(str);
02460   mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s));
02461   unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s);
02462 
02463   while (p < pend) {
02464     mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
02465     p++;
02466   }
02467   return a;
02468 }
02469 
02470 /* ---------------------------*/
02471 void
02472 mrb_init_string(mrb_state *mrb)
02473 {
02474   struct RClass *s;
02475 
02476   mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
02477 
02478   s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);             /* 15.2.10 */
02479   MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
02480 
02481   mrb_define_method(mrb, s, "bytesize",        mrb_str_size,            MRB_ARGS_NONE());
02482 
02483   mrb_define_method(mrb, s, "<=>",             mrb_str_cmp_m,           MRB_ARGS_REQ(1)); /* 15.2.10.5.1  */
02484   mrb_define_method(mrb, s, "==",              mrb_str_equal_m,         MRB_ARGS_REQ(1)); /* 15.2.10.5.2  */
02485   mrb_define_method(mrb, s, "+",               mrb_str_plus_m,          MRB_ARGS_REQ(1)); /* 15.2.10.5.4  */
02486   mrb_define_method(mrb, s, "*",               mrb_str_times,           MRB_ARGS_REQ(1)); /* 15.2.10.5.5  */
02487   mrb_define_method(mrb, s, "[]",              mrb_str_aref_m,          MRB_ARGS_ANY());  /* 15.2.10.5.6  */
02488   mrb_define_method(mrb, s, "capitalize",      mrb_str_capitalize,      MRB_ARGS_NONE()); /* 15.2.10.5.7  */
02489   mrb_define_method(mrb, s, "capitalize!",     mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8  */
02490   mrb_define_method(mrb, s, "chomp",           mrb_str_chomp,           MRB_ARGS_ANY());  /* 15.2.10.5.9  */
02491   mrb_define_method(mrb, s, "chomp!",          mrb_str_chomp_bang,      MRB_ARGS_ANY());  /* 15.2.10.5.10 */
02492   mrb_define_method(mrb, s, "chop",            mrb_str_chop,            MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
02493   mrb_define_method(mrb, s, "chop!",           mrb_str_chop_bang,       MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
02494   mrb_define_method(mrb, s, "downcase",        mrb_str_downcase,        MRB_ARGS_NONE()); /* 15.2.10.5.13 */
02495   mrb_define_method(mrb, s, "downcase!",       mrb_str_downcase_bang,   MRB_ARGS_NONE()); /* 15.2.10.5.14 */
02496   mrb_define_method(mrb, s, "empty?",          mrb_str_empty_p,         MRB_ARGS_NONE()); /* 15.2.10.5.16 */
02497   mrb_define_method(mrb, s, "eql?",            mrb_str_eql,             MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
02498 
02499   mrb_define_method(mrb, s, "hash",            mrb_str_hash_m,          MRB_ARGS_NONE()); /* 15.2.10.5.20 */
02500   mrb_define_method(mrb, s, "include?",        mrb_str_include,         MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
02501   mrb_define_method(mrb, s, "index",           mrb_str_index_m,         MRB_ARGS_ANY());  /* 15.2.10.5.22 */
02502   mrb_define_method(mrb, s, "initialize",      mrb_str_init,            MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
02503   mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace,         MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
02504   mrb_define_method(mrb, s, "intern",          mrb_str_intern,          MRB_ARGS_NONE()); /* 15.2.10.5.25 */
02505   mrb_define_method(mrb, s, "length",          mrb_str_size,            MRB_ARGS_NONE()); /* 15.2.10.5.26 */
02506   mrb_define_method(mrb, s, "replace",         mrb_str_replace,         MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
02507   mrb_define_method(mrb, s, "reverse",         mrb_str_reverse,         MRB_ARGS_NONE()); /* 15.2.10.5.29 */
02508   mrb_define_method(mrb, s, "reverse!",        mrb_str_reverse_bang,    MRB_ARGS_NONE()); /* 15.2.10.5.30 */
02509   mrb_define_method(mrb, s, "rindex",          mrb_str_rindex_m,        MRB_ARGS_ANY());  /* 15.2.10.5.31 */
02510   mrb_define_method(mrb, s, "size",            mrb_str_size,            MRB_ARGS_NONE()); /* 15.2.10.5.33 */
02511   mrb_define_method(mrb, s, "slice",           mrb_str_aref_m,          MRB_ARGS_ANY());  /* 15.2.10.5.34 */
02512   mrb_define_method(mrb, s, "split",           mrb_str_split_m,         MRB_ARGS_ANY());  /* 15.2.10.5.35 */
02513 
02514   mrb_define_method(mrb, s, "to_f",            mrb_str_to_f,            MRB_ARGS_NONE()); /* 15.2.10.5.38 */
02515   mrb_define_method(mrb, s, "to_i",            mrb_str_to_i,            MRB_ARGS_ANY());  /* 15.2.10.5.39 */
02516   mrb_define_method(mrb, s, "to_s",            mrb_str_to_s,            MRB_ARGS_NONE()); /* 15.2.10.5.40 */
02517   mrb_define_method(mrb, s, "to_str",          mrb_str_to_s,            MRB_ARGS_NONE());
02518   mrb_define_method(mrb, s, "to_sym",          mrb_str_intern,          MRB_ARGS_NONE()); /* 15.2.10.5.41 */
02519   mrb_define_method(mrb, s, "upcase",          mrb_str_upcase,          MRB_ARGS_NONE()); /* 15.2.10.5.42 */
02520   mrb_define_method(mrb, s, "upcase!",         mrb_str_upcase_bang,     MRB_ARGS_NONE()); /* 15.2.10.5.43 */
02521   mrb_define_method(mrb, s, "inspect",         mrb_str_inspect,         MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
02522   mrb_define_method(mrb, s, "bytes",           mrb_str_bytes,           MRB_ARGS_NONE());
02523 }
02524