core/vul/vul_string.cxx
Go to the documentation of this file.
00001 // This is core/vul/vul_string.cxx
00002 #include "vul_string.h"
00003 //:
00004 // \file
00005 
00006 #include <vcl_cassert.h>
00007 #include <vcl_cstdlib.h>
00008 #include <vcl_cstring.h>
00009 #include <vcl_cctype.h>
00010 #include <vcl_algorithm.h>
00011 #include <vcl_sstream.h>
00012 #include <vcl_cmath.h>
00013 
00014 #ifndef END_OF_STRING                           // If END_OF_STRING not defined
00015 #define END_OF_STRING (0)
00016 #endif
00017 
00018 // Converts all alphabetical characters to uppercase.
00019 char* vul_string_c_upcase(char* s)  // Convert entire string to upper case
00020 {
00021   char* p = s;                   // Point to beginning of string
00022   while (*p) {                   // While there are still valid characters
00023     if (vcl_islower(*p))         // if this is lower case
00024       *p = (char)vcl_toupper(*p);// convert to uppercase
00025     p++;                         // Advance pointer
00026   }
00027   return s;                      // Return reference to modified string
00028 }
00029 
00030 // Converts all alphabetical characters to lowercase.
00031 char* vul_string_c_downcase(char* s)  // Convert entire string to lower case
00032 {
00033   char* p = s;                   // Point to beginning of string
00034   while (*p) {                   // While there are still valid characters
00035     if (vcl_isupper(*p))         // if this is upper case
00036       *p = (char)vcl_tolower(*p);// convert to lowercase
00037     p++;                         // Advance pointer
00038   }
00039   return s;                      // Return reference to modified string
00040 }
00041 
00042 // Capitalizes all words in a string. A word is defined as
00043 // a sequence of characters separated by non-alphanumerics.
00044 char* vul_string_c_capitalize(char* s)  // Capitalize each word in string
00045 {
00046   char* p = s;                           // Point to beginning of string
00047   while (true) {                         // Infinite loop
00048     for (; *p && !vcl_isalnum(*p); p++) ;// Skip to first alphanumeric
00049     if (*p == END_OF_STRING)             // If end of string
00050       return s;                          // Return string
00051     *p = (char)vcl_toupper(*p);          // Convert character
00052     while (*++p && vcl_isalnum(*p)) ;    // Search for next word
00053   }
00054 }
00055 
00056 // Removes any occurrence of the string rem from string str,
00057 // and returns the modified string str.
00058 char* vul_string_c_trim(char* str, const char* rem) // Trim characters from string
00059 {
00060   char* s = str;
00061   char* result = str;
00062   register char c;
00063   while ((c=*s++) != END_OF_STRING) {
00064     register const char* r = rem;
00065     register char t;
00066     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00067     if (t == END_OF_STRING)                       // If no match found
00068       *result++ = c;
00069   }
00070   *result = END_OF_STRING;                        // NULL terminate string
00071   return str;                                     // Return pointer to string
00072 }
00073 
00074 // Removes any prefix occurrence of the string rem from
00075 // the first string str, and returns the modified string str.
00076 char* vul_string_c_left_trim(char* str, const char* rem) // Trim prefix from string
00077 {
00078   char* result = str;
00079   char* s;
00080   register char c;
00081   for (s=str; (c=*s) != END_OF_STRING; s++) {
00082     register const char* r = rem;
00083     register char t;
00084     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00085     if (t == END_OF_STRING)                       // If no match found
00086       break;
00087   }
00088   if (s != result)                                // when characters trimed
00089     while ((*result++ = *s++) != END_OF_STRING) ; // shift string down
00090   return str;                                     // Return pointer to string
00091 }
00092 
00093 // Removes any suffix occurrence of the string rem
00094 // from the first string str, and returns the modified string str.
00095 char* vul_string_c_right_trim(char* str, const char* rem) // Trim suffix from string
00096 {
00097   char* s = str + vcl_strlen(str) - 1;            // last character of str
00098   for (; s >= str; s--) {
00099     register const char* r = rem;
00100     register char t;
00101     register char c = *s;
00102     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00103     if (t == END_OF_STRING)                       // If no match found
00104       break;
00105   }
00106   *(s+1) = END_OF_STRING;
00107   return str;                                     // Return pointer to string
00108 }
00109 
00110 // Reverses the order of the characters in char*.
00111 char* vul_string_c_reverse(char* c)     // Reverse the order of characters
00112 {
00113   int length = vcl_strlen(c);           // Number of characters in string
00114   char temp;
00115 
00116   for (int i = 0, j = length-1;         // Counting from front and rear
00117        i < j; ++i, --j)                 // until we reach the middle
00118   {
00119     temp = c[i];                        // Save front character
00120     c[i] = c[j];                        // Switch with rear character
00121     c[j] = temp;                        // Copy new rear character
00122   }
00123   return c;
00124 }
00125 
00126 // Reverses the order of the characters in string
00127 vcl_string& vul_string_reverse(vcl_string& s)
00128 {
00129   for (int i=0, j=vcl_strlen(s.c_str())-1; i<j; ++i,--j)
00130   {
00131     char c = s[i]; s[i] = s[j]; s[j] = c;
00132   }
00133   return s;
00134 }
00135 
00136 // In some implementations of <cctype>, toupper and tolower are macros
00137 // instead of functions.  In that case, they cannot be passed as 4th argument
00138 // to std::transform.  Hence it's easier to "inline" std::transform here,
00139 // instead of using it explicitly. - PVr.
00140 
00141 // Converts all alphabetical characters in string s to uppercase.
00142 vcl_string& vul_string_upcase(vcl_string& s)
00143 {
00144   for (vcl_string::iterator i=s.begin(); i != s.end(); ++i)
00145     *i = (char)vcl_toupper(*i);
00146   return s;
00147 }
00148 
00149 // Converts all alphabetical characters in string s to lowercase.
00150 vcl_string& vul_string_downcase(vcl_string& s)
00151 {
00152   for (vcl_string::iterator i=s.begin(); i != s.end(); ++i)
00153     *i = (char)vcl_tolower(*i);
00154   return s;
00155 }
00156 
00157 // Capitalizes all words in string s.
00158 vcl_string& vul_string_capitalize(vcl_string& s)
00159 {
00160   // Word beginnings are defined as the transition from
00161   // non-alphanumeric to alphanumeric, and word endings as the reverse
00162   // transition.
00163   vcl_string::iterator si;
00164   bool in_word = false;
00165   for ( si = s.begin(); si != s.end(); ++si ) {
00166     if ( !in_word && vcl_isalnum( *si ) ) {
00167       *si = (char)vcl_toupper( *si );
00168       in_word = true;
00169     } else if ( in_word && !vcl_isalnum( *si ) ) {
00170       in_word = false;
00171     }
00172   }
00173   return s;
00174 }
00175 
00176 // Removes any occurrence of the character string rem
00177 // from the string sr, and returns the modified string sr.
00178 vcl_string& vul_string_trim(vcl_string& sr, const char* rem)
00179 {
00180   int l = vcl_strlen(rem);
00181   for (;;) {
00182     vcl_string::size_type loc = sr.find(rem);
00183     if (loc == vcl_string::npos)
00184       break;
00185     sr.erase(loc, l);
00186   }
00187   return sr;
00188 }
00189 
00190 // Removes any prefix occurrence of the character string rem
00191 // from the string sr, and returns the modified string sr.
00192 vcl_string& vul_string_left_trim(vcl_string& sr, const char* rem)
00193 {
00194   int l = vcl_strlen(rem);
00195   if (vcl_strncmp(sr.c_str(), rem, l) == 0)
00196     sr.erase(0, l);
00197   return sr;
00198 }
00199 
00200 // Removes any suffix occurrence of the character string rem
00201 // from the string sr, and returns the modified string sr.
00202 vcl_string& vul_string_right_trim(vcl_string& sr, const char* rem)
00203 {
00204   int l = vcl_strlen(rem);
00205   int lsr = sr.length();
00206   if (vcl_strncmp(sr.c_str() + lsr - l, rem, l) == 0)
00207     sr.erase(lsr - l, l);
00208   return sr;
00209 }
00210 
00211 int vul_string_atoi(vcl_string const& s)
00212 {
00213   return vcl_atoi(s.c_str());
00214 }
00215 
00216 double vul_string_atof(vcl_string const& s)
00217 {
00218   return vcl_atof(s.c_str());
00219 }
00220 
00221 
00222 
00223 //: Reads an double from a string, with k, kb, M, etc suffix.
00224 // No space is allowed between the number and the suffix.
00225 // k=10^3, kb=2^10, M=10^6, Mb=2^20, G=10^9, Gb=2^30, T=10^12, Tb=2^40
00226 // If parse fails, return 0.0;
00227 double vul_string_atof_withsuffix(vcl_string const& s)
00228 {
00229   vcl_istringstream ss(s);
00230   double d;
00231   ss >> d;
00232   if (!ss) return 0.0;
00233   if (ss.eof()) return d;
00234 
00235   char c='A';
00236   ss >> c;
00237   if (ss.eof()) return d;
00238 
00239   double e=0;
00240   switch (c)
00241   {
00242     case 'k': e=1; break;
00243     case 'M': e=2; break;
00244     case 'G': e=3; break;
00245     case 'T': e=4; break;
00246     default: return 0.0;
00247   }
00248   if (ss.eof()) return d*vcl_pow(10.0,3.0*e);
00249 
00250   c='A';
00251   ss >> c;
00252   if (ss.eof()) return d*vcl_pow(10.0,3.0*e);
00253   if (!ss || c!='i') return 0.0;
00254 
00255   ss >> c;
00256   if (!ss.eof()) return 0.0;
00257 
00258   return d*vcl_pow(2.0,10.0*e);
00259 }
00260 
00261 static bool NotSpace(char a)
00262 {
00263   return !vcl_isspace(a);
00264 }
00265 
00266 template <class IT>
00267 static bool myequals(IT b1, IT e1,
00268                      const char * b2, const char * e2)
00269 {
00270   for (;b1 != e1 && b2 != e2; ++b1, ++b2)
00271     if (vcl_toupper(*b1) != *b2) return false;
00272   return b1 == e1
00273       && b2 == e2;
00274 }
00275 
00276 bool vul_string_to_bool(const vcl_string &str)
00277 {
00278   vcl_string::const_iterator begin = vcl_find_if(str.begin(), str.end(), NotSpace);
00279   const vcl_string::const_reverse_iterator rend(begin);
00280   vcl_string::const_iterator end = vcl_find_if(str.rbegin(), rend, NotSpace).base();
00281   const char *syes = "YES";
00282   const char *strue = "TRUE";
00283   const char *s1 = "1";
00284   const char *son = "ON";
00285   return myequals(begin, end, syes, syes+3)
00286      ||  myequals(begin, end, strue, strue+4)
00287      ||  myequals(begin, end, s1, s1+1)
00288      ||  myequals(begin, end, son, son+2);
00289 }
00290 
00291 //Leave verbatim in to avoid $->LaTeX munging.
00292 
00293 //: Expand any environment variables in the string.
00294 // Expands "foo$VARfoo" to "foobarfoo" when $VAR=bar. If both $VAR and $VARfoo
00295 // exist, an arbitrary choice will be made of which variable to use.
00296 // This problem can be avoided by using the syntax "foo${VAR}foo." "$(VAR)"
00297 // and "$[VAR]" can also be used.
00298 // There are no inbuilt variables like in shell scripting, and variable names
00299 // cannot contain whitespace or "$"s.
00300 // "$$" can be used to insert a literal "$" into the output.
00301 // \returns false if a matching variable could not be found.
00302 bool vul_string_expand_var(vcl_string &str)
00303 {
00304   vcl_string::size_type i = 0; // index to current char.
00305   const vcl_string::size_type npos = vcl_string::npos;
00306 
00307   // If there is a problem, carry on trying to convert rest
00308   bool success=true; //  of string, but remember failure.
00309 
00310   enum {not_in_var, start_var, in_var, in_bracket_var} state = not_in_var;
00311   vcl_string::size_type var_begin = 0;
00312 
00313   vcl_string::size_type bracket_type = npos; //index into open_brackets.
00314   const vcl_string  open_brackets("{([");
00315   const vcl_string close_brackets("})]");
00316 
00317   while (i<str.size())
00318   {
00319     switch (state)
00320     {
00321      case not_in_var: // not currently in a variable
00322       if (str[i] == '$')
00323       {
00324         state = start_var;
00325         var_begin = i;
00326       }
00327       break;
00328      case start_var: // just started a variable
00329       if (str[i] == '$')
00330       {
00331         str.erase(i,1);
00332         state=not_in_var;
00333         continue;
00334       }
00335       else if ((bracket_type = open_brackets.find_first_of(str[i])) != npos)
00336       {
00337         state=in_bracket_var;
00338         break;
00339       }
00340       else // or this is the first letter of the variable, in which case go through
00341         state=in_var;
00342      case in_var:  // in a non-bracketed variable
00343       assert(var_begin+1 < str.size());
00344       assert(i > var_begin);
00345       if (str[i] == '$')
00346       { // no dollars allowed - assume we missed last variable and this is a new one.
00347         success=false;
00348         state = start_var;
00349         var_begin = i;
00350         break;
00351       }
00352       else
00353       {
00354         const char * value= vcl_getenv(str.substr(var_begin+1, i-var_begin).c_str());
00355         if (value)
00356         {
00357           str.replace(var_begin, i+1-var_begin, value);
00358           i = var_begin + vcl_strlen(value);
00359           state=not_in_var;
00360           continue;
00361         }
00362       }
00363       break;
00364      case in_bracket_var:  // in a bracketed variable
00365       if (str[i] == close_brackets[bracket_type])
00366       {
00367         assert(var_begin+2 < str.size());
00368         assert(i > var_begin+1);
00369         state=not_in_var;
00370         if (i==var_begin+2) // empty variable name
00371         {
00372           success=false;
00373           break;
00374         }
00375         else
00376         {
00377           const char * value= vcl_getenv(str.substr(var_begin+2, i-var_begin-2).c_str());
00378           if (value)
00379           {
00380             str.replace(var_begin, i+1-var_begin, value);
00381             i = var_begin + vcl_strlen(value);
00382             continue;
00383           }
00384           else
00385             success=false;
00386         }
00387       }
00388       break;
00389      default: // do nothing (silently ignore invalid state)
00390       break;
00391     }
00392     ++i;
00393   }
00394   return success;
00395 }
00396 
00397 //: replaces instances "find_str" in "full_str" with "replace_str" a given "num_times".
00398 //  \returns true iff at least one replacement took place.
00399 bool vul_string_replace(vcl_string& full_str,
00400                         const vcl_string& find_str,
00401                         const vcl_string& replace_str,
00402                         int num_times)
00403 {
00404   bool rep=false;
00405   for (int i = 0; i<num_times; i++)
00406   {
00407     int loc = full_str.find( find_str,0);
00408     if (loc >= 0)
00409     {
00410       full_str.replace( loc, find_str.length(), replace_str );
00411       rep=true;
00412     }
00413     else
00414     {
00415       return rep;
00416     }
00417   }
00418   return rep;
00419 }