src/utils/StringTokenizer.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 *
 * SUPPORT OPEN SCIENCE!  If you use OpenMD or its source code in your
 * research, please cite the appropriate papers when you publish your
 * work.  Good starting points are:
 *                                                                      
 * [1]  Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).             
 * [2]  Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).          
 * [3]  Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).          
 * [4]  Kuang & Gezelter,  J. Chem. Phys. 133, 164101 (2010).
 * [5]  Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
 */
 
#include <iostream>
#include <iterator>
#include <sstream>
#include "utils/StringTokenizer.hpp"

namespace OpenMD {


  StringTokenizer::StringTokenizer(const std::string & str, const std::string & delim) 
    : tokenString_(str), delim_(delim), returnTokens_(false),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()){

    }

  StringTokenizer::StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
                                   const std::string & delim)  
    : tokenString_(first, last) , delim_(delim), returnTokens_(false),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()) {

    }

  StringTokenizer::StringTokenizer(const std::string&str, const std::string&delim,
                                   bool returnTokens)
    : tokenString_(str), delim_(delim), returnTokens_(returnTokens),
      currentPos_(tokenString_.begin()), end_(tokenString_.end()) {

    }

  bool StringTokenizer::isDelimiter(const char c) {
    return delim_.find(c) == std::string::npos ? false : true;
  }

  int StringTokenizer::countTokens() {
    
    std::string::const_iterator tmpIter = currentPos_;    
    int numToken = 0;

    while (true) {

      //skip delimiter first
      while( tmpIter != end_ && isDelimiter(*tmpIter)) {
        ++tmpIter;

        if (returnTokens_) {
          //if delimiter is consider as token
          ++numToken;
        }
      }
        
      if (tmpIter == end_) {
        break;
      }
        
      //encount a token here
      while ( tmpIter != end_ && !isDelimiter(*tmpIter) ) {
        ++tmpIter;
      }

      ++numToken;

    }

    return numToken;
  }

  bool StringTokenizer::hasMoreTokens() {
    
    if (currentPos_ == end_) {
      return false;
    } else if (returnTokens_) {
      return true;
    } else {
      std::string::const_iterator i = currentPos_;

      //walk through the remaining string to check whether it contains non-delimeter or not
      while(i != end_ && isDelimiter(*i)) {
        ++i;
      }

      return i != end_ ? true : false;
    }
  }

  std::string StringTokenizer::nextToken() {
    std::string result;
    
    if(currentPos_ != end_) {
      std::insert_iterator<std::string> insertIter(result, result.begin());

      while( currentPos_ != end_ && isDelimiter(*currentPos_)) {

        if (returnTokens_) {
          *insertIter++ = *currentPos_++;
          return result;
        }
            
        ++currentPos_;
      }

      while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
        *insertIter++ = *currentPos_++;
      }
        
    }
    
    return result;
  }

  bool StringTokenizer::nextTokenAsBool() {
    std::string token = nextToken();
    std::istringstream iss(token);
    bool result;
    
    if (iss >> result) {
      return result;
    } else {
      std::cerr << "unable to convert " << token << " to a bool" << std::endl;
      return false;
    }
  }
 
  //Since libstdc++(GCC 3.2) has an i/ostream::operator>>/<<(streambuf*) bug (Bug 9318)
  //Instead of using iostream facility, we use C library
  int StringTokenizer::nextTokenAsInt() {
    std::string token = nextToken();
   
    return atoi(token.c_str());
  }

  float StringTokenizer::nextTokenAsFloat() {
    std::string token = nextToken();
    convertFortranNumber(token);
    return (float) (atof(token.c_str()));
  }

  RealType StringTokenizer::nextTokenAsDouble() {
    std::string token = nextToken();
    convertFortranNumber(token);
    return atof(token.c_str());
  }

  std::string  StringTokenizer::peekNextToken() {
    std::string result;
    std::string::const_iterator tmpIter = currentPos_;
    
    if(tmpIter != end_) {
      std::insert_iterator<std::string> insertIter(result, result.begin());

      while(tmpIter != end_ && isDelimiter(*tmpIter)) {

        if (returnTokens_) {
          *insertIter++ = *tmpIter++;
          return result;
        }
            
        ++tmpIter;
      }

      while (tmpIter != end_ && !isDelimiter(*tmpIter)) {
        *insertIter++ = *tmpIter++;
      }
    }
    
    return result;    
  }

 std::vector<std::string>  StringTokenizer::getAllTokens() {
    std::vector<std::string> tokens;
    while (hasMoreTokens()) {
        tokens.push_back(nextToken());
    }
    return tokens;
 }
  void StringTokenizer::convertFortranNumber(std::string& fortranNumber) {
    std::string::iterator i;
    for(i = fortranNumber.begin(); i != fortranNumber.end(); ++i) {
      if (*i == 'd' || *i == 'D') {
        *i = 'E';
      }
    }
  }

}//end namespace OpenMD

Revision:	1879
Committed:	Sun Jun 16 15:15:42 2013 UTC (12 years, 11 months ago) by gezelter
File size:	6563 byte(s)
Log Message:	MERGE OpenMD development 1783:1878 into trunk
#	Content
1	/*
2	* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3	*
4	* The University of Notre Dame grants you ("Licensee") a
5	* non-exclusive, royalty free, license to use, modify and
6	* redistribute this software in source and binary code form, provided
7	* that the following conditions are met:
8	*
9	* 1. Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	*
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the
15	* distribution.
16	*
17	* This software is provided "AS IS," without a warranty of any
18	* kind. All express or implied conditions, representations and
19	* warranties, including any implied warranty of merchantability,
20	* fitness for a particular purpose or non-infringement, are hereby
21	* excluded. The University of Notre Dame and its licensors shall not
22	* be liable for any damages suffered by licensee as a result of
23	* using, modifying or distributing the software or its
24	* derivatives. In no event will the University of Notre Dame or its
25	* licensors be liable for any lost revenue, profit or data, or for
26	* direct, indirect, special, consequential, incidental or punitive
27	* damages, however caused and regardless of the theory of liability,
28	* arising out of the use of or inability to use software, even if the
29	* University of Notre Dame has been advised of the possibility of
30	* such damages.
31	*
32	* SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33	* research, please cite the appropriate papers when you publish your
34	* work. Good starting points are:
35	*
36	* [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37	* [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38	* [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).
39	* [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40	* [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41	*/
42
43	#include <iostream>
44	#include <iterator>
45	#include <sstream>
46	#include "utils/StringTokenizer.hpp"
47
48	namespace OpenMD {
49
50
51	StringTokenizer::StringTokenizer(const std::string & str, const std::string & delim)
52	: tokenString_(str), delim_(delim), returnTokens_(false),
53	currentPos_(tokenString_.begin()), end_(tokenString_.end()){
54
55	}
56
57	StringTokenizer::StringTokenizer(std::string::const_iterator& first, std::string::const_iterator& last,
58	const std::string & delim)
59	: tokenString_(first, last) , delim_(delim), returnTokens_(false),
60	currentPos_(tokenString_.begin()), end_(tokenString_.end()) {
61
62	}
63
64	StringTokenizer::StringTokenizer(const std::string&str, const std::string&delim,
65	bool returnTokens)
66	: tokenString_(str), delim_(delim), returnTokens_(returnTokens),
67	currentPos_(tokenString_.begin()), end_(tokenString_.end()) {
68
69	}
70
71	bool StringTokenizer::isDelimiter(const char c) {
72	return delim_.find(c) == std::string::npos ? false : true;
73	}
74
75	int StringTokenizer::countTokens() {
76
77	std::string::const_iterator tmpIter = currentPos_;
78	int numToken = 0;
79
80	while (true) {
81
82	//skip delimiter first
83	while( tmpIter != end_ && isDelimiter(*tmpIter)) {
84	++tmpIter;
85
86	if (returnTokens_) {
87	//if delimiter is consider as token
88	++numToken;
89	}
90	}
91
92	if (tmpIter == end_) {
93	break;
94	}
95
96	//encount a token here
97	while ( tmpIter != end_ && !isDelimiter(*tmpIter) ) {
98	++tmpIter;
99	}
100
101	++numToken;
102
103	}
104
105	return numToken;
106	}
107
108	bool StringTokenizer::hasMoreTokens() {
109
110	if (currentPos_ == end_) {
111	return false;
112	} else if (returnTokens_) {
113	return true;
114	} else {
115	std::string::const_iterator i = currentPos_;
116
117	//walk through the remaining string to check whether it contains non-delimeter or not
118	while(i != end_ && isDelimiter(*i)) {
119	++i;
120	}
121
122	return i != end_ ? true : false;
123	}
124	}
125
126	std::string StringTokenizer::nextToken() {
127	std::string result;
128
129	if(currentPos_ != end_) {
130	std::insert_iterator<std::string> insertIter(result, result.begin());
131
132	while( currentPos_ != end_ && isDelimiter(*currentPos_)) {
133
134	if (returnTokens_) {
135	insertIter++ = currentPos_++;
136	return result;
137	}
138
139	++currentPos_;
140	}
141
142	while (currentPos_ != end_ && !isDelimiter(*currentPos_)) {
143	insertIter++ = currentPos_++;
144	}
145
146	}
147
148	return result;
149	}
150
151	bool StringTokenizer::nextTokenAsBool() {
152	std::string token = nextToken();
153	std::istringstream iss(token);
154	bool result;
155
156	if (iss >> result) {
157	return result;
158	} else {
159	std::cerr << "unable to convert " << token << " to a bool" << std::endl;
160	return false;
161	}
162	}
163
164	//Since libstdc++(GCC 3.2) has an i/ostream::operator>>/<<(streambuf*) bug (Bug 9318)
165	//Instead of using iostream facility, we use C library
166	int StringTokenizer::nextTokenAsInt() {
167	std::string token = nextToken();
168
169	return atoi(token.c_str());
170	}
171
172	float StringTokenizer::nextTokenAsFloat() {
173	std::string token = nextToken();
174	convertFortranNumber(token);
175	return (float) (atof(token.c_str()));
176	}
177
178	RealType StringTokenizer::nextTokenAsDouble() {
179	std::string token = nextToken();
180	convertFortranNumber(token);
181	return atof(token.c_str());
182	}
183
184	std::string StringTokenizer::peekNextToken() {
185	std::string result;
186	std::string::const_iterator tmpIter = currentPos_;
187
188	if(tmpIter != end_) {
189	std::insert_iterator<std::string> insertIter(result, result.begin());
190
191	while(tmpIter != end_ && isDelimiter(*tmpIter)) {
192
193	if (returnTokens_) {
194	insertIter++ = tmpIter++;
195	return result;
196	}
197
198	++tmpIter;
199	}
200
201	while (tmpIter != end_ && !isDelimiter(*tmpIter)) {
202	insertIter++ = tmpIter++;
203	}
204	}
205
206	return result;
207	}
208
209	std::vector<std::string> StringTokenizer::getAllTokens() {
210	std::vector<std::string> tokens;
211	while (hasMoreTokens()) {
212	tokens.push_back(nextToken());
213	}
214	return tokens;
215	}
216	void StringTokenizer::convertFortranNumber(std::string& fortranNumber) {
217	std::string::iterator i;
218	for(i = fortranNumber.begin(); i != fortranNumber.end(); ++i) {
219	if (i == 'd' \|\| i == 'D') {
220	*i = 'E';
221	}
222	}
223	}
224
225	}//end namespace OpenMD
226