| 1 | 
tim | 
2440 | 
/********************************************************************** | 
| 2 | 
  | 
  | 
tokenst.cpp - Tokenize a string. | 
| 3 | 
  | 
  | 
  | 
| 4 | 
  | 
  | 
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. | 
| 5 | 
  | 
  | 
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison | 
| 6 | 
  | 
  | 
  | 
| 7 | 
  | 
  | 
This file is part of the Open Babel project. | 
| 8 | 
  | 
  | 
For more information, see <http://openbabel.sourceforge.net/> | 
| 9 | 
  | 
  | 
  | 
| 10 | 
  | 
  | 
This program is free software; you can redistribute it and/or modify | 
| 11 | 
  | 
  | 
it under the terms of the GNU General Public License as published by | 
| 12 | 
  | 
  | 
the Free Software Foundation version 2 of the License. | 
| 13 | 
  | 
  | 
  | 
| 14 | 
  | 
  | 
This program is distributed in the hope that it will be useful, | 
| 15 | 
  | 
  | 
but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 | 
  | 
  | 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 17 | 
  | 
  | 
GNU General Public License for more details. | 
| 18 | 
  | 
  | 
***********************************************************************/ | 
| 19 | 
  | 
  | 
 | 
| 20 | 
  | 
  | 
#ifdef WIN32 | 
| 21 | 
  | 
  | 
#pragma warning (disable : 4786) | 
| 22 | 
  | 
  | 
#endif | 
| 23 | 
  | 
  | 
 | 
| 24 | 
  | 
  | 
#include <algorithm> | 
| 25 | 
  | 
  | 
#include <vector> | 
| 26 | 
  | 
  | 
#include <string> | 
| 27 | 
  | 
  | 
 | 
| 28 | 
gezelter | 
2450 | 
#include "config.h" | 
| 29 | 
tim | 
2440 | 
 | 
| 30 | 
  | 
  | 
using namespace std; | 
| 31 | 
  | 
  | 
/* | 
| 32 | 
  | 
  | 
OBAPI bool tokenize(vector<string> &, const char *, const char *); | 
| 33 | 
  | 
  | 
OBAPI char *trim_spaces(char *string); | 
| 34 | 
  | 
  | 
OBAPI bool tokenize(vector<string> &vcr, string &s, const char *delimstr,int limit=-1); | 
| 35 | 
  | 
  | 
*/ | 
| 36 | 
  | 
  | 
namespace OpenBabel | 
| 37 | 
  | 
  | 
{ | 
| 38 | 
  | 
  | 
 | 
| 39 | 
  | 
  | 
  //! Break a string (supplied as the second argument) into tokens, returned  | 
| 40 | 
  | 
  | 
  //! in the first argument. Tokens are determined by the delimiters supplied | 
| 41 | 
  | 
  | 
  //! (defaults to whitespace (i.e., spaces, tabs, newlines) | 
| 42 | 
  | 
  | 
OBAPI bool tokenize(vector<string> &vcr, const char *buf, const char *delimstr) | 
| 43 | 
  | 
  | 
{ | 
| 44 | 
  | 
  | 
    vcr.clear(); | 
| 45 | 
  | 
  | 
    string s = buf; | 
| 46 | 
  | 
  | 
    s += "\n"; | 
| 47 | 
  | 
  | 
    size_t startpos=0,endpos=0; | 
| 48 | 
  | 
  | 
 | 
| 49 | 
  | 
  | 
    for (;;) | 
| 50 | 
  | 
  | 
    { | 
| 51 | 
  | 
  | 
        startpos = s.find_first_not_of(delimstr,startpos); | 
| 52 | 
  | 
  | 
        endpos = s.find_first_of(delimstr,startpos); | 
| 53 | 
  | 
  | 
 | 
| 54 | 
  | 
  | 
        if (endpos <= s.size() && startpos <= s.size()) | 
| 55 | 
  | 
  | 
            vcr.push_back(s.substr(startpos,endpos-startpos)); | 
| 56 | 
  | 
  | 
        else | 
| 57 | 
  | 
  | 
            break; | 
| 58 | 
  | 
  | 
 | 
| 59 | 
  | 
  | 
        startpos = endpos+1; | 
| 60 | 
  | 
  | 
    } | 
| 61 | 
  | 
  | 
 | 
| 62 | 
  | 
  | 
    return(true); | 
| 63 | 
  | 
  | 
} | 
| 64 | 
  | 
  | 
 | 
| 65 | 
  | 
  | 
  //! Trim any trailing spaces at the end of the supplied string. | 
| 66 | 
  | 
  | 
OBAPI char *trim_spaces(char *string) | 
| 67 | 
  | 
  | 
{ | 
| 68 | 
  | 
  | 
    int length; | 
| 69 | 
  | 
  | 
 | 
| 70 | 
  | 
  | 
    length = strlen(string); | 
| 71 | 
  | 
  | 
    if (length == 0) | 
| 72 | 
  | 
  | 
        return string; | 
| 73 | 
  | 
  | 
 | 
| 74 | 
  | 
  | 
    while ((length > 0) && (string[0] == ' ')) | 
| 75 | 
  | 
  | 
    { | 
| 76 | 
  | 
  | 
        string++; | 
| 77 | 
  | 
  | 
        --length; | 
| 78 | 
  | 
  | 
    } | 
| 79 | 
  | 
  | 
 | 
| 80 | 
  | 
  | 
    if (length > 0) | 
| 81 | 
  | 
  | 
    { | 
| 82 | 
  | 
  | 
        while ((length > 0) && (string[length-1] == ' ')) | 
| 83 | 
  | 
  | 
        { | 
| 84 | 
  | 
  | 
            string[length-1] = '\0'; | 
| 85 | 
  | 
  | 
            --length; | 
| 86 | 
  | 
  | 
        } | 
| 87 | 
  | 
  | 
    } | 
| 88 | 
  | 
  | 
 | 
| 89 | 
  | 
  | 
    return(string); | 
| 90 | 
  | 
  | 
} | 
| 91 | 
  | 
  | 
 | 
| 92 | 
  | 
  | 
//! Break a string (supplied as the second argument) into tokens, returned  | 
| 93 | 
  | 
  | 
//! in the first argument. Tokens are determined by the delimiters supplied | 
| 94 | 
  | 
  | 
//! (defaults to whitespace (i.e., spaces, tabs, newlines) | 
| 95 | 
  | 
  | 
//! Only breaks at most 'limit' tokens and the last item in the vector may | 
| 96 | 
  | 
  | 
//! include un-parsed tokens. | 
| 97 | 
  | 
  | 
OBAPI bool tokenize(vector<string> &vcr, string &s, const char *delimstr, int limit) | 
| 98 | 
  | 
  | 
{ | 
| 99 | 
  | 
  | 
    vcr.clear(); | 
| 100 | 
  | 
  | 
    size_t startpos=0,endpos=0; | 
| 101 | 
  | 
  | 
 | 
| 102 | 
  | 
  | 
    int matched=0; | 
| 103 | 
  | 
  | 
    unsigned int s_size = s.size(); | 
| 104 | 
  | 
  | 
    for (;;) | 
| 105 | 
  | 
  | 
    { | 
| 106 | 
  | 
  | 
        startpos = s.find_first_not_of(delimstr,startpos); | 
| 107 | 
  | 
  | 
        endpos = s.find_first_of(delimstr,startpos); | 
| 108 | 
  | 
  | 
        if (endpos <= s_size && startpos <= s_size) | 
| 109 | 
  | 
  | 
        { | 
| 110 | 
  | 
  | 
            vcr.push_back(s.substr(startpos,endpos-startpos)); | 
| 111 | 
  | 
  | 
 | 
| 112 | 
  | 
  | 
            matched++; | 
| 113 | 
  | 
  | 
            if (matched == limit) | 
| 114 | 
  | 
  | 
            { | 
| 115 | 
  | 
  | 
                startpos = endpos+1; | 
| 116 | 
  | 
  | 
                vcr.push_back(s.substr(startpos,s_size)); | 
| 117 | 
  | 
  | 
                break; | 
| 118 | 
  | 
  | 
            } | 
| 119 | 
  | 
  | 
        } | 
| 120 | 
  | 
  | 
        else | 
| 121 | 
  | 
  | 
        { | 
| 122 | 
  | 
  | 
            if (startpos < s_size) | 
| 123 | 
  | 
  | 
                vcr.push_back(s.substr(startpos,s_size-startpos)); | 
| 124 | 
  | 
  | 
            break; | 
| 125 | 
  | 
  | 
        } | 
| 126 | 
  | 
  | 
 | 
| 127 | 
  | 
  | 
        startpos = endpos+1; | 
| 128 | 
  | 
  | 
    } | 
| 129 | 
  | 
  | 
    return(true); | 
| 130 | 
  | 
  | 
} | 
| 131 | 
  | 
  | 
 | 
| 132 | 
  | 
  | 
OBAPI void Trim(string& txt) | 
| 133 | 
  | 
  | 
{ | 
| 134 | 
  | 
  | 
        string::size_type pos = txt.find_last_not_of(" \t\n\r"); | 
| 135 | 
  | 
  | 
        if(pos!=string::npos) | 
| 136 | 
  | 
  | 
                txt.erase(pos+1); | 
| 137 | 
  | 
  | 
        else | 
| 138 | 
  | 
  | 
                txt.erase(); | 
| 139 | 
  | 
  | 
 | 
| 140 | 
  | 
  | 
        pos = txt.find_first_not_of(" \t\n\r"); | 
| 141 | 
  | 
  | 
        if(pos!=string::npos) | 
| 142 | 
  | 
  | 
                txt.erase(0, pos); | 
| 143 | 
  | 
  | 
        else | 
| 144 | 
  | 
  | 
                txt.erase(); | 
| 145 | 
  | 
  | 
} | 
| 146 | 
  | 
  | 
 | 
| 147 | 
  | 
  | 
} // end namespace OpenBabel | 
| 148 | 
  | 
  | 
 | 
| 149 | 
  | 
  | 
//! \file tokenst.cpp | 
| 150 | 
  | 
  | 
//! \brief Tokenize a string. |