| 1 | header | 
| 2 | { | 
| 3 |  | 
| 4 | #include "antlr/CharScanner.hpp" | 
| 5 | #include "utils/StringUtils.hpp" | 
| 6 | #include "mdParser/FilenameObserver.hpp" | 
| 7 | } | 
| 8 |  | 
| 9 | options | 
| 10 | { | 
| 11 | language = "Cpp"; | 
| 12 | } | 
| 13 |  | 
| 14 | class MDParser extends Parser; | 
| 15 |  | 
| 16 | options | 
| 17 | { | 
| 18 | k = 3; | 
| 19 | exportVocab = MD; | 
| 20 | buildAST = true; | 
| 21 | codeGenMakeSwitchThreshold = 2; | 
| 22 | codeGenBitsetTestThreshold = 3; | 
| 23 |  | 
| 24 | } | 
| 25 |  | 
| 26 | tokens | 
| 27 | { | 
| 28 | COMPONENT   = "component"; | 
| 29 | MOLECULE    = "molecule"; | 
| 30 | ZCONSTRAINT = "zconstraint"; | 
| 31 | ATOM        = "atom"; | 
| 32 | BOND        = "bond"; | 
| 33 | BEND        = "bend"; | 
| 34 | TORSION     = "torsion"; | 
| 35 | INVERSION   = "inversion"; | 
| 36 | RIGIDBODY   = "rigidBody"; | 
| 37 | CUTOFFGROUP = "cutoffGroup"; | 
| 38 | FRAGMENT    = "fragment"; | 
| 39 | MEMBERS     = "members"; | 
| 40 | CENTER      = "center"; | 
| 41 | POSITION    = "position"; | 
| 42 | ORIENTATION = "orientation"; | 
| 43 | ENDBLOCK; | 
| 44 | } | 
| 45 |  | 
| 46 |  | 
| 47 | mdfile  : (statement)* | 
| 48 | ; | 
| 49 |  | 
| 50 | statement : assignment | 
| 51 | | componentblock | 
| 52 | | moleculeblock | 
| 53 | | zconstraintblock | 
| 54 | ; | 
| 55 |  | 
| 56 | assignment  : ID ASSIGNEQUAL^ constant SEMICOLON! | 
| 57 | ; | 
| 58 |  | 
| 59 | constant    : intConst | 
| 60 | | floatConst | 
| 61 | | ID | 
| 62 | | StringLiteral | 
| 63 | ; | 
| 64 |  | 
| 65 | componentblock  : COMPONENT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 66 | ; | 
| 67 |  | 
| 68 | zconstraintblock  : ZCONSTRAINT^ LCURLY! (assignment)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 69 | ; | 
| 70 |  | 
| 71 | moleculeblock : MOLECULE^ LCURLY! (moleculestatement)*  RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 72 | ; | 
| 73 |  | 
| 74 | moleculestatement : assignment | 
| 75 | | atomblock | 
| 76 | | bondblock | 
| 77 | | bendblock | 
| 78 | | torsionblock | 
| 79 | | inversionblock | 
| 80 | | rigidbodyblock | 
| 81 | | cutoffgroupblock | 
| 82 | | fragmentblock | 
| 83 | ; | 
| 84 |  | 
| 85 | atomblock : ATOM^ LBRACKET! intConst RBRACKET! LCURLY! (atomstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 86 | ; | 
| 87 |  | 
| 88 | atomstatement : assignment | 
| 89 | | POSITION^ LPAREN! doubleNumberTuple RPAREN! SEMICOLON! | 
| 90 | | ORIENTATION^  LPAREN! doubleNumberTuple RPAREN! SEMICOLON! | 
| 91 | ; | 
| 92 |  | 
| 93 |  | 
| 94 | bondblock : BOND^ (LBRACKET! intConst! RBRACKET!)?  LCURLY!(bondstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 95 | ; | 
| 96 |  | 
| 97 | bondstatement : assignment | 
| 98 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 99 | ; | 
| 100 |  | 
| 101 | bendblock : BEND^ (LBRACKET! intConst! RBRACKET!)? LCURLY!  (bendstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 102 | ; | 
| 103 |  | 
| 104 | bendstatement : assignment | 
| 105 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 106 | ; | 
| 107 |  | 
| 108 | torsionblock  : TORSION^ (LBRACKET! intConst! RBRACKET!)?  LCURLY!(torsionstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 109 | ; | 
| 110 |  | 
| 111 | torsionstatement  : assignment | 
| 112 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 113 | ; | 
| 114 |  | 
| 115 | inversionblock  : INVERSION^ (LBRACKET! intConst! RBRACKET!)?  LCURLY!(inversionstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 116 | ; | 
| 117 |  | 
| 118 | inversionstatement  : assignment | 
| 119 | | CENTER^ LPAREN! intConst RPAREN! SEMICOLON! | 
| 120 | ; | 
| 121 |  | 
| 122 | rigidbodyblock  : RIGIDBODY^  LBRACKET! intConst RBRACKET! LCURLY!(rigidbodystatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 123 | ; | 
| 124 |  | 
| 125 | rigidbodystatement  : assignment | 
| 126 | | MEMBERS^ LPAREN!  inttuple  RPAREN! SEMICOLON! | 
| 127 | ; | 
| 128 |  | 
| 129 | cutoffgroupblock  : CUTOFFGROUP^ (LBRACKET! intConst! RBRACKET!)? LCURLY! (cutoffgroupstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 130 | ; | 
| 131 |  | 
| 132 | cutoffgroupstatement  : assignment | 
| 133 | | MEMBERS^ LPAREN! inttuple RPAREN! SEMICOLON! | 
| 134 | ; | 
| 135 |  | 
| 136 | fragmentblock : FRAGMENT^ LBRACKET! intConst RBRACKET! LCURLY! (fragmentstatement)* RCURLY {#RCURLY->setType(ENDBLOCK);} | 
| 137 | ; | 
| 138 |  | 
| 139 | fragmentstatement : assignment | 
| 140 | ; | 
| 141 |  | 
| 142 |  | 
| 143 |  | 
| 144 | doubleNumberTuple   : doubleNumber (COMMA! doubleNumber)* | 
| 145 | ; | 
| 146 |  | 
| 147 | inttuple      : intConst (COMMA! intConst)* | 
| 148 | ; | 
| 149 |  | 
| 150 | protected | 
| 151 | intConst | 
| 152 | :  NUM_INT | NUM_LONG | 
| 153 | ; | 
| 154 |  | 
| 155 | protected | 
| 156 | doubleNumber  : | 
| 157 | (intConst | floatConst) | 
| 158 | ; | 
| 159 |  | 
| 160 | protected | 
| 161 | floatConst | 
| 162 | : | 
| 163 | NUM_FLOAT | NUM_DOUBLE | 
| 164 | ; | 
| 165 |  | 
| 166 |  | 
| 167 |  | 
| 168 | class MDLexer extends Lexer; | 
| 169 |  | 
| 170 | options | 
| 171 | { | 
| 172 | k = 3; | 
| 173 | exportVocab = MD; | 
| 174 | testLiterals = false; | 
| 175 | } | 
| 176 |  | 
| 177 | tokens { | 
| 178 | DOT; | 
| 179 | } | 
| 180 |  | 
| 181 | { | 
| 182 |  | 
| 183 |  | 
| 184 | int deferredLineCount; | 
| 185 | FilenameObserver* observer; | 
| 186 |  | 
| 187 | public: | 
| 188 | void setObserver(FilenameObserver* osv) {observer = osv;} | 
| 189 | void initDeferredLineCount() { deferredLineCount = 0;} | 
| 190 | void deferredNewline() { | 
| 191 | deferredLineCount++; | 
| 192 | } | 
| 193 |  | 
| 194 |  | 
| 195 | virtual void newline() { | 
| 196 | for (;deferredLineCount>0;deferredLineCount--) { | 
| 197 | CharScanner::newline(); | 
| 198 | } | 
| 199 | CharScanner::newline(); | 
| 200 | } | 
| 201 |  | 
| 202 | } | 
| 203 |  | 
| 204 |  | 
| 205 | // Operators: | 
| 206 |  | 
| 207 | ASSIGNEQUAL     : '=' ; | 
| 208 | COLON           : ':' ; | 
| 209 | COMMA           : ',' ; | 
| 210 | QUESTIONMARK    : '?' ; | 
| 211 | SEMICOLON       : ';' ; | 
| 212 |  | 
| 213 | LPAREN          : '(' ; | 
| 214 | RPAREN          : ')' ; | 
| 215 | LBRACKET        : '[' ; | 
| 216 | RBRACKET        : ']' ; | 
| 217 | LCURLY          : '{' ; | 
| 218 | RCURLY          : '}' ; | 
| 219 |  | 
| 220 | Whitespace | 
| 221 | : | 
| 222 | ( // whitespace ignored | 
| 223 | (' ' |'\t' | '\f') | 
| 224 | | // handle newlines | 
| 225 | ( '\r' '\n' // MS | 
| 226 | | '\r'    // Mac | 
| 227 | | '\n'    // Unix | 
| 228 | ) { newline(); } | 
| 229 | | // handle continuation lines | 
| 230 | ( '\\' '\r' '\n'  // MS | 
| 231 | | '\\' '\r'   // Mac | 
| 232 | | '\\' '\n'   // Unix | 
| 233 | ) {printf("CPP_parser.g continuation line detected\n"); | 
| 234 | deferredNewline();} | 
| 235 | ) | 
| 236 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} | 
| 237 | ; | 
| 238 |  | 
| 239 | Comment | 
| 240 | : | 
| 241 | "/*" | 
| 242 | ( {LA(2) != '/'}? '*' | 
| 243 | | EndOfLine {deferredNewline();} | 
| 244 | | ~('*'| '\r' | '\n') | 
| 245 | )* | 
| 246 | "*/" {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} | 
| 247 | ; | 
| 248 |  | 
| 249 | CPPComment | 
| 250 | : | 
| 251 | "//" (~('\n' | '\r'))* EndOfLine | 
| 252 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} | 
| 253 | ; | 
| 254 |  | 
| 255 | PREPROC_DIRECTIVE | 
| 256 | options{paraphrase = "a line directive";} | 
| 257 | : | 
| 258 | '#' LineDirective | 
| 259 | {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} | 
| 260 | ; | 
| 261 |  | 
| 262 | protected | 
| 263 | LineDirective | 
| 264 | : | 
| 265 | { | 
| 266 | deferredLineCount = 0; | 
| 267 | } | 
| 268 | ("line")?  // this would be for if the directive started "#line" | 
| 269 | (Space)+ | 
| 270 | n:Decimal { setLine(oopse::lexi_cast<int>(n->getText()) - 1); } | 
| 271 | (Space)+ | 
| 272 | (sl:StringLiteral) {std::string filename = sl->getText().substr(1,sl->getText().length()-2); observer->notify(filename);} | 
| 273 | ((Space)+ Decimal)* // To support cpp flags (GNU) | 
| 274 | EndOfLine | 
| 275 | ; | 
| 276 |  | 
| 277 | protected | 
| 278 | Space | 
| 279 | : | 
| 280 | (' '|'\t'|'\f') | 
| 281 | ; | 
| 282 |  | 
| 283 |  | 
| 284 | // Literals: | 
| 285 |  | 
| 286 | /* | 
| 287 | * Note that we do NOT handle tri-graphs nor multi-byte sequences. | 
| 288 | */ | 
| 289 |  | 
| 290 | /* | 
| 291 | * Note that we can't have empty character constants (even though we | 
| 292 | * can have empty strings :-). | 
| 293 | */ | 
| 294 | CharLiteral | 
| 295 | : | 
| 296 | '\'' (Escape | ~('\'')) '\'' | 
| 297 | ; | 
| 298 |  | 
| 299 | /* | 
| 300 | * Can't have raw imbedded newlines in string constants.  Strict reading of | 
| 301 | * the standard gives odd dichotomy between newlines & carriage returns. | 
| 302 | * Go figure. | 
| 303 | */ | 
| 304 | StringLiteral | 
| 305 | : | 
| 306 | '"' | 
| 307 | ( Escape | 
| 308 | | | 
| 309 | ( "\\\r\n"   // MS | 
| 310 | | "\\\r"     // MAC | 
| 311 | | "\\\n"     // Unix | 
| 312 | ) {deferredNewline();} | 
| 313 | | | 
| 314 | ~('"'|'\r'|'\n'|'\\') | 
| 315 | )* | 
| 316 | '"' | 
| 317 | ; | 
| 318 |  | 
| 319 | protected | 
| 320 | EndOfLine | 
| 321 | : | 
| 322 | ( options{generateAmbigWarnings = false;}: | 
| 323 | "\r\n"  // MS | 
| 324 | | '\r'    // Mac | 
| 325 | | '\n'    // Unix | 
| 326 | ) | 
| 327 | ; | 
| 328 |  | 
| 329 | /* | 
| 330 | * Handle the various escape sequences. | 
| 331 | * | 
| 332 | * Note carefully that these numeric escape *sequences* are *not* of the | 
| 333 | * same form as the C language numeric *constants*. | 
| 334 | * | 
| 335 | * There is no such thing as a binary numeric escape sequence. | 
| 336 | * | 
| 337 | * Octal escape sequences are either 1, 2, or 3 octal digits exactly. | 
| 338 | * | 
| 339 | * There is no such thing as a decimal escape sequence. | 
| 340 | * | 
| 341 | * Hexadecimal escape sequences are begun with a leading \x and continue | 
| 342 | * until a non-hexadecimal character is found. | 
| 343 | * | 
| 344 | * No real handling of tri-graph sequences, yet. | 
| 345 | */ | 
| 346 |  | 
| 347 | protected | 
| 348 | Escape | 
| 349 | : | 
| 350 | '\\' | 
| 351 | ( options{warnWhenFollowAmbig=false;}: | 
| 352 | 'a' | 
| 353 | | 'b' | 
| 354 | | 'f' | 
| 355 | | 'n' | 
| 356 | | 'r' | 
| 357 | | 't' | 
| 358 | | 'v' | 
| 359 | | '"' | 
| 360 | | '\'' | 
| 361 | | '\\' | 
| 362 | | '?' | 
| 363 | | ('0'..'3') (options{warnWhenFollowAmbig=false;}: Digit (options{warnWhenFollowAmbig=false;}: Digit)? )? | 
| 364 | | ('4'..'7') (options{warnWhenFollowAmbig=false;}: Digit)? | 
| 365 | | 'x' (options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F')+ | 
| 366 | ) | 
| 367 | ; | 
| 368 |  | 
| 369 |  | 
| 370 | protected | 
| 371 | Vocabulary | 
| 372 | : | 
| 373 | '\3'..'\377' | 
| 374 | ; | 
| 375 |  | 
| 376 |  | 
| 377 | ID | 
| 378 | options {testLiterals = true;} | 
| 379 | : | 
| 380 | ('a'..'z'|'A'..'Z'|'_') | 
| 381 | ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* | 
| 382 | ; | 
| 383 |  | 
| 384 |  | 
| 385 | protected | 
| 386 | Digit | 
| 387 | : | 
| 388 | '0'..'9' | 
| 389 | ; | 
| 390 |  | 
| 391 | protected | 
| 392 | Decimal | 
| 393 | : | 
| 394 | ('0'..'9')+ | 
| 395 | ; | 
| 396 |  | 
| 397 | // hexadecimal digit (again, note it's protected!) | 
| 398 | protected | 
| 399 | HEX_DIGIT | 
| 400 | :       ('0'..'9'|'A'..'F'|'a'..'f') | 
| 401 | ; | 
| 402 |  | 
| 403 |  | 
| 404 | // a numeric literal | 
| 405 | NUM_INT | 
| 406 | { | 
| 407 | bool isDecimal = false; | 
| 408 | ANTLR_USE_NAMESPACE(antlr)RefToken t = ANTLR_USE_NAMESPACE(antlr)nullToken; | 
| 409 | } | 
| 410 | : ('+' | '-')? | 
| 411 | ( | 
| 412 | '.' {_ttype = DOT;} | 
| 413 | (   ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? | 
| 414 | { | 
| 415 | if ( t && | 
| 416 | (t->getText().find('f') != ANTLR_USE_NAMESPACE(std)string::npos || | 
| 417 | t->getText().find('F') != ANTLR_USE_NAMESPACE(std)string::npos ) ) { | 
| 418 | _ttype = NUM_FLOAT; | 
| 419 | } | 
| 420 | else { | 
| 421 | _ttype = NUM_DOUBLE; // assume double | 
| 422 | } | 
| 423 | } | 
| 424 | )? | 
| 425 |  | 
| 426 | |       (       '0' {isDecimal = true;} // special case for just '0' | 
| 427 | (       ('x'|'X') | 
| 428 | (                                                                                       // hex | 
| 429 | // the 'e'|'E' and float suffix stuff look | 
| 430 | // like hex digits, hence the (...)+ doesn't | 
| 431 | // know when to stop: ambig.  ANTLR resolves | 
| 432 | // it correctly by matching immediately.  It | 
| 433 | // is therefor ok to hush warning. | 
| 434 | options { | 
| 435 | warnWhenFollowAmbig=false; | 
| 436 | } | 
| 437 | :       HEX_DIGIT | 
| 438 | )+ | 
| 439 | |       //float or double with leading zero | 
| 440 | (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+ | 
| 441 | |       ('0'..'7')+                                                                     // octal | 
| 442 | )? | 
| 443 | |       ('1'..'9') ('0'..'9')*  {isDecimal=true;}               // non-zero decimal | 
| 444 | ) | 
| 445 | (       ('l'|'L') { _ttype = NUM_LONG; } | 
| 446 |  | 
| 447 | // only check to see if it's a float if looks like decimal so far | 
| 448 | |       {isDecimal}? | 
| 449 | (   '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? | 
| 450 | |   EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? | 
| 451 | |   f4:FLOAT_SUFFIX {t=f4;} | 
| 452 | ) | 
| 453 | { | 
| 454 | if ( t && | 
| 455 | (t->getText().find('f') != ANTLR_USE_NAMESPACE(std)string::npos || | 
| 456 | t->getText().find('F') != ANTLR_USE_NAMESPACE(std)string::npos ) ) { | 
| 457 | _ttype = NUM_FLOAT; | 
| 458 | } | 
| 459 | else { | 
| 460 | _ttype = NUM_DOUBLE; // assume double | 
| 461 | } | 
| 462 | } | 
| 463 | )? | 
| 464 | ) | 
| 465 | ; | 
| 466 |  | 
| 467 | // a couple protected methods to assist in matching floating point numbers | 
| 468 | protected | 
| 469 | EXPONENT | 
| 470 | :       ('e'|'E'|'d'|'D') ('+'|'-')? ('0'..'9')+ | 
| 471 | ; | 
| 472 |  | 
| 473 | protected | 
| 474 | FLOAT_SUFFIX | 
| 475 | :       'f'|'F'|'d'|'D' | 
| 476 | ; |