1 module hunt.http.codec.http.model.MultipartParser; 2 3 import hunt.http.codec.http.model.BadMessageException; 4 5 import hunt.container.ByteBuffer; 6 import hunt.container.BufferUtils; 7 import hunt.string; 8 import hunt.lang.exception; 9 import hunt.logging; 10 import hunt.util.SearchPattern; 11 12 import std.algorithm; 13 import std.conv; 14 import std.format; 15 16 17 /* ------------------------------------------------------------ */ 18 19 /** 20 * A parser for MultiPart content type. 21 * 22 * @see <a href="https://tools.ietf.org/html/rfc2046#section-5.1">https://tools.ietf.org/html/rfc2046#section-5.1</a> 23 * @see <a href="https://tools.ietf.org/html/rfc2045">https://tools.ietf.org/html/rfc2045</a> 24 */ 25 class MultipartParser { 26 enum byte COLON = ':'; 27 enum byte TAB = 0x09; 28 enum byte LINE_FEED = 0x0A; 29 enum byte CARRIAGE_RETURN = 0x0D; 30 enum byte SPACE = 0x20; 31 enum byte[] CRLF = [CARRIAGE_RETURN, LINE_FEED]; 32 enum byte SEMI_COLON = ';'; 33 34 // States 35 enum FieldState { 36 FIELD, 37 IN_NAME, 38 AFTER_NAME, 39 VALUE, 40 IN_VALUE 41 } 42 43 // States 44 enum State { 45 PREAMBLE, 46 DELIMITER, 47 DELIMITER_PADDING, 48 DELIMITER_CLOSE, 49 BODY_PART, 50 FIRST_OCTETS, 51 OCTETS, 52 EPILOGUE, 53 END 54 } 55 56 private enum State[] __delimiterStates = [State.DELIMITER, State.DELIMITER_CLOSE, State.DELIMITER_PADDING]; 57 58 private MultipartParserHandler _handler; 59 private SearchPattern _delimiterSearch; 60 61 private string _fieldName; 62 private string _fieldValue; 63 64 private State _state = State.PREAMBLE; 65 private FieldState _fieldState = FieldState.FIELD; 66 private int _partialBoundary = 2; // No CRLF if no preamble 67 private bool _cr; 68 private ByteBuffer _patternBuffer; 69 70 private StringBuilder _string; 71 private size_t _length; 72 73 private int _totalHeaderLineLength = -1; 74 private int _maxHeaderLineLength = 998; 75 76 /* ------------------------------------------------------------------------------- */ 77 this(MultipartParserHandler handler, string boundary) { 78 _handler = handler; 79 _string = new StringBuilder(); 80 81 string delimiter = "\r\n--" ~ boundary; 82 //delimiter.getBytes(StandardCharsets.US_ASCII) 83 _patternBuffer = ByteBuffer.wrap(cast(byte[])delimiter.dup); 84 _delimiterSearch = SearchPattern.compile(_patternBuffer.array()); 85 } 86 87 void reset() { 88 _state = State.PREAMBLE; 89 _fieldState = FieldState.FIELD; 90 _partialBoundary = 2; // No CRLF if no preamble 91 } 92 93 /* ------------------------------------------------------------------------------- */ 94 MultipartParserHandler getHandler() { 95 return _handler; 96 } 97 98 /* ------------------------------------------------------------------------------- */ 99 State getState() { 100 return _state; 101 } 102 103 /* ------------------------------------------------------------------------------- */ 104 bool isState(State state) { 105 return _state == state; 106 } 107 108 /* ------------------------------------------------------------------------------- */ 109 enum CharState { 110 ILLEGAL, CR, LF, LEGAL 111 } 112 113 private __gshared CharState[] __charState; 114 115 shared static this() { 116 // token = 1*tchar 117 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 118 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 119 // / DIGIT / ALPHA 120 // ; any VCHAR, except delimiters 121 // quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE 122 // qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text 123 // obs-text = %x80-FF 124 // comment = "(" *( ctext / quoted-pair / comment ) ")" 125 // ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text 126 // quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) 127 128 __charState = new CharState[256]; 129 __charState[0..$] = CharState.ILLEGAL; 130 131 __charState[LINE_FEED] = CharState.LF; 132 __charState[CARRIAGE_RETURN] = CharState.CR; 133 __charState[TAB] = CharState.LEGAL; 134 __charState[SPACE] = CharState.LEGAL; 135 136 __charState['!'] = CharState.LEGAL; 137 __charState['#'] = CharState.LEGAL; 138 __charState['$'] = CharState.LEGAL; 139 __charState['%'] = CharState.LEGAL; 140 __charState['&'] = CharState.LEGAL; 141 __charState['\''] = CharState.LEGAL; 142 __charState['*'] = CharState.LEGAL; 143 __charState['+'] = CharState.LEGAL; 144 __charState['-'] = CharState.LEGAL; 145 __charState['.'] = CharState.LEGAL; 146 __charState['^'] = CharState.LEGAL; 147 __charState['_'] = CharState.LEGAL; 148 __charState['`'] = CharState.LEGAL; 149 __charState['|'] = CharState.LEGAL; 150 __charState['~'] = CharState.LEGAL; 151 152 __charState['"'] = CharState.LEGAL; 153 154 __charState['\\'] = CharState.LEGAL; 155 __charState['('] = CharState.LEGAL; 156 __charState[')'] = CharState.LEGAL; 157 __charState[0x21 .. 0x27 + 1] = CharState.LEGAL; 158 __charState[0x2A .. 0x5B + 1] = CharState.LEGAL; 159 __charState[0x5D .. 0x7E + 1] = CharState.LEGAL; 160 __charState[0x80 .. 0xFF + 1] = CharState.LEGAL; 161 162 // Arrays.fill(__charState, 0x21, 0x27 + 1, CharState.LEGAL); 163 // Arrays.fill(__charState, 0x2A, 0x5B + 1, CharState.LEGAL); 164 // Arrays.fill(__charState, 0x5D, 0x7E + 1, CharState.LEGAL); 165 // Arrays.fill(__charState, 0x80, 0xFF + 1, CharState.LEGAL); 166 167 } 168 169 /* ------------------------------------------------------------------------------- */ 170 private bool hasNextByte(ByteBuffer buffer) { 171 return BufferUtils.hasContent(buffer); 172 } 173 174 /* ------------------------------------------------------------------------------- */ 175 private byte getNextByte(ByteBuffer buffer) { 176 177 byte ch = buffer.get(); 178 179 CharState s = __charState[0xff & ch]; 180 switch (s) { 181 case CharState.LF: 182 _cr = false; 183 return ch; 184 185 case CharState.CR: 186 if (_cr) 187 throw new BadMessageException("Bad EOL"); 188 189 _cr = true; 190 if (buffer.hasRemaining()) 191 return getNextByte(buffer); 192 193 // Can return 0 here to indicate the need for more characters, 194 // because a real 0 in the buffer would cause a BadMessage below 195 return 0; 196 197 case CharState.LEGAL: 198 if (_cr) 199 throw new BadMessageException("Bad EOL"); 200 201 return ch; 202 203 case CharState.ILLEGAL: 204 default: 205 throw new IllegalCharacterException(_state, ch, buffer); 206 } 207 } 208 209 /* ------------------------------------------------------------------------------- */ 210 private void setString(string s) { 211 _string.reset(); 212 _string.append(s); 213 _length = s.length; 214 } 215 216 /* ------------------------------------------------------------------------------- */ 217 /* 218 * Mime Field strings are treated as UTF-8 as per https://tools.ietf.org/html/rfc7578#section-5.1 219 */ 220 private string takeString() { 221 string s = _string.toString(); 222 // trim trailing whitespace. 223 if (s.length > _length) 224 s = s.substring(0, _length); 225 _string.reset(); 226 _length = -1; 227 return s; 228 } 229 230 /* ------------------------------------------------------------------------------- */ 231 232 /** 233 * Parse until next Event. 234 * 235 * @param buffer the buffer to parse 236 * @param last whether this buffer contains last bit of content 237 * @return True if an {@link hunt.http.codec.http.decode.HttpParser.RequestHandler} method was called and it returned true; 238 */ 239 bool parse(ByteBuffer buffer, bool last) { 240 bool handle = false; 241 while (handle == false && BufferUtils.hasContent(buffer)) { 242 switch (_state) { 243 case State.PREAMBLE: 244 parsePreamble(buffer); 245 continue; 246 247 case State.DELIMITER: 248 case State.DELIMITER_PADDING: 249 case State.DELIMITER_CLOSE: 250 parseDelimiter(buffer); 251 continue; 252 253 case State.BODY_PART: 254 handle = parseMimePartHeaders(buffer); 255 break; 256 257 case State.FIRST_OCTETS: 258 case State.OCTETS: 259 handle = parseOctetContent(buffer); 260 break; 261 262 case State.EPILOGUE: 263 BufferUtils.clear(buffer); 264 break; 265 266 case State.END: 267 handle = true; 268 break; 269 270 default: 271 throw new IllegalStateException(""); 272 273 } 274 } 275 276 if (last && BufferUtils.isEmpty(buffer)) { 277 if (_state == State.EPILOGUE) { 278 _state = State.END; 279 280 version(HUNT_DEBUG) 281 tracef("messageComplete %s", this); 282 283 return _handler.messageComplete(); 284 } else { 285 version(HUNT_DEBUG) 286 tracef("earlyEOF %s", this); 287 288 _handler.earlyEOF(); 289 return true; 290 } 291 } 292 293 return handle; 294 } 295 296 /* ------------------------------------------------------------------------------- */ 297 private void parsePreamble(ByteBuffer buffer) { 298 if (_partialBoundary > 0) { 299 int partial = _delimiterSearch.startsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), 300 buffer.remaining(), _partialBoundary); 301 if (partial > 0) { 302 if (partial == _delimiterSearch.getLength()) { 303 buffer.position(buffer.position() + partial - _partialBoundary); 304 _partialBoundary = 0; 305 setState(State.DELIMITER); 306 return; 307 } 308 309 _partialBoundary = partial; 310 BufferUtils.clear(buffer); 311 return; 312 } 313 314 _partialBoundary = 0; 315 } 316 317 int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 318 if (delimiter >= 0) { 319 buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength()); 320 setState(State.DELIMITER); 321 return; 322 } 323 324 _partialBoundary = _delimiterSearch.endsWith(buffer.array(), 325 buffer.arrayOffset() + buffer.position(), buffer.remaining()); 326 BufferUtils.clear(buffer); 327 328 return; 329 } 330 331 /* ------------------------------------------------------------------------------- */ 332 private void parseDelimiter(ByteBuffer buffer) { 333 while (__delimiterStates.canFind(_state) && hasNextByte(buffer)) { 334 byte b = getNextByte(buffer); 335 if (b == 0) 336 return; 337 338 if (b == '\n') { 339 setState(State.BODY_PART); 340 341 version(HUNT_DEBUG) 342 tracef("startPart %s", this); 343 344 _handler.startPart(); 345 return; 346 } 347 348 switch (_state) { 349 case State.DELIMITER: 350 if (b == '-') 351 setState(State.DELIMITER_CLOSE); 352 else 353 setState(State.DELIMITER_PADDING); 354 continue; 355 356 case State.DELIMITER_CLOSE: 357 if (b == '-') { 358 setState(State.EPILOGUE); 359 return; 360 } 361 setState(State.DELIMITER_PADDING); 362 continue; 363 364 case State.DELIMITER_PADDING: 365 default: 366 continue; 367 } 368 } 369 } 370 371 /* ------------------------------------------------------------------------------- */ 372 /* 373 * Parse the message headers and return true if the handler has signaled for a return 374 */ 375 protected bool parseMimePartHeaders(ByteBuffer buffer) { 376 // Process headers 377 while (_state == State.BODY_PART && hasNextByte(buffer)) { 378 // process each character 379 byte b = getNextByte(buffer); 380 if (b == 0) 381 break; 382 383 if (b != LINE_FEED) 384 _totalHeaderLineLength++; 385 386 if (_totalHeaderLineLength > _maxHeaderLineLength) 387 throw new IllegalStateException("Header Line Exceeded Max Length"); 388 389 switch (_fieldState) { 390 case FieldState.FIELD: 391 switch (b) { 392 case SPACE: 393 case TAB: { 394 // Folded field value! 395 396 if (_fieldName == null) 397 throw new IllegalStateException("First field folded"); 398 399 if (_fieldValue == null) { 400 _string.reset(); 401 _length = 0; 402 } else { 403 setString(_fieldValue); 404 _string.append(' '); 405 _length++; 406 _fieldValue = null; 407 } 408 setState(FieldState.VALUE); 409 break; 410 } 411 412 case LINE_FEED: { 413 handleField(); 414 setState(State.FIRST_OCTETS); 415 _partialBoundary = 2; // CRLF is option for empty parts 416 417 version(HUNT_DEBUG) 418 tracef("headerComplete %s", this); 419 420 if (_handler.headerComplete()) 421 return true; 422 break; 423 } 424 425 default: { 426 // process previous header 427 handleField(); 428 429 // New header 430 setState(FieldState.IN_NAME); 431 _string.reset(); 432 _string.append(b); 433 _length = 1; 434 } 435 } 436 break; 437 438 case FieldState.IN_NAME: 439 switch (b) { 440 case COLON: 441 _fieldName = takeString(); 442 _length = -1; 443 setState(FieldState.VALUE); 444 break; 445 446 case SPACE: 447 // Ignore trailing whitespaces 448 setState(FieldState.AFTER_NAME); 449 break; 450 451 case LINE_FEED: { 452 version(HUNT_DEBUG) 453 tracef("Line Feed in Name %s", this); 454 455 handleField(); 456 setState(FieldState.FIELD); 457 break; 458 } 459 460 default: 461 _string.append(b); 462 _length = _string.length; 463 break; 464 } 465 break; 466 467 case FieldState.AFTER_NAME: 468 switch (b) { 469 case COLON: 470 _fieldName = takeString(); 471 _length = -1; 472 setState(FieldState.VALUE); 473 break; 474 475 case LINE_FEED: 476 _fieldName = takeString(); 477 _string.reset(); 478 _fieldValue = ""; 479 _length = -1; 480 break; 481 482 case SPACE: 483 break; 484 485 default: 486 throw new IllegalCharacterException(_state, b, buffer); 487 } 488 break; 489 490 case FieldState.VALUE: 491 switch (b) { 492 case LINE_FEED: 493 _string.reset(); 494 _fieldValue = ""; 495 _length = -1; 496 497 setState(FieldState.FIELD); 498 break; 499 500 case SPACE: 501 case TAB: 502 break; 503 504 default: 505 _string.append(b); 506 _length = _string.length; 507 setState(FieldState.IN_VALUE); 508 break; 509 } 510 break; 511 512 case FieldState.IN_VALUE: 513 switch (b) { 514 case SPACE: 515 _string.append(b); 516 break; 517 518 case LINE_FEED: 519 if (_length > 0) { 520 _fieldValue = takeString(); 521 _length = -1; 522 _totalHeaderLineLength = -1; 523 } 524 setState(FieldState.FIELD); 525 break; 526 527 default: 528 _string.append(b); 529 if (b > SPACE || b < 0) 530 _length = _string.length; 531 break; 532 } 533 break; 534 535 default: 536 throw new IllegalStateException(_state.to!string()); 537 538 } 539 } 540 return false; 541 } 542 543 /* ------------------------------------------------------------------------------- */ 544 private void handleField() { 545 version(HUNT_DEBUG) 546 tracef("parsedField: _fieldName=%s _fieldValue=%s %s", _fieldName, _fieldValue, this); 547 548 if (_fieldName != null && _fieldValue != null) 549 _handler.parsedField(_fieldName, _fieldValue); 550 _fieldName = _fieldValue = null; 551 } 552 553 /* ------------------------------------------------------------------------------- */ 554 555 protected bool parseOctetContent(ByteBuffer buffer) { 556 557 // Starts With 558 if (_partialBoundary > 0) { 559 int partial = _delimiterSearch.startsWith(buffer.array(), 560 buffer.arrayOffset() + buffer.position(), buffer.remaining(), _partialBoundary); 561 if (partial > 0) { 562 if (partial == _delimiterSearch.getLength()) { 563 buffer.position(buffer.position() + _delimiterSearch.getLength() - _partialBoundary); 564 setState(State.DELIMITER); 565 _partialBoundary = 0; 566 567 version(HUNT_DEBUG) 568 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(BufferUtils.EMPTY_BUFFER), true, this); 569 570 return _handler.content(BufferUtils.EMPTY_BUFFER, true); 571 } 572 573 _partialBoundary = partial; 574 BufferUtils.clear(buffer); 575 return false; 576 } else { 577 // output up to _partialBoundary of the search pattern 578 ByteBuffer content = _patternBuffer.slice(); 579 if (_state == State.FIRST_OCTETS) { 580 setState(State.OCTETS); 581 content.position(2); 582 } 583 content.limit(_partialBoundary); 584 _partialBoundary = 0; 585 586 version(HUNT_DEBUG) 587 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 588 589 if (_handler.content(content, false)) 590 return true; 591 } 592 } 593 594 // Contains 595 int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 596 if (delimiter >= 0) { 597 ByteBuffer content = buffer.slice(); 598 content.limit(delimiter - buffer.arrayOffset() - buffer.position()); 599 600 buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength()); 601 setState(State.DELIMITER); 602 603 version(HUNT_DEBUG) 604 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), true, this); 605 606 return _handler.content(content, true); 607 } 608 609 // Ends With 610 _partialBoundary = _delimiterSearch.endsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 611 if (_partialBoundary > 0) { 612 ByteBuffer content = buffer.slice(); 613 content.limit(content.limit() - _partialBoundary); 614 615 version(HUNT_DEBUG) 616 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 617 618 BufferUtils.clear(buffer); 619 return _handler.content(content, false); 620 } 621 622 // There is normal content with no delimiter 623 ByteBuffer content = buffer.slice(); 624 625 version(HUNT_DEBUG) 626 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 627 628 BufferUtils.clear(buffer); 629 return _handler.content(content, false); 630 } 631 632 /* ------------------------------------------------------------------------------- */ 633 private void setState(State state) { 634 version(HUNT_DEBUG) 635 tracef("%s --> %s", _state, state); 636 _state = state; 637 } 638 639 /* ------------------------------------------------------------------------------- */ 640 private void setState(FieldState state) { 641 // version(HUNT_DEBUG) 642 // tracef("%s:%s --> %s", _state, _fieldState, state); 643 _fieldState = state; 644 } 645 646 /* ------------------------------------------------------------------------------- */ 647 override 648 string toString() { 649 return format("%s{s=%s}", typeof(this).stringof, _state); 650 } 651 652 } 653 654 655 656 /* ------------------------------------------------------------------------------- */ 657 658 private class IllegalCharacterException : IllegalArgumentException { 659 private this(MultipartParser.State state, byte ch, ByteBuffer buffer) { 660 super(format("Illegal character 0x%X", ch)); 661 // Bug #460642 - don't reveal buffers to end user 662 warningf(format("Illegal character 0x%X in state=%s for buffer %s", 663 ch, state, BufferUtils.toDetailString(buffer))); 664 } 665 } 666 667 /* ------------------------------------------------------------ */ 668 /* ------------------------------------------------------------ */ 669 /* ------------------------------------------------------------ */ 670 /* 671 * Event Handler interface These methods return true if the caller should process the events so far received (eg return from parseNext and call 672 * HttpChannel.handle). If multiple callbacks are called in sequence (eg headerComplete then messageComplete) from the same point in the parsing then it is 673 * sufficient for the caller to process the events only once. 674 */ 675 class MultipartParserHandler { 676 677 this() { 678 679 } 680 681 void startPart() { 682 } 683 684 void parsedField(string name, string value) { 685 } 686 687 bool headerComplete() { 688 return false; 689 } 690 691 bool content(ByteBuffer item, bool last) { 692 return false; 693 } 694 695 bool messageComplete() { 696 return false; 697 } 698 699 void earlyEOF() { 700 } 701 }