1 module hunt.http.codec.http.decode.MultipartParser; 2 3 import hunt.http.codec.http.model.BadMessageException; 4 5 import hunt.io.ByteBuffer; 6 import hunt.io.BufferUtils; 7 import hunt.Exceptions; 8 import hunt.logging; 9 import hunt.text.Common; 10 import hunt.util.StringBuilder; 11 import hunt.text.SearchPattern; 12 13 import std.algorithm; 14 import std.conv; 15 import std.format; 16 17 18 /* ------------------------------------------------------------ */ 19 20 /** 21 * A parser for MultiPart content type. 22 * 23 * @see <a href="https://tools.ietf.org/html/rfc2046#section-5.1">https://tools.ietf.org/html/rfc2046#section-5.1</a> 24 * @see <a href="https://tools.ietf.org/html/rfc2045">https://tools.ietf.org/html/rfc2045</a> 25 */ 26 class MultipartParser { 27 enum byte COLON = ':'; 28 enum byte TAB = 0x09; 29 enum byte LINE_FEED = 0x0A; 30 enum byte CARRIAGE_RETURN = 0x0D; 31 enum byte SPACE = 0x20; 32 enum byte[] CRLF = [CARRIAGE_RETURN, LINE_FEED]; 33 enum byte SEMI_COLON = ';'; 34 35 // States 36 enum FieldState { 37 FIELD, 38 IN_NAME, 39 AFTER_NAME, 40 VALUE, 41 IN_VALUE 42 } 43 44 // States 45 enum State { 46 PREAMBLE, 47 DELIMITER, 48 DELIMITER_PADDING, 49 DELIMITER_CLOSE, 50 BODY_PART, 51 FIRST_OCTETS, 52 OCTETS, 53 EPILOGUE, 54 END 55 } 56 57 private enum State[] __delimiterStates = [State.DELIMITER, State.DELIMITER_CLOSE, State.DELIMITER_PADDING]; 58 59 private MultipartParserHandler _handler; 60 private SearchPattern _delimiterSearch; 61 62 private string _fieldName; 63 private string _fieldValue; 64 65 private State _state = State.PREAMBLE; 66 private FieldState _fieldState = FieldState.FIELD; 67 private int _partialBoundary = 2; // No CRLF if no preamble 68 private bool _cr; 69 private ByteBuffer _patternBuffer; 70 71 private StringBuilder _string; 72 private size_t _length; 73 74 private int _totalHeaderLineLength = -1; 75 private int _maxHeaderLineLength = 998; 76 77 /* ------------------------------------------------------------------------------- */ 78 this(MultipartParserHandler handler, string boundary) { 79 _handler = handler; 80 _string = new StringBuilder(); 81 82 string delimiter = "\r\n--" ~ boundary; 83 //delimiter.getBytes(StandardCharsets.US_ASCII) 84 _patternBuffer = BufferUtils.toBuffer(cast(byte[])delimiter.dup); 85 _delimiterSearch = SearchPattern.compile(_patternBuffer.array()); 86 } 87 88 void reset() { 89 _state = State.PREAMBLE; 90 _fieldState = FieldState.FIELD; 91 _partialBoundary = 2; // No CRLF if no preamble 92 } 93 94 /* ------------------------------------------------------------------------------- */ 95 MultipartParserHandler getHandler() { 96 return _handler; 97 } 98 99 /* ------------------------------------------------------------------------------- */ 100 State getState() { 101 return _state; 102 } 103 104 /* ------------------------------------------------------------------------------- */ 105 bool isState(State state) { 106 return _state == state; 107 } 108 109 /* ------------------------------------------------------------------------------- */ 110 enum CharState { 111 ILLEGAL, CR, LF, LEGAL 112 } 113 114 private __gshared CharState[] __charState; 115 116 shared static this() { 117 // token = 1*tchar 118 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 119 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 120 // / DIGIT / ALPHA 121 // ; any VCHAR, except delimiters 122 // quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE 123 // qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text 124 // obs-text = %x80-FF 125 // comment = "(" *( ctext / quoted-pair / comment ) ")" 126 // ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text 127 // quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) 128 129 __charState = new CharState[256]; 130 __charState[0..$] = CharState.ILLEGAL; 131 132 __charState[LINE_FEED] = CharState.LF; 133 __charState[CARRIAGE_RETURN] = CharState.CR; 134 __charState[TAB] = CharState.LEGAL; 135 __charState[SPACE] = CharState.LEGAL; 136 137 __charState['!'] = CharState.LEGAL; 138 __charState['#'] = CharState.LEGAL; 139 __charState['$'] = CharState.LEGAL; 140 __charState['%'] = CharState.LEGAL; 141 __charState['&'] = CharState.LEGAL; 142 __charState['\''] = CharState.LEGAL; 143 __charState['*'] = CharState.LEGAL; 144 __charState['+'] = CharState.LEGAL; 145 __charState['-'] = CharState.LEGAL; 146 __charState['.'] = CharState.LEGAL; 147 __charState['^'] = CharState.LEGAL; 148 __charState['_'] = CharState.LEGAL; 149 __charState['`'] = CharState.LEGAL; 150 __charState['|'] = CharState.LEGAL; 151 __charState['~'] = CharState.LEGAL; 152 153 __charState['"'] = CharState.LEGAL; 154 155 __charState['\\'] = CharState.LEGAL; 156 __charState['('] = CharState.LEGAL; 157 __charState[')'] = CharState.LEGAL; 158 __charState[0x21 .. 0x27 + 1] = CharState.LEGAL; 159 __charState[0x2A .. 0x5B + 1] = CharState.LEGAL; 160 __charState[0x5D .. 0x7E + 1] = CharState.LEGAL; 161 __charState[0x80 .. 0xFF + 1] = CharState.LEGAL; 162 163 // Arrays.fill(__charState, 0x21, 0x27 + 1, CharState.LEGAL); 164 // Arrays.fill(__charState, 0x2A, 0x5B + 1, CharState.LEGAL); 165 // Arrays.fill(__charState, 0x5D, 0x7E + 1, CharState.LEGAL); 166 // Arrays.fill(__charState, 0x80, 0xFF + 1, CharState.LEGAL); 167 168 } 169 170 /* ------------------------------------------------------------------------------- */ 171 private bool hasNextByte(ByteBuffer buffer) { 172 return BufferUtils.hasContent(buffer); 173 } 174 175 /* ------------------------------------------------------------------------------- */ 176 private byte getNextByte(ByteBuffer buffer) { 177 178 byte ch = buffer.get(); 179 180 CharState s = __charState[0xff & ch]; 181 switch (s) { 182 case CharState.LF: 183 _cr = false; 184 return ch; 185 186 case CharState.CR: 187 if (_cr) 188 throw new BadMessageException("Bad EOL"); 189 190 _cr = true; 191 if (buffer.hasRemaining()) 192 return getNextByte(buffer); 193 194 // Can return 0 here to indicate the need for more characters, 195 // because a real 0 in the buffer would cause a BadMessage below 196 return 0; 197 198 case CharState.LEGAL: 199 if (_cr) 200 throw new BadMessageException("Bad EOL"); 201 202 return ch; 203 204 case CharState.ILLEGAL: 205 default: 206 throw new IllegalCharacterException(_state, ch, buffer); 207 } 208 } 209 210 /* ------------------------------------------------------------------------------- */ 211 private void setString(string s) { 212 _string.reset(); 213 _string.append(s); 214 _length = s.length; 215 } 216 217 /* ------------------------------------------------------------------------------- */ 218 /* 219 * Mime Field strings are treated as UTF-8 as per https://tools.ietf.org/html/rfc7578#section-5.1 220 */ 221 private string takeString() { 222 string s = _string.toString(); 223 // trim trailing whitespace. 224 if (s.length > _length) 225 s = s.substring(0, _length); 226 _string.reset(); 227 _length = -1; 228 return s; 229 } 230 231 /* ------------------------------------------------------------------------------- */ 232 233 /** 234 * Parse until next Event. 235 * 236 * @param buffer the buffer to parse 237 * @param last whether this buffer contains last bit of content 238 * @return True if an {@link hunt.http.codec.http.decode.HttpParser.RequestHandler} method was called and it returned true; 239 */ 240 bool parse(ByteBuffer buffer, bool last) { 241 bool handle = false; 242 while (handle == false && BufferUtils.hasContent(buffer)) { 243 switch (_state) { 244 case State.PREAMBLE: 245 parsePreamble(buffer); 246 continue; 247 248 case State.DELIMITER: 249 case State.DELIMITER_PADDING: 250 case State.DELIMITER_CLOSE: 251 parseDelimiter(buffer); 252 continue; 253 254 case State.BODY_PART: 255 handle = parseMimePartHeaders(buffer); 256 break; 257 258 case State.FIRST_OCTETS: 259 case State.OCTETS: 260 handle = parseOctetContent(buffer); 261 break; 262 263 case State.EPILOGUE: 264 BufferUtils.clear(buffer); 265 break; 266 267 case State.END: 268 handle = true; 269 break; 270 271 default: 272 throw new IllegalStateException(""); 273 274 } 275 } 276 277 if (last && BufferUtils.isEmpty(buffer)) { 278 if (_state == State.EPILOGUE) { 279 _state = State.END; 280 281 version(HUNT_HTTP_DEBUG) 282 tracef("messageComplete %s", this); 283 284 return _handler.messageComplete(); 285 } else { 286 version(HUNT_HTTP_DEBUG) 287 tracef("earlyEOF %s", this); 288 289 _handler.earlyEOF(); 290 return true; 291 } 292 } 293 294 return handle; 295 } 296 297 /* ------------------------------------------------------------------------------- */ 298 private void parsePreamble(ByteBuffer buffer) { 299 if (_partialBoundary > 0) { 300 int partial = _delimiterSearch.startsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), 301 buffer.remaining(), _partialBoundary); 302 if (partial > 0) { 303 if (partial == _delimiterSearch.getLength()) { 304 buffer.position(buffer.position() + partial - _partialBoundary); 305 _partialBoundary = 0; 306 setState(State.DELIMITER); 307 return; 308 } 309 310 _partialBoundary = partial; 311 BufferUtils.clear(buffer); 312 return; 313 } 314 315 _partialBoundary = 0; 316 } 317 318 int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 319 if (delimiter >= 0) { 320 buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength()); 321 setState(State.DELIMITER); 322 return; 323 } 324 325 _partialBoundary = _delimiterSearch.endsWith(buffer.array(), 326 buffer.arrayOffset() + buffer.position(), buffer.remaining()); 327 BufferUtils.clear(buffer); 328 329 return; 330 } 331 332 /* ------------------------------------------------------------------------------- */ 333 private void parseDelimiter(ByteBuffer buffer) { 334 while (__delimiterStates.canFind(_state) && hasNextByte(buffer)) { 335 byte b = getNextByte(buffer); 336 if (b == 0) 337 return; 338 339 if (b == '\n') { 340 setState(State.BODY_PART); 341 342 version(HUNT_HTTP_DEBUG) 343 tracef("startPart %s", this); 344 345 _handler.startPart(); 346 return; 347 } 348 349 switch (_state) { 350 case State.DELIMITER: 351 if (b == '-') 352 setState(State.DELIMITER_CLOSE); 353 else 354 setState(State.DELIMITER_PADDING); 355 continue; 356 357 case State.DELIMITER_CLOSE: 358 if (b == '-') { 359 setState(State.EPILOGUE); 360 return; 361 } 362 setState(State.DELIMITER_PADDING); 363 continue; 364 365 case State.DELIMITER_PADDING: 366 default: 367 continue; 368 } 369 } 370 } 371 372 /* ------------------------------------------------------------------------------- */ 373 /* 374 * Parse the message headers and return true if the handler has signaled for a return 375 */ 376 protected bool parseMimePartHeaders(ByteBuffer buffer) { 377 // Process headers 378 while (_state == State.BODY_PART && hasNextByte(buffer)) { 379 // process each character 380 byte b = getNextByte(buffer); 381 if (b == 0) 382 break; 383 384 if (b != LINE_FEED) 385 _totalHeaderLineLength++; 386 387 if (_totalHeaderLineLength > _maxHeaderLineLength) 388 throw new IllegalStateException("Header Line Exceeded Max Length"); 389 390 switch (_fieldState) { 391 case FieldState.FIELD: 392 switch (b) { 393 case SPACE: 394 case TAB: { 395 // Folded field value! 396 397 if (_fieldName == null) 398 throw new IllegalStateException("First field folded"); 399 400 if (_fieldValue == null) { 401 _string.reset(); 402 _length = 0; 403 } else { 404 setString(_fieldValue); 405 _string.append(' '); 406 _length++; 407 _fieldValue = null; 408 } 409 setState(FieldState.VALUE); 410 break; 411 } 412 413 case LINE_FEED: { 414 handleField(); 415 setState(State.FIRST_OCTETS); 416 _partialBoundary = 2; // CRLF is option for empty parts 417 418 version(HUNT_HTTP_DEBUG) 419 tracef("headerComplete %s", this); 420 421 if (_handler.headerComplete()) 422 return true; 423 break; 424 } 425 426 default: { 427 // process previous header 428 handleField(); 429 430 // New header 431 setState(FieldState.IN_NAME); 432 _string.reset(); 433 _string.append(b); 434 _length = 1; 435 } 436 } 437 break; 438 439 case FieldState.IN_NAME: 440 switch (b) { 441 case COLON: 442 _fieldName = takeString(); 443 _length = -1; 444 setState(FieldState.VALUE); 445 break; 446 447 case SPACE: 448 // Ignore trailing whitespaces 449 setState(FieldState.AFTER_NAME); 450 break; 451 452 case LINE_FEED: { 453 version(HUNT_HTTP_DEBUG) 454 tracef("Line Feed in Name %s", this); 455 456 handleField(); 457 setState(FieldState.FIELD); 458 break; 459 } 460 461 default: 462 _string.append(b); 463 _length = _string.length; 464 break; 465 } 466 break; 467 468 case FieldState.AFTER_NAME: 469 switch (b) { 470 case COLON: 471 _fieldName = takeString(); 472 _length = -1; 473 setState(FieldState.VALUE); 474 break; 475 476 case LINE_FEED: 477 _fieldName = takeString(); 478 _string.reset(); 479 _fieldValue = ""; 480 _length = -1; 481 break; 482 483 case SPACE: 484 break; 485 486 default: 487 throw new IllegalCharacterException(_state, b, buffer); 488 } 489 break; 490 491 case FieldState.VALUE: 492 switch (b) { 493 case LINE_FEED: 494 _string.reset(); 495 _fieldValue = ""; 496 _length = -1; 497 498 setState(FieldState.FIELD); 499 break; 500 501 case SPACE: 502 case TAB: 503 break; 504 505 default: 506 _string.append(b); 507 _length = _string.length; 508 setState(FieldState.IN_VALUE); 509 break; 510 } 511 break; 512 513 case FieldState.IN_VALUE: 514 switch (b) { 515 case SPACE: 516 _string.append(b); 517 break; 518 519 case LINE_FEED: 520 if (_length > 0) { 521 _fieldValue = takeString(); 522 _length = -1; 523 _totalHeaderLineLength = -1; 524 } 525 setState(FieldState.FIELD); 526 break; 527 528 default: 529 _string.append(b); 530 if (b > SPACE || b < 0) 531 _length = _string.length; 532 break; 533 } 534 break; 535 536 default: 537 throw new IllegalStateException(_state.to!string()); 538 539 } 540 } 541 return false; 542 } 543 544 /* ------------------------------------------------------------------------------- */ 545 private void handleField() { 546 version(HUNT_HTTP_DEBUG) 547 tracef("parsedField: fieldName=%s fieldValue=%s %s", _fieldName, _fieldValue, this); 548 549 if (_fieldName != null && _fieldValue != null) 550 _handler.parsedField(_fieldName, _fieldValue); 551 _fieldName = _fieldValue = null; 552 } 553 554 /* ------------------------------------------------------------------------------- */ 555 556 protected bool parseOctetContent(ByteBuffer buffer) { 557 558 // Starts With 559 if (_partialBoundary > 0) { 560 int partial = _delimiterSearch.startsWith(buffer.array(), 561 buffer.arrayOffset() + buffer.position(), buffer.remaining(), _partialBoundary); 562 if (partial > 0) { 563 if (partial == _delimiterSearch.getLength()) { 564 buffer.position(buffer.position() + _delimiterSearch.getLength() - _partialBoundary); 565 setState(State.DELIMITER); 566 _partialBoundary = 0; 567 568 version(HUNT_HTTP_DEBUG) 569 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(BufferUtils.EMPTY_BUFFER), true, this); 570 571 return _handler.content(BufferUtils.EMPTY_BUFFER, true); 572 } 573 574 _partialBoundary = partial; 575 BufferUtils.clear(buffer); 576 return false; 577 } else { 578 // output up to _partialBoundary of the search pattern 579 ByteBuffer content = _patternBuffer.slice(); 580 if (_state == State.FIRST_OCTETS) { 581 setState(State.OCTETS); 582 content.position(2); 583 } 584 content.limit(_partialBoundary); 585 _partialBoundary = 0; 586 587 version(HUNT_HTTP_DEBUG) 588 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 589 590 if (_handler.content(content, false)) 591 return true; 592 } 593 } 594 595 // Contains 596 int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 597 if (delimiter >= 0) { 598 ByteBuffer content = buffer.slice(); 599 content.limit(delimiter - buffer.arrayOffset() - buffer.position()); 600 601 buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength()); 602 setState(State.DELIMITER); 603 604 version(HUNT_HTTP_DEBUG) 605 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), true, this); 606 607 return _handler.content(content, true); 608 } 609 610 // Ends With 611 _partialBoundary = _delimiterSearch.endsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); 612 if (_partialBoundary > 0) { 613 ByteBuffer content = buffer.slice(); 614 content.limit(content.limit() - _partialBoundary); 615 616 version(HUNT_HTTP_DEBUG) 617 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 618 619 BufferUtils.clear(buffer); 620 return _handler.content(content, false); 621 } 622 623 // There is normal content with no delimiter 624 ByteBuffer content = buffer.slice(); 625 626 // version(HUNT_HTTP_DEBUG) { 627 // tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this); 628 // } 629 630 BufferUtils.clear(buffer); 631 return _handler.content(content, false); 632 } 633 634 /* ------------------------------------------------------------------------------- */ 635 private void setState(State state) { 636 // version(HUNT_HTTP_DEBUG) 637 // tracef("%s --> %s", _state, state); 638 _state = state; 639 } 640 641 /* ------------------------------------------------------------------------------- */ 642 private void setState(FieldState state) { 643 // version(HUNT_HTTP_DEBUG) 644 // tracef("%s:%s --> %s", _state, _fieldState, state); 645 _fieldState = state; 646 } 647 648 /* ------------------------------------------------------------------------------- */ 649 override 650 string toString() { 651 return format("%s{s=%s}", typeof(this).stringof, _state); 652 } 653 654 } 655 656 657 658 /* ------------------------------------------------------------------------------- */ 659 660 private class IllegalCharacterException : IllegalArgumentException { 661 private this(MultipartParser.State state, byte ch, ByteBuffer buffer) { 662 super(format("Illegal character 0x%X", ch)); 663 // Bug #460642 - don't reveal buffers to end user 664 warningf(format("Illegal character 0x%X in state=%s for buffer %s", 665 ch, state, BufferUtils.toDetailString(buffer))); 666 } 667 } 668 669 670 /* 671 * Event Handler interface These methods return true if the caller should process the events so far received (eg return from parseNext and call 672 * HttpChannel.handle). If multiple callbacks are called in sequence (eg headerComplete then messageComplete) from the same point in the parsing then it is 673 * sufficient for the caller to process the events only once. 674 */ 675 class MultipartParserHandler { 676 677 this() { 678 679 } 680 681 void startPart() { 682 } 683 684 void parsedField(string name, string value) { 685 } 686 687 bool headerComplete() { 688 return false; 689 } 690 691 bool content(ByteBuffer item, bool last) { 692 return false; 693 } 694 695 bool messageComplete() { 696 return false; 697 } 698 699 void earlyEOF() { 700 } 701 }