1 module hunt.http.codec.http.model.MultipartParser;
2 
3 import hunt.http.codec.http.model.BadMessageException;
4 
5 import hunt.container.ByteBuffer;
6 import hunt.container.BufferUtils;
7 import hunt.string;
8 import hunt.lang.exception;
9 import hunt.logging;
10 import hunt.util.SearchPattern;
11 
12 import std.algorithm;
13 import std.conv;
14 import std.format;
15 
16 
17 /* ------------------------------------------------------------ */
18 
19 /**
20  * A parser for MultiPart content type.
21  *
22  * @see <a href="https://tools.ietf.org/html/rfc2046#section-5.1">https://tools.ietf.org/html/rfc2046#section-5.1</a>
23  * @see <a href="https://tools.ietf.org/html/rfc2045">https://tools.ietf.org/html/rfc2045</a>
24  */
25 class MultipartParser {
26     enum byte COLON = ':';
27     enum byte TAB = 0x09;
28     enum byte LINE_FEED = 0x0A;
29     enum byte CARRIAGE_RETURN = 0x0D;
30     enum byte SPACE = 0x20;
31     enum byte[] CRLF = [CARRIAGE_RETURN, LINE_FEED];
32     enum byte SEMI_COLON = ';';
33 
34     // States
35     enum FieldState {
36         FIELD,
37         IN_NAME,
38         AFTER_NAME,
39         VALUE,
40         IN_VALUE
41     }
42 
43     // States
44     enum State {
45         PREAMBLE,
46         DELIMITER,
47         DELIMITER_PADDING,
48         DELIMITER_CLOSE,
49         BODY_PART,
50         FIRST_OCTETS,
51         OCTETS,
52         EPILOGUE,
53         END
54     }
55 
56     private enum State[] __delimiterStates = [State.DELIMITER, State.DELIMITER_CLOSE, State.DELIMITER_PADDING];
57 
58     private MultipartParserHandler _handler;
59     private SearchPattern _delimiterSearch;
60 
61     private string _fieldName;
62     private string _fieldValue;
63 
64     private State _state = State.PREAMBLE;
65     private FieldState _fieldState = FieldState.FIELD;
66     private int _partialBoundary = 2; // No CRLF if no preamble
67     private bool _cr;
68     private ByteBuffer _patternBuffer;
69 
70     private StringBuilder _string;
71     private size_t _length;
72 
73     private int _totalHeaderLineLength = -1;
74     private int _maxHeaderLineLength = 998;
75 
76     /* ------------------------------------------------------------------------------- */
77     this(MultipartParserHandler handler, string boundary) {
78         _handler = handler;
79         _string = new StringBuilder();
80 
81         string delimiter = "\r\n--" ~ boundary;
82         //delimiter.getBytes(StandardCharsets.US_ASCII)
83         _patternBuffer = ByteBuffer.wrap(cast(byte[])delimiter.dup); 
84         _delimiterSearch = SearchPattern.compile(_patternBuffer.array());
85     }
86 
87     void reset() {
88         _state = State.PREAMBLE;
89         _fieldState = FieldState.FIELD;
90         _partialBoundary = 2; // No CRLF if no preamble
91     }
92 
93     /* ------------------------------------------------------------------------------- */
94     MultipartParserHandler getHandler() {
95         return _handler;
96     }
97 
98     /* ------------------------------------------------------------------------------- */
99     State getState() {
100         return _state;
101     }
102 
103     /* ------------------------------------------------------------------------------- */
104     bool isState(State state) {
105         return _state == state;
106     }
107 
108     /* ------------------------------------------------------------------------------- */
109     enum CharState {
110         ILLEGAL, CR, LF, LEGAL
111     }
112 
113     private __gshared CharState[] __charState;
114 
115     shared static this() {
116         // token = 1*tchar
117         // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
118         // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
119         // / DIGIT / ALPHA
120         // ; any VCHAR, except delimiters
121         // quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
122         // qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
123         // obs-text = %x80-FF
124         // comment = "(" *( ctext / quoted-pair / comment ) ")"
125         // ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
126         // quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
127 
128         __charState = new CharState[256];
129         __charState[0..$] = CharState.ILLEGAL;
130 
131         __charState[LINE_FEED] = CharState.LF;
132         __charState[CARRIAGE_RETURN] = CharState.CR;
133         __charState[TAB] = CharState.LEGAL;
134         __charState[SPACE] = CharState.LEGAL;
135 
136         __charState['!'] = CharState.LEGAL;
137         __charState['#'] = CharState.LEGAL;
138         __charState['$'] = CharState.LEGAL;
139         __charState['%'] = CharState.LEGAL;
140         __charState['&'] = CharState.LEGAL;
141         __charState['\''] = CharState.LEGAL;
142         __charState['*'] = CharState.LEGAL;
143         __charState['+'] = CharState.LEGAL;
144         __charState['-'] = CharState.LEGAL;
145         __charState['.'] = CharState.LEGAL;
146         __charState['^'] = CharState.LEGAL;
147         __charState['_'] = CharState.LEGAL;
148         __charState['`'] = CharState.LEGAL;
149         __charState['|'] = CharState.LEGAL;
150         __charState['~'] = CharState.LEGAL;
151 
152         __charState['"'] = CharState.LEGAL;
153 
154         __charState['\\'] = CharState.LEGAL;
155         __charState['('] = CharState.LEGAL;
156         __charState[')'] = CharState.LEGAL;
157         __charState[0x21 .. 0x27 + 1] = CharState.LEGAL;
158         __charState[0x2A .. 0x5B + 1] = CharState.LEGAL;
159         __charState[0x5D .. 0x7E + 1] = CharState.LEGAL;
160         __charState[0x80 .. 0xFF + 1] = CharState.LEGAL;
161 
162         // Arrays.fill(__charState, 0x21, 0x27 + 1, CharState.LEGAL);
163         // Arrays.fill(__charState, 0x2A, 0x5B + 1, CharState.LEGAL);
164         // Arrays.fill(__charState, 0x5D, 0x7E + 1, CharState.LEGAL);
165         // Arrays.fill(__charState, 0x80, 0xFF + 1, CharState.LEGAL);
166 
167     }
168 
169     /* ------------------------------------------------------------------------------- */
170     private bool hasNextByte(ByteBuffer buffer) {
171         return BufferUtils.hasContent(buffer);
172     }
173 
174     /* ------------------------------------------------------------------------------- */
175     private byte getNextByte(ByteBuffer buffer) {
176 
177         byte ch = buffer.get();
178 
179         CharState s = __charState[0xff & ch];
180         switch (s) {
181             case CharState.LF:
182                 _cr = false;
183                 return ch;
184 
185             case CharState.CR:
186                 if (_cr)
187                     throw new BadMessageException("Bad EOL");
188 
189                 _cr = true;
190                 if (buffer.hasRemaining())
191                     return getNextByte(buffer);
192 
193                 // Can return 0 here to indicate the need for more characters,
194                 // because a real 0 in the buffer would cause a BadMessage below
195                 return 0;
196 
197             case CharState.LEGAL:
198                 if (_cr)
199                     throw new BadMessageException("Bad EOL");
200 
201                 return ch;
202 
203             case CharState.ILLEGAL:
204             default:
205                 throw new IllegalCharacterException(_state, ch, buffer);
206         }
207     }
208 
209     /* ------------------------------------------------------------------------------- */
210     private void setString(string s) {
211         _string.reset();
212         _string.append(s);
213         _length = s.length;
214     }
215 
216     /* ------------------------------------------------------------------------------- */
217     /*
218      * Mime Field strings are treated as UTF-8 as per https://tools.ietf.org/html/rfc7578#section-5.1
219      */
220     private string takeString() {
221         string s = _string.toString();
222         // trim trailing whitespace.
223         if (s.length > _length)
224             s = s.substring(0, _length);
225         _string.reset();
226         _length = -1;
227         return s;
228     }
229 
230     /* ------------------------------------------------------------------------------- */
231 
232     /**
233      * Parse until next Event.
234      *
235      * @param buffer the buffer to parse
236      * @param last   whether this buffer contains last bit of content
237      * @return True if an {@link hunt.http.codec.http.decode.HttpParser.RequestHandler} method was called and it returned true;
238      */
239     bool parse(ByteBuffer buffer, bool last) {
240         bool handle = false;
241         while (handle == false && BufferUtils.hasContent(buffer)) {
242             switch (_state) {
243                 case State.PREAMBLE:
244                     parsePreamble(buffer);
245                     continue;
246 
247                 case State.DELIMITER:
248                 case State.DELIMITER_PADDING:
249                 case State.DELIMITER_CLOSE:
250                     parseDelimiter(buffer);
251                     continue;
252 
253                 case State.BODY_PART:
254                     handle = parseMimePartHeaders(buffer);
255                     break;
256 
257                 case State.FIRST_OCTETS:
258                 case State.OCTETS:
259                     handle = parseOctetContent(buffer);
260                     break;
261 
262                 case State.EPILOGUE:
263                     BufferUtils.clear(buffer);
264                     break;
265 
266                 case State.END:
267                     handle = true;
268                     break;
269 
270                 default:
271                     throw new IllegalStateException("");
272 
273             }
274         }
275 
276         if (last && BufferUtils.isEmpty(buffer)) {
277             if (_state == State.EPILOGUE) {
278                 _state = State.END;
279 
280                 version(HUNT_DEBUG)
281                     tracef("messageComplete %s", this);
282 
283                 return _handler.messageComplete();
284             } else {
285                 version(HUNT_DEBUG)
286                     tracef("earlyEOF %s", this);
287 
288                 _handler.earlyEOF();
289                 return true;
290             }
291         }
292 
293         return handle;
294     }
295 
296     /* ------------------------------------------------------------------------------- */
297     private void parsePreamble(ByteBuffer buffer) {
298         if (_partialBoundary > 0) {
299             int partial = _delimiterSearch.startsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), 
300                 buffer.remaining(), _partialBoundary);
301             if (partial > 0) {
302                 if (partial == _delimiterSearch.getLength()) {
303                     buffer.position(buffer.position() + partial - _partialBoundary);
304                     _partialBoundary = 0;
305                     setState(State.DELIMITER);
306                     return;
307                 }
308 
309                 _partialBoundary = partial;
310                 BufferUtils.clear(buffer);
311                 return;
312             }
313 
314             _partialBoundary = 0;
315         }
316 
317         int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
318         if (delimiter >= 0) {
319             buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength());
320             setState(State.DELIMITER);
321             return;
322         }
323 
324         _partialBoundary = _delimiterSearch.endsWith(buffer.array(), 
325             buffer.arrayOffset() + buffer.position(), buffer.remaining());
326         BufferUtils.clear(buffer);
327 
328         return;
329     }
330 
331     /* ------------------------------------------------------------------------------- */
332     private void parseDelimiter(ByteBuffer buffer) {
333         while (__delimiterStates.canFind(_state) && hasNextByte(buffer)) {
334             byte b = getNextByte(buffer);
335             if (b == 0)
336                 return;
337 
338             if (b == '\n') {
339                 setState(State.BODY_PART);
340 
341                 version(HUNT_DEBUG)
342                     tracef("startPart %s", this);
343 
344                 _handler.startPart();
345                 return;
346             }
347 
348             switch (_state) {
349                 case State.DELIMITER:
350                     if (b == '-')
351                         setState(State.DELIMITER_CLOSE);
352                     else
353                         setState(State.DELIMITER_PADDING);
354                     continue;
355 
356                 case State.DELIMITER_CLOSE:
357                     if (b == '-') {
358                         setState(State.EPILOGUE);
359                         return;
360                     }
361                     setState(State.DELIMITER_PADDING);
362                     continue;
363 
364                 case State.DELIMITER_PADDING:
365                 default:
366                     continue;
367             }
368         }
369     }
370 
371     /* ------------------------------------------------------------------------------- */
372     /*
373      * Parse the message headers and return true if the handler has signaled for a return
374      */
375     protected bool parseMimePartHeaders(ByteBuffer buffer) {
376         // Process headers
377         while (_state == State.BODY_PART && hasNextByte(buffer)) {
378             // process each character
379             byte b = getNextByte(buffer);
380             if (b == 0)
381                 break;
382 
383             if (b != LINE_FEED)
384                 _totalHeaderLineLength++;
385 
386             if (_totalHeaderLineLength > _maxHeaderLineLength)
387                 throw new IllegalStateException("Header Line Exceeded Max Length");
388 
389             switch (_fieldState) {
390                 case FieldState.FIELD:
391                     switch (b) {
392                         case SPACE:
393                         case TAB: {
394                             // Folded field value!
395 
396                             if (_fieldName == null)
397                                 throw new IllegalStateException("First field folded");
398 
399                             if (_fieldValue == null) {
400                                 _string.reset();
401                                 _length = 0;
402                             } else {
403                                 setString(_fieldValue);
404                                 _string.append(' ');
405                                 _length++;
406                                 _fieldValue = null;
407                             }
408                             setState(FieldState.VALUE);
409                             break;
410                         }
411 
412                         case LINE_FEED: {
413                             handleField();
414                             setState(State.FIRST_OCTETS);
415                             _partialBoundary = 2; // CRLF is option for empty parts
416 
417                             version(HUNT_DEBUG)
418                                 tracef("headerComplete %s", this);
419 
420                             if (_handler.headerComplete())
421                                 return true;
422                             break;
423                         }
424 
425                         default: {
426                             // process previous header
427                             handleField();
428 
429                             // New header
430                             setState(FieldState.IN_NAME);
431                             _string.reset();
432                             _string.append(b);
433                             _length = 1;
434                         }
435                     }
436                     break;
437 
438                 case FieldState.IN_NAME:
439                     switch (b) {
440                         case COLON:
441                             _fieldName = takeString();
442                             _length = -1;
443                             setState(FieldState.VALUE);
444                             break;
445 
446                         case SPACE:
447                             // Ignore trailing whitespaces
448                             setState(FieldState.AFTER_NAME);
449                             break;
450 
451                         case LINE_FEED: {
452                             version(HUNT_DEBUG)
453                                 tracef("Line Feed in Name %s", this);
454 
455                             handleField();
456                             setState(FieldState.FIELD);
457                             break;
458                         }
459 
460                         default:
461                             _string.append(b);
462                             _length = _string.length;
463                             break;
464                     }
465                     break;
466 
467                 case FieldState.AFTER_NAME:
468                     switch (b) {
469                         case COLON:
470                             _fieldName = takeString();
471                             _length = -1;
472                             setState(FieldState.VALUE);
473                             break;
474 
475                         case LINE_FEED:
476                             _fieldName = takeString();
477                             _string.reset();
478                             _fieldValue = "";
479                             _length = -1;
480                             break;
481 
482                         case SPACE:
483                             break;
484 
485                         default:
486                             throw new IllegalCharacterException(_state, b, buffer);
487                     }
488                     break;
489 
490                 case FieldState.VALUE:
491                     switch (b) {
492                         case LINE_FEED:
493                             _string.reset();
494                             _fieldValue = "";
495                             _length = -1;
496 
497                             setState(FieldState.FIELD);
498                             break;
499 
500                         case SPACE:
501                         case TAB:
502                             break;
503 
504                         default:
505                             _string.append(b);
506                             _length = _string.length;
507                             setState(FieldState.IN_VALUE);
508                             break;
509                     }
510                     break;
511 
512                 case FieldState.IN_VALUE:
513                     switch (b) {
514                         case SPACE:
515                             _string.append(b);
516                             break;
517 
518                         case LINE_FEED:
519                             if (_length > 0) {
520                                 _fieldValue = takeString();
521                                 _length = -1;
522                                 _totalHeaderLineLength = -1;
523                             }
524                             setState(FieldState.FIELD);
525                             break;
526 
527                         default:
528                             _string.append(b);
529                             if (b > SPACE || b < 0)
530                                 _length = _string.length;
531                             break;
532                     }
533                     break;
534 
535                 default:
536                     throw new IllegalStateException(_state.to!string());
537 
538             }
539         }
540         return false;
541     }
542 
543     /* ------------------------------------------------------------------------------- */
544     private void handleField() {
545         version(HUNT_DEBUG)
546             tracef("parsedField:  _fieldName=%s _fieldValue=%s %s", _fieldName, _fieldValue, this);
547 
548         if (_fieldName != null && _fieldValue != null)
549             _handler.parsedField(_fieldName, _fieldValue);
550         _fieldName = _fieldValue = null;
551     }
552 
553     /* ------------------------------------------------------------------------------- */
554 
555     protected bool parseOctetContent(ByteBuffer buffer) {
556 
557         // Starts With
558         if (_partialBoundary > 0) {
559             int partial = _delimiterSearch.startsWith(buffer.array(), 
560                 buffer.arrayOffset() + buffer.position(), buffer.remaining(), _partialBoundary);
561             if (partial > 0) {
562                 if (partial == _delimiterSearch.getLength()) {
563                     buffer.position(buffer.position() + _delimiterSearch.getLength() - _partialBoundary);
564                     setState(State.DELIMITER);
565                     _partialBoundary = 0;
566 
567                     version(HUNT_DEBUG)
568                         tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(BufferUtils.EMPTY_BUFFER), true, this);
569 
570                     return _handler.content(BufferUtils.EMPTY_BUFFER, true);
571                 }
572 
573                 _partialBoundary = partial;
574                 BufferUtils.clear(buffer);
575                 return false;
576             } else {
577                 // output up to _partialBoundary of the search pattern
578                 ByteBuffer content = _patternBuffer.slice();
579                 if (_state == State.FIRST_OCTETS) {
580                     setState(State.OCTETS);
581                     content.position(2);
582                 }
583                 content.limit(_partialBoundary);
584                 _partialBoundary = 0;
585 
586                 version(HUNT_DEBUG)
587                     tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
588 
589                 if (_handler.content(content, false))
590                     return true;
591             }
592         }
593 
594         // Contains
595         int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
596         if (delimiter >= 0) {
597             ByteBuffer content = buffer.slice();
598             content.limit(delimiter - buffer.arrayOffset() - buffer.position());
599 
600             buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength());
601             setState(State.DELIMITER);
602 
603             version(HUNT_DEBUG)
604                 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), true, this);
605 
606             return _handler.content(content, true);
607         }
608 
609         // Ends With
610         _partialBoundary = _delimiterSearch.endsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
611         if (_partialBoundary > 0) {
612             ByteBuffer content = buffer.slice();
613             content.limit(content.limit() - _partialBoundary);
614 
615             version(HUNT_DEBUG)
616                 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
617 
618             BufferUtils.clear(buffer);
619             return _handler.content(content, false);
620         }
621 
622         // There is normal content with no delimiter
623         ByteBuffer content = buffer.slice();
624 
625         version(HUNT_DEBUG)
626             tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
627 
628         BufferUtils.clear(buffer);
629         return _handler.content(content, false);
630     }
631 
632     /* ------------------------------------------------------------------------------- */
633     private void setState(State state) {
634         version(HUNT_DEBUG)
635             tracef("%s --> %s", _state, state);
636         _state = state;
637     }
638 
639     /* ------------------------------------------------------------------------------- */
640     private void setState(FieldState state) {
641         // version(HUNT_DEBUG)
642         //     tracef("%s:%s --> %s", _state, _fieldState, state);
643         _fieldState = state;
644     }
645 
646     /* ------------------------------------------------------------------------------- */
647     override
648     string toString() {
649         return format("%s{s=%s}", typeof(this).stringof, _state);
650     }
651 
652 }
653 
654 
655 
656 /* ------------------------------------------------------------------------------- */
657 
658 private class IllegalCharacterException : IllegalArgumentException {
659     private this(MultipartParser.State state, byte ch, ByteBuffer buffer) {
660         super(format("Illegal character 0x%X", ch));
661         // Bug #460642 - don't reveal buffers to end user
662         warningf(format("Illegal character 0x%X in state=%s for buffer %s", 
663             ch, state, BufferUtils.toDetailString(buffer)));
664     }
665 }
666 
667 /* ------------------------------------------------------------ */
668 /* ------------------------------------------------------------ */
669 /* ------------------------------------------------------------ */
670 /*
671  * Event Handler interface These methods return true if the caller should process the events so far received (eg return from parseNext and call
672  * HttpChannel.handle). If multiple callbacks are called in sequence (eg headerComplete then messageComplete) from the same point in the parsing then it is
673  * sufficient for the caller to process the events only once.
674  */
675 class MultipartParserHandler {
676 
677     this() {
678 
679     }
680     
681     void startPart() {
682     }
683 
684     void parsedField(string name, string value) {
685     }
686 
687     bool headerComplete() {
688         return false;
689     }
690 
691     bool content(ByteBuffer item, bool last) {
692         return false;
693     }
694 
695     bool messageComplete() {
696         return false;
697     }
698 
699     void earlyEOF() {
700     }
701 }