1 module hunt.http.codec.http.decode.MultipartParser;
2 
3 import hunt.http.codec.http.model.BadMessageException;
4 
5 import hunt.io.ByteBuffer;
6 import hunt.io.BufferUtils;
7 import hunt.Exceptions;
8 import hunt.logging;
9 import hunt.text.Common;
10 import hunt.util.StringBuilder;
11 import hunt.text.SearchPattern;
12 
13 import std.algorithm;
14 import std.conv;
15 import std.format;
16 
17 
18 /* ------------------------------------------------------------ */
19 
20 /**
21  * A parser for MultiPart content type.
22  *
23  * @see <a href="https://tools.ietf.org/html/rfc2046#section-5.1">https://tools.ietf.org/html/rfc2046#section-5.1</a>
24  * @see <a href="https://tools.ietf.org/html/rfc2045">https://tools.ietf.org/html/rfc2045</a>
25  */
26 class MultipartParser {
27     enum byte COLON = ':';
28     enum byte TAB = 0x09;
29     enum byte LINE_FEED = 0x0A;
30     enum byte CARRIAGE_RETURN = 0x0D;
31     enum byte SPACE = 0x20;
32     enum byte[] CRLF = [CARRIAGE_RETURN, LINE_FEED];
33     enum byte SEMI_COLON = ';';
34 
35     // States
36     enum FieldState {
37         FIELD,
38         IN_NAME,
39         AFTER_NAME,
40         VALUE,
41         IN_VALUE
42     }
43 
44     // States
45     enum State {
46         PREAMBLE,
47         DELIMITER,
48         DELIMITER_PADDING,
49         DELIMITER_CLOSE,
50         BODY_PART,
51         FIRST_OCTETS,
52         OCTETS,
53         EPILOGUE,
54         END
55     }
56 
57     private enum State[] __delimiterStates = [State.DELIMITER, State.DELIMITER_CLOSE, State.DELIMITER_PADDING];
58 
59     private MultipartParserHandler _handler;
60     private SearchPattern _delimiterSearch;
61 
62     private string _fieldName;
63     private string _fieldValue;
64 
65     private State _state = State.PREAMBLE;
66     private FieldState _fieldState = FieldState.FIELD;
67     private int _partialBoundary = 2; // No CRLF if no preamble
68     private bool _cr;
69     private ByteBuffer _patternBuffer;
70 
71     private StringBuilder _string;
72     private size_t _length;
73 
74     private int _totalHeaderLineLength = -1;
75     private int _maxHeaderLineLength = 998;
76 
77     /* ------------------------------------------------------------------------------- */
78     this(MultipartParserHandler handler, string boundary) {
79         _handler = handler;
80         _string = new StringBuilder();
81 
82         string delimiter = "\r\n--" ~ boundary;
83         //delimiter.getBytes(StandardCharsets.US_ASCII)
84         _patternBuffer = BufferUtils.toBuffer(cast(byte[])delimiter.dup); 
85         _delimiterSearch = SearchPattern.compile(_patternBuffer.array());
86     }
87 
88     void reset() {
89         _state = State.PREAMBLE;
90         _fieldState = FieldState.FIELD;
91         _partialBoundary = 2; // No CRLF if no preamble
92     }
93 
94     /* ------------------------------------------------------------------------------- */
95     MultipartParserHandler getHandler() {
96         return _handler;
97     }
98 
99     /* ------------------------------------------------------------------------------- */
100     State getState() {
101         return _state;
102     }
103 
104     /* ------------------------------------------------------------------------------- */
105     bool isState(State state) {
106         return _state == state;
107     }
108 
109     /* ------------------------------------------------------------------------------- */
110     enum CharState {
111         ILLEGAL, CR, LF, LEGAL
112     }
113 
114     private __gshared CharState[] __charState;
115 
116     shared static this() {
117         // token = 1*tchar
118         // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
119         // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
120         // / DIGIT / ALPHA
121         // ; any VCHAR, except delimiters
122         // quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
123         // qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
124         // obs-text = %x80-FF
125         // comment = "(" *( ctext / quoted-pair / comment ) ")"
126         // ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
127         // quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
128 
129         __charState = new CharState[256];
130         __charState[0..$] = CharState.ILLEGAL;
131 
132         __charState[LINE_FEED] = CharState.LF;
133         __charState[CARRIAGE_RETURN] = CharState.CR;
134         __charState[TAB] = CharState.LEGAL;
135         __charState[SPACE] = CharState.LEGAL;
136 
137         __charState['!'] = CharState.LEGAL;
138         __charState['#'] = CharState.LEGAL;
139         __charState['$'] = CharState.LEGAL;
140         __charState['%'] = CharState.LEGAL;
141         __charState['&'] = CharState.LEGAL;
142         __charState['\''] = CharState.LEGAL;
143         __charState['*'] = CharState.LEGAL;
144         __charState['+'] = CharState.LEGAL;
145         __charState['-'] = CharState.LEGAL;
146         __charState['.'] = CharState.LEGAL;
147         __charState['^'] = CharState.LEGAL;
148         __charState['_'] = CharState.LEGAL;
149         __charState['`'] = CharState.LEGAL;
150         __charState['|'] = CharState.LEGAL;
151         __charState['~'] = CharState.LEGAL;
152 
153         __charState['"'] = CharState.LEGAL;
154 
155         __charState['\\'] = CharState.LEGAL;
156         __charState['('] = CharState.LEGAL;
157         __charState[')'] = CharState.LEGAL;
158         __charState[0x21 .. 0x27 + 1] = CharState.LEGAL;
159         __charState[0x2A .. 0x5B + 1] = CharState.LEGAL;
160         __charState[0x5D .. 0x7E + 1] = CharState.LEGAL;
161         __charState[0x80 .. 0xFF + 1] = CharState.LEGAL;
162 
163         // Arrays.fill(__charState, 0x21, 0x27 + 1, CharState.LEGAL);
164         // Arrays.fill(__charState, 0x2A, 0x5B + 1, CharState.LEGAL);
165         // Arrays.fill(__charState, 0x5D, 0x7E + 1, CharState.LEGAL);
166         // Arrays.fill(__charState, 0x80, 0xFF + 1, CharState.LEGAL);
167 
168     }
169 
170     /* ------------------------------------------------------------------------------- */
171     private bool hasNextByte(ByteBuffer buffer) {
172         return BufferUtils.hasContent(buffer);
173     }
174 
175     /* ------------------------------------------------------------------------------- */
176     private byte getNextByte(ByteBuffer buffer) {
177 
178         byte ch = buffer.get();
179 
180         CharState s = __charState[0xff & ch];
181         switch (s) {
182             case CharState.LF:
183                 _cr = false;
184                 return ch;
185 
186             case CharState.CR:
187                 if (_cr)
188                     throw new BadMessageException("Bad EOL");
189 
190                 _cr = true;
191                 if (buffer.hasRemaining())
192                     return getNextByte(buffer);
193 
194                 // Can return 0 here to indicate the need for more characters,
195                 // because a real 0 in the buffer would cause a BadMessage below
196                 return 0;
197 
198             case CharState.LEGAL:
199                 if (_cr)
200                     throw new BadMessageException("Bad EOL");
201 
202                 return ch;
203 
204             case CharState.ILLEGAL:
205             default:
206                 throw new IllegalCharacterException(_state, ch, buffer);
207         }
208     }
209 
210     /* ------------------------------------------------------------------------------- */
211     private void setString(string s) {
212         _string.reset();
213         _string.append(s);
214         _length = s.length;
215     }
216 
217     /* ------------------------------------------------------------------------------- */
218     /*
219      * Mime Field strings are treated as UTF-8 as per https://tools.ietf.org/html/rfc7578#section-5.1
220      */
221     private string takeString() {
222         string s = _string.toString();
223         // trim trailing whitespace.
224         if (s.length > _length)
225             s = s.substring(0, _length);
226         _string.reset();
227         _length = -1;
228         return s;
229     }
230 
231     /* ------------------------------------------------------------------------------- */
232 
233     /**
234      * Parse until next Event.
235      *
236      * @param buffer the buffer to parse
237      * @param last   whether this buffer contains last bit of content
238      * @return True if an {@link hunt.http.codec.http.decode.HttpParser.RequestHandler} method was called and it returned true;
239      */
240     bool parse(ByteBuffer buffer, bool last) {
241         bool handle = false;
242         while (handle == false && BufferUtils.hasContent(buffer)) {
243             switch (_state) {
244                 case State.PREAMBLE:
245                     parsePreamble(buffer);
246                     continue;
247 
248                 case State.DELIMITER:
249                 case State.DELIMITER_PADDING:
250                 case State.DELIMITER_CLOSE:
251                     parseDelimiter(buffer);
252                     continue;
253 
254                 case State.BODY_PART:
255                     handle = parseMimePartHeaders(buffer);
256                     break;
257 
258                 case State.FIRST_OCTETS:
259                 case State.OCTETS:
260                     handle = parseOctetContent(buffer);
261                     break;
262 
263                 case State.EPILOGUE:
264                     BufferUtils.clear(buffer);
265                     break;
266 
267                 case State.END:
268                     handle = true;
269                     break;
270 
271                 default:
272                     throw new IllegalStateException("");
273 
274             }
275         }
276 
277         if (last && BufferUtils.isEmpty(buffer)) {
278             if (_state == State.EPILOGUE) {
279                 _state = State.END;
280 
281                 version(HUNT_HTTP_DEBUG)
282                     tracef("messageComplete %s", this);
283 
284                 return _handler.messageComplete();
285             } else {
286                 version(HUNT_HTTP_DEBUG)
287                     tracef("earlyEOF %s", this);
288 
289                 _handler.earlyEOF();
290                 return true;
291             }
292         }
293 
294         return handle;
295     }
296 
297     /* ------------------------------------------------------------------------------- */
298     private void parsePreamble(ByteBuffer buffer) {
299         if (_partialBoundary > 0) {
300             int partial = _delimiterSearch.startsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), 
301                 buffer.remaining(), _partialBoundary);
302             if (partial > 0) {
303                 if (partial == _delimiterSearch.getLength()) {
304                     buffer.position(buffer.position() + partial - _partialBoundary);
305                     _partialBoundary = 0;
306                     setState(State.DELIMITER);
307                     return;
308                 }
309 
310                 _partialBoundary = partial;
311                 BufferUtils.clear(buffer);
312                 return;
313             }
314 
315             _partialBoundary = 0;
316         }
317 
318         int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
319         if (delimiter >= 0) {
320             buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength());
321             setState(State.DELIMITER);
322             return;
323         }
324 
325         _partialBoundary = _delimiterSearch.endsWith(buffer.array(), 
326             buffer.arrayOffset() + buffer.position(), buffer.remaining());
327         BufferUtils.clear(buffer);
328 
329         return;
330     }
331 
332     /* ------------------------------------------------------------------------------- */
333     private void parseDelimiter(ByteBuffer buffer) {
334         while (__delimiterStates.canFind(_state) && hasNextByte(buffer)) {
335             byte b = getNextByte(buffer);
336             if (b == 0)
337                 return;
338 
339             if (b == '\n') {
340                 setState(State.BODY_PART);
341 
342                 version(HUNT_HTTP_DEBUG)
343                     tracef("startPart %s", this);
344 
345                 _handler.startPart();
346                 return;
347             }
348 
349             switch (_state) {
350                 case State.DELIMITER:
351                     if (b == '-')
352                         setState(State.DELIMITER_CLOSE);
353                     else
354                         setState(State.DELIMITER_PADDING);
355                     continue;
356 
357                 case State.DELIMITER_CLOSE:
358                     if (b == '-') {
359                         setState(State.EPILOGUE);
360                         return;
361                     }
362                     setState(State.DELIMITER_PADDING);
363                     continue;
364 
365                 case State.DELIMITER_PADDING:
366                 default:
367                     continue;
368             }
369         }
370     }
371 
372     /* ------------------------------------------------------------------------------- */
373     /*
374      * Parse the message headers and return true if the handler has signaled for a return
375      */
376     protected bool parseMimePartHeaders(ByteBuffer buffer) {
377         // Process headers
378         while (_state == State.BODY_PART && hasNextByte(buffer)) {
379             // process each character
380             byte b = getNextByte(buffer);
381             if (b == 0)
382                 break;
383 
384             if (b != LINE_FEED)
385                 _totalHeaderLineLength++;
386 
387             if (_totalHeaderLineLength > _maxHeaderLineLength)
388                 throw new IllegalStateException("Header Line Exceeded Max Length");
389 
390             switch (_fieldState) {
391                 case FieldState.FIELD:
392                     switch (b) {
393                         case SPACE:
394                         case TAB: {
395                             // Folded field value!
396 
397                             if (_fieldName == null)
398                                 throw new IllegalStateException("First field folded");
399 
400                             if (_fieldValue == null) {
401                                 _string.reset();
402                                 _length = 0;
403                             } else {
404                                 setString(_fieldValue);
405                                 _string.append(' ');
406                                 _length++;
407                                 _fieldValue = null;
408                             }
409                             setState(FieldState.VALUE);
410                             break;
411                         }
412 
413                         case LINE_FEED: {
414                             handleField();
415                             setState(State.FIRST_OCTETS);
416                             _partialBoundary = 2; // CRLF is option for empty parts
417 
418                             version(HUNT_HTTP_DEBUG)
419                                 tracef("headerComplete %s", this);
420 
421                             if (_handler.headerComplete())
422                                 return true;
423                             break;
424                         }
425 
426                         default: {
427                             // process previous header
428                             handleField();
429 
430                             // New header
431                             setState(FieldState.IN_NAME);
432                             _string.reset();
433                             _string.append(b);
434                             _length = 1;
435                         }
436                     }
437                     break;
438 
439                 case FieldState.IN_NAME:
440                     switch (b) {
441                         case COLON:
442                             _fieldName = takeString();
443                             _length = -1;
444                             setState(FieldState.VALUE);
445                             break;
446 
447                         case SPACE:
448                             // Ignore trailing whitespaces
449                             setState(FieldState.AFTER_NAME);
450                             break;
451 
452                         case LINE_FEED: {
453                             version(HUNT_HTTP_DEBUG)
454                                 tracef("Line Feed in Name %s", this);
455 
456                             handleField();
457                             setState(FieldState.FIELD);
458                             break;
459                         }
460 
461                         default:
462                             _string.append(b);
463                             _length = _string.length;
464                             break;
465                     }
466                     break;
467 
468                 case FieldState.AFTER_NAME:
469                     switch (b) {
470                         case COLON:
471                             _fieldName = takeString();
472                             _length = -1;
473                             setState(FieldState.VALUE);
474                             break;
475 
476                         case LINE_FEED:
477                             _fieldName = takeString();
478                             _string.reset();
479                             _fieldValue = "";
480                             _length = -1;
481                             break;
482 
483                         case SPACE:
484                             break;
485 
486                         default:
487                             throw new IllegalCharacterException(_state, b, buffer);
488                     }
489                     break;
490 
491                 case FieldState.VALUE:
492                     switch (b) {
493                         case LINE_FEED:
494                             _string.reset();
495                             _fieldValue = "";
496                             _length = -1;
497 
498                             setState(FieldState.FIELD);
499                             break;
500 
501                         case SPACE:
502                         case TAB:
503                             break;
504 
505                         default:
506                             _string.append(b);
507                             _length = _string.length;
508                             setState(FieldState.IN_VALUE);
509                             break;
510                     }
511                     break;
512 
513                 case FieldState.IN_VALUE:
514                     switch (b) {
515                         case SPACE:
516                             _string.append(b);
517                             break;
518 
519                         case LINE_FEED:
520                             if (_length > 0) {
521                                 _fieldValue = takeString();
522                                 _length = -1;
523                                 _totalHeaderLineLength = -1;
524                             }
525                             setState(FieldState.FIELD);
526                             break;
527 
528                         default:
529                             _string.append(b);
530                             if (b > SPACE || b < 0)
531                                 _length = _string.length;
532                             break;
533                     }
534                     break;
535 
536                 default:
537                     throw new IllegalStateException(_state.to!string());
538 
539             }
540         }
541         return false;
542     }
543 
544     /* ------------------------------------------------------------------------------- */
545     private void handleField() {
546         version(HUNT_HTTP_DEBUG)
547             tracef("parsedField:  fieldName=%s fieldValue=%s %s", _fieldName, _fieldValue, this);
548 
549         if (_fieldName != null && _fieldValue != null)
550             _handler.parsedField(_fieldName, _fieldValue);
551         _fieldName = _fieldValue = null;
552     }
553 
554     /* ------------------------------------------------------------------------------- */
555 
556     protected bool parseOctetContent(ByteBuffer buffer) {
557 
558         // Starts With
559         if (_partialBoundary > 0) {
560             int partial = _delimiterSearch.startsWith(buffer.array(), 
561                 buffer.arrayOffset() + buffer.position(), buffer.remaining(), _partialBoundary);
562             if (partial > 0) {
563                 if (partial == _delimiterSearch.getLength()) {
564                     buffer.position(buffer.position() + _delimiterSearch.getLength() - _partialBoundary);
565                     setState(State.DELIMITER);
566                     _partialBoundary = 0;
567 
568                     version(HUNT_HTTP_DEBUG)
569                         tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(BufferUtils.EMPTY_BUFFER), true, this);
570 
571                     return _handler.content(BufferUtils.EMPTY_BUFFER, true);
572                 }
573 
574                 _partialBoundary = partial;
575                 BufferUtils.clear(buffer);
576                 return false;
577             } else {
578                 // output up to _partialBoundary of the search pattern
579                 ByteBuffer content = _patternBuffer.slice();
580                 if (_state == State.FIRST_OCTETS) {
581                     setState(State.OCTETS);
582                     content.position(2);
583                 }
584                 content.limit(_partialBoundary);
585                 _partialBoundary = 0;
586 
587                 version(HUNT_HTTP_DEBUG)
588                     tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
589 
590                 if (_handler.content(content, false))
591                     return true;
592             }
593         }
594 
595         // Contains
596         int delimiter = _delimiterSearch.match(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
597         if (delimiter >= 0) {
598             ByteBuffer content = buffer.slice();
599             content.limit(delimiter - buffer.arrayOffset() - buffer.position());
600 
601             buffer.position(delimiter - buffer.arrayOffset() + _delimiterSearch.getLength());
602             setState(State.DELIMITER);
603 
604             version(HUNT_HTTP_DEBUG)
605                 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), true, this);
606 
607             return _handler.content(content, true);
608         }
609 
610         // Ends With
611         _partialBoundary = _delimiterSearch.endsWith(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
612         if (_partialBoundary > 0) {
613             ByteBuffer content = buffer.slice();
614             content.limit(content.limit() - _partialBoundary);
615 
616             version(HUNT_HTTP_DEBUG)
617                 tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
618 
619             BufferUtils.clear(buffer);
620             return _handler.content(content, false);
621         }
622 
623         // There is normal content with no delimiter
624         ByteBuffer content = buffer.slice();
625 
626         // version(HUNT_HTTP_DEBUG) {
627         //     tracef("Content=%s, Last=%s %s", BufferUtils.toDetailString(content), false, this);
628         // }
629 
630         BufferUtils.clear(buffer);
631         return _handler.content(content, false);
632     }
633 
634     /* ------------------------------------------------------------------------------- */
635     private void setState(State state) {
636         // version(HUNT_HTTP_DEBUG)
637         //     tracef("%s --> %s", _state, state);
638         _state = state;
639     }
640 
641     /* ------------------------------------------------------------------------------- */
642     private void setState(FieldState state) {
643         // version(HUNT_HTTP_DEBUG)
644         //     tracef("%s:%s --> %s", _state, _fieldState, state);
645         _fieldState = state;
646     }
647 
648     /* ------------------------------------------------------------------------------- */
649     override
650     string toString() {
651         return format("%s{s=%s}", typeof(this).stringof, _state);
652     }
653 
654 }
655 
656 
657 
658 /* ------------------------------------------------------------------------------- */
659 
660 private class IllegalCharacterException : IllegalArgumentException {
661     private this(MultipartParser.State state, byte ch, ByteBuffer buffer) {
662         super(format("Illegal character 0x%X", ch));
663         // Bug #460642 - don't reveal buffers to end user
664         warningf(format("Illegal character 0x%X in state=%s for buffer %s", 
665             ch, state, BufferUtils.toDetailString(buffer)));
666     }
667 }
668 
669 
670 /*
671  * Event Handler interface These methods return true if the caller should process the events so far received (eg return from parseNext and call
672  * HttpChannel.handle). If multiple callbacks are called in sequence (eg headerComplete then messageComplete) from the same point in the parsing then it is
673  * sufficient for the caller to process the events only once.
674  */
675 class MultipartParserHandler {
676 
677     this() {
678 
679     }
680     
681     void startPart() {
682     }
683 
684     void parsedField(string name, string value) {
685     }
686 
687     bool headerComplete() {
688         return false;
689     }
690 
691     bool content(ByteBuffer item, bool last) {
692         return false;
693     }
694 
695     bool messageComplete() {
696         return false;
697     }
698 
699     void earlyEOF() {
700     }
701 }