1 module hunt.http.util.UrlEncoded;
2 
3 import hunt.container;
4 
5 import hunt.lang.Charset;
6 import hunt.lang.exception;
7 import hunt.string;
8 import hunt.util.TypeUtils;
9 
10 import hunt.logging;
11 
12 import std.conv;
13 import std.array;
14 
15 
16 /**
17  * Handles coding of MIME "x-www-form-urlencoded".
18  * <p>
19  * This class handles the encoding and decoding for either the query string of a
20  * URL or the _content of a POST HTTP request.
21  * </p>
22  * <b>Notes</b>
23  * <p>
24  * The UTF-8 charset is assumed, unless otherwise defined by either passing a
25  * parameter or setting the "org.hunt.utils.UrlEncoding.charset" System
26  * property.
27  * </p>
28  * <p>
29  * The hashtable either contains string single values, vectors of string or
30  * arrays of Strings.
31  * </p>
32  * <p>
33  * This class is only partially synchronised. In particular, simple get
34  * operations are not protected from concurrent updates.
35  * </p>
36  *
37  * @see java.net.URLEncoder
38  */
39 class UrlEncoded  : MultiMap!string { 
40     
41     enum string ENCODING = StandardCharsets.UTF_8;
42 
43 
44     this() {
45     }
46 
47     this(string query) {
48         decodeTo(query, this, ENCODING);
49     }
50 
51     void decode(string query) {
52         decodeTo(query, this, ENCODING);
53     }
54 
55     void decode(string query, string charset) {
56         decodeTo(query, this, charset);
57     }
58 
59     /**
60      * Encode MultiMap with % encoding for UTF8 sequences.
61      *
62      * @return the MultiMap as a string with % encoding
63      */
64     string encode() {
65         return encode(ENCODING, false);
66     }
67 
68     /**
69      * Encode MultiMap with % encoding for arbitrary string sequences.
70      *
71      * @param charset the charset to use for encoding
72      * @return the MultiMap as a string encoded with % encodings
73      */
74     string encode(string charset) {
75         return encode(charset, false);
76     }
77 
78     /**
79      * Encode MultiMap with % encoding.
80      *
81      * @param charset            the charset to encode with
82      * @param equalsForNullValue if True, then an '=' is always used, even
83      *                           for parameters without a value. e.g. <code>"blah?a=&amp;b=&amp;c="</code>.
84      * @return the MultiMap as a string encoded with % encodings
85      */
86     string encode(string charset, bool equalsForNullValue) {
87         return encode(this, charset, equalsForNullValue);
88     }
89 
90     /**
91      * Encode MultiMap with % encoding.
92      *
93      * @param map                the map to encode
94      * @param charset            the charset to use for encoding (uses default encoding if null)
95      * @param equalsForNullValue if True, then an '=' is always used, even
96      *                           for parameters without a value. e.g. <code>"blah?a=&amp;b=&amp;c="</code>.
97      * @return the MultiMap as a string encoded with % encodings.
98      */
99     static string encode(MultiMap!string map, string charset, bool equalsForNullValue) {
100         if (charset is null)
101             charset = ENCODING;
102 
103         StringBuilder result = new StringBuilder(128);
104 
105         bool delim = false;
106         foreach(string key, List!string list; map)
107         {
108             int s = list.size();
109 
110             if (delim) {
111                 result.append('&');
112             }
113 
114             if (s == 0) {
115                 result.append(encodeString(key, charset));
116                 if (equalsForNullValue)
117                     result.append('=');
118             } else {
119                 for (int i = 0; i < s; i++) {
120                     if (i > 0)
121                         result.append('&');
122                     string val = list.get(i);
123                     result.append(encodeString(key, charset));
124 
125                     if (val != null) {
126                         if (val.length > 0) {
127                             result.append('=');
128                             result.append(encodeString(val, charset));
129                         } else if (equalsForNullValue)
130                             result.append('=');
131                     } else if (equalsForNullValue)
132                         result.append('=');
133                 }
134             }
135             delim = true;
136         }
137         return result.toString();
138     }
139 
140     /**
141      * Decoded parameters to Map.
142      *
143      * @param content the string containing the encoded parameters
144      * @param map     the MultiMap to put parsed query parameters into
145      * @param charset the charset to use for decoding
146      */
147     static void decodeTo(string content, MultiMap!string map, string charset = ENCODING) {
148         if (charset.empty)
149             charset = ENCODING;
150 
151         synchronized (map) {
152             string key = null;
153             string value = null;
154             int mark = -1;
155             bool encoded = false;
156             for (int i = 0; i < content.length; i++) {
157                 char c = content[i];
158                 switch (c) {
159                     case '&':
160                         int l = i - mark - 1;
161                         value = l == 0 ? "" :
162                                 (encoded ? decodeString(content, mark + 1, l) : content.substring(mark + 1, i));
163                         mark = i;
164                         encoded = false;
165                         if (key != null) {
166                             map.add(key, value);
167                         } else if (value != null && value.length > 0) {
168                             map.add(value, "");
169                         }
170                         key = null;
171                         value = null;
172                         break;
173                     case '=':
174                         if (key != null)
175                             break;
176                         key = encoded ? decodeString(content, mark + 1, i - mark - 1) : content.substring(mark + 1, i);
177                         mark = i;
178                         encoded = false;
179                         break;
180                     case '+':
181                         encoded = true;
182                         break;
183                     case '%':
184                         encoded = true;
185                         break;
186                     default: break;
187                 }
188             }
189 
190             int contentLen = cast(int)content.length;
191 
192             if (key != null) {
193                 int l =  contentLen - mark - 1;
194                 value = l == 0 ? "" : (encoded ? decodeString(content, mark + 1, l) : content.substring(mark + 1));
195                 version(HUNT_DEBUG) tracef("key=%s, value=%s", key, value);
196                 map.add(key, value);
197             } else if (mark < contentLen) {
198                 version(HUNT_DEBUG) tracef("empty value: content=%s, key=%s", content, key);
199                 key = encoded
200                         ? decodeString(content, mark + 1, contentLen - mark - 1, charset)
201                         : content.substring(mark + 1);
202                 if (!key.empty) {
203                     map.add(key, "");
204                 }
205             } else {
206                 warningf("No key found.");
207             }
208         }
209     }
210 
211     /**
212      * Decode string with % encoding.
213      * This method makes the assumption that the majority of calls
214      * will need no decoding.
215      *
216      * @param encoded the encoded string to decode
217      * @return the decoded string
218      */
219     static string decodeString(string encoded) {
220         return decodeString(encoded, 0, cast(int)encoded.length);
221     }
222 
223     /**
224      * Decode string with % encoding.
225      * This method makes the assumption that the majority of calls
226      * will need no decoding.
227      *
228      * @param encoded the encoded string to decode
229      * @param offset  the offset in the encoded string to decode from
230      * @param length  the length of characters in the encoded string to decode
231      * @param charset the charset to use for decoding
232      * @return the decoded string
233      */
234     static string decodeString(string encoded, int offset, int length, string charset = ENCODING) {
235         StringBuffer buffer = null;
236 
237         for (int i = 0; i < length; i++) {
238             char c = encoded.charAt(offset + i);
239             if (c < 0 || c > 0xff) {
240                 if (buffer is null) {
241                     buffer = new StringBuffer(length);
242                     buffer.append(encoded, offset, offset + i + 1);
243                 } else
244                     buffer.append(c);
245             } else if (c == '+') {
246                 if (buffer is null) {
247                     buffer = new StringBuffer(length);
248                     buffer.append(encoded, offset, offset + i);
249                 }
250 
251                 buffer.append(' ');
252             } else if (c == '%') {
253                 if (buffer is null) {
254                     buffer = new StringBuffer(length);
255                     buffer.append(encoded, offset, offset + i);
256                 }
257 
258                 byte[] ba = new byte[length];
259                 int n = 0;
260                 while (c >= 0 && c <= 0xff) {
261                     if (c == '%') {
262                         if (i + 2 < length) {
263                             int o = offset + i + 1;
264                             i += 3;
265                             ba[n] = cast(byte) TypeUtils.parseInt(encoded, o, 2, 16);
266                             n++;
267                         } else {
268                             ba[n++] = cast(byte) '?';
269                             i = length;
270                         }
271                     } else if (c == '+') {
272                         ba[n++] = cast(byte) ' ';
273                         i++;
274                     } else {
275                         ba[n++] = cast(byte) c;
276                         i++;
277                     }
278 
279                     if (i >= length)
280                         break;
281                     c = encoded.charAt(offset + i);
282                 }
283 
284                 i--;
285                 buffer.append(cast(string)(ba[0 .. n]));
286 
287             } else if (buffer !is null)
288                 buffer.append(c);
289         }
290 
291         if (buffer is null) {
292             if (offset == 0 && encoded.length == length)
293                 return encoded;
294             return encoded.substring(offset, offset + length);
295         }
296 
297         return buffer.toString();
298     }
299 
300 
301     /**
302      * Perform URL encoding.
303      *
304      * @param string the string to encode
305      * @return encoded string.
306      */
307     static string encodeString(string string) {
308         return encodeString(string, ENCODING);
309     }
310 
311     /**
312      * Perform URL encoding.
313      *
314      * @param string  the string to encode
315      * @param charset the charset to use for encoding
316      * @return encoded string.
317      */
318     static string encodeString(string str, string charset) {
319         if (charset is null)
320             charset = ENCODING;
321         byte[] bytes = cast(byte[])str;
322         // bytes = string.getBytes(charset);
323 
324         int len = cast(int)bytes.length;
325         byte[] encoded = new byte[bytes.length * 3];
326         int n = 0;
327         bool noEncode = true;
328 
329         for (int i = 0; i < len; i++) {
330             byte b = bytes[i];
331 
332             if (b == ' ') {
333                 noEncode = false;
334                 encoded[n++] = cast(byte) '+';
335             } else if (b >= 'a' && b <= 'z' ||
336                     b >= 'A' && b <= 'Z' ||
337                     b >= '0' && b <= '9') {
338                 encoded[n++] = b;
339             } else {
340                 noEncode = false;
341                 encoded[n++] = cast(byte) '%';
342                 byte nibble = cast(byte) ((b & 0xf0) >> 4);
343                 if (nibble >= 10)
344                     encoded[n++] = cast(byte) ('A' + nibble - 10);
345                 else
346                     encoded[n++] = cast(byte) ('0' + nibble);
347                 nibble = cast(byte) (b & 0xf);
348                 if (nibble >= 10)
349                     encoded[n++] = cast(byte) ('A' + nibble - 10);
350                 else
351                     encoded[n++] = cast(byte) ('0' + nibble);
352             }
353         }
354 
355         if (noEncode)
356             return str;
357 
358         return cast(string)(encoded[0 .. n]);
359     }
360 }