HTTP v1.17.0
Loading...
Searching...
No Matches
HttpParser.cpp
Go to the documentation of this file.
1#include "HttpParser.h"
2
3#include <iterator>
4#include <format>
5#include <algorithm>
6#include <unordered_set>
7#include <functional>
8
9#include "HttpParserException.h"
10
11#ifndef __LINUX__
12#pragma warning(disable: 26800)
13#endif
14
15static const std::unordered_set<std::string> methods =
16{
17 "HEAD",
18 "PUT",
19 "POST",
20 "PATCH",
21 "OPTIONS",
22 "DELETE",
23 "CONNECT",
24 "GET",
25 "TRACE"
26};
27
28namespace web
29{
30 const std::unordered_map<std::string_view, std::function<void(HttpParser&, std::string_view)>> HttpParser::contentTypeParsers =
31 {
32 { HttpParser::urlEncoded, [](HttpParser& parser, std::string_view data) { parser.parseQueryParameter(data); }},
33 { HttpParser::jsonEncoded, [](HttpParser& parser, std::string_view data) { parser.jsonParser.setJSONData(data); }},
34 { HttpParser::multipartEncoded, [](HttpParser& parser, std::string_view data) { parser.parseMultipart(data); }},
35 };
36
37 HttpParser::ReadOnlyBuffer::ReadOnlyBuffer(std::string_view view)
38 {
39 char* data = const_cast<char*>(view.data());
40
41 setg(data, data, data + view.size());
42 }
43
44 std::string HttpParser::mergeChunks() const
45 {
46 std::string result;
47
48 result.reserve(chunksSize);
49
50 std::ranges::for_each(chunks, [&result](const std::string& value) { result += value; });
51
52 return result;
53 }
54
55 void HttpParser::parseQueryParameter(std::string_view rawParameters)
56 {
57 std::string key;
58 std::string value;
59 bool equal = false;
60
61 if (rawParameters.find("HTTP") != std::string_view::npos)
62 {
63 rawParameters.remove_suffix(httpVersion.size());
64 }
65
66 std::string decodedParameters = web::decodeUrl(rawParameters);
67
68 for (size_t nextKeyValuePair = 0; nextKeyValuePair < decodedParameters.size(); nextKeyValuePair++)
69 {
70 if (decodedParameters[nextKeyValuePair] == '&')
71 {
72 equal = false;
73
74 queryParameters.try_emplace(move(key), move(value));
75
76 continue;
77 }
78
79 if (!equal)
80 {
81 equal = decodedParameters[nextKeyValuePair] == '=';
82
83 if (equal)
84 {
85 continue;
86 }
87 }
88
89 equal ? value += decodedParameters[nextKeyValuePair] : key += decodedParameters[nextKeyValuePair];
90 }
91
92 queryParameters.try_emplace(move(key), move(value));
93 }
94
95 void HttpParser::parseMultipart(std::string_view data)
96 {
97 constexpr std::string_view boundaryText = "boundary=";
98
99 const std::string& contentType = headers["Content-Type"];
100 size_t index = contentType.find(boundaryText);
101
102 if (index == std::string_view::npos)
103 {
104 throw std::runtime_error(std::format("Can't find {}", boundaryText));
105 }
106
107 std::string boundary = std::format("--{}", std::string_view(contentType.begin() + index + boundaryText.size(), contentType.end()));
108 std::boyer_moore_horspool_searcher searcher(boundary.begin(), boundary.end());
109 std::string_view::const_iterator current = std::search(data.begin(), data.end(), searcher);
110
111 while (true)
112 {
113 std::string_view::const_iterator next = std::search(current + 1, data.end(), searcher);
114
115 if (next == data.end())
116 {
117 break;
118 }
119
120 multiparts.emplace_back(std::string_view(current + boundary.size(), next));
121
122 current = std::search(next, data.end(), searcher);
123 }
124 }
125
126 void HttpParser::parseContentType()
127 {
128 if (auto it = headers.find(contentTypeHeader); it != headers.end())
129 {
130 for (const auto& [encodeType, parser] : contentTypeParsers)
131 {
132 if (it->second.find(encodeType) != std::string::npos)
133 {
134 parser(*this, chunksSize ? this->mergeChunks() : body);
135
136 break;
137 }
138 }
139 }
140 }
141
142 void HttpParser::parseChunkEncoded(std::string_view httpMessage, bool isUTF8)
143 {
144 size_t chunksStart = httpMessage.find(crlfcrlf);
145
146 if (chunksStart == std::string_view::npos)
147 {
148 throw std::runtime_error(std::format("Can't find chunks start in {}", httpMessage));
149 }
150
151 chunksStart += crlfcrlf.size();
152
153 if (chunksStart >= httpMessage.size())
154 {
155 throw std::runtime_error(std::format("Wrong stat chunks format in {}", httpMessage));
156 }
157
158 size_t chunksEnd = httpMessage.rfind(crlfcrlf);
159
160 if (chunksEnd == std::string_view::npos)
161 {
162 throw std::runtime_error(std::format("Can't find chunks start in {}", httpMessage));
163 }
164
165 chunksEnd += crlfcrlf.size();
166
167 if (chunksEnd > httpMessage.size())
168 {
169 throw std::runtime_error(std::format("Wrong end chunks format in {}", httpMessage));
170 }
171
172 ReadOnlyBuffer buffer(std::string_view(httpMessage.data() + chunksStart, chunksEnd - chunksStart));
173 std::istringstream chunksData;
174
175 static_cast<std::ios&>(chunksData).rdbuf(&buffer);
176
177 chunksSize = 0;
178
179 while (true)
180 {
181 std::string size;
182 std::string value;
183
184 std::getline(chunksData, size);
185
186 size.pop_back(); // \r symbol from \r\n
187
188 value.resize(std::stol(size, nullptr, 16));
189
190 if (value.empty())
191 {
192 return;
193 }
194
195 chunksData.read(value.data(), value.size());
196
197 chunksData.ignore(constants::crlf.size());
198
199 std::string& chunk = isUTF8 ?
200 chunks.emplace_back(json::utility::toUTF8JSON(value, CP_UTF8)) :
201 chunks.emplace_back(move(value));
202
203 chunksSize += chunk.size();
204 }
205 }
206
207 HttpParser::HttpParser() :
208 chunksSize(0),
209 parsed(false)
210 {
211
212 }
213
214 HttpParser::HttpParser(const std::string& httpMessage)
215 {
216 this->parse(httpMessage);
217 }
218
219 HttpParser::HttpParser(const std::vector<char>& httpMessage)
220 {
221 this->parse(std::string_view(httpMessage.data(), httpMessage.size()));
222 }
223
224 void HttpParser::parse(std::string_view httpMessage)
225 {
226 if (httpMessage.empty())
227 {
228 parsed = false;
229
230 return;
231 }
232
233 parsed = true;
234
235 size_t prevString = 0;
236 size_t nextString = httpMessage.find('\r');
237
238 if (nextString == std::string_view::npos)
239 {
240 throw std::runtime_error(std::format("Can't find next string: {}", httpMessage));
241 }
242
243 std::string_view firstString(httpMessage.data(), nextString);
244
245 if (std::string_view temp = firstString.substr(0, firstString.find(' ')); temp.find("HTTP") == std::string_view::npos)
246 {
247 method = temp;
248
249 if (methods.find(method) == methods.end())
250 {
251 throw exceptions::HttpParserException(std::format("Wrong method: {}", method));
252 }
253 }
254
255 chunksSize = 0;
256
257 rawData = httpMessage;
258
259 if (method.empty())
260 {
261 ReadOnlyBuffer buffer(firstString);
262 std::istringstream data;
263 std::string responseCode;
264
265 static_cast<std::ios&>(data).rdbuf(&buffer);
266
267 data >> httpVersion >> responseCode >> response.second;
268
269 response.first = std::stoi(responseCode);
270 }
271 else if (method != "CONNECT")
272 {
273 size_t startParameters = firstString.find('/');
274
275 if (startParameters == std::string::npos)
276 {
277 throw exceptions::HttpParserException("Can't find /");
278 }
279
280 startParameters++;
281
282 size_t endParameters = firstString.rfind(' ');
283
284 if (endParameters == std::string_view::npos)
285 {
286 throw std::runtime_error(std::format("Can't find end parameters in: {}", firstString));
287 }
288
289 size_t queryStart = firstString.find('?');
290
291 if (queryStart == std::string_view::npos)
292 {
293 throw std::runtime_error(std::format("Can't find query start in: {}", firstString));
294 }
295
296 size_t httpStartIndex = firstString.find("HTTP");
297
298 if (httpStartIndex == std::string_view::npos)
299 {
300 throw std::runtime_error(std::format("Can't find HTTP in: {}", firstString));
301 }
302
303 parameters = web::decodeUrl(std::string_view(firstString.begin() + startParameters, firstString.begin() + endParameters));
304 httpVersion = std::string(firstString.begin() + httpStartIndex, firstString.end());
305
306 if (queryStart != std::string::npos)
307 {
308 this->parseQueryParameter(std::string_view(firstString.data() + queryStart + 1, firstString.data() + endParameters));
309 }
310 }
311 else
312 {
313 size_t space = firstString.find(' ');
314
315 if (space == std::string_view::npos)
316 {
317 throw std::runtime_error(std::format("Can't find first space in {}", firstString));
318 }
319
320 size_t lastSpace = firstString.rfind(' ');
321
322 if (lastSpace == std::string_view::npos)
323 {
324 throw std::runtime_error(std::format("Can't find first last space in {}", firstString));
325 }
326
327 size_t httpStartIndex = firstString.find("HTTP");
328
329 if (httpStartIndex == std::string_view::npos)
330 {
331 throw std::runtime_error(std::format("Can't find HTTP in: {}", firstString));
332 }
333
334 parameters = std::string(firstString.begin() + space + 1, firstString.begin() + lastSpace);
335 httpVersion = std::string(firstString.begin() + httpStartIndex, firstString.end());
336 }
337
338 while (true)
339 {
340 prevString = nextString + constants::crlf.size();
341 nextString = httpMessage.find('\r', prevString);
342
343 if (prevString == nextString || nextString == std::string::npos)
344 {
345 break;
346 }
347
348 std::string_view next(httpMessage.data() + prevString, nextString - prevString);
349
350 size_t colonIndex = next.find(':');
351
352 if (colonIndex == std::string_view::npos)
353 {
354 throw std::runtime_error(std::format("Can't find ':' while parsing headers in {}", next));
355 }
356
357 size_t nonSpace = colonIndex + 1;
358 std::string header(next.begin(), next.begin() + colonIndex);
359
360 while (next.size() > nonSpace && isspace(next[nonSpace]))
361 {
362 nonSpace++;
363 }
364
365 std::string value(next.begin() + nonSpace, next.end());
366
367 headers.try_emplace(std::move(header), std::move(value));
368 }
369
370 bool isUTF8 = httpMessage.find(utf8Encoded) != std::string::npos;
371
372 if (auto it = headers.find(transferEncodingHeader); it != headers.end())
373 {
374 static const std::unordered_map<std::string, void (HttpParser::*)(std::string_view httpMessage, bool isUTF8)> transferTypeParsers =
375 {
376 { chunkEncoded, &HttpParser::parseChunkEncoded }
377 };
378
379 if (!transferTypeParsers.contains(it->second))
380 {
381 throw exceptions::HttpParserException("Not supported transfer encoding: " + it->second);
382 }
383
384 std::invoke(transferTypeParsers.at(it->second), *this, httpMessage, isUTF8);
385 }
386 else if (headers.find(contentLengthHeader) != headers.end())
387 {
388 if (isUTF8)
389 {
390 body = json::utility::toUTF8JSON(std::string_view(httpMessage.begin() + httpMessage.find(crlfcrlf) + crlfcrlf.size(), httpMessage.end()), CP_UTF8);
391 }
392 else
393 {
394 body = std::string(httpMessage.begin() + httpMessage.find(crlfcrlf) + crlfcrlf.size(), httpMessage.end());
395 }
396 }
397
398 this->parseContentType();
399 }
400
401 const std::string& HttpParser::getMethod() const
402 {
403 return method;
404 }
405
406 double HttpParser::getHTTPVersion() const
407 {
408 return std::stod(httpVersion.substr(5));
409 }
410
411 const std::string& HttpParser::getParameters() const
412 {
413 return parameters;
414 }
415
416 const std::unordered_map<std::string, std::string>& HttpParser::getQueryParameters() const
417 {
418 return queryParameters;
419 }
420
421 const std::pair<int, std::string>& HttpParser::getFullResponse() const
422 {
423 return response;
424 }
425
426 int HttpParser::getResponseCode() const
427 {
428 return response.first;
429 }
430
431 const std::string& HttpParser::getResponseMessage() const
432 {
433 return response.second;
434 }
435
436 const HeadersMap& HttpParser::getHeaders() const
437 {
438 return headers;
439 }
440
441 const std::string& HttpParser::getBody() const
442 {
443 return body;
444 }
445
446 const std::vector<std::string>& HttpParser::getChunks() const
447 {
448 return chunks;
449 }
450
451 const json::JsonParser& HttpParser::getJson() const
452 {
453 return jsonParser;
454 }
455
456 const std::string& HttpParser::getRawData() const
457 {
458 return rawData;
459 }
460
461 const std::vector<Multipart>& HttpParser::getMultiparts() const
462 {
463 return multiparts;
464 }
465
466 HttpParser::operator bool() const
467 {
468 return parsed;
469 }
470
471 std::ostream& operator << (std::ostream& outputStream, const HttpParser& parser)
472 {
473 std::string result;
474
475 if (parser.method.size())
476 {
477 if (parser.method != "CONNECT")
478 {
479 result += std::format("{} /{} {}", parser.method, parser.parameters, parser.httpVersion);
480 }
481 else
482 {
483 result += std::format("{} {} {}", parser.method, parser.parameters, parser.httpVersion);
484 }
485 }
486 else
487 {
488 const auto& [code, message] = parser.getFullResponse();
489
490 result += std::format("{} {} {}", parser.httpVersion, static_cast<int>(code), message);
491 }
492
493 result += constants::crlf;
494
495 for (const auto& [header, value] : parser.headers)
496 {
497 result += std::format("{}: {}{}", header, value, constants::crlf);
498 }
499
500 if (parser.body.size())
501 {
502 result += std::format("{}{}", constants::crlf, parser.body);
503 }
504 else if (parser.chunks.size())
505 {
506 result += constants::crlf;
507
508 for (const auto& chunk : parser.chunks)
509 {
510 result += std::format("{}{}{}", (std::ostringstream() << std::hex << chunk.size() << constants::crlf).str(), chunk, constants::crlf);
511 }
512
513 result += std::format("0{}", HttpParser::crlfcrlf);
514 }
515
516 if (!result.ends_with(HttpParser::crlfcrlf))
517 {
518 if (result.ends_with(constants::crlf))
519 {
520 result += constants::crlf;
521 }
522 else
523 {
524 result += HttpParser::crlfcrlf;
525 }
526 }
527
528 return outputStream << result;
529 }
530
531 std::istream& operator >> (std::istream& inputStream, HttpParser& parser)
532 {
533 std::istreambuf_iterator<char> it(inputStream);
534 std::string httpMessage(it, {});
535
536 if (httpMessage.size())
537 {
538 parser.parse(httpMessage);
539 }
540
541 return inputStream;
542 }
543}
std::string decodeUrl(std::string_view data)