3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "httpfetch.h"
21 #include "porting.h" // for sleep_ms(), get_sysinfo(), secure_rand_fill_buf()
28 #include "network/socket.h" // for select()
29 #include "threading/event.h"
31 #include "exceptions.h"
34 #include "util/container.h"
35 #include "util/thread.h"
40 std::mutex g_httpfetch_mutex;
41 std::map<unsigned long, std::queue<HTTPFetchResult>> g_httpfetch_results;
42 PcgRandom g_callerid_randomness;
44 HTTPFetchRequest::HTTPFetchRequest() :
45 timeout(g_settings->getS32("curl_timeout")), connect_timeout(timeout),
46 useragent(std::string(PROJECT_NAME_C "/") + g_version_hash + " (" +
47 porting::get_sysinfo() + ")")
51 static void httpfetch_deliver_result(const HTTPFetchResult &fetch_result)
53 unsigned long caller = fetch_result.caller;
54 if (caller != HTTPFETCH_DISCARD) {
55 MutexAutoLock lock(g_httpfetch_mutex);
56 g_httpfetch_results[caller].push(fetch_result);
60 static void httpfetch_request_clear(unsigned long caller);
62 unsigned long httpfetch_caller_alloc()
64 MutexAutoLock lock(g_httpfetch_mutex);
66 // Check each caller ID except HTTPFETCH_DISCARD
67 const unsigned long discard = HTTPFETCH_DISCARD;
68 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
69 std::map<unsigned long, std::queue<HTTPFetchResult>>::iterator it =
70 g_httpfetch_results.find(caller);
71 if (it == g_httpfetch_results.end()) {
72 verbosestream << "httpfetch_caller_alloc: allocating " << caller
74 // Access element to create it
75 g_httpfetch_results[caller];
80 FATAL_ERROR("httpfetch_caller_alloc: ran out of caller IDs");
84 unsigned long httpfetch_caller_alloc_secure()
86 MutexAutoLock lock(g_httpfetch_mutex);
88 // Generate random caller IDs and make sure they're not
89 // already used or equal to HTTPFETCH_DISCARD
90 // Give up after 100 tries to prevent infinite loop
95 caller = (((u64)g_callerid_randomness.next()) << 32) |
96 g_callerid_randomness.next();
99 FATAL_ERROR("httpfetch_caller_alloc_secure: ran out of caller "
101 return HTTPFETCH_DISCARD;
103 } while (g_httpfetch_results.find(caller) != g_httpfetch_results.end());
105 verbosestream << "httpfetch_caller_alloc_secure: allocating " << caller
108 // Access element to create it
109 g_httpfetch_results[caller];
113 void httpfetch_caller_free(unsigned long caller)
115 verbosestream << "httpfetch_caller_free: freeing " << caller << std::endl;
117 httpfetch_request_clear(caller);
118 if (caller != HTTPFETCH_DISCARD) {
119 MutexAutoLock lock(g_httpfetch_mutex);
120 g_httpfetch_results.erase(caller);
124 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetch_result)
126 MutexAutoLock lock(g_httpfetch_mutex);
128 // Check that caller exists
129 std::map<unsigned long, std::queue<HTTPFetchResult>>::iterator it =
130 g_httpfetch_results.find(caller);
131 if (it == g_httpfetch_results.end())
134 // Check that result queue is nonempty
135 std::queue<HTTPFetchResult> &caller_results = it->second;
136 if (caller_results.empty())
140 fetch_result = caller_results.front();
141 caller_results.pop();
146 #include <curl/curl.h>
149 USE_CURL is on: use cURL based httpfetch implementation
152 static size_t httpfetch_writefunction(
153 char *ptr, size_t size, size_t nmemb, void *userdata)
155 std::ostringstream *stream = (std::ostringstream *)userdata;
156 size_t count = size * nmemb;
157 stream->write(ptr, count);
161 static size_t httpfetch_discardfunction(
162 char *ptr, size_t size, size_t nmemb, void *userdata)
169 std::list<CURL *> handles;
172 CurlHandlePool() = default;
176 for (std::list<CURL *>::iterator it = handles.begin();
177 it != handles.end(); ++it) {
178 curl_easy_cleanup(*it);
184 if (handles.empty()) {
185 curl = curl_easy_init();
187 errorstream << "curl_easy_init returned NULL"
191 curl = handles.front();
196 void free(CURL *handle)
199 handles.push_back(handle);
203 class HTTPFetchOngoing
206 HTTPFetchOngoing(const HTTPFetchRequest &request, CurlHandlePool *pool);
209 CURLcode start(CURLM *multi);
210 const HTTPFetchResult *complete(CURLcode res);
212 const HTTPFetchRequest &getRequest() const { return request; };
213 const CURL *getEasyHandle() const { return curl; };
216 CurlHandlePool *pool;
219 HTTPFetchRequest request;
220 HTTPFetchResult result;
221 std::ostringstream oss;
222 struct curl_slist *http_header;
226 HTTPFetchOngoing::HTTPFetchOngoing(
227 const HTTPFetchRequest &request_, CurlHandlePool *pool_) :
229 curl(NULL), multi(NULL), request(request_), result(request_),
230 oss(std::ios::binary), http_header(NULL), post(NULL)
232 curl = pool->alloc();
237 // Set static cURL options
238 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
239 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
240 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
241 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 3);
242 curl_easy_setopt(curl, CURLOPT_ENCODING, "gzip");
244 std::string bind_address = g_settings->get("bind_address");
245 if (!bind_address.empty()) {
246 curl_easy_setopt(curl, CURLOPT_INTERFACE, bind_address.c_str());
249 if (!g_settings->getBool("enable_ipv6")) {
250 curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
253 #if LIBCURL_VERSION_NUM >= 0x071304
254 // Restrict protocols so that curl vulnerabilities in
255 // other protocols don't affect us.
256 // These settings were introduced in curl 7.19.4.
258 CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FTP | CURLPROTO_FTPS;
259 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
260 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
263 // Set cURL options based on HTTPFetchRequest
264 curl_easy_setopt(curl, CURLOPT_URL, request.url.c_str());
265 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, request.timeout);
266 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS, request.connect_timeout);
268 if (!request.useragent.empty())
269 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
271 // Set up a write callback that writes to the
272 // ostringstream ongoing->oss, unless the data
273 // is to be discarded
274 if (request.caller == HTTPFETCH_DISCARD) {
275 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, httpfetch_discardfunction);
276 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
278 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, httpfetch_writefunction);
279 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
282 // Set POST (or GET) data
283 if (request.post_fields.empty() && request.post_data.empty()) {
284 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
285 } else if (request.multipart) {
286 curl_httppost *last = NULL;
287 for (StringMap::iterator it = request.post_fields.begin();
288 it != request.post_fields.end(); ++it) {
289 curl_formadd(&post, &last, CURLFORM_NAMELENGTH, it->first.size(),
290 CURLFORM_PTRNAME, it->first.c_str(),
291 CURLFORM_CONTENTSLENGTH, it->second.size(),
292 CURLFORM_PTRCONTENTS, it->second.c_str(),
295 curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
296 // request.post_fields must now *never* be
297 // modified until CURLOPT_HTTPPOST is cleared
298 } else if (request.post_data.empty()) {
299 curl_easy_setopt(curl, CURLOPT_POST, 1);
301 for (auto &post_field : request.post_fields) {
304 str += urlencode(post_field.first);
306 str += urlencode(post_field.second);
308 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, str.size());
309 curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, str.c_str());
311 curl_easy_setopt(curl, CURLOPT_POST, 1);
312 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, request.post_data.size());
313 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, request.post_data.c_str());
314 // request.post_data must now *never* be
315 // modified until CURLOPT_POSTFIELDS is cleared
317 // Set additional HTTP headers
318 for (const std::string &extra_header : request.extra_headers) {
319 http_header = curl_slist_append(http_header, extra_header.c_str());
321 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_header);
323 if (!g_settings->getBool("curl_verify_cert")) {
324 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
328 CURLcode HTTPFetchOngoing::start(CURLM *multi_)
331 return CURLE_FAILED_INIT;
334 // Easy interface (sync)
335 return curl_easy_perform(curl);
338 // Multi interface (async)
339 CURLMcode mres = curl_multi_add_handle(multi_, curl);
340 if (mres != CURLM_OK) {
341 errorstream << "curl_multi_add_handle"
342 << " returned error code " << mres << std::endl;
343 return CURLE_FAILED_INIT;
345 multi = multi_; // store for curl_multi_remove_handle
349 const HTTPFetchResult *HTTPFetchOngoing::complete(CURLcode res)
351 result.succeeded = (res == CURLE_OK);
352 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
353 result.data = oss.str();
355 // Get HTTP/FTP response code
356 result.response_code = 0;
357 if (curl && (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
358 &result.response_code) != CURLE_OK)) {
359 // We failed to get a return code, make sure it is still 0
360 result.response_code = 0;
363 if (res != CURLE_OK) {
364 errorstream << request.url << " not found (" << curl_easy_strerror(res)
366 << " (response code " << result.response_code << ")"
373 HTTPFetchOngoing::~HTTPFetchOngoing()
376 CURLMcode mres = curl_multi_remove_handle(multi, curl);
377 if (mres != CURLM_OK) {
378 errorstream << "curl_multi_remove_handle"
379 << " returned error code " << mres << std::endl;
383 // Set safe options for the reusable cURL handle
384 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, httpfetch_discardfunction);
385 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
386 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
388 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
389 curl_slist_free_all(http_header);
392 curl_easy_setopt(curl, CURLOPT_HTTPPOST, NULL);
396 // Store the cURL handle for reuse
400 class CurlFetchThread : public Thread
413 HTTPFetchRequest fetch_request;
418 MutexedQueue<Request> m_requests;
419 size_t m_parallel_limit;
421 // Variables exclusively used within thread
422 std::vector<HTTPFetchOngoing *> m_all_ongoing;
423 std::list<HTTPFetchRequest> m_queued_fetches;
426 CurlFetchThread(int parallel_limit) : Thread("CurlFetch")
428 if (parallel_limit >= 1)
429 m_parallel_limit = parallel_limit;
431 m_parallel_limit = 1;
434 void requestFetch(const HTTPFetchRequest &fetch_request)
438 req.fetch_request = fetch_request;
440 m_requests.push_back(req);
443 void requestClear(unsigned long caller, Event *event)
447 req.fetch_request.caller = caller;
449 m_requests.push_back(req);
455 req.type = RT_WAKEUP;
457 m_requests.push_back(req);
461 // Handle a request from some other thread
462 // E.g. new fetch; clear fetches for one caller; wake up
463 void processRequest(const Request &req)
465 if (req.type == RT_FETCH) {
466 // New fetch, queue until there are less
467 // than m_parallel_limit ongoing fetches
468 m_queued_fetches.push_back(req.fetch_request);
470 // see processQueued() for what happens next
472 } else if (req.type == RT_CLEAR) {
473 unsigned long caller = req.fetch_request.caller;
475 // Abort all ongoing fetches for the caller
476 for (std::vector<HTTPFetchOngoing *>::iterator it =
477 m_all_ongoing.begin();
478 it != m_all_ongoing.end();) {
479 if ((*it)->getRequest().caller == caller) {
481 it = m_all_ongoing.erase(it);
487 // Also abort all queued fetches for the caller
488 for (std::list<HTTPFetchRequest>::iterator it =
489 m_queued_fetches.begin();
490 it != m_queued_fetches.end();) {
491 if ((*it).caller == caller)
492 it = m_queued_fetches.erase(it);
496 } else if (req.type == RT_WAKEUP) {
497 // Wakeup: Nothing to do, thread is awake at this point
500 if (req.event != NULL)
504 // Start new ongoing fetches if m_parallel_limit allows
505 void processQueued(CurlHandlePool *pool)
507 while (m_all_ongoing.size() < m_parallel_limit &&
508 !m_queued_fetches.empty()) {
509 HTTPFetchRequest request = m_queued_fetches.front();
510 m_queued_fetches.pop_front();
512 // Create ongoing fetch data and make a cURL handle
513 // Set cURL options based on HTTPFetchRequest
514 HTTPFetchOngoing *ongoing = new HTTPFetchOngoing(request, pool);
516 // Initiate the connection (curl_multi_add_handle)
517 CURLcode res = ongoing->start(m_multi);
518 if (res == CURLE_OK) {
519 m_all_ongoing.push_back(ongoing);
521 httpfetch_deliver_result(*ongoing->complete(res));
527 // Process CURLMsg (indicates completion of a fetch)
528 void processCurlMessage(CURLMsg *msg)
530 // Determine which ongoing fetch the message pertains to
533 for (i = 0; i < m_all_ongoing.size(); ++i) {
534 if (m_all_ongoing[i]->getEasyHandle() == msg->easy_handle) {
539 if (msg->msg == CURLMSG_DONE && found) {
540 // m_all_ongoing[i] succeeded or failed.
541 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
542 httpfetch_deliver_result(*ongoing->complete(msg->data.result));
544 m_all_ongoing.erase(m_all_ongoing.begin() + i);
548 // Wait for a request from another thread, or timeout elapses
549 void waitForRequest(long timeout)
551 if (m_queued_fetches.empty()) {
553 Request req = m_requests.pop_front(timeout);
555 } catch (ItemNotFoundException &e) {
560 // Wait until some IO happens, or timeout elapses
561 void waitForIO(long timeout)
567 long select_timeout = -1;
568 struct timeval select_tv;
571 FD_ZERO(&read_fd_set);
572 FD_ZERO(&write_fd_set);
573 FD_ZERO(&exc_fd_set);
575 mres = curl_multi_fdset(m_multi, &read_fd_set, &write_fd_set, &exc_fd_set,
577 if (mres != CURLM_OK) {
578 errorstream << "curl_multi_fdset"
579 << " returned error code " << mres << std::endl;
583 mres = curl_multi_timeout(m_multi, &select_timeout);
584 if (mres != CURLM_OK) {
585 errorstream << "curl_multi_timeout"
586 << " returned error code " << mres << std::endl;
590 // Limit timeout so new requests get through
591 if (select_timeout < 0 || select_timeout > timeout)
592 select_timeout = timeout;
594 if (select_timeout > 0) {
595 // in Winsock it is forbidden to pass three empty
596 // fd_sets to select(), so in that case use sleep_ms
598 select_tv.tv_sec = select_timeout / 1000;
599 select_tv.tv_usec = (select_timeout % 1000) * 1000;
600 int retval = select(max_fd + 1, &read_fd_set,
601 &write_fd_set, &exc_fd_set, &select_tv);
604 errorstream << "select returned error code "
605 << WSAGetLastError() << std::endl;
607 errorstream << "select returned error code "
608 << errno << std::endl;
612 sleep_ms(select_timeout);
621 m_multi = curl_multi_init();
622 if (m_multi == NULL) {
623 errorstream << "curl_multi_init returned NULL\n";
627 FATAL_ERROR_IF(!m_all_ongoing.empty(), "Expected empty");
629 while (!stopRequested()) {
630 BEGIN_DEBUG_EXCEPTION_HANDLER
633 Handle new async requests
636 while (!m_requests.empty()) {
637 Request req = m_requests.pop_frontNoEx();
640 processQueued(&pool);
643 Handle ongoing async requests
646 int still_ongoing = 0;
647 while (curl_multi_perform(m_multi, &still_ongoing) ==
648 CURLM_CALL_MULTI_PERFORM)
652 Handle completed async requests
654 if (still_ongoing < (int)m_all_ongoing.size()) {
657 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
658 while (msg != NULL) {
659 processCurlMessage(msg);
660 msg = curl_multi_info_read(
661 m_multi, &msgs_in_queue);
666 If there are ongoing requests, wait for data
667 (with a timeout of 100ms so that new requests
670 If no ongoing requests, wait for a new request.
671 (Possibly an empty request that signals
672 that the thread should be stopped.)
674 if (m_all_ongoing.empty())
675 waitForRequest(100000000);
679 END_DEBUG_EXCEPTION_HANDLER
682 // Call curl_multi_remove_handle and cleanup easy handles
683 for (HTTPFetchOngoing *i : m_all_ongoing) {
686 m_all_ongoing.clear();
688 m_queued_fetches.clear();
690 CURLMcode mres = curl_multi_cleanup(m_multi);
691 if (mres != CURLM_OK) {
692 errorstream << "curl_multi_cleanup"
693 << " returned error code " << mres << std::endl;
700 CurlFetchThread *g_httpfetch_thread = NULL;
702 void httpfetch_init(int parallel_limit)
704 verbosestream << "httpfetch_init: parallel_limit=" << parallel_limit << std::endl;
706 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
707 FATAL_ERROR_IF(res != CURLE_OK, "CURL init failed");
709 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
711 // Initialize g_callerid_randomness for httpfetch_caller_alloc_secure
713 porting::secure_rand_fill_buf(randbuf, sizeof(u64) * 2);
714 g_callerid_randomness = PcgRandom(randbuf[0], randbuf[1]);
717 void httpfetch_cleanup()
719 verbosestream << "httpfetch_cleanup: cleaning up" << std::endl;
721 g_httpfetch_thread->stop();
722 g_httpfetch_thread->requestWakeUp();
723 g_httpfetch_thread->wait();
724 delete g_httpfetch_thread;
726 curl_global_cleanup();
729 void httpfetch_async(const HTTPFetchRequest &fetch_request)
731 g_httpfetch_thread->requestFetch(fetch_request);
732 if (!g_httpfetch_thread->isRunning())
733 g_httpfetch_thread->start();
736 static void httpfetch_request_clear(unsigned long caller)
738 if (g_httpfetch_thread->isRunning()) {
740 g_httpfetch_thread->requestClear(caller, &event);
743 g_httpfetch_thread->requestClear(caller, NULL);
747 void httpfetch_sync(const HTTPFetchRequest &fetch_request, HTTPFetchResult &fetch_result)
749 // Create ongoing fetch data and make a cURL handle
750 // Set cURL options based on HTTPFetchRequest
752 HTTPFetchOngoing ongoing(fetch_request, &pool);
753 // Do the fetch (curl_easy_perform)
754 CURLcode res = ongoing.start(NULL);
755 // Update fetch result
756 fetch_result = *ongoing.complete(res);
764 Dummy httpfetch implementation that always returns an error.
767 void httpfetch_init(int parallel_limit)
771 void httpfetch_cleanup()
775 void httpfetch_async(const HTTPFetchRequest &fetch_request)
777 errorstream << "httpfetch_async: unable to fetch " << fetch_request.url
778 << " because USE_CURL=0" << std::endl;
780 HTTPFetchResult fetch_result(fetch_request); // sets succeeded = false etc.
781 httpfetch_deliver_result(fetch_result);
784 static void httpfetch_request_clear(unsigned long caller)
788 void httpfetch_sync(const HTTPFetchRequest &fetch_request, HTTPFetchResult &fetch_result)
790 errorstream << "httpfetch_sync: unable to fetch " << fetch_request.url
791 << " because USE_CURL=0" << std::endl;
793 fetch_result = HTTPFetchResult(fetch_request); // sets succeeded = false etc.