3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "clientmedia.h"
21 #include "httpfetch.h"
23 #include "filecache.h"
29 #include "util/serialize.h"
30 #include "util/sha1.h"
31 #include "util/string.h"
33 static std::string getMediaCacheDir()
35 return porting::path_cache + DIR_DELIM + "media";
38 bool clientMediaUpdateCache(const std::string &raw_hash, const std::string &filedata)
40 FileCache media_cache(getMediaCacheDir());
41 std::string sha1_hex = hex_encode(raw_hash);
42 if (!media_cache.exists(sha1_hex))
43 return media_cache.update(sha1_hex, filedata);
51 ClientMediaDownloader::ClientMediaDownloader():
52 m_httpfetch_caller(HTTPFETCH_DISCARD)
56 ClientMediaDownloader::~ClientMediaDownloader()
58 if (m_httpfetch_caller != HTTPFETCH_DISCARD)
59 httpfetch_caller_free(m_httpfetch_caller);
61 for (auto &file_it : m_files)
62 delete file_it.second;
64 for (auto &remote : m_remotes)
68 bool ClientMediaDownloader::loadMedia(Client *client, const std::string &data,
69 const std::string &name)
71 return client->loadMedia(data, name);
74 void ClientMediaDownloader::addFile(const std::string &name, const std::string &sha1)
76 assert(!m_initial_step_done); // pre-condition
78 // if name was already announced, ignore the new announcement
79 if (m_files.count(name) != 0) {
80 errorstream << "Client: ignoring duplicate media announcement "
81 << "sent by server: \"" << name << "\""
86 // if name is empty or contains illegal characters, ignore the file
87 if (name.empty() || !string_allowed(name, TEXTURENAME_ALLOWED_CHARS)) {
88 errorstream << "Client: ignoring illegal file name "
89 << "sent by server: \"" << name << "\""
94 // length of sha1 must be exactly 20 (160 bits), else ignore the file
95 if (sha1.size() != 20) {
96 errorstream << "Client: ignoring illegal SHA1 sent by server: "
97 << hex_encode(sha1) << " \"" << name << "\""
102 FileStatus *filestatus = new FileStatus();
103 filestatus->received = false;
104 filestatus->sha1 = sha1;
105 filestatus->current_remote = -1;
106 m_files.insert(std::make_pair(name, filestatus));
109 void ClientMediaDownloader::addRemoteServer(const std::string &baseurl)
111 assert(!m_initial_step_done); // pre-condition
115 if (g_settings->getBool("enable_remote_media_server")) {
116 infostream << "Client: Adding remote server \""
117 << baseurl << "\" for media download" << std::endl;
119 RemoteServerStatus *remote = new RemoteServerStatus();
120 remote->baseurl = baseurl;
121 remote->active_count = 0;
122 m_remotes.push_back(remote);
127 infostream << "Client: Ignoring remote server \""
128 << baseurl << "\" because cURL support is not compiled in"
134 void ClientMediaDownloader::step(Client *client)
136 if (!m_initial_step_done) {
138 m_initial_step_done = true;
141 // Remote media: check for completion of fetches
142 if (m_httpfetch_active) {
143 bool fetched_something = false;
144 HTTPFetchResult fetch_result;
146 while (httpfetch_async_get(m_httpfetch_caller, fetch_result)) {
147 m_httpfetch_active--;
148 fetched_something = true;
150 // Is this a hashset (index.mth) or a media file?
151 if (fetch_result.request_id < m_remotes.size())
152 remoteHashSetReceived(fetch_result);
154 remoteMediaReceived(fetch_result, client);
157 if (fetched_something)
158 startRemoteMediaTransfers();
160 // Did all remote transfers end and no new ones can be started?
161 // If so, request still missing files from the minetest server
162 // (Or report that we have all files.)
163 if (m_httpfetch_active == 0) {
164 if (m_uncached_received_count < m_uncached_count) {
165 infostream << "Client: Failed to remote-fetch "
166 << (m_uncached_count-m_uncached_received_count)
167 << " files. Requesting them"
168 << " the usual way." << std::endl;
170 startConventionalTransfers(client);
175 void ClientMediaDownloader::initialStep(Client *client)
178 m_uncached_count = m_files.size();
179 for (auto &file_it : m_files) {
180 const std::string &name = file_it.first;
181 FileStatus *filestatus = file_it.second;
182 const std::string &sha1 = filestatus->sha1;
184 if (tryLoadFromCache(name, sha1, client)) {
185 filestatus->received = true;
190 assert(m_uncached_received_count == 0);
192 // Create the media cache dir if we are likely to write to it
193 if (m_uncached_count != 0)
196 // If we found all files in the cache, report this fact to the server.
197 // If the server reported no remote servers, immediately start
198 // conventional transfers. Note: if cURL support is not compiled in,
199 // m_remotes is always empty, so "!USE_CURL" is redundant but may
200 // reduce the size of the compiled code
201 if (!USE_CURL || m_uncached_count == 0 || m_remotes.empty()) {
202 startConventionalTransfers(client);
205 // Otherwise start off by requesting each server's sha1 set
207 // This is the first time we use httpfetch, so alloc a caller ID
208 m_httpfetch_caller = httpfetch_caller_alloc();
210 // Set the active fetch limit to curl_parallel_limit or 84,
211 // whichever is greater. This gives us some leeway so that
212 // inefficiencies in communicating with the httpfetch thread
213 // don't slow down fetches too much. (We still want some limit
214 // so that when the first remote server returns its hash set,
215 // not all files are requested from that server immediately.)
216 // One such inefficiency is that ClientMediaDownloader::step()
217 // is only called a couple times per second, while httpfetch
218 // might return responses much faster than that.
219 // Note that httpfetch strictly enforces curl_parallel_limit
220 // but at no inter-thread communication cost. This however
221 // doesn't help with the aforementioned inefficiencies.
222 // The signifance of 84 is that it is 2*6*9 in base 13.
223 m_httpfetch_active_limit = g_settings->getS32("curl_parallel_limit");
224 m_httpfetch_active_limit = MYMAX(m_httpfetch_active_limit, 84);
226 // Write a list of hashes that we need. This will be POSTed
227 // to the server using Content-Type: application/octet-stream
228 std::string required_hash_set = serializeRequiredHashSet();
230 // minor fixme: this loop ignores m_httpfetch_active_limit
232 // another minor fixme, unlikely to matter in normal usage:
233 // these index.mth fetches do (however) count against
234 // m_httpfetch_active_limit when starting actual media file
235 // requests, so if there are lots of remote servers that are
236 // not responding, those will stall new media file transfers.
238 for (u32 i = 0; i < m_remotes.size(); ++i) {
239 assert(m_httpfetch_next_id == i);
241 RemoteServerStatus *remote = m_remotes[i];
242 actionstream << "Client: Contacting remote server \""
243 << remote->baseurl << "\"" << std::endl;
245 HTTPFetchRequest fetch_request;
247 remote->baseurl + MTHASHSET_FILE_NAME;
248 fetch_request.caller = m_httpfetch_caller;
249 fetch_request.request_id = m_httpfetch_next_id; // == i
250 fetch_request.method = HTTP_POST;
251 fetch_request.raw_data = required_hash_set;
252 fetch_request.extra_headers.emplace_back(
253 "Content-Type: application/octet-stream");
255 // Encapsulate possible IPv6 plain address in []
256 std::string addr = client->getAddressName();
257 if (addr.find(':', 0) != std::string::npos)
258 addr = '[' + addr + ']';
259 fetch_request.extra_headers.emplace_back(
260 std::string("Referer: minetest://") +
262 std::to_string(client->getServerAddress().getPort()));
264 httpfetch_async(fetch_request);
266 m_httpfetch_active++;
267 m_httpfetch_next_id++;
268 m_outstanding_hash_sets++;
273 void ClientMediaDownloader::remoteHashSetReceived(
274 const HTTPFetchResult &fetch_result)
276 u32 remote_id = fetch_result.request_id;
277 assert(remote_id < m_remotes.size());
278 RemoteServerStatus *remote = m_remotes[remote_id];
280 m_outstanding_hash_sets--;
282 if (fetch_result.succeeded) {
284 // Server sent a list of file hashes that are
285 // available on it, try to parse the list
287 std::set<std::string> sha1_set;
288 deSerializeHashSet(fetch_result.data, sha1_set);
290 // Parsing succeeded: For every file that is
291 // available on this server, add this server
292 // to the available_remotes array
294 for(auto it = m_files.upper_bound(m_name_bound);
295 it != m_files.end(); ++it) {
296 FileStatus *f = it->second;
297 if (!f->received && sha1_set.count(f->sha1))
298 f->available_remotes.push_back(remote_id);
301 catch (SerializationError &e) {
302 infostream << "Client: Remote server \""
303 << remote->baseurl << "\" sent invalid hash set: "
304 << e.what() << std::endl;
309 void ClientMediaDownloader::remoteMediaReceived(
310 const HTTPFetchResult &fetch_result,
313 // Some remote server sent us a file.
314 // -> decrement number of active fetches
315 // -> mark file as received if fetch succeeded
316 // -> try to load media
320 auto it = m_remote_file_transfers.find(fetch_result.request_id);
321 assert(it != m_remote_file_transfers.end());
323 m_remote_file_transfers.erase(it);
326 sanity_check(m_files.count(name) != 0);
328 FileStatus *filestatus = m_files[name];
329 sanity_check(!filestatus->received);
330 sanity_check(filestatus->current_remote >= 0);
332 RemoteServerStatus *remote = m_remotes[filestatus->current_remote];
334 filestatus->current_remote = -1;
335 remote->active_count--;
337 // If fetch succeeded, try to load media file
339 if (fetch_result.succeeded) {
340 bool success = checkAndLoad(name, filestatus->sha1,
341 fetch_result.data, false, client);
343 filestatus->received = true;
344 assert(m_uncached_received_count < m_uncached_count);
345 m_uncached_received_count++;
350 s32 ClientMediaDownloader::selectRemoteServer(FileStatus *filestatus)
353 assert(filestatus != NULL);
354 assert(!filestatus->received);
355 assert(filestatus->current_remote < 0);
357 if (filestatus->available_remotes.empty())
360 // Of all servers that claim to provide the file (and haven't
361 // been unsuccessfully tried before), find the one with the
362 // smallest number of currently active transfers
365 s32 best_remote_id = filestatus->available_remotes[best];
366 s32 best_active_count = m_remotes[best_remote_id]->active_count;
368 for (u32 i = 1; i < filestatus->available_remotes.size(); ++i) {
369 s32 remote_id = filestatus->available_remotes[i];
370 s32 active_count = m_remotes[remote_id]->active_count;
371 if (active_count < best_active_count) {
373 best_remote_id = remote_id;
374 best_active_count = active_count;
378 filestatus->available_remotes.erase(
379 filestatus->available_remotes.begin() + best);
381 return best_remote_id;
385 void ClientMediaDownloader::startRemoteMediaTransfers()
387 bool changing_name_bound = true;
389 for (auto files_iter = m_files.upper_bound(m_name_bound);
390 files_iter != m_files.end(); ++files_iter) {
392 // Abort if active fetch limit is exceeded
393 if (m_httpfetch_active >= m_httpfetch_active_limit)
396 const std::string &name = files_iter->first;
397 FileStatus *filestatus = files_iter->second;
399 if (!filestatus->received && filestatus->current_remote < 0) {
400 // File has not been received yet and is not currently
401 // being transferred. Choose a server for it.
402 s32 remote_id = selectRemoteServer(filestatus);
403 if (remote_id >= 0) {
404 // Found a server, so start fetching
405 RemoteServerStatus *remote =
406 m_remotes[remote_id];
408 std::string url = remote->baseurl +
409 hex_encode(filestatus->sha1);
410 verbosestream << "Client: "
411 << "Requesting remote media file "
412 << "\"" << name << "\" "
413 << "\"" << url << "\"" << std::endl;
415 HTTPFetchRequest fetch_request;
416 fetch_request.url = url;
417 fetch_request.caller = m_httpfetch_caller;
418 fetch_request.request_id = m_httpfetch_next_id;
419 fetch_request.timeout =
420 g_settings->getS32("curl_file_download_timeout");
421 httpfetch_async(fetch_request);
423 m_remote_file_transfers.insert(std::make_pair(
427 filestatus->current_remote = remote_id;
428 remote->active_count++;
429 m_httpfetch_active++;
430 m_httpfetch_next_id++;
434 if (filestatus->received ||
435 (filestatus->current_remote < 0 &&
436 !m_outstanding_hash_sets)) {
437 // If we arrive here, we conclusively know that we
438 // won't fetch this file from a remote server in the
439 // future. So update the name bound if possible.
440 if (changing_name_bound)
444 changing_name_bound = false;
449 void ClientMediaDownloader::startConventionalTransfers(Client *client)
451 assert(m_httpfetch_active == 0); // pre-condition
453 if (m_uncached_received_count != m_uncached_count) {
454 // Some media files have not been received yet, use the
455 // conventional slow method (minetest protocol) to get them
456 std::vector<std::string> file_requests;
457 for (auto &file : m_files) {
458 if (!file.second->received)
459 file_requests.push_back(file.first);
461 assert((s32) file_requests.size() ==
462 m_uncached_count - m_uncached_received_count);
463 client->request_media(file_requests);
467 bool ClientMediaDownloader::conventionalTransferDone(
468 const std::string &name,
469 const std::string &data,
472 // Check that file was announced
473 auto file_iter = m_files.find(name);
474 if (file_iter == m_files.end()) {
475 errorstream << "Client: server sent media file that was"
476 << "not announced, ignoring it: \"" << name << "\""
480 FileStatus *filestatus = file_iter->second;
481 assert(filestatus != NULL);
483 // Check that file hasn't already been received
484 if (filestatus->received) {
485 errorstream << "Client: server sent media file that we already"
486 << "received, ignoring it: \"" << name << "\""
491 // Mark file as received, regardless of whether loading it works and
492 // whether the checksum matches (because at this point there is no
493 // other server that could send a replacement)
494 filestatus->received = true;
495 assert(m_uncached_received_count < m_uncached_count);
496 m_uncached_received_count++;
498 // Check that received file matches announced checksum
500 checkAndLoad(name, filestatus->sha1, data, false, client);
506 IClientMediaDownloader
509 IClientMediaDownloader::IClientMediaDownloader():
510 m_media_cache(getMediaCacheDir()), m_write_to_cache(true)
514 void IClientMediaDownloader::createCacheDirs()
516 if (!m_write_to_cache)
519 std::string path = getMediaCacheDir();
520 if (!fs::CreateAllDirs(path)) {
521 errorstream << "Client: Could not create media cache directory: "
522 << path << std::endl;
526 bool IClientMediaDownloader::tryLoadFromCache(const std::string &name,
527 const std::string &sha1, Client *client)
529 std::ostringstream tmp_os(std::ios_base::binary);
530 bool found_in_cache = m_media_cache.load(hex_encode(sha1), tmp_os);
532 // If found in cache, try to load it from there
534 return checkAndLoad(name, sha1, tmp_os.str(), true, client);
539 bool IClientMediaDownloader::checkAndLoad(
540 const std::string &name, const std::string &sha1,
541 const std::string &data, bool is_from_cache, Client *client)
543 const char *cached_or_received = is_from_cache ? "cached" : "received";
544 const char *cached_or_received_uc = is_from_cache ? "Cached" : "Received";
545 std::string sha1_hex = hex_encode(sha1);
547 // Compute actual checksum of data
548 std::string data_sha1;
550 SHA1 data_sha1_calculator;
551 data_sha1_calculator.addBytes(data.c_str(), data.size());
552 unsigned char *data_tmpdigest = data_sha1_calculator.getDigest();
553 data_sha1.assign((char*) data_tmpdigest, 20);
554 free(data_tmpdigest);
557 // Check that received file matches announced checksum
558 if (data_sha1 != sha1) {
559 std::string data_sha1_hex = hex_encode(data_sha1);
560 infostream << "Client: "
561 << cached_or_received_uc << " media file "
562 << sha1_hex << " \"" << name << "\" "
563 << "mismatches actual checksum " << data_sha1_hex
568 // Checksum is ok, try loading the file
569 bool success = loadMedia(client, data, name);
571 infostream << "Client: "
572 << "Failed to load " << cached_or_received << " media: "
573 << sha1_hex << " \"" << name << "\""
578 verbosestream << "Client: "
579 << "Loaded " << cached_or_received << " media: "
580 << sha1_hex << " \"" << name << "\""
583 // Update cache (unless we just loaded the file from the cache)
584 if (!is_from_cache && m_write_to_cache)
585 m_media_cache.update(sha1_hex, data);
591 Minetest Hashset File Format
593 All values are stored in big-endian byte order.
594 [u32] signature: 'MTHS'
596 For each hash in set:
603 std::string ClientMediaDownloader::serializeRequiredHashSet()
605 std::ostringstream os(std::ios::binary);
607 writeU32(os, MTHASHSET_FILE_SIGNATURE); // signature
608 writeU16(os, 1); // version
610 // Write list of hashes of files that have not been
611 // received (found in cache) yet
612 for (const auto &it : m_files) {
613 if (!it.second->received) {
614 FATAL_ERROR_IF(it.second->sha1.size() != 20, "Invalid SHA1 size");
615 os << it.second->sha1;
622 void ClientMediaDownloader::deSerializeHashSet(const std::string &data,
623 std::set<std::string> &result)
625 if (data.size() < 6 || data.size() % 20 != 6) {
626 throw SerializationError(
627 "ClientMediaDownloader::deSerializeHashSet: "
628 "invalid hash set file size");
631 const u8 *data_cstr = (const u8*) data.c_str();
633 u32 signature = readU32(&data_cstr[0]);
634 if (signature != MTHASHSET_FILE_SIGNATURE) {
635 throw SerializationError(
636 "ClientMediaDownloader::deSerializeHashSet: "
637 "invalid hash set file signature");
640 u16 version = readU16(&data_cstr[4]);
642 throw SerializationError(
643 "ClientMediaDownloader::deSerializeHashSet: "
644 "unsupported hash set file version");
647 for (u32 pos = 6; pos < data.size(); pos += 20) {
648 result.insert(data.substr(pos, 20));
653 SingleMediaDownloader
656 SingleMediaDownloader::SingleMediaDownloader(bool write_to_cache):
657 m_httpfetch_caller(HTTPFETCH_DISCARD)
659 m_write_to_cache = write_to_cache;
662 SingleMediaDownloader::~SingleMediaDownloader()
664 if (m_httpfetch_caller != HTTPFETCH_DISCARD)
665 httpfetch_caller_free(m_httpfetch_caller);
668 bool SingleMediaDownloader::loadMedia(Client *client, const std::string &data,
669 const std::string &name)
671 return client->loadMedia(data, name, true);
674 void SingleMediaDownloader::addFile(const std::string &name, const std::string &sha1)
676 assert(m_stage == STAGE_INIT); // pre-condition
678 assert(!name.empty());
679 assert(sha1.size() == 20);
681 FATAL_ERROR_IF(!m_file_name.empty(), "Cannot add a second file");
686 void SingleMediaDownloader::addRemoteServer(const std::string &baseurl)
688 assert(m_stage == STAGE_INIT); // pre-condition
690 if (g_settings->getBool("enable_remote_media_server"))
691 m_remotes.emplace_back(baseurl);
694 void SingleMediaDownloader::step(Client *client)
696 if (m_stage == STAGE_INIT) {
697 m_stage = STAGE_CACHE_CHECKED;
701 // Remote media: check for completion of fetches
702 if (m_httpfetch_caller != HTTPFETCH_DISCARD) {
703 HTTPFetchResult fetch_result;
704 while (httpfetch_async_get(m_httpfetch_caller, fetch_result)) {
705 remoteMediaReceived(fetch_result, client);
710 bool SingleMediaDownloader::conventionalTransferDone(const std::string &name,
711 const std::string &data, Client *client)
713 if (name != m_file_name)
716 // Mark file as received unconditionally and try to load it
717 m_stage = STAGE_DONE;
718 checkAndLoad(name, m_file_sha1, data, false, client);
722 void SingleMediaDownloader::initialStep(Client *client)
724 if (tryLoadFromCache(m_file_name, m_file_sha1, client))
725 m_stage = STAGE_DONE;
731 // If the server reported no remote servers, immediately fall back to
732 // conventional transfer.
733 if (!USE_CURL || m_remotes.empty()) {
734 startConventionalTransfer(client);
736 // Otherwise start by requesting the file from the first remote media server
737 m_httpfetch_caller = httpfetch_caller_alloc();
738 m_current_remote = 0;
739 startRemoteMediaTransfer();
743 void SingleMediaDownloader::remoteMediaReceived(
744 const HTTPFetchResult &fetch_result, Client *client)
746 sanity_check(!isDone());
747 sanity_check(m_current_remote >= 0);
749 // If fetch succeeded, try to load it
750 if (fetch_result.succeeded) {
751 bool success = checkAndLoad(m_file_name, m_file_sha1,
752 fetch_result.data, false, client);
754 m_stage = STAGE_DONE;
759 // Otherwise try the next remote server or fall back to conventional transfer
761 if (m_current_remote >= (int)m_remotes.size()) {
762 infostream << "Client: Failed to remote-fetch \"" << m_file_name
763 << "\". Requesting it the usual way." << std::endl;
764 m_current_remote = -1;
765 startConventionalTransfer(client);
767 startRemoteMediaTransfer();
771 void SingleMediaDownloader::startRemoteMediaTransfer()
773 std::string url = m_remotes.at(m_current_remote) + hex_encode(m_file_sha1);
774 verbosestream << "Client: Requesting remote media file "
775 << "\"" << m_file_name << "\" " << "\"" << url << "\"" << std::endl;
777 HTTPFetchRequest fetch_request;
778 fetch_request.url = url;
779 fetch_request.caller = m_httpfetch_caller;
780 fetch_request.request_id = m_httpfetch_next_id;
781 fetch_request.timeout = g_settings->getS32("curl_file_download_timeout");
782 httpfetch_async(fetch_request);
784 m_httpfetch_next_id++;
787 void SingleMediaDownloader::startConventionalTransfer(Client *client)
789 std::vector<std::string> requests;
790 requests.emplace_back(m_file_name);
791 client->request_media(requests);