3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "clientmedia.h"
21 #include "httpfetch.h"
23 #include "filecache.h"
29 #include "util/serialize.h"
30 #include "util/sha1.h"
31 #include "util/string.h"
33 static std::string getMediaCacheDir()
35 return porting::path_cache + DIR_DELIM + "media";
38 bool clientMediaUpdateCache(const std::string &raw_hash, const std::string &filedata)
40 FileCache media_cache(getMediaCacheDir());
41 std::string sha1_hex = hex_encode(raw_hash);
42 if (!media_cache.exists(sha1_hex))
43 return media_cache.update(sha1_hex, filedata);
51 ClientMediaDownloader::ClientMediaDownloader():
52 m_media_cache(getMediaCacheDir()),
53 m_httpfetch_caller(HTTPFETCH_DISCARD)
57 ClientMediaDownloader::~ClientMediaDownloader()
59 if (m_httpfetch_caller != HTTPFETCH_DISCARD)
60 httpfetch_caller_free(m_httpfetch_caller);
62 for (auto &file_it : m_files)
63 delete file_it.second;
65 for (auto &remote : m_remotes)
69 void ClientMediaDownloader::addFile(const std::string &name, const std::string &sha1)
71 assert(!m_initial_step_done); // pre-condition
73 // if name was already announced, ignore the new announcement
74 if (m_files.count(name) != 0) {
75 errorstream << "Client: ignoring duplicate media announcement "
76 << "sent by server: \"" << name << "\""
81 // if name is empty or contains illegal characters, ignore the file
82 if (name.empty() || !string_allowed(name, TEXTURENAME_ALLOWED_CHARS)) {
83 errorstream << "Client: ignoring illegal file name "
84 << "sent by server: \"" << name << "\""
89 // length of sha1 must be exactly 20 (160 bits), else ignore the file
90 if (sha1.size() != 20) {
91 errorstream << "Client: ignoring illegal SHA1 sent by server: "
92 << hex_encode(sha1) << " \"" << name << "\""
97 FileStatus *filestatus = new FileStatus();
98 filestatus->received = false;
99 filestatus->sha1 = sha1;
100 filestatus->current_remote = -1;
101 m_files.insert(std::make_pair(name, filestatus));
104 void ClientMediaDownloader::addRemoteServer(const std::string &baseurl)
106 assert(!m_initial_step_done); // pre-condition
110 if (g_settings->getBool("enable_remote_media_server")) {
111 infostream << "Client: Adding remote server \""
112 << baseurl << "\" for media download" << std::endl;
114 RemoteServerStatus *remote = new RemoteServerStatus();
115 remote->baseurl = baseurl;
116 remote->active_count = 0;
117 m_remotes.push_back(remote);
122 infostream << "Client: Ignoring remote server \""
123 << baseurl << "\" because cURL support is not compiled in"
129 void ClientMediaDownloader::step(Client *client)
131 if (!m_initial_step_done) {
133 m_initial_step_done = true;
136 // Remote media: check for completion of fetches
137 if (m_httpfetch_active) {
138 bool fetched_something = false;
139 HTTPFetchResult fetch_result;
141 while (httpfetch_async_get(m_httpfetch_caller, fetch_result)) {
142 m_httpfetch_active--;
143 fetched_something = true;
145 // Is this a hashset (index.mth) or a media file?
146 if (fetch_result.request_id < m_remotes.size())
147 remoteHashSetReceived(fetch_result);
149 remoteMediaReceived(fetch_result, client);
152 if (fetched_something)
153 startRemoteMediaTransfers();
155 // Did all remote transfers end and no new ones can be started?
156 // If so, request still missing files from the minetest server
157 // (Or report that we have all files.)
158 if (m_httpfetch_active == 0) {
159 if (m_uncached_received_count < m_uncached_count) {
160 infostream << "Client: Failed to remote-fetch "
161 << (m_uncached_count-m_uncached_received_count)
162 << " files. Requesting them"
163 << " the usual way." << std::endl;
165 startConventionalTransfers(client);
170 void ClientMediaDownloader::initialStep(Client *client)
173 m_uncached_count = m_files.size();
174 for (auto &file_it : m_files) {
175 std::string name = file_it.first;
176 FileStatus *filestatus = file_it.second;
177 const std::string &sha1 = filestatus->sha1;
179 std::ostringstream tmp_os(std::ios_base::binary);
180 bool found_in_cache = m_media_cache.load(hex_encode(sha1), tmp_os);
182 // If found in cache, try to load it from there
183 if (found_in_cache) {
184 bool success = checkAndLoad(name, sha1,
185 tmp_os.str(), true, client);
187 filestatus->received = true;
193 assert(m_uncached_received_count == 0);
195 // Create the media cache dir if we are likely to write to it
196 if (m_uncached_count != 0) {
197 bool did = fs::CreateAllDirs(getMediaCacheDir());
199 errorstream << "Client: "
200 << "Could not create media cache directory: "
201 << getMediaCacheDir()
206 // If we found all files in the cache, report this fact to the server.
207 // If the server reported no remote servers, immediately start
208 // conventional transfers. Note: if cURL support is not compiled in,
209 // m_remotes is always empty, so "!USE_CURL" is redundant but may
210 // reduce the size of the compiled code
211 if (!USE_CURL || m_uncached_count == 0 || m_remotes.empty()) {
212 startConventionalTransfers(client);
215 // Otherwise start off by requesting each server's sha1 set
217 // This is the first time we use httpfetch, so alloc a caller ID
218 m_httpfetch_caller = httpfetch_caller_alloc();
219 m_httpfetch_timeout = g_settings->getS32("curl_timeout");
221 // Set the active fetch limit to curl_parallel_limit or 84,
222 // whichever is greater. This gives us some leeway so that
223 // inefficiencies in communicating with the httpfetch thread
224 // don't slow down fetches too much. (We still want some limit
225 // so that when the first remote server returns its hash set,
226 // not all files are requested from that server immediately.)
227 // One such inefficiency is that ClientMediaDownloader::step()
228 // is only called a couple times per second, while httpfetch
229 // might return responses much faster than that.
230 // Note that httpfetch strictly enforces curl_parallel_limit
231 // but at no inter-thread communication cost. This however
232 // doesn't help with the aforementioned inefficiencies.
233 // The signifance of 84 is that it is 2*6*9 in base 13.
234 m_httpfetch_active_limit = g_settings->getS32("curl_parallel_limit");
235 m_httpfetch_active_limit = MYMAX(m_httpfetch_active_limit, 84);
237 // Write a list of hashes that we need. This will be POSTed
238 // to the server using Content-Type: application/octet-stream
239 std::string required_hash_set = serializeRequiredHashSet();
241 // minor fixme: this loop ignores m_httpfetch_active_limit
243 // another minor fixme, unlikely to matter in normal usage:
244 // these index.mth fetches do (however) count against
245 // m_httpfetch_active_limit when starting actual media file
246 // requests, so if there are lots of remote servers that are
247 // not responding, those will stall new media file transfers.
249 for (u32 i = 0; i < m_remotes.size(); ++i) {
250 assert(m_httpfetch_next_id == i);
252 RemoteServerStatus *remote = m_remotes[i];
253 actionstream << "Client: Contacting remote server \""
254 << remote->baseurl << "\"" << std::endl;
256 HTTPFetchRequest fetch_request;
258 remote->baseurl + MTHASHSET_FILE_NAME;
259 fetch_request.caller = m_httpfetch_caller;
260 fetch_request.request_id = m_httpfetch_next_id; // == i
261 fetch_request.timeout = m_httpfetch_timeout;
262 fetch_request.connect_timeout = m_httpfetch_timeout;
263 fetch_request.method = HTTP_POST;
264 fetch_request.raw_data = required_hash_set;
265 fetch_request.extra_headers.emplace_back(
266 "Content-Type: application/octet-stream");
268 // Encapsulate possible IPv6 plain address in []
269 std::string addr = client->getAddressName();
270 if (addr.find(':', 0) != std::string::npos)
271 addr = '[' + addr + ']';
272 fetch_request.extra_headers.emplace_back(
273 std::string("Referer: minetest://") +
275 std::to_string(client->getServerAddress().getPort()));
277 httpfetch_async(fetch_request);
279 m_httpfetch_active++;
280 m_httpfetch_next_id++;
281 m_outstanding_hash_sets++;
286 void ClientMediaDownloader::remoteHashSetReceived(
287 const HTTPFetchResult &fetch_result)
289 u32 remote_id = fetch_result.request_id;
290 assert(remote_id < m_remotes.size());
291 RemoteServerStatus *remote = m_remotes[remote_id];
293 m_outstanding_hash_sets--;
295 if (fetch_result.succeeded) {
297 // Server sent a list of file hashes that are
298 // available on it, try to parse the list
300 std::set<std::string> sha1_set;
301 deSerializeHashSet(fetch_result.data, sha1_set);
303 // Parsing succeeded: For every file that is
304 // available on this server, add this server
305 // to the available_remotes array
307 for(std::map<std::string, FileStatus*>::iterator
308 it = m_files.upper_bound(m_name_bound);
309 it != m_files.end(); ++it) {
310 FileStatus *f = it->second;
311 if (!f->received && sha1_set.count(f->sha1))
312 f->available_remotes.push_back(remote_id);
315 catch (SerializationError &e) {
316 infostream << "Client: Remote server \""
317 << remote->baseurl << "\" sent invalid hash set: "
318 << e.what() << std::endl;
323 void ClientMediaDownloader::remoteMediaReceived(
324 const HTTPFetchResult &fetch_result,
327 // Some remote server sent us a file.
328 // -> decrement number of active fetches
329 // -> mark file as received if fetch succeeded
330 // -> try to load media
334 std::unordered_map<unsigned long, std::string>::iterator it =
335 m_remote_file_transfers.find(fetch_result.request_id);
336 assert(it != m_remote_file_transfers.end());
338 m_remote_file_transfers.erase(it);
341 sanity_check(m_files.count(name) != 0);
343 FileStatus *filestatus = m_files[name];
344 sanity_check(!filestatus->received);
345 sanity_check(filestatus->current_remote >= 0);
347 RemoteServerStatus *remote = m_remotes[filestatus->current_remote];
349 filestatus->current_remote = -1;
350 remote->active_count--;
352 // If fetch succeeded, try to load media file
354 if (fetch_result.succeeded) {
355 bool success = checkAndLoad(name, filestatus->sha1,
356 fetch_result.data, false, client);
358 filestatus->received = true;
359 assert(m_uncached_received_count < m_uncached_count);
360 m_uncached_received_count++;
365 s32 ClientMediaDownloader::selectRemoteServer(FileStatus *filestatus)
368 assert(filestatus != NULL);
369 assert(!filestatus->received);
370 assert(filestatus->current_remote < 0);
372 if (filestatus->available_remotes.empty())
375 // Of all servers that claim to provide the file (and haven't
376 // been unsuccessfully tried before), find the one with the
377 // smallest number of currently active transfers
380 s32 best_remote_id = filestatus->available_remotes[best];
381 s32 best_active_count = m_remotes[best_remote_id]->active_count;
383 for (u32 i = 1; i < filestatus->available_remotes.size(); ++i) {
384 s32 remote_id = filestatus->available_remotes[i];
385 s32 active_count = m_remotes[remote_id]->active_count;
386 if (active_count < best_active_count) {
388 best_remote_id = remote_id;
389 best_active_count = active_count;
393 filestatus->available_remotes.erase(
394 filestatus->available_remotes.begin() + best);
396 return best_remote_id;
400 void ClientMediaDownloader::startRemoteMediaTransfers()
402 bool changing_name_bound = true;
404 for (std::map<std::string, FileStatus*>::iterator
405 files_iter = m_files.upper_bound(m_name_bound);
406 files_iter != m_files.end(); ++files_iter) {
408 // Abort if active fetch limit is exceeded
409 if (m_httpfetch_active >= m_httpfetch_active_limit)
412 const std::string &name = files_iter->first;
413 FileStatus *filestatus = files_iter->second;
415 if (!filestatus->received && filestatus->current_remote < 0) {
416 // File has not been received yet and is not currently
417 // being transferred. Choose a server for it.
418 s32 remote_id = selectRemoteServer(filestatus);
419 if (remote_id >= 0) {
420 // Found a server, so start fetching
421 RemoteServerStatus *remote =
422 m_remotes[remote_id];
424 std::string url = remote->baseurl +
425 hex_encode(filestatus->sha1);
426 verbosestream << "Client: "
427 << "Requesting remote media file "
428 << "\"" << name << "\" "
429 << "\"" << url << "\"" << std::endl;
431 HTTPFetchRequest fetch_request;
432 fetch_request.url = url;
433 fetch_request.caller = m_httpfetch_caller;
434 fetch_request.request_id = m_httpfetch_next_id;
435 fetch_request.timeout = 0; // no data timeout!
436 fetch_request.connect_timeout =
438 httpfetch_async(fetch_request);
440 m_remote_file_transfers.insert(std::make_pair(
444 filestatus->current_remote = remote_id;
445 remote->active_count++;
446 m_httpfetch_active++;
447 m_httpfetch_next_id++;
451 if (filestatus->received ||
452 (filestatus->current_remote < 0 &&
453 !m_outstanding_hash_sets)) {
454 // If we arrive here, we conclusively know that we
455 // won't fetch this file from a remote server in the
456 // future. So update the name bound if possible.
457 if (changing_name_bound)
461 changing_name_bound = false;
466 void ClientMediaDownloader::startConventionalTransfers(Client *client)
468 assert(m_httpfetch_active == 0); // pre-condition
470 if (m_uncached_received_count != m_uncached_count) {
471 // Some media files have not been received yet, use the
472 // conventional slow method (minetest protocol) to get them
473 std::vector<std::string> file_requests;
474 for (auto &file : m_files) {
475 if (!file.second->received)
476 file_requests.push_back(file.first);
478 assert((s32) file_requests.size() ==
479 m_uncached_count - m_uncached_received_count);
480 client->request_media(file_requests);
484 void ClientMediaDownloader::conventionalTransferDone(
485 const std::string &name,
486 const std::string &data,
489 // Check that file was announced
490 std::map<std::string, FileStatus*>::iterator
491 file_iter = m_files.find(name);
492 if (file_iter == m_files.end()) {
493 errorstream << "Client: server sent media file that was"
494 << "not announced, ignoring it: \"" << name << "\""
498 FileStatus *filestatus = file_iter->second;
499 assert(filestatus != NULL);
501 // Check that file hasn't already been received
502 if (filestatus->received) {
503 errorstream << "Client: server sent media file that we already"
504 << "received, ignoring it: \"" << name << "\""
509 // Mark file as received, regardless of whether loading it works and
510 // whether the checksum matches (because at this point there is no
511 // other server that could send a replacement)
512 filestatus->received = true;
513 assert(m_uncached_received_count < m_uncached_count);
514 m_uncached_received_count++;
516 // Check that received file matches announced checksum
518 checkAndLoad(name, filestatus->sha1, data, false, client);
521 bool ClientMediaDownloader::checkAndLoad(
522 const std::string &name, const std::string &sha1,
523 const std::string &data, bool is_from_cache, Client *client)
525 const char *cached_or_received = is_from_cache ? "cached" : "received";
526 const char *cached_or_received_uc = is_from_cache ? "Cached" : "Received";
527 std::string sha1_hex = hex_encode(sha1);
529 // Compute actual checksum of data
530 std::string data_sha1;
532 SHA1 data_sha1_calculator;
533 data_sha1_calculator.addBytes(data.c_str(), data.size());
534 unsigned char *data_tmpdigest = data_sha1_calculator.getDigest();
535 data_sha1.assign((char*) data_tmpdigest, 20);
536 free(data_tmpdigest);
539 // Check that received file matches announced checksum
540 if (data_sha1 != sha1) {
541 std::string data_sha1_hex = hex_encode(data_sha1);
542 infostream << "Client: "
543 << cached_or_received_uc << " media file "
544 << sha1_hex << " \"" << name << "\" "
545 << "mismatches actual checksum " << data_sha1_hex
550 // Checksum is ok, try loading the file
551 bool success = client->loadMedia(data, name);
553 infostream << "Client: "
554 << "Failed to load " << cached_or_received << " media: "
555 << sha1_hex << " \"" << name << "\""
560 verbosestream << "Client: "
561 << "Loaded " << cached_or_received << " media: "
562 << sha1_hex << " \"" << name << "\""
565 // Update cache (unless we just loaded the file from the cache)
567 m_media_cache.update(sha1_hex, data);
573 Minetest Hashset File Format
575 All values are stored in big-endian byte order.
576 [u32] signature: 'MTHS'
578 For each hash in set:
585 std::string ClientMediaDownloader::serializeRequiredHashSet()
587 std::ostringstream os(std::ios::binary);
589 writeU32(os, MTHASHSET_FILE_SIGNATURE); // signature
590 writeU16(os, 1); // version
592 // Write list of hashes of files that have not been
593 // received (found in cache) yet
594 for (std::map<std::string, FileStatus*>::iterator
595 it = m_files.begin();
596 it != m_files.end(); ++it) {
597 if (!it->second->received) {
598 FATAL_ERROR_IF(it->second->sha1.size() != 20, "Invalid SHA1 size");
599 os << it->second->sha1;
606 void ClientMediaDownloader::deSerializeHashSet(const std::string &data,
607 std::set<std::string> &result)
609 if (data.size() < 6 || data.size() % 20 != 6) {
610 throw SerializationError(
611 "ClientMediaDownloader::deSerializeHashSet: "
612 "invalid hash set file size");
615 const u8 *data_cstr = (const u8*) data.c_str();
617 u32 signature = readU32(&data_cstr[0]);
618 if (signature != MTHASHSET_FILE_SIGNATURE) {
619 throw SerializationError(
620 "ClientMediaDownloader::deSerializeHashSet: "
621 "invalid hash set file signature");
624 u16 version = readU16(&data_cstr[4]);
626 throw SerializationError(
627 "ClientMediaDownloader::deSerializeHashSet: "
628 "unsupported hash set file version");
631 for (u32 pos = 6; pos < data.size(); pos += 20) {
632 result.insert(data.substr(pos, 20));