]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Fix unexpected crash on bad Github URL
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os, git, tempfile, shutil
19 from git import GitCommandError
20 from flask_sqlalchemy import SQLAlchemy
21 from urllib.error import HTTPError
22 import urllib.request
23 from urllib.parse import urlparse, quote_plus, urlsplit
24 from app import app
25 from app.models import *
26 from app.tasks import celery, TaskError
27 from app.utils import randomString
28
29
30 class GithubURLMaker:
31         def __init__(self, url):
32                 self.baseUrl = None
33                 self.user = None
34                 self.repo = None
35
36                 # Rewrite path
37                 import re
38                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
39                 if m is None:
40                         return
41
42                 user = m.group(1)
43                 repo = m.group(2).replace(".git", "")
44                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
45                                 .format(user, repo)
46                 self.user = user
47                 self.repo = repo
48
49         def isValid(self):
50                 return self.baseUrl is not None
51
52         def getRepoURL(self):
53                 return "https://github.com/{}/{}".format(self.user, self.repo)
54
55         def getScreenshotURL(self):
56                 return self.baseUrl + "/screenshot.png"
57
58         def getCommitsURL(self, branch):
59                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
60                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
61
62         def getCommitDownload(self, commit):
63                 return "https://github.com/{}/{}/archive/{}.zip" \
64                                 .format(self.user, self.repo, commit)
65
66 krock_list_cache = None
67 krock_list_cache_by_name = None
68 def getKrockList():
69         global krock_list_cache
70         global krock_list_cache_by_name
71
72         if krock_list_cache is None:
73                 contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
74                 list = json.loads(contents)
75
76                 def h(x):
77                         if not ("title"   in x and "author" in x and \
78                                         "topicId" in x and "link"   in x and x["link"] != ""):
79                                 return False
80
81                         import re
82                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
83                         if m is None:
84                                 return False
85
86                         x["name"] = m.group(1)
87                         return True
88
89                 def g(x):
90                         return {
91                                 "title":   x["title"],
92                                 "author":  x["author"],
93                                 "name": x["name"],
94                                 "topicId": x["topicId"],
95                                 "link": x["link"],
96                         }
97
98                 krock_list_cache = [g(x) for x in list if h(x)]
99                 krock_list_cache_by_name = {}
100                 for x in krock_list_cache:
101                         if not x["name"] in krock_list_cache_by_name:
102                                 krock_list_cache_by_name[x["name"]] = []
103
104                         krock_list_cache_by_name[x["name"]].append(x)
105
106         return krock_list_cache, krock_list_cache_by_name
107
108 def findModInfo(author, name, link):
109         list, lookup = getKrockList()
110
111         if name is not None and name in lookup:
112                 if len(lookup[name]) == 1:
113                         return lookup[name][0]
114
115                 for x in lookup[name]:
116                         if x["author"] == author:
117                                 return x
118
119         if link is not None and len(link) > 15:
120                 for x in list:
121                         if link in x["link"]:
122                                 return x
123
124         return None
125
126
127 def parseConf(string):
128         retval = {}
129         for line in string.split("\n"):
130                 idx = line.find("=")
131                 if idx > 0:
132                         key   = line[:idx].strip()
133                         value = line[idx+1:].strip()
134                         retval[key] = value
135
136         return retval
137
138
139 class PackageTreeNode:
140         def __init__(self, baseDir, author=None, repo=None, name=None):
141                 print("Scanning " + baseDir)
142                 self.baseDir  = baseDir
143                 self.author   = author
144                 self.name        = name
145                 self.repo        = repo
146                 self.meta        = None
147                 self.children = []
148
149                 # Detect type
150                 type = None
151                 is_modpack = False
152                 if os.path.isfile(baseDir + "/game.conf"):
153                         type = PackageType.GAME
154                 elif os.path.isfile(baseDir + "/init.lua"):
155                         type = PackageType.MOD
156                 elif os.path.isfile(baseDir + "/modpack.txt") or \
157                                 os.path.isfile(baseDir + "/modpack.conf"):
158                         type = PackageType.MOD
159                         is_modpack = True
160                 elif os.path.isdir(baseDir + "/mods"):
161                         type = PackageType.GAME
162                 elif os.listdir(baseDir) == []:
163                         # probably a submodule
164                         return
165                 else:
166                         raise TaskError("Unable to detect package type!")
167
168                 self.type = type
169                 self.readMetaFiles()
170
171                 if self.type == PackageType.GAME:
172                         self.addChildrenFromModDir(baseDir + "/mods")
173                 elif is_modpack:
174                         self.addChildrenFromModDir(baseDir)
175
176
177         def readMetaFiles(self):
178                 result = {}
179
180                 # .conf file
181                 try:
182                         with open(self.baseDir + "/mod.conf", "r") as myfile:
183                                 conf = parseConf(myfile.read())
184                                 for key in ["name", "description", "title", "depends", "optional_depends"]:
185                                         try:
186                                                 result[key] = conf[key]
187                                         except KeyError:
188                                                 pass
189                 except IOError:
190                         print("description.txt does not exist!")
191
192                 # description.txt
193                 if not "description" in result:
194                         try:
195                                 with open(self.baseDir + "/description.txt", "r") as myfile:
196                                         result["description"] = myfile.read()
197                         except IOError:
198                                 print("description.txt does not exist!")
199
200                 # depends.txt
201                 import re
202                 pattern = re.compile("^([a-z0-9_]+)\??$")
203                 if not "depends" in result and not "optional_depends" in result:
204                         try:
205                                 with open(self.baseDir + "/depends.txt", "r") as myfile:
206                                         contents = myfile.read()
207                                         soft = []
208                                         hard = []
209                                         for line in contents.split("\n"):
210                                                 line = line.strip()
211                                                 if pattern.match(line):
212                                                         if line[len(line) - 1] == "?":
213                                                                 soft.append( line[:-1])
214                                                         else:
215                                                                 hard.append(line)
216
217                                         result["depends"] = hard
218                                         result["optional_depends"] = soft
219
220                         except IOError:
221                                 print("depends.txt does not exist!")
222
223                 else:
224                         if "depends" in result:
225                                 result["depends"] = [x.strip() for x in result["depends"].split(",")]
226                         if "optional_depends" in result:
227                                 result["optional_depends"] = [x.strip() for x in result["optional_depends"].split(",")]
228
229
230                 # Calculate Title
231                 if "name" in result and not "title" in result:
232                         result["title"] = result["name"].replace("_", " ").title()
233
234                 # Calculate short description
235                 if "description" in result:
236                         desc = result["description"]
237                         idx = desc.find(".") + 1
238                         cutIdx = min(len(desc), 200 if idx < 5 else idx)
239                         result["short_description"] = desc[:cutIdx]
240
241                 # Get forum ID
242                 info = findModInfo(self.author, result.get("name"), self.repo)
243                 if info is not None:
244                         result["forumId"] = info.get("topicId")
245
246                 if "name" in result:
247                         self.name = result["name"]
248                         del result["name"]
249
250                 self.meta = result
251
252         def addChildrenFromModDir(self, dir):
253                 for entry in next(os.walk(dir))[1]:
254                         path = dir + "/" + entry
255                         if not entry.startswith('.') and os.path.isdir(path):
256                                 self.children.append(PackageTreeNode(path, name=entry))
257
258
259         def fold(self, attr, key=None, acc=None):
260                 if acc is None:
261                         acc = set()
262
263                 if self.meta is None:
264                         return acc
265
266                 at = getattr(self, attr)
267                 value = at if key is None else at.get(key)
268
269                 if isinstance(value, list):
270                         acc |= set(value)
271                 elif value is not None:
272                         acc.add(value)
273
274                 for child in self.children:
275                         child.fold(attr, key, acc)
276
277                 return acc
278
279         def get(self, key):
280                 return self.meta.get(key)
281
282 def generateGitURL(urlstr):
283         scheme, netloc, path, query, frag = urlsplit(urlstr)
284
285         return "http://:@" + netloc + path + query
286
287 # Clones a repo from an unvalidated URL.
288 # Returns a tuple of path and repo on sucess.
289 # Throws `TaskError` on failure.
290 # Caller is responsible for deleting returned directory.
291 def cloneRepo(urlstr, ref=None, recursive=False):
292         gitDir = tempfile.gettempdir() + "/" + randomString(10)
293
294         err = None
295         try:
296                 gitUrl = generateGitURL(urlstr)
297                 print("Cloning from " + gitUrl)
298                 repo = git.Repo.clone_from(gitUrl, gitDir, \
299                                 progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
300
301                 if ref is not None:
302                         repo.create_head("myhead", ref).checkout()
303                 return gitDir, repo
304         except GitCommandError as e:
305                 # This is needed to stop the backtrace being weird
306                 err = e.stderr
307
308         raise TaskError(err.replace("stderr: ", "") \
309                         .replace("Cloning into '" + gitDir + "'...", "") \
310                         .strip())
311
312 @celery.task()
313 def getMeta(urlstr, author):
314         gitDir, _ = cloneRepo(urlstr, recursive=True)
315         tree = PackageTreeNode(gitDir, author=author, repo=urlstr)
316         shutil.rmtree(gitDir)
317
318         result = {}
319         result["name"] = tree.name
320         result["provides"] = tree.fold("name")
321         result["type"] = tree.type.name
322
323         for key in ["depends", "optional_depends"]:
324                 result[key] = tree.fold("meta", key)
325
326         for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
327                 result[key] = tree.get(key)
328
329         for mod in result["provides"]:
330                 result["depends"].discard(mod)
331                 result["optional_depends"].discard(mod)
332
333         for key, value in result.items():
334                 if isinstance(value, set):
335                         result[key] = list(value)
336
337         return result
338
339
340 def makeVCSReleaseFromGithub(id, branch, release, url):
341         urlmaker = GithubURLMaker(url)
342         if not urlmaker.isValid():
343                 raise TaskError("Invalid github repo URL")
344
345         commitsURL = urlmaker.getCommitsURL(branch)
346         contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
347         commits = json.loads(contents)
348
349         if len(commits) == 0 or not "sha" in commits[0]:
350                 raise TaskError("No commits found")
351
352         release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
353         release.task_id     = None
354         release.commit_hash = commits[0]["sha"]
355         release.approve(release.package.author)
356         print(release.url)
357         db.session.commit()
358
359         return release.url
360
361
362
363 @celery.task()
364 def makeVCSRelease(id, branch):
365         release = PackageRelease.query.get(id)
366         if release is None:
367                 raise TaskError("No such release!")
368         elif release.package is None:
369                 raise TaskError("No package attached to release")
370
371         urlmaker = None
372         url = urlparse(release.package.repo)
373         if url.netloc == "github.com":
374                 return makeVCSReleaseFromGithub(id, branch, release, url)
375         else:
376                 gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
377
378                 try:
379                         filename = randomString(10) + ".zip"
380                         destPath = os.path.join("app/public/uploads", filename)
381                         with open(destPath, "wb") as fp:
382                                 repo.archive(fp, format="zip")
383
384                         release.url         = "/uploads/" + filename
385                         release.task_id     = None
386                         release.commit_hash = repo.head.object.hexsha
387                         release.approve(release.package.author)
388                         print(release.url)
389                         db.session.commit()
390
391                         return release.url
392                 finally:
393                         shutil.rmtree(gitDir)
394
395 @celery.task()
396 def importRepoScreenshot(id):
397         package = Package.query.get(id)
398         if package is None or package.soft_deleted:
399                 raise Exception("Unexpected none package")
400
401         # Get URL Maker
402         try:
403                 gitDir, _ = cloneRepo(package.repo)
404         except TaskError as e:
405                 # ignore download errors
406                 print(e)
407                 return None
408
409         # Find and import screenshot
410         try:
411                 for ext in ["png", "jpg", "jpeg"]:
412                         sourcePath = gitDir + "/screenshot." + ext
413                         if os.path.isfile(sourcePath):
414                                 filename = randomString(10) + "." + ext
415                                 destPath = os.path.join("app/public/uploads", filename)
416                                 shutil.copyfile(sourcePath, destPath)
417
418                                 ss = PackageScreenshot()
419                                 ss.approved = True
420                                 ss.package = package
421                                 ss.title   = "screenshot.png"
422                                 ss.url   = "/uploads/" + filename
423                                 db.session.add(ss)
424                                 db.session.commit()
425
426                                 return "/uploads/" + filename
427         finally:
428                 shutil.rmtree(gitDir)
429
430         print("screenshot.png does not exist")
431         return None
432
433
434
435 def getDepends(package):
436         url = urlparse(package.repo)
437         urlmaker = None
438         if url.netloc == "github.com":
439                 urlmaker = GithubURLMaker(url)
440         else:
441                 return {}
442
443         result = {}
444         if not urlmaker.isValid():
445                 return {}
446
447         #
448         # Try getting depends on mod.conf
449         #
450         try:
451                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
452                 conf = parseConf(contents)
453                 for key in ["depends", "optional_depends"]:
454                         try:
455                                 result[key] = conf[key]
456                         except KeyError:
457                                 pass
458
459         except HTTPError:
460                 print("mod.conf does not exist")
461
462         if "depends" in result or "optional_depends" in result:
463                 return result
464
465
466         #
467         # Try depends.txt
468         #
469         import re
470         pattern = re.compile("^([a-z0-9_]+)\??$")
471         try:
472                 contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
473                 soft = []
474                 hard = []
475                 for line in contents.split("\n"):
476                         line = line.strip()
477                         if pattern.match(line):
478                                 if line[len(line) - 1] == "?":
479                                         soft.append( line[:-1])
480                                 else:
481                                         hard.append(line)
482
483                 result["depends"] = ",".join(hard)
484                 result["optional_depends"] = ",".join(soft)
485         except HTTPError:
486                 print("depends.txt does not exist")
487
488         return result
489
490
491 def importDependencies(package, mpackage_cache):
492         if Dependency.query.filter_by(depender=package).count() != 0:
493                 return
494
495         result = getDepends(package)
496
497         if "depends" in result:
498                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
499                 print("{} hard: {}".format(len(deps), result["depends"]))
500                 for dep in deps:
501                         dep.optional = False
502                         db.session.add(dep)
503
504         if "optional_depends" in result:
505                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
506                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
507                 for dep in deps:
508                         dep.optional = True
509                         db.session.add(dep)
510
511 @celery.task()
512 def importAllDependencies():
513         Dependency.query.delete()
514         mpackage_cache = {}
515         packages = Package.query.filter_by(type=PackageType.MOD).all()
516         for i, p in enumerate(packages):
517                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
518                 importDependencies(p, mpackage_cache)
519
520         db.session.commit()