]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Fix release validation for repos with submodules
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os, git, tempfile, shutil, gitdb
19 from git import GitCommandError
20 from git_archive_all import GitArchiver
21 from flask_sqlalchemy import SQLAlchemy
22 from urllib.error import HTTPError
23 import urllib.request
24 from urllib.parse import urlparse, quote_plus, urlsplit
25 from zipfile import ZipFile
26
27 from app import app
28 from app.models import *
29 from app.tasks import celery, TaskError
30 from app.utils import randomString
31 from .minetestcheck import build_tree, MinetestCheckError, ContentType
32 from .minetestcheck.config import parse_conf
33
34 class GithubURLMaker:
35         def __init__(self, url):
36                 self.baseUrl = None
37                 self.user = None
38                 self.repo = None
39
40                 # Rewrite path
41                 import re
42                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
43                 if m is None:
44                         return
45
46                 user = m.group(1)
47                 repo = m.group(2).replace(".git", "")
48                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
49                                 .format(user, repo)
50                 self.user = user
51                 self.repo = repo
52
53         def isValid(self):
54                 return self.baseUrl is not None
55
56         def getRepoURL(self):
57                 return "https://github.com/{}/{}".format(self.user, self.repo)
58
59         def getScreenshotURL(self):
60                 return self.baseUrl + "/screenshot.png"
61
62         def getModConfURL(self):
63                 return self.baseUrl + "/mod.conf"
64
65         def getCommitsURL(self, branch):
66                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
67                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
68
69         def getCommitDownload(self, commit):
70                 return "https://github.com/{}/{}/archive/{}.zip" \
71                                 .format(self.user, self.repo, commit)
72
73 krock_list_cache = None
74 krock_list_cache_by_name = None
75 def getKrockList():
76         global krock_list_cache
77         global krock_list_cache_by_name
78
79         if krock_list_cache is None:
80                 contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
81                 list = json.loads(contents)
82
83                 def h(x):
84                         if not ("title"   in x and "author" in x and \
85                                         "topicId" in x and "link"   in x and x["link"] != ""):
86                                 return False
87
88                         import re
89                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
90                         if m is None:
91                                 return False
92
93                         x["name"] = m.group(1)
94                         return True
95
96                 def g(x):
97                         return {
98                                 "title":   x["title"],
99                                 "author":  x["author"],
100                                 "name": x["name"],
101                                 "topicId": x["topicId"],
102                                 "link": x["link"],
103                         }
104
105                 krock_list_cache = [g(x) for x in list if h(x)]
106                 krock_list_cache_by_name = {}
107                 for x in krock_list_cache:
108                         if not x["name"] in krock_list_cache_by_name:
109                                 krock_list_cache_by_name[x["name"]] = []
110
111                         krock_list_cache_by_name[x["name"]].append(x)
112
113         return krock_list_cache, krock_list_cache_by_name
114
115 def findModInfo(author, name, link):
116         list, lookup = getKrockList()
117
118         if name is not None and name in lookup:
119                 if len(lookup[name]) == 1:
120                         return lookup[name][0]
121
122                 for x in lookup[name]:
123                         if x["author"] == author:
124                                 return x
125
126         if link is not None and len(link) > 15:
127                 for x in list:
128                         if link in x["link"]:
129                                 return x
130
131         return None
132
133 def generateGitURL(urlstr):
134         scheme, netloc, path, query, frag = urlsplit(urlstr)
135
136         return "http://:@" + netloc + path + query
137
138
139 def getTempDir():
140         return os.path.join(tempfile.gettempdir(), randomString(10))
141
142
143 # Clones a repo from an unvalidated URL.
144 # Returns a tuple of path and repo on sucess.
145 # Throws `TaskError` on failure.
146 # Caller is responsible for deleting returned directory.
147 def cloneRepo(urlstr, ref=None, recursive=False):
148         gitDir = getTempDir()
149
150         err = None
151         try:
152                 gitUrl = generateGitURL(urlstr)
153                 print("Cloning from " + gitUrl)
154
155                 if ref is None:
156                         repo = git.Repo.clone_from(gitUrl, gitDir, \
157                                         progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
158                 else:
159                         repo = git.Repo.init(gitDir)
160                         origin = repo.create_remote("origin", url=gitUrl)
161                         assert origin.exists()
162                         origin.fetch()
163                         origin.pull(ref)
164
165                         for submodule in repo.submodules:
166                                 submodule.update(init=True)
167
168                 return gitDir, repo
169
170         except GitCommandError as e:
171                 # This is needed to stop the backtrace being weird
172                 err = e.stderr
173
174         except gitdb.exc.BadName as e:
175                 err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
176
177         raise TaskError(err.replace("stderr: ", "") \
178                         .replace("Cloning into '" + gitDir + "'...", "") \
179                         .strip())
180
181 @celery.task()
182 def getMeta(urlstr, author):
183         gitDir, _ = cloneRepo(urlstr, recursive=True)
184
185         try:
186                 tree = build_tree(gitDir, author=author, repo=urlstr)
187         except MinetestCheckError as err:
188                 raise TaskError(str(err))
189
190         shutil.rmtree(gitDir)
191
192         result = {}
193         result["name"] = tree.name
194         result["provides"] = tree.fold("name")
195         result["type"] = tree.type.name
196
197         for key in ["depends", "optional_depends"]:
198                 result[key] = tree.fold("meta", key)
199
200         for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
201                 result[key] = tree.get(key)
202
203         for mod in result["provides"]:
204                 result["depends"].discard(mod)
205                 result["optional_depends"].discard(mod)
206
207         for key, value in result.items():
208                 if isinstance(value, set):
209                         result[key] = list(value)
210
211         return result
212
213
214 def makeVCSReleaseFromGithub(id, branch, release, url):
215         urlmaker = GithubURLMaker(url)
216         if not urlmaker.isValid():
217                 raise TaskError("Invalid github repo URL")
218
219         commitsURL = urlmaker.getCommitsURL(branch)
220         try:
221                 contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
222                 commits = json.loads(contents)
223         except HTTPError:
224                 raise TaskError("Unable to get commits for Github repository. Either the repository or reference doesn't exist.")
225
226         if len(commits) == 0 or not "sha" in commits[0]:
227                 raise TaskError("No commits found")
228
229         release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
230         release.task_id     = None
231         release.commit_hash = commits[0]["sha"]
232         release.approve(release.package.author)
233         db.session.commit()
234
235         return release.url
236
237
238 @celery.task(bind=True)
239 def checkZipRelease(self, id, path):
240         release = PackageRelease.query.get(id)
241         if release is None:
242                 raise TaskError("No such release!")
243         elif release.package is None:
244                 raise TaskError("No package attached to release")
245
246         temp = getTempDir()
247         try:
248                 with ZipFile(path, 'r') as zip_ref:
249                         zip_ref.extractall(temp)
250
251                 try:
252                         tree = build_tree(temp, expected_type=ContentType[release.package.type.name], \
253                                 author=release.package.author.username, name=release.package.name)
254                 except MinetestCheckError as err:
255                         if "Fails validation" not in release.title:
256                                 release.title += " (Fails validation)"
257
258                         release.task_id = self.request.id
259                         release.approved = False
260                         db.session.commit()
261
262                         raise TaskError(str(err))
263
264                 release.task_id = None
265                 release.approve(release.package.author)
266                 db.session.commit()
267
268         finally:
269                 shutil.rmtree(temp)
270
271
272 @celery.task()
273 def makeVCSRelease(id, branch):
274         release = PackageRelease.query.get(id)
275         if release is None:
276                 raise TaskError("No such release!")
277         elif release.package is None:
278                 raise TaskError("No package attached to release")
279
280         # url = urlparse(release.package.repo)
281         # if url.netloc == "github.com":
282         #       return makeVCSReleaseFromGithub(id, branch, release, url)
283
284         gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
285
286         try:
287                 tree = build_tree(gitDir, expected_type=ContentType[release.package.type.name], \
288                         author=release.package.author.username, name=release.package.name)
289         except MinetestCheckError as err:
290                 raise TaskError(str(err))
291
292         try:
293                 filename = randomString(10) + ".zip"
294                 destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
295
296                 assert(not os.path.isfile(destPath))
297                 archiver = GitArchiver(force_sub=True, main_repo_abspath=gitDir)
298                 archiver.create(destPath)
299                 assert(os.path.isfile(destPath))
300
301                 release.url         = "/uploads/" + filename
302                 release.task_id     = None
303                 release.commit_hash = repo.head.object.hexsha
304                 release.approve(release.package.author)
305                 print(release.url)
306                 db.session.commit()
307
308                 return release.url
309         finally:
310                 shutil.rmtree(gitDir)
311
312 @celery.task()
313 def importRepoScreenshot(id):
314         package = Package.query.get(id)
315         if package is None or package.soft_deleted:
316                 raise Exception("Unexpected none package")
317
318         # Get URL Maker
319         try:
320                 gitDir, _ = cloneRepo(package.repo)
321         except TaskError as e:
322                 # ignore download errors
323                 print(e)
324                 return None
325
326         # Find and import screenshot
327         try:
328                 for ext in ["png", "jpg", "jpeg"]:
329                         sourcePath = gitDir + "/screenshot." + ext
330                         if os.path.isfile(sourcePath):
331                                 filename = randomString(10) + "." + ext
332                                 destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
333                                 shutil.copyfile(sourcePath, destPath)
334
335                                 ss = PackageScreenshot()
336                                 ss.approved = True
337                                 ss.package = package
338                                 ss.title   = "screenshot.png"
339                                 ss.url   = "/uploads/" + filename
340                                 db.session.add(ss)
341                                 db.session.commit()
342
343                                 return "/uploads/" + filename
344         finally:
345                 shutil.rmtree(gitDir)
346
347         print("screenshot.png does not exist")
348         return None
349
350
351
352 def getDepends(package):
353         url = urlparse(package.repo)
354         urlmaker = None
355         if url.netloc == "github.com":
356                 urlmaker = GithubURLMaker(url)
357         else:
358                 return {}
359
360         result = {}
361         if not urlmaker.isValid():
362                 return {}
363
364         #
365         # Try getting depends on mod.conf
366         #
367         try:
368                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
369                 conf = parse_conf(contents)
370                 for key in ["depends", "optional_depends"]:
371                         try:
372                                 result[key] = conf[key]
373                         except KeyError:
374                                 pass
375
376         except HTTPError:
377                 print("mod.conf does not exist")
378
379         if "depends" in result or "optional_depends" in result:
380                 return result
381
382
383         #
384         # Try depends.txt
385         #
386         import re
387         pattern = re.compile("^([a-z0-9_]+)\??$")
388         try:
389                 contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
390                 soft = []
391                 hard = []
392                 for line in contents.split("\n"):
393                         line = line.strip()
394                         if pattern.match(line):
395                                 if line[len(line) - 1] == "?":
396                                         soft.append( line[:-1])
397                                 else:
398                                         hard.append(line)
399
400                 result["depends"] = ",".join(hard)
401                 result["optional_depends"] = ",".join(soft)
402         except HTTPError:
403                 print("depends.txt does not exist")
404
405         return result
406
407
408 def importDependencies(package, mpackage_cache):
409         if Dependency.query.filter_by(depender=package).count() != 0:
410                 return
411
412         result = getDepends(package)
413
414         if "depends" in result:
415                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
416                 print("{} hard: {}".format(len(deps), result["depends"]))
417                 for dep in deps:
418                         dep.optional = False
419                         db.session.add(dep)
420
421         if "optional_depends" in result:
422                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
423                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
424                 for dep in deps:
425                         dep.optional = True
426                         db.session.add(dep)
427
428 @celery.task()
429 def importAllDependencies():
430         Dependency.query.delete()
431         mpackage_cache = {}
432         packages = Package.query.filter_by(type=PackageType.MOD).all()
433         for i, p in enumerate(packages):
434                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
435                 importDependencies(p, mpackage_cache)
436
437         db.session.commit()