]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Add validation to zip releases
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os, git, tempfile, shutil, gitdb
19 from git import GitCommandError
20 from git_archive_all import GitArchiver
21 from flask_sqlalchemy import SQLAlchemy
22 from urllib.error import HTTPError
23 import urllib.request
24 from urllib.parse import urlparse, quote_plus, urlsplit
25 from zipfile import ZipFile
26
27 from app import app
28 from app.models import *
29 from app.tasks import celery, TaskError
30 from app.utils import randomString
31 from .minetestcheck import build_tree, MinetestCheckError, ContentType
32 from .minetestcheck.config import parse_conf
33
34 class GithubURLMaker:
35         def __init__(self, url):
36                 self.baseUrl = None
37                 self.user = None
38                 self.repo = None
39
40                 # Rewrite path
41                 import re
42                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
43                 if m is None:
44                         return
45
46                 user = m.group(1)
47                 repo = m.group(2).replace(".git", "")
48                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
49                                 .format(user, repo)
50                 self.user = user
51                 self.repo = repo
52
53         def isValid(self):
54                 return self.baseUrl is not None
55
56         def getRepoURL(self):
57                 return "https://github.com/{}/{}".format(self.user, self.repo)
58
59         def getScreenshotURL(self):
60                 return self.baseUrl + "/screenshot.png"
61
62         def getModConfURL(self):
63                 return self.baseUrl + "/mod.conf"
64
65         def getCommitsURL(self, branch):
66                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
67                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
68
69         def getCommitDownload(self, commit):
70                 return "https://github.com/{}/{}/archive/{}.zip" \
71                                 .format(self.user, self.repo, commit)
72
73 krock_list_cache = None
74 krock_list_cache_by_name = None
75 def getKrockList():
76         global krock_list_cache
77         global krock_list_cache_by_name
78
79         if krock_list_cache is None:
80                 contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
81                 list = json.loads(contents)
82
83                 def h(x):
84                         if not ("title"   in x and "author" in x and \
85                                         "topicId" in x and "link"   in x and x["link"] != ""):
86                                 return False
87
88                         import re
89                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
90                         if m is None:
91                                 return False
92
93                         x["name"] = m.group(1)
94                         return True
95
96                 def g(x):
97                         return {
98                                 "title":   x["title"],
99                                 "author":  x["author"],
100                                 "name": x["name"],
101                                 "topicId": x["topicId"],
102                                 "link": x["link"],
103                         }
104
105                 krock_list_cache = [g(x) for x in list if h(x)]
106                 krock_list_cache_by_name = {}
107                 for x in krock_list_cache:
108                         if not x["name"] in krock_list_cache_by_name:
109                                 krock_list_cache_by_name[x["name"]] = []
110
111                         krock_list_cache_by_name[x["name"]].append(x)
112
113         return krock_list_cache, krock_list_cache_by_name
114
115 def findModInfo(author, name, link):
116         list, lookup = getKrockList()
117
118         if name is not None and name in lookup:
119                 if len(lookup[name]) == 1:
120                         return lookup[name][0]
121
122                 for x in lookup[name]:
123                         if x["author"] == author:
124                                 return x
125
126         if link is not None and len(link) > 15:
127                 for x in list:
128                         if link in x["link"]:
129                                 return x
130
131         return None
132
133 def generateGitURL(urlstr):
134         scheme, netloc, path, query, frag = urlsplit(urlstr)
135
136         return "http://:@" + netloc + path + query
137
138
139 def getTempDir():
140         return os.path.join(tempfile.gettempdir(), randomString(10))
141
142
143 # Clones a repo from an unvalidated URL.
144 # Returns a tuple of path and repo on sucess.
145 # Throws `TaskError` on failure.
146 # Caller is responsible for deleting returned directory.
147 def cloneRepo(urlstr, ref=None, recursive=False):
148         gitDir = getTempDir()
149
150         err = None
151         try:
152                 gitUrl = generateGitURL(urlstr)
153                 print("Cloning from " + gitUrl)
154
155                 if ref is None:
156                         repo = git.Repo.clone_from(gitUrl, gitDir, \
157                                         progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
158                 else:
159                         repo = git.Repo.clone_from(gitUrl, gitDir, \
160                                         progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15, b=ref)
161
162                 return gitDir, repo
163
164         except GitCommandError as e:
165                 # This is needed to stop the backtrace being weird
166                 err = e.stderr
167
168         except gitdb.exc.BadName as e:
169                 err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
170
171         raise TaskError(err.replace("stderr: ", "") \
172                         .replace("Cloning into '" + gitDir + "'...", "") \
173                         .strip())
174
175 @celery.task()
176 def getMeta(urlstr, author):
177         gitDir, _ = cloneRepo(urlstr, recursive=True)
178
179         try:
180                 tree = build_tree(gitDir, author=author, repo=urlstr)
181         except MinetestCheckError as err:
182                 raise TaskError(str(err))
183
184         shutil.rmtree(gitDir)
185
186         result = {}
187         result["name"] = tree.name
188         result["provides"] = tree.fold("name")
189         result["type"] = tree.type.name
190
191         for key in ["depends", "optional_depends"]:
192                 result[key] = tree.fold("meta", key)
193
194         for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
195                 result[key] = tree.get(key)
196
197         for mod in result["provides"]:
198                 result["depends"].discard(mod)
199                 result["optional_depends"].discard(mod)
200
201         for key, value in result.items():
202                 if isinstance(value, set):
203                         result[key] = list(value)
204
205         return result
206
207
208 def makeVCSReleaseFromGithub(id, branch, release, url):
209         urlmaker = GithubURLMaker(url)
210         if not urlmaker.isValid():
211                 raise TaskError("Invalid github repo URL")
212
213         commitsURL = urlmaker.getCommitsURL(branch)
214         try:
215                 contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
216                 commits = json.loads(contents)
217         except HTTPError:
218                 raise TaskError("Unable to get commits for Github repository. Either the repository or reference doesn't exist.")
219
220         if len(commits) == 0 or not "sha" in commits[0]:
221                 raise TaskError("No commits found")
222
223         release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
224         release.task_id     = None
225         release.commit_hash = commits[0]["sha"]
226         release.approve(release.package.author)
227         db.session.commit()
228
229         return release.url
230
231
232 @celery.task()
233 def checkZIPRelease(id, path):
234         release = PackageRelease.query.get(id)
235         if release is None:
236                 raise TaskError("No such release!")
237         elif release.package is None:
238                 raise TaskError("No package attached to release")
239
240         temp = getTempDir()
241         try:
242                 with ZipFile(path, 'r') as zip_ref:
243                         zip_ref.extractall(temp)
244
245                 try:
246                         tree = build_tree(temp, expected_type=ContentType[release.package.type.name], \
247                                 author=release.package.author.username, name=release.package.name)
248                 except MinetestCheckError as err:
249                         raise TaskError(str(err))
250
251                 release.task_id = None
252                 release.approve(release.package.author)
253                 db.session.commit()
254
255         finally:
256                 shutil.rmtree(temp)
257
258
259 @celery.task()
260 def makeVCSRelease(id, branch):
261         release = PackageRelease.query.get(id)
262         if release is None:
263                 raise TaskError("No such release!")
264         elif release.package is None:
265                 raise TaskError("No package attached to release")
266
267         # url = urlparse(release.package.repo)
268         # if url.netloc == "github.com":
269         #       return makeVCSReleaseFromGithub(id, branch, release, url)
270
271         gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
272
273         try:
274                 tree = build_tree(gitDir, expected_type=ContentType[release.package.type.name], \
275                         author=release.package.author.username, name=release.package.name)
276         except MinetestCheckError as err:
277                 raise TaskError(str(err))
278
279         try:
280                 filename = randomString(10) + ".zip"
281                 destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
282
283                 assert(not os.path.isfile(destPath))
284                 archiver = GitArchiver(force_sub=True, main_repo_abspath=gitDir)
285                 archiver.create(destPath)
286                 assert(os.path.isfile(destPath))
287
288                 release.url         = "/uploads/" + filename
289                 release.task_id     = None
290                 release.commit_hash = repo.head.object.hexsha
291                 release.approve(release.package.author)
292                 print(release.url)
293                 db.session.commit()
294
295                 return release.url
296         finally:
297                 shutil.rmtree(gitDir)
298
299 @celery.task()
300 def importRepoScreenshot(id):
301         package = Package.query.get(id)
302         if package is None or package.soft_deleted:
303                 raise Exception("Unexpected none package")
304
305         # Get URL Maker
306         try:
307                 gitDir, _ = cloneRepo(package.repo)
308         except TaskError as e:
309                 # ignore download errors
310                 print(e)
311                 return None
312
313         # Find and import screenshot
314         try:
315                 for ext in ["png", "jpg", "jpeg"]:
316                         sourcePath = gitDir + "/screenshot." + ext
317                         if os.path.isfile(sourcePath):
318                                 filename = randomString(10) + "." + ext
319                                 destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
320                                 shutil.copyfile(sourcePath, destPath)
321
322                                 ss = PackageScreenshot()
323                                 ss.approved = True
324                                 ss.package = package
325                                 ss.title   = "screenshot.png"
326                                 ss.url   = "/uploads/" + filename
327                                 db.session.add(ss)
328                                 db.session.commit()
329
330                                 return "/uploads/" + filename
331         finally:
332                 shutil.rmtree(gitDir)
333
334         print("screenshot.png does not exist")
335         return None
336
337
338
339 def getDepends(package):
340         url = urlparse(package.repo)
341         urlmaker = None
342         if url.netloc == "github.com":
343                 urlmaker = GithubURLMaker(url)
344         else:
345                 return {}
346
347         result = {}
348         if not urlmaker.isValid():
349                 return {}
350
351         #
352         # Try getting depends on mod.conf
353         #
354         try:
355                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
356                 conf = parse_conf(contents)
357                 for key in ["depends", "optional_depends"]:
358                         try:
359                                 result[key] = conf[key]
360                         except KeyError:
361                                 pass
362
363         except HTTPError:
364                 print("mod.conf does not exist")
365
366         if "depends" in result or "optional_depends" in result:
367                 return result
368
369
370         #
371         # Try depends.txt
372         #
373         import re
374         pattern = re.compile("^([a-z0-9_]+)\??$")
375         try:
376                 contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
377                 soft = []
378                 hard = []
379                 for line in contents.split("\n"):
380                         line = line.strip()
381                         if pattern.match(line):
382                                 if line[len(line) - 1] == "?":
383                                         soft.append( line[:-1])
384                                 else:
385                                         hard.append(line)
386
387                 result["depends"] = ",".join(hard)
388                 result["optional_depends"] = ",".join(soft)
389         except HTTPError:
390                 print("depends.txt does not exist")
391
392         return result
393
394
395 def importDependencies(package, mpackage_cache):
396         if Dependency.query.filter_by(depender=package).count() != 0:
397                 return
398
399         result = getDepends(package)
400
401         if "depends" in result:
402                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
403                 print("{} hard: {}".format(len(deps), result["depends"]))
404                 for dep in deps:
405                         dep.optional = False
406                         db.session.add(dep)
407
408         if "optional_depends" in result:
409                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
410                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
411                 for dep in deps:
412                         dep.optional = True
413                         db.session.add(dep)
414
415 @celery.task()
416 def importAllDependencies():
417         Dependency.query.delete()
418         mpackage_cache = {}
419         packages = Package.query.filter_by(type=PackageType.MOD).all()
420         for i, p in enumerate(packages):
421                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
422                 importDependencies(p, mpackage_cache)
423
424         db.session.commit()