]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Fix release auto-approval
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os, git, tempfile, shutil
19 from git import GitCommandError
20 from flask_sqlalchemy import SQLAlchemy
21 from urllib.error import HTTPError
22 import urllib.request
23 from urllib.parse import urlparse, quote_plus, urlsplit
24 from app import app
25 from app.models import *
26 from app.tasks import celery, TaskError
27 from app.utils import randomString
28
29
30 class GithubURLMaker:
31         def __init__(self, url):
32                 # Rewrite path
33                 import re
34                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
35                 if m is None:
36                         return
37
38                 user = m.group(1)
39                 repo = m.group(2).replace(".git", "")
40                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
41                                 .format(user, repo)
42                 self.user = user
43                 self.repo = repo
44
45         def isValid(self):
46                 return self.baseUrl is not None
47
48         def getRepoURL(self):
49                 return "https://github.com/{}/{}".format(self.user, self.repo)
50
51         def getScreenshotURL(self):
52                 return self.baseUrl + "/screenshot.png"
53
54         def getCommitsURL(self, branch):
55                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
56                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
57
58         def getCommitDownload(self, commit):
59                 return "https://github.com/{}/{}/archive/{}.zip" \
60                                 .format(self.user, self.repo, commit)
61
62 krock_list_cache = None
63 krock_list_cache_by_name = None
64 def getKrockList():
65         global krock_list_cache
66         global krock_list_cache_by_name
67
68         if krock_list_cache is None:
69                 contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
70                 list = json.loads(contents)
71
72                 def h(x):
73                         if not ("title"   in x and "author" in x and \
74                                         "topicId" in x and "link"   in x and x["link"] != ""):
75                                 return False
76
77                         import re
78                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
79                         if m is None:
80                                 return False
81
82                         x["name"] = m.group(1)
83                         return True
84
85                 def g(x):
86                         return {
87                                 "title":   x["title"],
88                                 "author":  x["author"],
89                                 "name": x["name"],
90                                 "topicId": x["topicId"],
91                                 "link": x["link"],
92                         }
93
94                 krock_list_cache = [g(x) for x in list if h(x)]
95                 krock_list_cache_by_name = {}
96                 for x in krock_list_cache:
97                         if not x["name"] in krock_list_cache_by_name:
98                                 krock_list_cache_by_name[x["name"]] = []
99
100                         krock_list_cache_by_name[x["name"]].append(x)
101
102         return krock_list_cache, krock_list_cache_by_name
103
104 def findModInfo(author, name, link):
105         list, lookup = getKrockList()
106
107         if name is not None and name in lookup:
108                 if len(lookup[name]) == 1:
109                         return lookup[name][0]
110
111                 for x in lookup[name]:
112                         if x["author"] == author:
113                                 return x
114
115         if link is not None and len(link) > 15:
116                 for x in list:
117                         if link in x["link"]:
118                                 return x
119
120         return None
121
122
123 def parseConf(string):
124         retval = {}
125         for line in string.split("\n"):
126                 idx = line.find("=")
127                 if idx > 0:
128                         key   = line[:idx].strip()
129                         value = line[idx+1:].strip()
130                         retval[key] = value
131
132         return retval
133
134
135 class PackageTreeNode:
136         def __init__(self, baseDir, author=None, repo=None, name=None):
137                 print("Scanning " + baseDir)
138                 self.baseDir  = baseDir
139                 self.author   = author
140                 self.name        = name
141                 self.repo        = repo
142                 self.meta        = None
143                 self.children = []
144
145                 # Detect type
146                 type = None
147                 is_modpack = False
148                 if os.path.isfile(baseDir + "/game.conf"):
149                         type = PackageType.GAME
150                 elif os.path.isfile(baseDir + "/init.lua"):
151                         type = PackageType.MOD
152                 elif os.path.isfile(baseDir + "/modpack.txt") or \
153                                 os.path.isfile(baseDir + "/modpack.conf"):
154                         type = PackageType.MOD
155                         is_modpack = True
156                 elif os.path.isdir(baseDir + "/mods"):
157                         type = PackageType.GAME
158                 elif os.listdir(baseDir) == []:
159                         # probably a submodule
160                         return
161                 else:
162                         raise TaskError("Unable to detect package type!")
163
164                 self.type = type
165                 self.readMetaFiles()
166
167                 if self.type == PackageType.GAME:
168                         self.addChildrenFromModDir(baseDir + "/mods")
169                 elif is_modpack:
170                         self.addChildrenFromModDir(baseDir)
171
172
173         def readMetaFiles(self):
174                 result = {}
175
176                 # .conf file
177                 try:
178                         with open(self.baseDir + "/mod.conf", "r") as myfile:
179                                 conf = parseConf(myfile.read())
180                                 for key in ["name", "description", "title", "depends", "optional_depends"]:
181                                         try:
182                                                 result[key] = conf[key]
183                                         except KeyError:
184                                                 pass
185                 except IOError:
186                         print("description.txt does not exist!")
187
188                 # description.txt
189                 if not "description" in result:
190                         try:
191                                 with open(self.baseDir + "/description.txt", "r") as myfile:
192                                         result["description"] = myfile.read()
193                         except IOError:
194                                 print("description.txt does not exist!")
195
196                 # depends.txt
197                 import re
198                 pattern = re.compile("^([a-z0-9_]+)\??$")
199                 if not "depends" in result and not "optional_depends" in result:
200                         try:
201                                 with open(self.baseDir + "/depends.txt", "r") as myfile:
202                                         contents = myfile.read()
203                                         soft = []
204                                         hard = []
205                                         for line in contents.split("\n"):
206                                                 line = line.strip()
207                                                 if pattern.match(line):
208                                                         if line[len(line) - 1] == "?":
209                                                                 soft.append( line[:-1])
210                                                         else:
211                                                                 hard.append(line)
212
213                                         result["depends"] = hard
214                                         result["optional_depends"] = soft
215
216                         except IOError:
217                                 print("depends.txt does not exist!")
218
219                 else:
220                         if "depends" in result:
221                                 result["depends"] = [x.strip() for x in result["depends"].split(",")]
222                         if "optional_depends" in result:
223                                 result["optional_depends"] = [x.strip() for x in result["optional_depends"].split(",")]
224
225
226                 # Calculate Title
227                 if "name" in result and not "title" in result:
228                         result["title"] = result["name"].replace("_", " ").title()
229
230                 # Calculate short description
231                 if "description" in result:
232                         desc = result["description"]
233                         idx = desc.find(".") + 1
234                         cutIdx = min(len(desc), 200 if idx < 5 else idx)
235                         result["short_description"] = desc[:cutIdx]
236
237                 # Get forum ID
238                 info = findModInfo(self.author, result.get("name"), self.repo)
239                 if info is not None:
240                         result["forumId"] = info.get("topicId")
241
242                 if "name" in result:
243                         self.name = result["name"]
244                         del result["name"]
245
246                 self.meta = result
247
248         def addChildrenFromModDir(self, dir):
249                 for entry in next(os.walk(dir))[1]:
250                         path = dir + "/" + entry
251                         if not entry.startswith('.') and os.path.isdir(path):
252                                 self.children.append(PackageTreeNode(path, name=entry))
253
254
255         def fold(self, attr, key=None, acc=None):
256                 if acc is None:
257                         acc = set()
258
259                 if self.meta is None:
260                         return acc
261
262                 at = getattr(self, attr)
263                 value = at if key is None else at.get(key)
264
265                 if isinstance(value, list):
266                         acc |= set(value)
267                 elif value is not None:
268                         acc.add(value)
269
270                 for child in self.children:
271                         child.fold(attr, key, acc)
272
273                 return acc
274
275         def get(self, key):
276                 return self.meta.get(key)
277
278 def generateGitURL(urlstr):
279         scheme, netloc, path, query, frag = urlsplit(urlstr)
280
281         return "http://:@" + netloc + path + query
282
283 # Clones a repo from an unvalidated URL.
284 # Returns a tuple of path and repo on sucess.
285 # Throws `TaskError` on failure.
286 # Caller is responsible for deleting returned directory.
287 def cloneRepo(urlstr, ref=None, recursive=False):
288         gitDir = tempfile.gettempdir() + "/" + randomString(10)
289
290         err = None
291         try:
292                 gitUrl = generateGitURL(urlstr)
293                 print("Cloning from " + gitUrl)
294                 repo = git.Repo.clone_from(gitUrl, gitDir, \
295                                 progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
296
297                 if ref is not None:
298                         repo.create_head("myhead", ref).checkout()
299                 return gitDir, repo
300         except GitCommandError as e:
301                 # This is needed to stop the backtrace being weird
302                 err = e.stderr
303
304         raise TaskError(err.replace("stderr: ", "") \
305                         .replace("Cloning into '" + gitDir + "'...", "") \
306                         .strip())
307
308 @celery.task()
309 def getMeta(urlstr, author):
310         gitDir, _ = cloneRepo(urlstr, recursive=True)
311         tree = PackageTreeNode(gitDir, author=author, repo=urlstr)
312         shutil.rmtree(gitDir)
313
314         result = {}
315         result["name"] = tree.name
316         result["provides"] = tree.fold("name")
317         result["type"] = tree.type.name
318
319         for key in ["depends", "optional_depends"]:
320                 result[key] = tree.fold("meta", key)
321
322         for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
323                 result[key] = tree.get(key)
324
325         for mod in result["provides"]:
326                 result["depends"].discard(mod)
327                 result["optional_depends"].discard(mod)
328
329         for key, value in result.items():
330                 if isinstance(value, set):
331                         result[key] = list(value)
332
333         return result
334
335
336 def makeVCSReleaseFromGithub(id, branch, release, url):
337         urlmaker = GithubURLMaker(url)
338         if not urlmaker.isValid():
339                 raise TaskError("Invalid github repo URL")
340
341         commitsURL = urlmaker.getCommitsURL(branch)
342         contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
343         commits = json.loads(contents)
344
345         if len(commits) == 0 or not "sha" in commits[0]:
346                 raise TaskError("No commits found")
347
348         release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
349         release.task_id     = None
350         release.commit_hash = commits[0]["sha"]
351         release.approve(release.package.author)
352         print(release.url)
353         db.session.commit()
354
355         return release.url
356
357
358
359 @celery.task()
360 def makeVCSRelease(id, branch):
361         release = PackageRelease.query.get(id)
362         if release is None:
363                 raise TaskError("No such release!")
364         elif release.package is None:
365                 raise TaskError("No package attached to release")
366
367         urlmaker = None
368         url = urlparse(release.package.repo)
369         if url.netloc == "github.com":
370                 return makeVCSReleaseFromGithub(id, branch, release, url)
371         else:
372                 gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
373
374                 try:
375                         filename = randomString(10) + ".zip"
376                         destPath = os.path.join("app/public/uploads", filename)
377                         with open(destPath, "wb") as fp:
378                                 repo.archive(fp, format="zip")
379
380                         release.url         = "/uploads/" + filename
381                         release.task_id     = None
382                         release.commit_hash = repo.head.object.hexsha
383                         release.approve(release.package.author)
384                         print(release.url)
385                         db.session.commit()
386
387                         return release.url
388                 finally:
389                         shutil.rmtree(gitDir)
390
391 @celery.task()
392 def importRepoScreenshot(id):
393         package = Package.query.get(id)
394         if package is None or package.soft_deleted:
395                 raise Exception("Unexpected none package")
396
397         # Get URL Maker
398         try:
399                 gitDir, _ = cloneRepo(package.repo)
400         except TaskError as e:
401                 # ignore download errors
402                 print(e)
403                 return None
404
405         # Find and import screenshot
406         try:
407                 for ext in ["png", "jpg", "jpeg"]:
408                         sourcePath = gitDir + "/screenshot." + ext
409                         if os.path.isfile(sourcePath):
410                                 filename = randomString(10) + "." + ext
411                                 destPath = os.path.join("app/public/uploads", filename)
412                                 shutil.copyfile(sourcePath, destPath)
413
414                                 ss = PackageScreenshot()
415                                 ss.approved = True
416                                 ss.package = package
417                                 ss.title   = "screenshot.png"
418                                 ss.url   = "/uploads/" + filename
419                                 db.session.add(ss)
420                                 db.session.commit()
421
422                                 return "/uploads/" + filename
423         finally:
424                 shutil.rmtree(gitDir)
425
426         print("screenshot.png does not exist")
427         return None
428
429
430
431 def getDepends(package):
432         url = urlparse(package.repo)
433         urlmaker = None
434         if url.netloc == "github.com":
435                 urlmaker = GithubURLMaker(url)
436         else:
437                 return {}
438
439         result = {}
440         if not urlmaker.isValid():
441                 return {}
442
443         #
444         # Try getting depends on mod.conf
445         #
446         try:
447                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
448                 conf = parseConf(contents)
449                 for key in ["depends", "optional_depends"]:
450                         try:
451                                 result[key] = conf[key]
452                         except KeyError:
453                                 pass
454
455         except HTTPError:
456                 print("mod.conf does not exist")
457
458         if "depends" in result or "optional_depends" in result:
459                 return result
460
461
462         #
463         # Try depends.txt
464         #
465         import re
466         pattern = re.compile("^([a-z0-9_]+)\??$")
467         try:
468                 contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
469                 soft = []
470                 hard = []
471                 for line in contents.split("\n"):
472                         line = line.strip()
473                         if pattern.match(line):
474                                 if line[len(line) - 1] == "?":
475                                         soft.append( line[:-1])
476                                 else:
477                                         hard.append(line)
478
479                 result["depends"] = ",".join(hard)
480                 result["optional_depends"] = ",".join(soft)
481         except HTTPError:
482                 print("depends.txt does not exist")
483
484         return result
485
486
487 def importDependencies(package, mpackage_cache):
488         if Dependency.query.filter_by(depender=package).count() != 0:
489                 return
490
491         result = getDepends(package)
492
493         if "depends" in result:
494                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
495                 print("{} hard: {}".format(len(deps), result["depends"]))
496                 for dep in deps:
497                         dep.optional = False
498                         db.session.add(dep)
499
500         if "optional_depends" in result:
501                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
502                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
503                 for dep in deps:
504                         dep.optional = True
505                         db.session.add(dep)
506
507 @celery.task()
508 def importAllDependencies():
509         Dependency.query.delete()
510         mpackage_cache = {}
511         packages = Package.query.filter_by(type=PackageType.MOD).all()
512         for i, p in enumerate(packages):
513                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
514                 importDependencies(p, mpackage_cache)
515
516         db.session.commit()