]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Fix git clone breaking when branch is None
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os, git, tempfile, shutil
19 from git import GitCommandError
20 from flask_sqlalchemy import SQLAlchemy
21 from urllib.error import HTTPError
22 import urllib.request
23 from urllib.parse import urlparse, quote_plus, urlsplit
24 from app import app
25 from app.models import *
26 from app.tasks import celery, TaskError
27 from app.utils import randomString
28
29
30 class GithubURLMaker:
31         def __init__(self, url):
32                 self.baseUrl = None
33                 self.user = None
34                 self.repo = None
35
36                 # Rewrite path
37                 import re
38                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
39                 if m is None:
40                         return
41
42                 user = m.group(1)
43                 repo = m.group(2).replace(".git", "")
44                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
45                                 .format(user, repo)
46                 self.user = user
47                 self.repo = repo
48
49         def isValid(self):
50                 return self.baseUrl is not None
51
52         def getRepoURL(self):
53                 return "https://github.com/{}/{}".format(self.user, self.repo)
54
55         def getScreenshotURL(self):
56                 return self.baseUrl + "/screenshot.png"
57
58         def getModConfURL(self):
59                 return self.baseUrl + "/mod.conf"
60
61         def getCommitsURL(self, branch):
62                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
63                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
64
65         def getCommitDownload(self, commit):
66                 return "https://github.com/{}/{}/archive/{}.zip" \
67                                 .format(self.user, self.repo, commit)
68
69 krock_list_cache = None
70 krock_list_cache_by_name = None
71 def getKrockList():
72         global krock_list_cache
73         global krock_list_cache_by_name
74
75         if krock_list_cache is None:
76                 contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
77                 list = json.loads(contents)
78
79                 def h(x):
80                         if not ("title"   in x and "author" in x and \
81                                         "topicId" in x and "link"   in x and x["link"] != ""):
82                                 return False
83
84                         import re
85                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
86                         if m is None:
87                                 return False
88
89                         x["name"] = m.group(1)
90                         return True
91
92                 def g(x):
93                         return {
94                                 "title":   x["title"],
95                                 "author":  x["author"],
96                                 "name": x["name"],
97                                 "topicId": x["topicId"],
98                                 "link": x["link"],
99                         }
100
101                 krock_list_cache = [g(x) for x in list if h(x)]
102                 krock_list_cache_by_name = {}
103                 for x in krock_list_cache:
104                         if not x["name"] in krock_list_cache_by_name:
105                                 krock_list_cache_by_name[x["name"]] = []
106
107                         krock_list_cache_by_name[x["name"]].append(x)
108
109         return krock_list_cache, krock_list_cache_by_name
110
111 def findModInfo(author, name, link):
112         list, lookup = getKrockList()
113
114         if name is not None and name in lookup:
115                 if len(lookup[name]) == 1:
116                         return lookup[name][0]
117
118                 for x in lookup[name]:
119                         if x["author"] == author:
120                                 return x
121
122         if link is not None and len(link) > 15:
123                 for x in list:
124                         if link in x["link"]:
125                                 return x
126
127         return None
128
129
130 def parseConf(string):
131         retval = {}
132         for line in string.split("\n"):
133                 idx = line.find("=")
134                 if idx > 0:
135                         key   = line[:idx].strip()
136                         value = line[idx+1:].strip()
137                         retval[key] = value
138
139         return retval
140
141
142 class PackageTreeNode:
143         def __init__(self, baseDir, author=None, repo=None, name=None):
144                 print("Scanning " + baseDir)
145                 self.baseDir  = baseDir
146                 self.author   = author
147                 self.name        = name
148                 self.repo        = repo
149                 self.meta        = None
150                 self.children = []
151
152                 # Detect type
153                 type = None
154                 is_modpack = False
155                 if os.path.isfile(baseDir + "/game.conf"):
156                         type = PackageType.GAME
157                 elif os.path.isfile(baseDir + "/init.lua"):
158                         type = PackageType.MOD
159                 elif os.path.isfile(baseDir + "/modpack.txt") or \
160                                 os.path.isfile(baseDir + "/modpack.conf"):
161                         type = PackageType.MOD
162                         is_modpack = True
163                 elif os.path.isdir(baseDir + "/mods"):
164                         type = PackageType.GAME
165                 elif os.listdir(baseDir) == []:
166                         # probably a submodule
167                         return
168                 else:
169                         raise TaskError("Unable to detect package type!")
170
171                 self.type = type
172                 self.readMetaFiles()
173
174                 if self.type == PackageType.GAME:
175                         self.addChildrenFromModDir(baseDir + "/mods")
176                 elif is_modpack:
177                         self.addChildrenFromModDir(baseDir)
178
179
180         def readMetaFiles(self):
181                 result = {}
182
183                 # .conf file
184                 try:
185                         with open(self.baseDir + "/mod.conf", "r") as myfile:
186                                 conf = parseConf(myfile.read())
187                                 for key in ["name", "description", "title", "depends", "optional_depends"]:
188                                         try:
189                                                 result[key] = conf[key]
190                                         except KeyError:
191                                                 pass
192                 except IOError:
193                         print("description.txt does not exist!")
194
195                 # description.txt
196                 if not "description" in result:
197                         try:
198                                 with open(self.baseDir + "/description.txt", "r") as myfile:
199                                         result["description"] = myfile.read()
200                         except IOError:
201                                 print("description.txt does not exist!")
202
203                 # depends.txt
204                 import re
205                 pattern = re.compile("^([a-z0-9_]+)\??$")
206                 if not "depends" in result and not "optional_depends" in result:
207                         try:
208                                 with open(self.baseDir + "/depends.txt", "r") as myfile:
209                                         contents = myfile.read()
210                                         soft = []
211                                         hard = []
212                                         for line in contents.split("\n"):
213                                                 line = line.strip()
214                                                 if pattern.match(line):
215                                                         if line[len(line) - 1] == "?":
216                                                                 soft.append( line[:-1])
217                                                         else:
218                                                                 hard.append(line)
219
220                                         result["depends"] = hard
221                                         result["optional_depends"] = soft
222
223                         except IOError:
224                                 print("depends.txt does not exist!")
225
226                 else:
227                         if "depends" in result:
228                                 result["depends"] = [x.strip() for x in result["depends"].split(",")]
229                         if "optional_depends" in result:
230                                 result["optional_depends"] = [x.strip() for x in result["optional_depends"].split(",")]
231
232
233                 # Calculate Title
234                 if "name" in result and not "title" in result:
235                         result["title"] = result["name"].replace("_", " ").title()
236
237                 # Calculate short description
238                 if "description" in result:
239                         desc = result["description"]
240                         idx = desc.find(".") + 1
241                         cutIdx = min(len(desc), 200 if idx < 5 else idx)
242                         result["short_description"] = desc[:cutIdx]
243
244                 # Get forum ID
245                 info = findModInfo(self.author, result.get("name"), self.repo)
246                 if info is not None:
247                         result["forumId"] = info.get("topicId")
248
249                 if "name" in result:
250                         self.name = result["name"]
251                         del result["name"]
252
253                 self.meta = result
254
255         def addChildrenFromModDir(self, dir):
256                 for entry in next(os.walk(dir))[1]:
257                         path = dir + "/" + entry
258                         if not entry.startswith('.') and os.path.isdir(path):
259                                 self.children.append(PackageTreeNode(path, name=entry))
260
261
262         def fold(self, attr, key=None, acc=None):
263                 if acc is None:
264                         acc = set()
265
266                 if self.meta is None:
267                         return acc
268
269                 at = getattr(self, attr)
270                 value = at if key is None else at.get(key)
271
272                 if isinstance(value, list):
273                         acc |= set(value)
274                 elif value is not None:
275                         acc.add(value)
276
277                 for child in self.children:
278                         child.fold(attr, key, acc)
279
280                 return acc
281
282         def get(self, key):
283                 return self.meta.get(key)
284
285 def generateGitURL(urlstr):
286         scheme, netloc, path, query, frag = urlsplit(urlstr)
287
288         return "http://:@" + netloc + path + query
289
290 # Clones a repo from an unvalidated URL.
291 # Returns a tuple of path and repo on sucess.
292 # Throws `TaskError` on failure.
293 # Caller is responsible for deleting returned directory.
294 def cloneRepo(urlstr, ref=None, recursive=False):
295         gitDir = tempfile.gettempdir() + "/" + randomString(10)
296
297         err = None
298         try:
299                 gitUrl = generateGitURL(urlstr)
300                 print("Cloning from " + gitUrl)
301
302                 if ref is None:
303                         repo = git.Repo.clone_from(gitUrl, gitDir, \
304                                         progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
305                 else:
306                         repo = git.Repo.clone_from(gitUrl, gitDir, \
307                                         progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15, b=ref)
308
309                 return gitDir, repo
310
311         except GitCommandError as e:
312                 # This is needed to stop the backtrace being weird
313                 err = e.stderr
314
315         except gitdb.exc.BadName as e:
316                 err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
317
318         raise TaskError(err.replace("stderr: ", "") \
319                         .replace("Cloning into '" + gitDir + "'...", "") \
320                         .strip())
321
322 @celery.task()
323 def getMeta(urlstr, author):
324         gitDir, _ = cloneRepo(urlstr, recursive=True)
325         tree = PackageTreeNode(gitDir, author=author, repo=urlstr)
326         shutil.rmtree(gitDir)
327
328         result = {}
329         result["name"] = tree.name
330         result["provides"] = tree.fold("name")
331         result["type"] = tree.type.name
332
333         for key in ["depends", "optional_depends"]:
334                 result[key] = tree.fold("meta", key)
335
336         for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
337                 result[key] = tree.get(key)
338
339         for mod in result["provides"]:
340                 result["depends"].discard(mod)
341                 result["optional_depends"].discard(mod)
342
343         for key, value in result.items():
344                 if isinstance(value, set):
345                         result[key] = list(value)
346
347         return result
348
349
350 def makeVCSReleaseFromGithub(id, branch, release, url):
351         urlmaker = GithubURLMaker(url)
352         if not urlmaker.isValid():
353                 raise TaskError("Invalid github repo URL")
354
355         commitsURL = urlmaker.getCommitsURL(branch)
356         try:
357                 contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
358                 commits = json.loads(contents)
359         except HTTPError:
360                 raise TaskError("Unable to get commits for Github repository. Either the repository or reference doesn't exist.")
361
362         if len(commits) == 0 or not "sha" in commits[0]:
363                 raise TaskError("No commits found")
364
365         release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
366         release.task_id     = None
367         release.commit_hash = commits[0]["sha"]
368         release.approve(release.package.author)
369         db.session.commit()
370
371         return release.url
372
373
374
375 @celery.task()
376 def makeVCSRelease(id, branch):
377         release = PackageRelease.query.get(id)
378         if release is None:
379                 raise TaskError("No such release!")
380         elif release.package is None:
381                 raise TaskError("No package attached to release")
382
383         urlmaker = None
384         url = urlparse(release.package.repo)
385         if url.netloc == "github.com":
386                 return makeVCSReleaseFromGithub(id, branch, release, url)
387         else:
388                 gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
389
390                 try:
391                         filename = randomString(10) + ".zip"
392                         destPath = os.path.join("app/public/uploads", filename)
393                         with open(destPath, "wb") as fp:
394                                 repo.archive(fp, format="zip")
395
396                         release.url         = "/uploads/" + filename
397                         release.task_id     = None
398                         release.commit_hash = repo.head.object.hexsha
399                         release.approve(release.package.author)
400                         print(release.url)
401                         db.session.commit()
402
403                         return release.url
404                 finally:
405                         shutil.rmtree(gitDir)
406
407 @celery.task()
408 def importRepoScreenshot(id):
409         package = Package.query.get(id)
410         if package is None or package.soft_deleted:
411                 raise Exception("Unexpected none package")
412
413         # Get URL Maker
414         try:
415                 gitDir, _ = cloneRepo(package.repo)
416         except TaskError as e:
417                 # ignore download errors
418                 print(e)
419                 return None
420
421         # Find and import screenshot
422         try:
423                 for ext in ["png", "jpg", "jpeg"]:
424                         sourcePath = gitDir + "/screenshot." + ext
425                         if os.path.isfile(sourcePath):
426                                 filename = randomString(10) + "." + ext
427                                 destPath = os.path.join("app/public/uploads", filename)
428                                 shutil.copyfile(sourcePath, destPath)
429
430                                 ss = PackageScreenshot()
431                                 ss.approved = True
432                                 ss.package = package
433                                 ss.title   = "screenshot.png"
434                                 ss.url   = "/uploads/" + filename
435                                 db.session.add(ss)
436                                 db.session.commit()
437
438                                 return "/uploads/" + filename
439         finally:
440                 shutil.rmtree(gitDir)
441
442         print("screenshot.png does not exist")
443         return None
444
445
446
447 def getDepends(package):
448         url = urlparse(package.repo)
449         urlmaker = None
450         if url.netloc == "github.com":
451                 urlmaker = GithubURLMaker(url)
452         else:
453                 return {}
454
455         result = {}
456         if not urlmaker.isValid():
457                 return {}
458
459         #
460         # Try getting depends on mod.conf
461         #
462         try:
463                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
464                 conf = parseConf(contents)
465                 for key in ["depends", "optional_depends"]:
466                         try:
467                                 result[key] = conf[key]
468                         except KeyError:
469                                 pass
470
471         except HTTPError:
472                 print("mod.conf does not exist")
473
474         if "depends" in result or "optional_depends" in result:
475                 return result
476
477
478         #
479         # Try depends.txt
480         #
481         import re
482         pattern = re.compile("^([a-z0-9_]+)\??$")
483         try:
484                 contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
485                 soft = []
486                 hard = []
487                 for line in contents.split("\n"):
488                         line = line.strip()
489                         if pattern.match(line):
490                                 if line[len(line) - 1] == "?":
491                                         soft.append( line[:-1])
492                                 else:
493                                         hard.append(line)
494
495                 result["depends"] = ",".join(hard)
496                 result["optional_depends"] = ",".join(soft)
497         except HTTPError:
498                 print("depends.txt does not exist")
499
500         return result
501
502
503 def importDependencies(package, mpackage_cache):
504         if Dependency.query.filter_by(depender=package).count() != 0:
505                 return
506
507         result = getDepends(package)
508
509         if "depends" in result:
510                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
511                 print("{} hard: {}".format(len(deps), result["depends"]))
512                 for dep in deps:
513                         dep.optional = False
514                         db.session.add(dep)
515
516         if "optional_depends" in result:
517                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
518                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
519                 for dep in deps:
520                         dep.optional = True
521                         db.session.add(dep)
522
523 @celery.task()
524 def importAllDependencies():
525         Dependency.query.delete()
526         mpackage_cache = {}
527         packages = Package.query.filter_by(type=PackageType.MOD).all()
528         for i, p in enumerate(packages):
529                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
530                 importDependencies(p, mpackage_cache)
531
532         db.session.commit()