X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=app%2Ftasks%2Fimporttasks.py;h=8c061d0d136d02fbbf58271986568ba0a70eaf80;hb=04e8ae5bdd73f6a939c51ef7993935afd2e576f2;hp=638e9ca73dd5a5bf59a38524decde8fc94cb4918;hpb=dbc56283ebb0d850fff65ecaf6753c0fa31e0c53;p=cheatdb.git diff --git a/app/tasks/importtasks.py b/app/tasks/importtasks.py index 638e9ca..8c061d0 100644 --- a/app/tasks/importtasks.py +++ b/app/tasks/importtasks.py @@ -1,15 +1,38 @@ -import flask, json, os -from flask.ext.sqlalchemy import SQLAlchemy +# Content DB +# Copyright (C) 2018 rubenwardy +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import flask, json, os, git, tempfile, shutil +from git import GitCommandError +from flask_sqlalchemy import SQLAlchemy from urllib.error import HTTPError import urllib.request -from urllib.parse import urlparse, quote_plus +from urllib.parse import urlparse, quote_plus, urlsplit from app import app from app.models import * from app.tasks import celery, TaskError from app.utils import randomString + class GithubURLMaker: def __init__(self, url): + self.baseUrl = None + self.user = None + self.repo = None + # Rewrite path import re m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path) @@ -17,9 +40,9 @@ class GithubURLMaker: return user = m.group(1) - repo = m.group(2) + repo = m.group(2).replace(".git", "") self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \ - .format(user, repo.replace(".git", "")) + .format(user, repo) self.user = user self.repo = repo @@ -29,15 +52,6 @@ class GithubURLMaker: def getRepoURL(self): return "https://github.com/{}/{}".format(self.user, self.repo) - def getIssueTrackerURL(self): - return "https://github.com/{}/{}/issues/".format(self.user, self.repo) - - def getModConfURL(self): - return self.baseUrl + "/mod.conf" - - def getDescURL(self): - return self.baseUrl + "/description.txt" - def getScreenshotURL(self): return self.baseUrl + "/screenshot.png" @@ -49,7 +63,6 @@ class GithubURLMaker: return "https://github.com/{}/{}/archive/{}.zip" \ .format(self.user, self.repo, commit) - krock_list_cache = None krock_list_cache_by_name = None def getKrockList(): @@ -57,7 +70,7 @@ def getKrockList(): global krock_list_cache_by_name if krock_list_cache is None: - contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8") + contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8") list = json.loads(contents) def h(x): @@ -77,9 +90,9 @@ def getKrockList(): return { "title": x["title"], "author": x["author"], - "name": x["name"], + "name": x["name"], "topicId": x["topicId"], - "link": x["link"], + "link": x["link"], } krock_list_cache = [g(x) for x in list if h(x)] @@ -123,124 +136,385 @@ def parseConf(string): return retval -@celery.task() -def getMeta(urlstr, author): - url = urlparse(urlstr) +class PackageTreeNode: + def __init__(self, baseDir, author=None, repo=None, name=None): + print("Scanning " + baseDir) + self.baseDir = baseDir + self.author = author + self.name = name + self.repo = repo + self.meta = None + self.children = [] + + # Detect type + type = None + is_modpack = False + if os.path.isfile(baseDir + "/game.conf"): + type = PackageType.GAME + elif os.path.isfile(baseDir + "/init.lua"): + type = PackageType.MOD + elif os.path.isfile(baseDir + "/modpack.txt") or \ + os.path.isfile(baseDir + "/modpack.conf"): + type = PackageType.MOD + is_modpack = True + elif os.path.isdir(baseDir + "/mods"): + type = PackageType.GAME + elif os.listdir(baseDir) == []: + # probably a submodule + return + else: + raise TaskError("Unable to detect package type!") - urlmaker = None - if url.netloc == "github.com": - urlmaker = GithubURLMaker(url) - else: - raise TaskError("Unsupported repo") + self.type = type + self.readMetaFiles() - if not urlmaker.isValid(): - raise TaskError("Error! Url maker not valid") + if self.type == PackageType.GAME: + self.addChildrenFromModDir(baseDir + "/mods") + elif is_modpack: + self.addChildrenFromModDir(baseDir) - result = {} - result["repo"] = urlmaker.getRepoURL() - result["issueTracker"] = urlmaker.getIssueTrackerURL() + def readMetaFiles(self): + result = {} - try: - contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8") - conf = parseConf(contents) - for key in ["name", "description", "title"]: + # .conf file + try: + with open(self.baseDir + "/mod.conf", "r") as myfile: + conf = parseConf(myfile.read()) + for key in ["name", "description", "title", "depends", "optional_depends"]: + try: + result[key] = conf[key] + except KeyError: + pass + except IOError: + print("description.txt does not exist!") + + # description.txt + if not "description" in result: try: - result[key] = conf[key] - except KeyError: - pass - except HTTPError: - print("mod.conf does not exist") + with open(self.baseDir + "/description.txt", "r") as myfile: + result["description"] = myfile.read() + except IOError: + print("description.txt does not exist!") + + # depends.txt + import re + pattern = re.compile("^([a-z0-9_]+)\??$") + if not "depends" in result and not "optional_depends" in result: + try: + with open(self.baseDir + "/depends.txt", "r") as myfile: + contents = myfile.read() + soft = [] + hard = [] + for line in contents.split("\n"): + line = line.strip() + if pattern.match(line): + if line[len(line) - 1] == "?": + soft.append( line[:-1]) + else: + hard.append(line) - if "name" in result: - result["title"] = result["name"].replace("_", " ").title() + result["depends"] = hard + result["optional_depends"] = soft - if not "description" in result: - try: - contents = urllib.request.urlopen(urlmaker.getDescURL()).read().decode("utf-8") - result["description"] = contents.strip() - except HTTPError: - print("description.txt does not exist!") + except IOError: + print("depends.txt does not exist!") - if "description" in result: - desc = result["description"] - idx = desc.find(".") + 1 - cutIdx = min(len(desc), 200 if idx < 5 else idx) - result["short_description"] = desc[:cutIdx] + else: + if "depends" in result: + result["depends"] = [x.strip() for x in result["depends"].split(",")] + if "optional_depends" in result: + result["optional_depends"] = [x.strip() for x in result["optional_depends"].split(",")] - info = findModInfo(author, result.get("name"), result["repo"]) - if info is not None: - result["forumId"] = info.get("topicId") - return result + # Calculate Title + if "name" in result and not "title" in result: + result["title"] = result["name"].replace("_", " ").title() + + # Calculate short description + if "description" in result: + desc = result["description"] + idx = desc.find(".") + 1 + cutIdx = min(len(desc), 200 if idx < 5 else idx) + result["short_description"] = desc[:cutIdx] + + # Get forum ID + info = findModInfo(self.author, result.get("name"), self.repo) + if info is not None: + result["forumId"] = info.get("topicId") + + if "name" in result: + self.name = result["name"] + del result["name"] + + self.meta = result + + def addChildrenFromModDir(self, dir): + for entry in next(os.walk(dir))[1]: + path = dir + "/" + entry + if not entry.startswith('.') and os.path.isdir(path): + self.children.append(PackageTreeNode(path, name=entry)) + + + def fold(self, attr, key=None, acc=None): + if acc is None: + acc = set() + + if self.meta is None: + return acc + + at = getattr(self, attr) + value = at if key is None else at.get(key) + + if isinstance(value, list): + acc |= set(value) + elif value is not None: + acc.add(value) + for child in self.children: + child.fold(attr, key, acc) + + return acc + + def get(self, key): + return self.meta.get(key) + +def generateGitURL(urlstr): + scheme, netloc, path, query, frag = urlsplit(urlstr) + + return "http://:@" + netloc + path + query + +# Clones a repo from an unvalidated URL. +# Returns a tuple of path and repo on sucess. +# Throws `TaskError` on failure. +# Caller is responsible for deleting returned directory. +def cloneRepo(urlstr, ref=None, recursive=False): + gitDir = tempfile.gettempdir() + "/" + randomString(10) + + err = None + try: + gitUrl = generateGitURL(urlstr) + print("Cloning from " + gitUrl) + repo = git.Repo.clone_from(gitUrl, gitDir, \ + progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15) + + if ref is not None: + repo.create_head("myhead", ref).checkout() + return gitDir, repo + except GitCommandError as e: + # This is needed to stop the backtrace being weird + err = e.stderr + + raise TaskError(err.replace("stderr: ", "") \ + .replace("Cloning into '" + gitDir + "'...", "") \ + .strip()) @celery.task() -def makeVCSRelease(id, branch): - release = PackageRelease.query.get(id) +def getMeta(urlstr, author): + gitDir, _ = cloneRepo(urlstr, recursive=True) + tree = PackageTreeNode(gitDir, author=author, repo=urlstr) + shutil.rmtree(gitDir) - if release is None: - raise TaskError("No such release!") + result = {} + result["name"] = tree.name + result["provides"] = tree.fold("name") + result["type"] = tree.type.name - if release.package is None: - raise TaskError("No package attached to release") + for key in ["depends", "optional_depends"]: + result[key] = tree.fold("meta", key) - url = urlparse(release.package.repo) + for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]: + result[key] = tree.get(key) + + for mod in result["provides"]: + result["depends"].discard(mod) + result["optional_depends"].discard(mod) + + for key, value in result.items(): + if isinstance(value, set): + result[key] = list(value) + + return result - urlmaker = None - if url.netloc == "github.com": - urlmaker = GithubURLMaker(url) - else: - raise TaskError("Unsupported repo") +def makeVCSReleaseFromGithub(id, branch, release, url): + urlmaker = GithubURLMaker(url) if not urlmaker.isValid(): raise TaskError("Invalid github repo URL") - contents = urllib.request.urlopen(urlmaker.getCommitsURL(branch)).read().decode("utf-8") + commitsURL = urlmaker.getCommitsURL(branch) + contents = urllib.request.urlopen(commitsURL).read().decode("utf-8") commits = json.loads(contents) - if len(commits) == 0: + if len(commits) == 0 or not "sha" in commits[0]: raise TaskError("No commits found") - release.url = urlmaker.getCommitDownload(commits[0]["sha"]) - release.task_id = None + release.url = urlmaker.getCommitDownload(commits[0]["sha"]) + release.task_id = None + release.commit_hash = commits[0]["sha"] + release.approve(release.package.author) + print(release.url) db.session.commit() return release.url + +@celery.task() +def makeVCSRelease(id, branch): + release = PackageRelease.query.get(id) + if release is None: + raise TaskError("No such release!") + elif release.package is None: + raise TaskError("No package attached to release") + + urlmaker = None + url = urlparse(release.package.repo) + if url.netloc == "github.com": + return makeVCSReleaseFromGithub(id, branch, release, url) + else: + gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True) + + try: + filename = randomString(10) + ".zip" + destPath = os.path.join("app/public/uploads", filename) + with open(destPath, "wb") as fp: + repo.archive(fp, format="zip") + + release.url = "/uploads/" + filename + release.task_id = None + release.commit_hash = repo.head.object.hexsha + release.approve(release.package.author) + print(release.url) + db.session.commit() + + return release.url + finally: + shutil.rmtree(gitDir) + @celery.task() def importRepoScreenshot(id): package = Package.query.get(id) - if package is None: + if package is None or package.soft_deleted: raise Exception("Unexpected none package") # Get URL Maker + try: + gitDir, _ = cloneRepo(package.repo) + except TaskError as e: + # ignore download errors + print(e) + return None + + # Find and import screenshot + try: + for ext in ["png", "jpg", "jpeg"]: + sourcePath = gitDir + "/screenshot." + ext + if os.path.isfile(sourcePath): + filename = randomString(10) + "." + ext + destPath = os.path.join("app/public/uploads", filename) + shutil.copyfile(sourcePath, destPath) + + ss = PackageScreenshot() + ss.approved = True + ss.package = package + ss.title = "screenshot.png" + ss.url = "/uploads/" + filename + db.session.add(ss) + db.session.commit() + + return "/uploads/" + filename + finally: + shutil.rmtree(gitDir) + + print("screenshot.png does not exist") + return None + + + +def getDepends(package): url = urlparse(package.repo) urlmaker = None if url.netloc == "github.com": urlmaker = GithubURLMaker(url) else: - raise TaskError("Unsupported repo") + return {} + result = {} if not urlmaker.isValid(): - raise TaskError("Error! Url maker not valid") + return {} + # + # Try getting depends on mod.conf + # try: - filename = randomString(10) + ".png" - imagePath = os.path.join(app.config["UPLOAD_FOLDER"], filename) - print(imagePath) - urllib.request.urlretrieve(urlmaker.getScreenshotURL(), imagePath) - - ss = PackageScreenshot() - ss.package = package - ss.title = "screenshot.png" - ss.url = "/uploads/" + filename - db.session.add(ss) - db.session.commit() - - return "/uploads/" + filename + contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8") + conf = parseConf(contents) + for key in ["depends", "optional_depends"]: + try: + result[key] = conf[key] + except KeyError: + pass + except HTTPError: - print("screenshot.png does not exist") + print("mod.conf does not exist") - return None + if "depends" in result or "optional_depends" in result: + return result + + + # + # Try depends.txt + # + import re + pattern = re.compile("^([a-z0-9_]+)\??$") + try: + contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8") + soft = [] + hard = [] + for line in contents.split("\n"): + line = line.strip() + if pattern.match(line): + if line[len(line) - 1] == "?": + soft.append( line[:-1]) + else: + hard.append(line) + + result["depends"] = ",".join(hard) + result["optional_depends"] = ",".join(soft) + except HTTPError: + print("depends.txt does not exist") + + return result + + +def importDependencies(package, mpackage_cache): + if Dependency.query.filter_by(depender=package).count() != 0: + return + + result = getDepends(package) + + if "depends" in result: + deps = Dependency.SpecToList(package, result["depends"], mpackage_cache) + print("{} hard: {}".format(len(deps), result["depends"])) + for dep in deps: + dep.optional = False + db.session.add(dep) + + if "optional_depends" in result: + deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache) + print("{} soft: {}".format(len(deps), result["optional_depends"])) + for dep in deps: + dep.optional = True + db.session.add(dep) + +@celery.task() +def importAllDependencies(): + Dependency.query.delete() + mpackage_cache = {} + packages = Package.query.filter_by(type=PackageType.MOD).all() + for i, p in enumerate(packages): + print("============= {} ({}/{}) =============".format(p.name, i, len(packages))) + importDependencies(p, mpackage_cache) + + db.session.commit()