]> git.lizzy.rs Git - cheatdb.git/blobdiff - app/tasks/importtasks.py
Fix release validation for repos with submodules
[cheatdb.git] / app / tasks / importtasks.py
index 61b60d92f0771cb84e04ac2f90a943496cf3ad83..b66270268316d584f4f2b78519a412ced58472f3 100644 (file)
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 
-import flask, json, os
-from flask.ext.sqlalchemy import SQLAlchemy
+import flask, json, os, git, tempfile, shutil, gitdb
+from git import GitCommandError
+from git_archive_all import GitArchiver
+from flask_sqlalchemy import SQLAlchemy
 from urllib.error import HTTPError
 import urllib.request
-from urllib.parse import urlparse, quote_plus
+from urllib.parse import urlparse, quote_plus, urlsplit
+from zipfile import ZipFile
+
 from app import app
 from app.models import *
 from app.tasks import celery, TaskError
 from app.utils import randomString
+from .minetestcheck import build_tree, MinetestCheckError, ContentType
+from .minetestcheck.config import parse_conf
 
 class GithubURLMaker:
        def __init__(self, url):
+               self.baseUrl = None
+               self.user = None
+               self.repo = None
+
                # Rewrite path
                import re
                m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
@@ -46,18 +56,12 @@ class GithubURLMaker:
        def getRepoURL(self):
                return "https://github.com/{}/{}".format(self.user, self.repo)
 
-       def getIssueTrackerURL(self):
-               return "https://github.com/{}/{}/issues/".format(self.user, self.repo)
+       def getScreenshotURL(self):
+               return self.baseUrl + "/screenshot.png"
 
        def getModConfURL(self):
                return self.baseUrl + "/mod.conf"
 
-       def getDescURL(self):
-               return self.baseUrl + "/description.txt"
-
-       def getScreenshotURL(self):
-               return self.baseUrl + "/screenshot.png"
-
        def getCommitsURL(self, branch):
                return "https://api.github.com/repos/{}/{}/commits?sha={}" \
                                .format(self.user, self.repo, urllib.parse.quote_plus(branch))
@@ -66,7 +70,6 @@ class GithubURLMaker:
                return "https://github.com/{}/{}/archive/{}.zip" \
                                .format(self.user, self.repo, commit)
 
-
 krock_list_cache = None
 krock_list_cache_by_name = None
 def getKrockList():
@@ -74,7 +77,7 @@ def getKrockList():
        global krock_list_cache_by_name
 
        if krock_list_cache is None:
-               contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8")
+               contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
                list = json.loads(contents)
 
                def h(x):
@@ -94,9 +97,9 @@ def getKrockList():
                        return {
                                "title":   x["title"],
                                "author":  x["author"],
-                               "name":    x["name"],
+                               "name": x["name"],
                                "topicId": x["topicId"],
-                               "link":    x["link"],
+                               "link": x["link"],
                        }
 
                krock_list_cache = [g(x) for x in list if h(x)]
@@ -127,139 +130,308 @@ def findModInfo(author, name, link):
 
        return None
 
+def generateGitURL(urlstr):
+       scheme, netloc, path, query, frag = urlsplit(urlstr)
 
-def parseConf(string):
-       retval = {}
-       for line in string.split("\n"):
-               idx = line.find("=")
-               if idx > 0:
-                       key   = line[:idx].strip()
-                       value = line[idx+1:].strip()
-                       retval[key] = value
+       return "http://:@" + netloc + path + query
 
-       return retval
 
+def getTempDir():
+       return os.path.join(tempfile.gettempdir(), randomString(10))
 
-@celery.task()
-def getMeta(urlstr, author):
-       url = urlparse(urlstr)
 
-       urlmaker = None
-       if url.netloc == "github.com":
-               urlmaker = GithubURLMaker(url)
-       else:
-               raise TaskError("Unsupported repo")
+# Clones a repo from an unvalidated URL.
+# Returns a tuple of path and repo on sucess.
+# Throws `TaskError` on failure.
+# Caller is responsible for deleting returned directory.
+def cloneRepo(urlstr, ref=None, recursive=False):
+       gitDir = getTempDir()
 
-       if not urlmaker.isValid():
-               raise TaskError("Error! Url maker not valid")
+       err = None
+       try:
+               gitUrl = generateGitURL(urlstr)
+               print("Cloning from " + gitUrl)
 
-       result = {}
+               if ref is None:
+                       repo = git.Repo.clone_from(gitUrl, gitDir, \
+                                       progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
+               else:
+                       repo = git.Repo.init(gitDir)
+                       origin = repo.create_remote("origin", url=gitUrl)
+                       assert origin.exists()
+                       origin.fetch()
+                       origin.pull(ref)
 
-       result["repo"] = urlmaker.getRepoURL()
-       result["issueTracker"] = urlmaker.getIssueTrackerURL()
+                       for submodule in repo.submodules:
+                               submodule.update(init=True)
 
-       try:
-               contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
-               conf = parseConf(contents)
-               for key in ["name", "description", "title"]:
-                       try:
-                               result[key] = conf[key]
-                       except KeyError:
-                               pass
-       except HTTPError:
-               print("mod.conf does not exist")
+               return gitDir, repo
 
-       if "name" in result:
-               result["title"] = result["name"].replace("_", " ").title()
+       except GitCommandError as e:
+               # This is needed to stop the backtrace being weird
+               err = e.stderr
 
-       if not "description" in result:
-               try:
-                       contents = urllib.request.urlopen(urlmaker.getDescURL()).read().decode("utf-8")
-                       result["description"] = contents.strip()
-               except HTTPError:
-                       print("description.txt does not exist!")
+       except gitdb.exc.BadName as e:
+               err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
 
-       if "description" in result:
-               desc = result["description"]
-               idx = desc.find(".") + 1
-               cutIdx = min(len(desc), 200 if idx < 5 else idx)
-               result["short_description"] = desc[:cutIdx]
+       raise TaskError(err.replace("stderr: ", "") \
+                       .replace("Cloning into '" + gitDir + "'...", "") \
+                       .strip())
 
-       info = findModInfo(author, result.get("name"), result["repo"])
-       if info is not None:
-               result["forumId"] = info.get("topicId")
+@celery.task()
+def getMeta(urlstr, author):
+       gitDir, _ = cloneRepo(urlstr, recursive=True)
 
-       return result
+       try:
+               tree = build_tree(gitDir, author=author, repo=urlstr)
+       except MinetestCheckError as err:
+               raise TaskError(str(err))
 
+       shutil.rmtree(gitDir)
 
-@celery.task()
-def makeVCSRelease(id, branch):
-       release = PackageRelease.query.get(id)
+       result = {}
+       result["name"] = tree.name
+       result["provides"] = tree.fold("name")
+       result["type"] = tree.type.name
 
-       if release is None:
-               raise TaskError("No such release!")
+       for key in ["depends", "optional_depends"]:
+               result[key] = tree.fold("meta", key)
 
-       if release.package is None:
-               raise TaskError("No package attached to release")
+       for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
+               result[key] = tree.get(key)
 
-       url = urlparse(release.package.repo)
+       for mod in result["provides"]:
+               result["depends"].discard(mod)
+               result["optional_depends"].discard(mod)
+
+       for key, value in result.items():
+               if isinstance(value, set):
+                       result[key] = list(value)
+
+       return result
 
-       urlmaker = None
-       if url.netloc == "github.com":
-               urlmaker = GithubURLMaker(url)
-       else:
-               raise TaskError("Unsupported repo")
 
+def makeVCSReleaseFromGithub(id, branch, release, url):
+       urlmaker = GithubURLMaker(url)
        if not urlmaker.isValid():
                raise TaskError("Invalid github repo URL")
 
        commitsURL = urlmaker.getCommitsURL(branch)
-       contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
-       commits = json.loads(contents)
+       try:
+               contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
+               commits = json.loads(contents)
+       except HTTPError:
+               raise TaskError("Unable to get commits for Github repository. Either the repository or reference doesn't exist.")
 
        if len(commits) == 0 or not "sha" in commits[0]:
                raise TaskError("No commits found")
 
-       release.url = urlmaker.getCommitDownload(commits[0]["sha"])
-       print(release.url)
-       release.task_id = None
+       release.url          = urlmaker.getCommitDownload(commits[0]["sha"])
+       release.task_id     = None
+       release.commit_hash = commits[0]["sha"]
+       release.approve(release.package.author)
        db.session.commit()
 
        return release.url
 
 
+@celery.task(bind=True)
+def checkZipRelease(self, id, path):
+       release = PackageRelease.query.get(id)
+       if release is None:
+               raise TaskError("No such release!")
+       elif release.package is None:
+               raise TaskError("No package attached to release")
+
+       temp = getTempDir()
+       try:
+               with ZipFile(path, 'r') as zip_ref:
+                       zip_ref.extractall(temp)
+
+               try:
+                       tree = build_tree(temp, expected_type=ContentType[release.package.type.name], \
+                               author=release.package.author.username, name=release.package.name)
+               except MinetestCheckError as err:
+                       if "Fails validation" not in release.title:
+                               release.title += " (Fails validation)"
+
+                       release.task_id = self.request.id
+                       release.approved = False
+                       db.session.commit()
+
+                       raise TaskError(str(err))
+
+               release.task_id = None
+               release.approve(release.package.author)
+               db.session.commit()
+
+       finally:
+               shutil.rmtree(temp)
+
+
+@celery.task()
+def makeVCSRelease(id, branch):
+       release = PackageRelease.query.get(id)
+       if release is None:
+               raise TaskError("No such release!")
+       elif release.package is None:
+               raise TaskError("No package attached to release")
+
+       # url = urlparse(release.package.repo)
+       # if url.netloc == "github.com":
+       #       return makeVCSReleaseFromGithub(id, branch, release, url)
+
+       gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
+
+       try:
+               tree = build_tree(gitDir, expected_type=ContentType[release.package.type.name], \
+                       author=release.package.author.username, name=release.package.name)
+       except MinetestCheckError as err:
+               raise TaskError(str(err))
+
+       try:
+               filename = randomString(10) + ".zip"
+               destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
+
+               assert(not os.path.isfile(destPath))
+               archiver = GitArchiver(force_sub=True, main_repo_abspath=gitDir)
+               archiver.create(destPath)
+               assert(os.path.isfile(destPath))
+
+               release.url         = "/uploads/" + filename
+               release.task_id     = None
+               release.commit_hash = repo.head.object.hexsha
+               release.approve(release.package.author)
+               print(release.url)
+               db.session.commit()
+
+               return release.url
+       finally:
+               shutil.rmtree(gitDir)
+
 @celery.task()
 def importRepoScreenshot(id):
        package = Package.query.get(id)
-       if package is None:
+       if package is None or package.soft_deleted:
                raise Exception("Unexpected none package")
 
        # Get URL Maker
+       try:
+               gitDir, _ = cloneRepo(package.repo)
+       except TaskError as e:
+               # ignore download errors
+               print(e)
+               return None
+
+       # Find and import screenshot
+       try:
+               for ext in ["png", "jpg", "jpeg"]:
+                       sourcePath = gitDir + "/screenshot." + ext
+                       if os.path.isfile(sourcePath):
+                               filename = randomString(10) + "." + ext
+                               destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
+                               shutil.copyfile(sourcePath, destPath)
+
+                               ss = PackageScreenshot()
+                               ss.approved = True
+                               ss.package = package
+                               ss.title   = "screenshot.png"
+                               ss.url   = "/uploads/" + filename
+                               db.session.add(ss)
+                               db.session.commit()
+
+                               return "/uploads/" + filename
+       finally:
+               shutil.rmtree(gitDir)
+
+       print("screenshot.png does not exist")
+       return None
+
+
+
+def getDepends(package):
        url = urlparse(package.repo)
        urlmaker = None
        if url.netloc == "github.com":
                urlmaker = GithubURLMaker(url)
        else:
-               raise TaskError("Unsupported repo")
+               return {}
 
+       result = {}
        if not urlmaker.isValid():
-               raise TaskError("Error! Url maker not valid")
+               return {}
 
+       #
+       # Try getting depends on mod.conf
+       #
        try:
-               filename = randomString(10) + ".png"
-               imagePath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
-               print(imagePath)
-               urllib.request.urlretrieve(urlmaker.getScreenshotURL(), imagePath)
-
-               ss = PackageScreenshot()
-               ss.package = package
-               ss.title   = "screenshot.png"
-               ss.url     = "/uploads/" + filename
-               db.session.add(ss)
-               db.session.commit()
+               contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
+               conf = parse_conf(contents)
+               for key in ["depends", "optional_depends"]:
+                       try:
+                               result[key] = conf[key]
+                       except KeyError:
+                               pass
 
-               return "/uploads/" + filename
        except HTTPError:
-               print("screenshot.png does not exist")
+               print("mod.conf does not exist")
 
-       return None
+       if "depends" in result or "optional_depends" in result:
+               return result
+
+
+       #
+       # Try depends.txt
+       #
+       import re
+       pattern = re.compile("^([a-z0-9_]+)\??$")
+       try:
+               contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
+               soft = []
+               hard = []
+               for line in contents.split("\n"):
+                       line = line.strip()
+                       if pattern.match(line):
+                               if line[len(line) - 1] == "?":
+                                       soft.append( line[:-1])
+                               else:
+                                       hard.append(line)
+
+               result["depends"] = ",".join(hard)
+               result["optional_depends"] = ",".join(soft)
+       except HTTPError:
+               print("depends.txt does not exist")
+
+       return result
+
+
+def importDependencies(package, mpackage_cache):
+       if Dependency.query.filter_by(depender=package).count() != 0:
+               return
+
+       result = getDepends(package)
+
+       if "depends" in result:
+               deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
+               print("{} hard: {}".format(len(deps), result["depends"]))
+               for dep in deps:
+                       dep.optional = False
+                       db.session.add(dep)
+
+       if "optional_depends" in result:
+               deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
+               print("{} soft: {}".format(len(deps), result["optional_depends"]))
+               for dep in deps:
+                       dep.optional = True
+                       db.session.add(dep)
+
+@celery.task()
+def importAllDependencies():
+       Dependency.query.delete()
+       mpackage_cache = {}
+       packages = Package.query.filter_by(type=PackageType.MOD).all()
+       for i, p in enumerate(packages):
+               print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
+               importDependencies(p, mpackage_cache)
+
+       db.session.commit()