app/tasks/importtasks.py

   1 # Content DB
   2 # Copyright (C) 2018  rubenwardy
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License
  15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  16
  17
  18 import flask, json, os
  19 from flask.ext.sqlalchemy import SQLAlchemy
  20 from urllib.error import HTTPError
  21 import urllib.request
  22 from urllib.parse import urlparse, quote_plus
  23 from app import app
  24 from app.models import *
  25 from app.tasks import celery, TaskError
  26 from app.utils import randomString
  27
  28 class GithubURLMaker:
  29         def __init__(self, url):
  30                 # Rewrite path
  31                 import re
  32                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
  33                 if m is None:
  34                         return
  35
  36                 user = m.group(1)
  37                 repo = m.group(2).replace(".git", "")
  38                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
  39                                 .format(user, repo)
  40                 self.user = user
  41                 self.repo = repo
  42
  43         def isValid(self):
  44                 return self.baseUrl is not None
  45
  46         def getRepoURL(self):
  47                 return "https://github.com/{}/{}".format(self.user, self.repo)
  48
  49         def getIssueTrackerURL(self):
  50                 return "https://github.com/{}/{}/issues/".format(self.user, self.repo)
  51
  52         def getModConfURL(self):
  53                 return self.baseUrl + "/mod.conf"
  54
  55         def getDescURL(self):
  56                 return self.baseUrl + "/description.txt"
  57
  58         def getDependsURL(self):
  59                 return self.baseUrl + "/depends.txt"
  60
  61         def getScreenshotURL(self):
  62                 return self.baseUrl + "/screenshot.png"
  63
  64         def getCommitsURL(self, branch):
  65                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
  66                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
  67
  68         def getCommitDownload(self, commit):
  69                 return "https://github.com/{}/{}/archive/{}.zip" \
  70                                 .format(self.user, self.repo, commit)
  71
  72
  73 krock_list_cache = None
  74 krock_list_cache_by_name = None
  75 def getKrockList():
  76         global krock_list_cache
  77         global krock_list_cache_by_name
  78
  79         if krock_list_cache is None:
  80                 contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8")
  81                 list = json.loads(contents)
  82
  83                 def h(x):
  84                         if not ("title"   in x and "author" in x and \
  85                                         "topicId" in x and "link"   in x and x["link"] != ""):
  86                                 return False
  87
  88                         import re
  89                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
  90                         if m is None:
  91                                 return False
  92
  93                         x["name"] = m.group(1)
  94                         return True
  95
  96                 def g(x):
  97                         return {
  98                                 "title":   x["title"],
  99                                 "author":  x["author"],
 100                                 "name":    x["name"],
 101                                 "topicId": x["topicId"],
 102                                 "link":    x["link"],
 103                         }
 104
 105                 krock_list_cache = [g(x) for x in list if h(x)]
 106                 krock_list_cache_by_name = {}
 107                 for x in krock_list_cache:
 108                         if not x["name"] in krock_list_cache_by_name:
 109                                 krock_list_cache_by_name[x["name"]] = []
 110
 111                         krock_list_cache_by_name[x["name"]].append(x)
 112
 113         return krock_list_cache, krock_list_cache_by_name
 114
 115 def findModInfo(author, name, link):
 116         list, lookup = getKrockList()
 117
 118         if name is not None and name in lookup:
 119                 if len(lookup[name]) == 1:
 120                         return lookup[name][0]
 121
 122                 for x in lookup[name]:
 123                         if x["author"] == author:
 124                                 return x
 125
 126         if link is not None and len(link) > 15:
 127                 for x in list:
 128                         if link in x["link"]:
 129                                 return x
 130
 131         return None
 132
 133
 134 def parseConf(string):
 135         retval = {}
 136         for line in string.split("\n"):
 137                 idx = line.find("=")
 138                 if idx > 0:
 139                         key   = line[:idx].strip()
 140                         value = line[idx+1:].strip()
 141                         retval[key] = value
 142
 143         return retval
 144
 145
 146 @celery.task()
 147 def getMeta(urlstr, author):
 148         url = urlparse(urlstr)
 149
 150         urlmaker = None
 151         if url.netloc == "github.com":
 152                 urlmaker = GithubURLMaker(url)
 153         else:
 154                 raise TaskError("Unsupported repo")
 155
 156         if not urlmaker.isValid():
 157                 raise TaskError("Error! Url maker not valid")
 158
 159         result = {}
 160
 161         result["repo"] = urlmaker.getRepoURL()
 162         result["issueTracker"] = urlmaker.getIssueTrackerURL()
 163
 164         try:
 165                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
 166                 conf = parseConf(contents)
 167                 for key in ["name", "description", "title", "depends", "optional_depends"]:
 168                         try:
 169                                 result[key] = conf[key]
 170                         except KeyError:
 171                                 pass
 172         except HTTPError:
 173                 print("mod.conf does not exist")
 174
 175         if "name" in result:
 176                 result["title"] = result["name"].replace("_", " ").title()
 177
 178         if not "description" in result:
 179                 try:
 180                         contents = urllib.request.urlopen(urlmaker.getDescURL()).read().decode("utf-8")
 181                         result["description"] = contents.strip()
 182                 except HTTPError:
 183                         print("description.txt does not exist!")
 184
 185         import re
 186         pattern = re.compile("^([a-z0-9_]+)\??$")
 187         if not "depends" in result and not "optional_depends" in result:
 188                 try:
 189                         contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
 190                         soft = []
 191                         hard = []
 192                         for line in contents.split("\n"):
 193                                 line = line.strip()
 194                                 if pattern.match(line):
 195                                         if line[len(line) - 1] == "?":
 196                                                 soft.append( line[:-1])
 197                                         else:
 198                                                 hard.append(line)
 199
 200                         result["depends"] = ",".join(hard)
 201                         result["optional_depends"] = ",".join(soft)
 202
 203
 204                 except HTTPError:
 205                         print("depends.txt does not exist!")
 206
 207         if "description" in result:
 208                 desc = result["description"]
 209                 idx = desc.find(".") + 1
 210                 cutIdx = min(len(desc), 200 if idx < 5 else idx)
 211                 result["short_description"] = desc[:cutIdx]
 212
 213
 214         info = findModInfo(author, result.get("name"), result["repo"])
 215         if info is not None:
 216                 result["forumId"] = info.get("topicId")
 217
 218         return result
 219
 220
 221 @celery.task()
 222 def makeVCSRelease(id, branch):
 223         release = PackageRelease.query.get(id)
 224
 225         if release is None:
 226                 raise TaskError("No such release!")
 227
 228         if release.package is None:
 229                 raise TaskError("No package attached to release")
 230
 231         url = urlparse(release.package.repo)
 232
 233         urlmaker = None
 234         if url.netloc == "github.com":
 235                 urlmaker = GithubURLMaker(url)
 236         else:
 237                 raise TaskError("Unsupported repo")
 238
 239         if not urlmaker.isValid():
 240                 raise TaskError("Invalid github repo URL")
 241
 242         commitsURL = urlmaker.getCommitsURL(branch)
 243         contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
 244         commits = json.loads(contents)
 245
 246         if len(commits) == 0 or not "sha" in commits[0]:
 247                 raise TaskError("No commits found")
 248
 249         release.url = urlmaker.getCommitDownload(commits[0]["sha"])
 250         print(release.url)
 251         release.task_id = None
 252         db.session.commit()
 253
 254         return release.url
 255
 256
 257 @celery.task()
 258 def importRepoScreenshot(id):
 259         package = Package.query.get(id)
 260         if package is None or package.soft_deleted:
 261                 raise Exception("Unexpected none package")
 262
 263         # Get URL Maker
 264         url = urlparse(package.repo)
 265         urlmaker = None
 266         if url.netloc == "github.com":
 267                 urlmaker = GithubURLMaker(url)
 268         else:
 269                 raise TaskError("Unsupported repo")
 270
 271         if not urlmaker.isValid():
 272                 raise TaskError("Error! Url maker not valid")
 273
 274         try:
 275                 filename = randomString(10) + ".png"
 276                 imagePath = os.path.join("app/public/uploads", filename)
 277                 print(imagePath)
 278                 urllib.request.urlretrieve(urlmaker.getScreenshotURL(), imagePath)
 279
 280                 ss = PackageScreenshot()
 281                 ss.approved = True
 282                 ss.package = package
 283                 ss.title   = "screenshot.png"
 284                 ss.url     = "/uploads/" + filename
 285                 db.session.add(ss)
 286                 db.session.commit()
 287
 288                 return "/uploads/" + filename
 289         except HTTPError:
 290                 print("screenshot.png does not exist")
 291
 292         return None
 293
 294
 295
 296 def getDepends(package):
 297         url = urlparse(package.repo)
 298         urlmaker = None
 299         if url.netloc == "github.com":
 300                 urlmaker = GithubURLMaker(url)
 301         else:
 302                 raise TaskError("Unsupported repo")
 303
 304         result = {}
 305         if urlmaker.isValid():
 306                 #
 307                 # Try getting depends on mod.conf
 308                 #
 309                 try:
 310                         contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
 311                         conf = parseConf(contents)
 312                         for key in ["depends", "optional_depends"]:
 313                                 try:
 314                                         result[key] = conf[key]
 315                                 except KeyError:
 316                                         pass
 317
 318                 except HTTPError:
 319                         print("mod.conf does not exist")
 320
 321                 if "depends" in result or "optional_depends" in result:
 322                         return result
 323
 324
 325                 #
 326                 # Try depends.txt
 327                 #
 328                 import re
 329                 pattern = re.compile("^([a-z0-9_]+)\??$")
 330                 try:
 331                         contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
 332                         soft = []
 333                         hard = []
 334                         for line in contents.split("\n"):
 335                                 line = line.strip()
 336                                 if pattern.match(line):
 337                                         if line[len(line) - 1] == "?":
 338                                                 soft.append( line[:-1])
 339                                         else:
 340                                                 hard.append(line)
 341
 342                         result["depends"] = ",".join(hard)
 343                         result["optional_depends"] = ",".join(soft)
 344                 except HTTPError:
 345                         print("depends.txt does not exist")
 346
 347                 return result
 348
 349         else:
 350                 print(TaskError("non-github depends detector not implemented yet!"))
 351                 return {}
 352
 353
 354 def importDependencies(package, mpackage_cache):
 355         if Dependency.query.filter_by(depender=package).count() != 0:
 356                 return
 357
 358         result = getDepends(package)
 359
 360         if "depends" in result:
 361                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
 362                 print("{} hard: {}".format(len(deps), result["depends"]))
 363                 for dep in deps:
 364                         dep.optional = False
 365                         db.session.add(dep)
 366
 367         if "optional_depends" in result:
 368                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
 369                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
 370                 for dep in deps:
 371                         dep.optional = True
 372                         db.session.add(dep)
 373
 374 @celery.task()
 375 def importAllDependencies():
 376         Dependency.query.delete()
 377         mpackage_cache = {}
 378         packages = Package.query.filter_by(type=PackageType.MOD).all()
 379         for i, p in enumerate(packages):
 380                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
 381                 importDependencies(p, mpackage_cache)
 382
 383         db.session.commit()