]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/importtasks.py
Merge pull request #78 from minetest/dev
[cheatdb.git] / app / tasks / importtasks.py
1 # Content DB
2 # Copyright (C) 2018  rubenwardy
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17
18 import flask, json, os
19 from flask.ext.sqlalchemy import SQLAlchemy
20 from urllib.error import HTTPError
21 import urllib.request
22 from urllib.parse import urlparse, quote_plus
23 from app import app
24 from app.models import *
25 from app.tasks import celery, TaskError
26 from app.utils import randomString
27
28 class GithubURLMaker:
29         def __init__(self, url):
30                 # Rewrite path
31                 import re
32                 m = re.search("^\/([^\/]+)\/([^\/]+)\/?$", url.path)
33                 if m is None:
34                         return
35
36                 user = m.group(1)
37                 repo = m.group(2).replace(".git", "")
38                 self.baseUrl = "https://raw.githubusercontent.com/{}/{}/master" \
39                                 .format(user, repo)
40                 self.user = user
41                 self.repo = repo
42
43         def isValid(self):
44                 return self.baseUrl is not None
45
46         def getRepoURL(self):
47                 return "https://github.com/{}/{}".format(self.user, self.repo)
48
49         def getIssueTrackerURL(self):
50                 return "https://github.com/{}/{}/issues/".format(self.user, self.repo)
51
52         def getModConfURL(self):
53                 return self.baseUrl + "/mod.conf"
54
55         def getDescURL(self):
56                 return self.baseUrl + "/description.txt"
57
58         def getDependsURL(self):
59                 return self.baseUrl + "/depends.txt"
60
61         def getScreenshotURL(self):
62                 return self.baseUrl + "/screenshot.png"
63
64         def getCommitsURL(self, branch):
65                 return "https://api.github.com/repos/{}/{}/commits?sha={}" \
66                                 .format(self.user, self.repo, urllib.parse.quote_plus(branch))
67
68         def getCommitDownload(self, commit):
69                 return "https://github.com/{}/{}/archive/{}.zip" \
70                                 .format(self.user, self.repo, commit)
71
72
73 krock_list_cache = None
74 krock_list_cache_by_name = None
75 def getKrockList():
76         global krock_list_cache
77         global krock_list_cache_by_name
78
79         if krock_list_cache is None:
80                 contents = urllib.request.urlopen("http://krock-works.16mb.com/MTstuff/modList.php").read().decode("utf-8")
81                 list = json.loads(contents)
82
83                 def h(x):
84                         if not ("title"   in x and "author" in x and \
85                                         "topicId" in x and "link"   in x and x["link"] != ""):
86                                 return False
87
88                         import re
89                         m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
90                         if m is None:
91                                 return False
92
93                         x["name"] = m.group(1)
94                         return True
95
96                 def g(x):
97                         return {
98                                 "title":   x["title"],
99                                 "author":  x["author"],
100                                 "name":    x["name"],
101                                 "topicId": x["topicId"],
102                                 "link":    x["link"],
103                         }
104
105                 krock_list_cache = [g(x) for x in list if h(x)]
106                 krock_list_cache_by_name = {}
107                 for x in krock_list_cache:
108                         if not x["name"] in krock_list_cache_by_name:
109                                 krock_list_cache_by_name[x["name"]] = []
110
111                         krock_list_cache_by_name[x["name"]].append(x)
112
113         return krock_list_cache, krock_list_cache_by_name
114
115 def findModInfo(author, name, link):
116         list, lookup = getKrockList()
117
118         if name is not None and name in lookup:
119                 if len(lookup[name]) == 1:
120                         return lookup[name][0]
121
122                 for x in lookup[name]:
123                         if x["author"] == author:
124                                 return x
125
126         if link is not None and len(link) > 15:
127                 for x in list:
128                         if link in x["link"]:
129                                 return x
130
131         return None
132
133
134 def parseConf(string):
135         retval = {}
136         for line in string.split("\n"):
137                 idx = line.find("=")
138                 if idx > 0:
139                         key   = line[:idx].strip()
140                         value = line[idx+1:].strip()
141                         retval[key] = value
142
143         return retval
144
145
146 @celery.task()
147 def getMeta(urlstr, author):
148         url = urlparse(urlstr)
149
150         urlmaker = None
151         if url.netloc == "github.com":
152                 urlmaker = GithubURLMaker(url)
153         else:
154                 raise TaskError("Unsupported repo")
155
156         if not urlmaker.isValid():
157                 raise TaskError("Error! Url maker not valid")
158
159         result = {}
160
161         result["repo"] = urlmaker.getRepoURL()
162         result["issueTracker"] = urlmaker.getIssueTrackerURL()
163
164         try:
165                 contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
166                 conf = parseConf(contents)
167                 for key in ["name", "description", "title", "depends", "optional_depends"]:
168                         try:
169                                 result[key] = conf[key]
170                         except KeyError:
171                                 pass
172         except HTTPError:
173                 print("mod.conf does not exist")
174
175         if "name" in result:
176                 result["title"] = result["name"].replace("_", " ").title()
177
178         if not "description" in result:
179                 try:
180                         contents = urllib.request.urlopen(urlmaker.getDescURL()).read().decode("utf-8")
181                         result["description"] = contents.strip()
182                 except HTTPError:
183                         print("description.txt does not exist!")
184
185         import re
186         pattern = re.compile("^([a-z0-9_]+)\??$")
187         if not "depends" in result and not "optional_depends" in result:
188                 try:
189                         contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
190                         soft = []
191                         hard = []
192                         for line in contents.split("\n"):
193                                 line = line.strip()
194                                 if pattern.match(line):
195                                         if line[len(line) - 1] == "?":
196                                                 soft.append( line[:-1])
197                                         else:
198                                                 hard.append(line)
199
200                         result["depends"] = ",".join(hard)
201                         result["optional_depends"] = ",".join(soft)
202
203
204                 except HTTPError:
205                         print("depends.txt does not exist!")
206
207         if "description" in result:
208                 desc = result["description"]
209                 idx = desc.find(".") + 1
210                 cutIdx = min(len(desc), 200 if idx < 5 else idx)
211                 result["short_description"] = desc[:cutIdx]
212
213
214         info = findModInfo(author, result.get("name"), result["repo"])
215         if info is not None:
216                 result["forumId"] = info.get("topicId")
217
218         return result
219
220
221 @celery.task()
222 def makeVCSRelease(id, branch):
223         release = PackageRelease.query.get(id)
224
225         if release is None:
226                 raise TaskError("No such release!")
227
228         if release.package is None:
229                 raise TaskError("No package attached to release")
230
231         url = urlparse(release.package.repo)
232
233         urlmaker = None
234         if url.netloc == "github.com":
235                 urlmaker = GithubURLMaker(url)
236         else:
237                 raise TaskError("Unsupported repo")
238
239         if not urlmaker.isValid():
240                 raise TaskError("Invalid github repo URL")
241
242         commitsURL = urlmaker.getCommitsURL(branch)
243         contents = urllib.request.urlopen(commitsURL).read().decode("utf-8")
244         commits = json.loads(contents)
245
246         if len(commits) == 0 or not "sha" in commits[0]:
247                 raise TaskError("No commits found")
248
249         release.url = urlmaker.getCommitDownload(commits[0]["sha"])
250         print(release.url)
251         release.task_id = None
252         db.session.commit()
253
254         return release.url
255
256
257 @celery.task()
258 def importRepoScreenshot(id):
259         package = Package.query.get(id)
260         if package is None or package.soft_deleted:
261                 raise Exception("Unexpected none package")
262
263         # Get URL Maker
264         url = urlparse(package.repo)
265         urlmaker = None
266         if url.netloc == "github.com":
267                 urlmaker = GithubURLMaker(url)
268         else:
269                 raise TaskError("Unsupported repo")
270
271         if not urlmaker.isValid():
272                 raise TaskError("Error! Url maker not valid")
273
274         try:
275                 filename = randomString(10) + ".png"
276                 imagePath = os.path.join("app/public/uploads", filename)
277                 print(imagePath)
278                 urllib.request.urlretrieve(urlmaker.getScreenshotURL(), imagePath)
279
280                 ss = PackageScreenshot()
281                 ss.approved = True
282                 ss.package = package
283                 ss.title   = "screenshot.png"
284                 ss.url     = "/uploads/" + filename
285                 db.session.add(ss)
286                 db.session.commit()
287
288                 return "/uploads/" + filename
289         except HTTPError:
290                 print("screenshot.png does not exist")
291
292         return None
293
294
295
296 def getDepends(package):
297         url = urlparse(package.repo)
298         urlmaker = None
299         if url.netloc == "github.com":
300                 urlmaker = GithubURLMaker(url)
301         else:
302                 raise TaskError("Unsupported repo")
303
304         result = {}
305         if urlmaker.isValid():
306                 #
307                 # Try getting depends on mod.conf
308                 #
309                 try:
310                         contents = urllib.request.urlopen(urlmaker.getModConfURL()).read().decode("utf-8")
311                         conf = parseConf(contents)
312                         for key in ["depends", "optional_depends"]:
313                                 try:
314                                         result[key] = conf[key]
315                                 except KeyError:
316                                         pass
317
318                 except HTTPError:
319                         print("mod.conf does not exist")
320
321                 if "depends" in result or "optional_depends" in result:
322                         return result
323
324
325                 #
326                 # Try depends.txt
327                 #
328                 import re
329                 pattern = re.compile("^([a-z0-9_]+)\??$")
330                 try:
331                         contents = urllib.request.urlopen(urlmaker.getDependsURL()).read().decode("utf-8")
332                         soft = []
333                         hard = []
334                         for line in contents.split("\n"):
335                                 line = line.strip()
336                                 if pattern.match(line):
337                                         if line[len(line) - 1] == "?":
338                                                 soft.append( line[:-1])
339                                         else:
340                                                 hard.append(line)
341
342                         result["depends"] = ",".join(hard)
343                         result["optional_depends"] = ",".join(soft)
344                 except HTTPError:
345                         print("depends.txt does not exist")
346
347                 return result
348
349         else:
350                 print(TaskError("non-github depends detector not implemented yet!"))
351                 return {}
352
353
354 def importDependencies(package, mpackage_cache):
355         if Dependency.query.filter_by(depender=package).count() != 0:
356                 return
357
358         result = getDepends(package)
359
360         if "depends" in result:
361                 deps = Dependency.SpecToList(package, result["depends"], mpackage_cache)
362                 print("{} hard: {}".format(len(deps), result["depends"]))
363                 for dep in deps:
364                         dep.optional = False
365                         db.session.add(dep)
366
367         if "optional_depends" in result:
368                 deps = Dependency.SpecToList(package, result["optional_depends"], mpackage_cache)
369                 print("{} soft: {}".format(len(deps), result["optional_depends"]))
370                 for dep in deps:
371                         dep.optional = True
372                         db.session.add(dep)
373
374 @celery.task()
375 def importAllDependencies():
376         Dependency.query.delete()
377         mpackage_cache = {}
378         packages = Package.query.filter_by(type=PackageType.MOD).all()
379         for i, p in enumerate(packages):
380                 print("============= {} ({}/{}) =============".format(p.name, i, len(packages)))
381                 importDependencies(p, mpackage_cache)
382
383         db.session.commit()