]> git.lizzy.rs Git - cheatdb.git/blob - app/tasks/phpbbparser.py
Add git support for importing meta
[cheatdb.git] / app / tasks / phpbbparser.py
1 # Copyright (c) 2016  Andrew "rubenwardy" Ward
2 # License: MIT
3 # Source: https://github.com/rubenwardy/python_phpbb_parser
4
5 import urllib, socket
6 from bs4 import *
7 from urllib.parse import urljoin
8 import urllib.request
9 import os.path
10 import time, re
11
12 def urlEncodeNonAscii(b):
13         return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b)
14
15 class Profile:
16         def __init__(self, username):
17                 self.username = username
18                 self.signature = ""
19                 self.properties = {}
20
21         def set(self, key, value):
22                 self.properties[key] = value
23
24         def get(self, key):
25                 return self.properties[key] if key in self.properties else None
26
27         def __str__(self):
28                 return self.username + "\n" + str(self.signature) + "\n" + str(self.properties)
29
30 def __extract_properties(profile, soup):
31         el = soup.find(id="viewprofile")
32         if el is None:
33                 return None
34
35         res = el.find_all("dl", class_ = "left-box details")
36         if len(res) != 1:
37                 return None
38
39         catch_next_key = None
40
41         # Look through
42         for element in res[0].children:
43                 if element.name == "dt":
44                         if catch_next_key is None:
45                                 catch_next_key = element.text.lower()[:-1].strip()
46                         else:
47                                 print("Unexpected dt!")
48
49                 elif element.name == "dd":
50                         if catch_next_key is None:
51                                 print("Unexpected dd!")
52                         else:
53                                 if catch_next_key != "groups":
54                                         profile.set(catch_next_key, element.text)
55                                 catch_next_key = None
56
57                 elif element and element.name is not None:
58                         print("Unexpected other")
59
60 def __extract_signature(soup):
61         res = soup.find_all("div", class_="signature")
62         if (len(res) != 1):
63                 return None
64         else:
65                 return res[0]
66
67 def getProfile(url, username):
68         url = url + "/memberlist.php?mode=viewprofile&un=" + urlEncodeNonAscii(username)
69
70         contents = urllib.request.urlopen(url).read().decode("utf-8")
71         soup = BeautifulSoup(contents, "lxml")
72         if soup is None:
73                 return None
74         else:
75                 profile = Profile(username)
76                 profile.signature = __extract_signature(soup)
77                 __extract_properties(profile, soup)
78
79                 return profile