1 # Copyright (c) 2016 Andrew "rubenwardy" Ward
3 # Source: https://github.com/rubenwardy/python_phpbb_parser
7 from urllib.parse import urljoin
12 def urlEncodeNonAscii(b):
13 return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b)
16 def __init__(self, username):
17 self.username = username
21 def set(self, key, value):
22 self.properties[key] = value
25 return self.properties[key] if key in self.properties else None
28 return self.username + "\n" + str(self.signature) + "\n" + str(self.properties)
30 def __extract_properties(profile, soup):
31 el = soup.find(id="viewprofile")
35 res = el.find_all("dl", class_ = "left-box details")
42 for element in res[0].children:
43 if element.name == "dt":
44 if catch_next_key is None:
45 catch_next_key = element.text.lower()[:-1].strip()
47 print("Unexpected dt!")
49 elif element.name == "dd":
50 if catch_next_key is None:
51 print("Unexpected dd!")
53 if catch_next_key != "groups":
54 profile.set(catch_next_key, element.text)
57 elif element and element.name is not None:
58 print("Unexpected other")
60 def __extract_signature(soup):
61 res = soup.find_all("div", class_="signature")
67 def getProfile(url, username):
68 url = url + "/memberlist.php?mode=viewprofile&un=" + urlEncodeNonAscii(username)
70 contents = urllib.request.urlopen(url).read().decode("utf-8")
71 soup = BeautifulSoup(contents, "lxml")
75 profile = Profile(username)
76 profile.signature = __extract_signature(soup)
77 __extract_properties(profile, soup)