]> git.lizzy.rs Git - google_images.git/blob - init.js
Fix unresolved promise
[google_images.git] / init.js
1 const fetch = require("node-fetch")
2 const cheerio = require("cheerio")
3 const jsonic = require("jsonic")
4
5 module.exports.search = (query, userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") =>
6         fetch("https://www.google.com/search?tbm=isch&q=" + encodeURIComponent(query), {headers: {"User-Agent": userAgent}}).then(res => res.text()).then(data =>
7                 jsonic( // jsonic is used because JSON.parse() requires strict JSON and eval() allows for remote code execution
8                         cheerio.load(data, null, false)                            // parse HTML
9                         ("script")                                                 // find script tags
10                         .toArray()                                                 // convert cheerio list to array
11                         .map(script => script.children[0]?.data)                   // map script tags to their inline code
12                         .find(script => script?.startsWith("AF_initDataCallback")) // find script that contains init data
13                         .slice("AF_initDataCallback(".length, -");".length)        // remove call to init function
14                 ).data[31][0][12][2].map(elem => new Object({ // map the parts of the init data we know/care about to something readable
15                         image: {
16                                 url: elem[1][3][0],
17                                 size: {
18                                         width: elem[1][3][2],
19                                         height: elem[1][3][1],
20                                 },
21                         },
22                         preview: {
23                                 url: elem[1][2][0],
24                                 size: {
25                                         width: elem[1][2][2],
26                                         height: elem[1][2][1],
27                                 },
28                         },
29                         color: elem[1][6], // average color of the image, probably (used as placeholder while loading the image)
30                         link: elem[1][9][2003][2],
31                         title: elem[1][9][2003][3], // there is some more data in elem[1][9] that could potentially be useful
32                 }))
33         )
34
35 /*
36
37 In case google makes changes, here are some snippets used to reverse engineer the format:
38
39 1. Find which script contains the init data (use the query astolfo+images for this)
40 -----------------------------------------------------------------------------------
41
42         scripts.find(script => script.search("https://steamcdn-a.akamaihd.net/steamcommunity/public/images/items/622220/f4d2d4074167411a7e15b9a845cf18b434c02af3.jpg") >= 0)
43
44 2. Reverse engineer format of init data passed to AF_initDataCallback
45 ---------------------------------------------------------------------
46
47 const findStrings = (obj, path = "") => {
48         let found = []
49
50         for (k in obj) {
51                 let v = obj[k]
52                 let t = typeof v
53                 let p = path + "." + k
54
55                 if (t == "object")
56                         found = found.concat(findStrings(v, p))
57                 else if (t == "string")
58                         found.push([v, p])
59         }
60
61         return found
62 }
63
64         console.log(findStrings(initData))
65
66 */