]> git.lizzy.rs Git - google_images.git/blob - init.js
Bump release
[google_images.git] / init.js
1 const fetch = require("node-fetch")
2 const cheerio = require("cheerio")
3 const jsonic = require("jsonic")
4
5 module.exports.search = (query, safeSearch = false, userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") =>
6         fetch("https://www.google.com/search?tbm=isch&q=" + encodeURIComponent(query) + (safeSearch ? "&safe=active" : ""), {headers: {"User-Agent": userAgent}})
7                 .then(res => res.text())
8                 .then(data =>
9                         cheerio.load(data, null, false)                                               // parse HTML
10                                 ("script")                                                                // find script tags
11                                 .toArray()                                                                // convert cheerio list to array
12                                 .map(script => script.children[0]?.data)                                  // map script tags to their inline code
13                                 .filter(script => script?.startsWith("AF_initDataCallback"))              // find script that contains init data
14                                 .map(script => script.slice("AF_initDataCallback(".length, -");".length)) // remove call to init function
15                                 .map(jsonic)                                                              // jsonic is used because JSON.parse() requires strict JSON and eval() allows remote code execution
16                                 .find(data => data.key == "ds:1")                                         // for some reason there are two init datas, one is empty tho
17                                 .data[31][0][12][2].map(elem => elem[1] && new Object({                   // map the parts of the init data we know/care about to something readable
18                                         image: {
19                                                 url: elem[1][3][0],
20                                                 size: {
21                                                         width: elem[1][3][2],
22                                                         height: elem[1][3][1],
23                                                 },
24                                         },
25                                         preview: {
26                                                 url: elem[1][2][0],
27                                                 size: {
28                                                         width: elem[1][2][2],
29                                                         height: elem[1][2][1],
30                                                 },
31                                         },
32                                         color: elem[1][6],          // probably average color of the image (used as placeholder while loading the image)
33                                         link: elem[1][9][2003][2],
34                                         title: elem[1][9][2003][3], // there is some more data in elem[1][9] that could potentially be useful
35                                 }))
36                                 .filter(elem => elem)
37                 )
38
39 module.exports.searchRandom = (query, safeSearch, userAgent) => module.exports.search(query, safeSearch, userAgent)
40         .then(results => results[Math.floor(Math.random() * results.length)])
41
42 /*
43
44 In case google makes changes, here are some snippets used to reverse engineer the format:
45
46 1. Find which script contains the init data (use the query astolfo+images for this)
47 -----------------------------------------------------------------------------------
48
49         scripts.find(script => script.search("https://steamcdn-a.akamaihd.net/steamcommunity/public/images/items/622220/f4d2d4074167411a7e15b9a845cf18b434c02af3.jpg") >= 0)
50
51 2. Reverse engineer format of init data passed to AF_initDataCallback
52 ---------------------------------------------------------------------
53
54 const findStrings = (obj, path = "") => {
55         let found = []
56
57         for (k in obj) {
58                 let v = obj[k]
59                 let t = typeof v
60                 let p = path + "." + k
61
62                 if (t == "object")
63                         found = found.concat(findStrings(v, p))
64                 else if (t == "string")
65                         found.push([v, p])
66         }
67
68         return found
69 }
70
71         console.log(findStrings(initData))
72
73 */