From: Elias Fleckenstein Date: Sat, 14 May 2022 19:21:59 +0000 (+0200) Subject: Add scraping by tag,character,artist,group,parody X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=8a0a187b31a3e0e9b9faed1369441576a2b1d103;p=local-nhentai.git Add scraping by tag,character,artist,group,parody --- diff --git a/README.md b/README.md index f195b54..0ae9f18 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,9 @@ Remember to install NPM deps: `npm install` `node stats-tags.js`: Displays tags sorted by how many doujins are available for each tag. May produce long output, you might want to pipe it into `head`, `grep` or `less`. -`node wholesome.js`: Scrapes [wholesome hentais](https://wholesomelist.com/list). This takes a long time since it's about 2700 hentais in total (may consume 50GB of disk space), but you can abort it any time (and resume it later). +`node scrape-wholesome.js`: Scrapes [wholesome hentais](https://wholesomelist.com/list). This takes a long time since it's about 2700 hentais in total (consumes about 32GB of disk space), but you can abort it any time (and resume it later). + +`node scrape.js `: Scrapes doujins by criterium, where criterium can be tag, artist, character, parody or group and value is the value. E.g. `node scrape.js tag full-color` scrapes fully colored doujins. Note: you might want to create a subdirectory and put the doujins into there, they will all be put into the current working directory. (Run the scripts from a different directory to prevent spamming this directory) Of course, you can also run these scripts in a directory where you already downloaded doujins using the `nhentai` tool, but make sure to run the symlinks script to "register" them all in the system. diff --git a/package-lock.json b/package-lock.json index 8b7989e..6a55391 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,8 +10,7 @@ "license": "GPL-3.0-or-later", "dependencies": { "node-fetch": "^2.0.0" - }, - "devDependencies": {} + } }, "node_modules/node-fetch": { "version": "2.0.0", diff --git a/package.json b/package.json index 2eed851..b17c9eb 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,6 @@ "name": "nhentai-mgr", "version": "1.0.0", "main": "select.js", - "devDependencies": {}, "scripts": {}, "repository": { "type": "git", diff --git a/scrape-links.js b/scrape-links.js new file mode 100644 index 0000000..0b3f2b9 --- /dev/null +++ b/scrape-links.js @@ -0,0 +1,33 @@ +const fetch = require("node-fetch") +const child = require("child_process") + +module.exports = async (page, link) => { + let data + + try { + data = await (await fetch(page)).text() + } catch {} + + if (!data) + return false + + const ids = [] + + while (true) { + const pos = data.search(link) + + if (pos == -1) + break; + + data = data.slice(pos + link.length) + const id = parseInt(data) + if (id) + ids.push(id) + } + + if (ids.length < 1) + return false + + child.spawnSync("nhentai", ["--id", ids.join(",")], { stdio: "inherit" }) + return true +} diff --git a/scrape-wholesome.js b/scrape-wholesome.js new file mode 100644 index 0000000..d47e40d --- /dev/null +++ b/scrape-wholesome.js @@ -0,0 +1 @@ +require("./scrape-links")("https://wholesomelist.com/list", "https://nhentai.net/g/") diff --git a/scrape.js b/scrape.js new file mode 100644 index 0000000..69c476d --- /dev/null +++ b/scrape.js @@ -0,0 +1,8 @@ +const scrapeLinks = require("./scrape-links") +const page = `https://nhentai.net/${process.argv[2]}/${process.argv[3]}/popular?page=` + +;(async _ => { + for (let i = 1; await scrapeLinks(page + i, "/g/"); i++) + ; +})(); + diff --git a/wholesome.js b/wholesome.js deleted file mode 100644 index 551fb82..0000000 --- a/wholesome.js +++ /dev/null @@ -1,24 +0,0 @@ -const fetch = require("node-fetch") -const child = require("child_process") - -const link = "https://nhentai.net/g/" -const ids = [] - -fetch("https://wholesomelist.com/list") - .then(data => data.text()) - .then(data => { - while (true) { - const pos = data.search(link) - - if (pos == -1) - break; - - data = data.slice(pos + link.length) - const id = parseInt(data) - if (id) - ids.push(id) - } - - child.spawn("nhentai", ["--id", ids.join(",")], { stdio: "inherit" }) - }) -