]> git.lizzy.rs Git - local-nhentai.git/commitdiff
Add scraping by tag,character,artist,group,parody
authorElias Fleckenstein <eliasfleckenstein@web.de>
Sat, 14 May 2022 19:21:59 +0000 (21:21 +0200)
committerElias Fleckenstein <eliasfleckenstein@web.de>
Sat, 14 May 2022 19:21:59 +0000 (21:21 +0200)
README.md
package-lock.json
package.json
scrape-links.js [new file with mode: 0644]
scrape-wholesome.js [new file with mode: 0644]
scrape.js [new file with mode: 0644]
wholesome.js [deleted file]

index f195b54bcbc17afea1f4267f007d44bb4d36d3b3..0ae9f18cb29d1bb38a09aacd150442e7120d5642 100644 (file)
--- a/README.md
+++ b/README.md
@@ -26,7 +26,9 @@ Remember to install NPM deps: `npm install`
 
 `node stats-tags.js`: Displays tags sorted by how many doujins are available for each tag. May produce long output, you might want to pipe it into `head`, `grep` or `less`.
 
-`node wholesome.js`: Scrapes [wholesome hentais](https://wholesomelist.com/list). This takes a long time since it's about 2700 hentais in total (may consume 50GB of disk space), but you can abort it any time (and resume it later).
+`node scrape-wholesome.js`: Scrapes [wholesome hentais](https://wholesomelist.com/list). This takes a long time since it's about 2700 hentais in total (consumes about 32GB of disk space), but you can abort it any time (and resume it later).
+
+`node scrape.js <criterium> <value>`: Scrapes doujins by criterium, where criterium can be tag, artist, character, parody or group and value is the value. E.g. `node scrape.js tag full-color` scrapes fully colored doujins.
 
 Note: you might want to create a subdirectory and put the doujins into there, they will all be put into the current working directory. (Run the scripts from a different directory to prevent spamming this directory)
 Of course, you can also run these scripts in a directory where you already downloaded doujins using the `nhentai` tool, but make sure to run the symlinks script to "register" them all in the system.
index 8b7989e57b583afaadc8d095d99738eeb5d7f4e6..6a55391171d0e099d4f7659d42f8417f83fd0a1c 100644 (file)
@@ -10,8 +10,7 @@
       "license": "GPL-3.0-or-later",
       "dependencies": {
         "node-fetch": "^2.0.0"
-      },
-      "devDependencies": {}
+      }
     },
     "node_modules/node-fetch": {
       "version": "2.0.0",
index 2eed851e7a6d18c0041754217544da6dfc446bef..b17c9ebb7cea1f0b032545b183fecd848febcebd 100644 (file)
@@ -5,7 +5,6 @@
   "name": "nhentai-mgr",
   "version": "1.0.0",
   "main": "select.js",
-  "devDependencies": {},
   "scripts": {},
   "repository": {
     "type": "git",
diff --git a/scrape-links.js b/scrape-links.js
new file mode 100644 (file)
index 0000000..0b3f2b9
--- /dev/null
@@ -0,0 +1,33 @@
+const fetch = require("node-fetch")
+const child = require("child_process")
+
+module.exports = async (page, link) => {
+       let data
+
+       try {
+               data = await (await fetch(page)).text()
+       } catch {}
+
+       if (!data)
+               return false
+
+       const ids = []
+
+       while (true) {
+               const pos = data.search(link)
+
+               if (pos == -1)
+                       break;
+
+               data = data.slice(pos + link.length)
+               const id = parseInt(data)
+               if (id)
+                       ids.push(id)
+       }
+
+       if (ids.length < 1)
+               return false
+
+       child.spawnSync("nhentai", ["--id", ids.join(",")], { stdio: "inherit" })
+       return true
+}
diff --git a/scrape-wholesome.js b/scrape-wholesome.js
new file mode 100644 (file)
index 0000000..d47e40d
--- /dev/null
@@ -0,0 +1 @@
+require("./scrape-links")("https://wholesomelist.com/list", "https://nhentai.net/g/")
diff --git a/scrape.js b/scrape.js
new file mode 100644 (file)
index 0000000..69c476d
--- /dev/null
+++ b/scrape.js
@@ -0,0 +1,8 @@
+const scrapeLinks = require("./scrape-links")
+const page = `https://nhentai.net/${process.argv[2]}/${process.argv[3]}/popular?page=`
+
+;(async _ => {
+       for (let i = 1; await scrapeLinks(page + i, "/g/"); i++)
+               ;
+})();
+
diff --git a/wholesome.js b/wholesome.js
deleted file mode 100644 (file)
index 551fb82..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-const fetch = require("node-fetch")
-const child = require("child_process")
-
-const link = "https://nhentai.net/g/"
-const ids = []
-
-fetch("https://wholesomelist.com/list")
-       .then(data => data.text())
-       .then(data => {
-               while (true) {
-                       const pos = data.search(link)
-
-                       if (pos == -1)
-                               break;
-
-                       data = data.slice(pos + link.length)
-                       const id = parseInt(data)
-                       if (id)
-                               ids.push(id)
-               }
-
-               child.spawn("nhentai", ["--id", ids.join(",")], { stdio: "inherit" })
-       })
-