Last active
February 4, 2020 13:32
-
-
Save zjn0505/c10db825e72070becb2f3b90036db6df to your computer and use it in GitHub Desktop.
explainxkcd extraction, deployed on https://xkcd-trans.zjn0505.now.sh/api
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 'use strict' | |
| const rp = require("request-promise"), | |
| url = require("url"), | |
| cheerio = require("cheerio") | |
| console.log("Init") | |
| module.exports = async (req, res) => { | |
| getMaxFromXkcd() | |
| .then(max => extractQueryRange(req, max)) | |
| .then(range => range.map(convertToExplainRequest)) | |
| .then(x => Promise.all(x)) | |
| .then(r => { | |
| res.status(200).send(`${r.join("<br/>")}`) | |
| }) | |
| } | |
| const getMaxFromXkcd = () => rp("https://xkcd.com/info.0.json") | |
| .then(JSON.parse) | |
| .then(r => r.num) | |
| const extractQueryRange = (req, max) => { | |
| console.log(max) | |
| var { | |
| from = 1, | |
| to = from + 19, | |
| } = req.query | |
| from = Math.max(1, Math.min(max, from)) | |
| to = Math.max(from, Math.min(max, to)) | |
| console.log(`Query ${JSON.stringify(req.query)}`) | |
| console.log(`From ${from}`) | |
| console.log(`To ${to}`) | |
| return Array(to - from + 1).fill(undefined).map((n, i) => i + from) | |
| } | |
| const convertToExplainRequest = index => { | |
| const explainXkcdUrl = "https://explainxkcd.com/" + index | |
| return rp(explainXkcdUrl) | |
| .then(cheerio.load) | |
| .then($ => { | |
| console.log("Parsing " + index) | |
| const title = $(".firstHeading").toString() | |
| var imgUrl = url.resolve("https://explainxkcd.com/", $(".image img").attr("src")) | |
| const img = $("table .image img").attr("src", imgUrl).toString() | |
| const titleText = $(".image").attr("title") | |
| const transcript = $("#Transcript").parent().nextUntil("h1, h2, #Discussion").toString() | |
| return `${title} | |
| <a href='${explainXkcdUrl}'>${explainXkcdUrl}</a><br/><br/> | |
| ${img} | |
| <h2>title text</h2> | |
| ${titleText} | |
| <h2>transcript</h2> | |
| ${transcript}` | |
| }) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment