chmod +x mwdump-to-pandoc # Get the newest release of pandoc from https://github.com/jgm/pandoc/releases/latest , e.g.: wget https://github.com/jgm/pandoc/releases/download/1.16.0.2/pandoc-1.16.0.2-1-amd64.deb # grab a wikipedia dump, e.g. wget https://dumps.wikimedia.org/dawiki/20160111/dawiki-20160111-pages-articles.xml.bz2 # run conversion: bzcat dawiki-20160111-pages-articles.xml.bz2 | ./mwdump-to-pandoc | xz - > dawiki-20160111-pandoc.txt.xz