import hashlib import re from time import sleep from urllib.parse import urlparse import requests from bs4 import BeautifulSoup from gql import gql, Client from gql.transport.aiohttp import AIOHTTPTransport from markdownify import markdownify as md """ This script migrates comments from WordPress wpDiscuz plugin (https://wpdiscuz.com/) to Giscus (https://giscus.app/) powered by GitHub Discussions. It fetches the WordPress page, parses it to extract comments, and creates new discussions in the GitHub repository using GitHub GraphQL API. This is a one-time script that should be run only once for each post. Otherwise, it will create duplicate discussions and comments. The script is quick and dirty, so it may not handle all edge cases. Review the parsed comments in dry mode first. The script assumes the Giscus discussion mapping mode "pathname". It works with (or without) "strict title matching" option. To run: 1. Install the required packages: `pip install requests beautifulsoup4 markdownify gql[all]` 2. Set the configuration parameters below. 3. Run the script: `python3 migrate-comments.py` Note that the GitHub API has rate limits. The script will sleep for 1 second after each comment creation to avoid hitting the rate limits, but it may still happen if there are too many comments to migrate. In that case, you can delete the half-migrated discussion and run the script again for given post. See more on the rate limits here: https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api """ ########## Configuration ########## # in dry run mode, script will not create any discussions or comments but output the comments to the console dryrun = False # GitHub Personal Access Token with "Discussions" read and write permissions on comments repository; create: https://github.com/settings/tokens token = "" # GitHub repository ID and discussions category ID, matching parameters in the Giscus script repository_id = "" # "data-repo-id" attribute in the Giscus script category_id = "" # "data-category-id" attribute in the Giscus script # list of WordPress post URLs to migrate posts = [ # "https://example.com/wordpress-to-github-discussions-migration/" ] ########## Configuration End ########## posts.reverse() # start from the oldest post transport = AIOHTTPTransport(url="https://api.github.com/graphql", headers={"Authorization": f"Bearer {token}"}) client = Client(transport=transport, fetch_schema_from_transport=True) def main(): for post in posts: migrate_post(post) def migrate_post(url): response = requests.get(url) if response.status_code != 200: print('Failed to fetch the page') exit(1) soup = BeautifulSoup(response.content, 'html.parser') title = soup.find("h1").get_text().strip() description = soup.find("meta", property="og:description")["content"] print(f"# Post: {title}") thread = soup.find("div", class_="wpd-thread-list") comments = get_comments(thread, deep=False) if len(comments) > 0: discussion_id = create_discussion(url, description) for comment in comments: (element, text) = comment if dryrun: print("\n" + text) comment_id = create_comment(discussion_id, text, None) responses = get_comments(element, deep=True) for response in responses: (el, text) = response if dryrun: print("----------\n") print(text) create_comment(discussion_id, text, comment_id) if dryrun: print("--------------------\n--------------------") def get_comments(parent, deep: bool): comments = parent.find_all("div", class_="comment", recursive=deep) results = [] for comment in comments: author = comment.find("div", class_="wpd-comment-author").get_text().strip() author = f"**{author}**" date = comment.find("div", class_="wpd-comment-date")["title"] date = " ".join(date.split(" ")[0:3]) content = comment.find("div", class_="wpd-comment-text").prettify().strip() content = convert_to_markdown(content) upvotes = int(comment.find("div", class_="wpd-vote-result").get_text().strip()) if upvotes != 0: reactions = f"_Reactions: {abs(upvotes)} x " reactions += "👍" if upvotes > 0 else "👎" reactions += "_\n" else: reactions = "" text = f"_From {author} on {date} (migrated from WordPress):_" + "\n\n" + content + "\n\n" + reactions results.append((comment, text)) return results def convert_to_markdown(content): content = re.sub(r'\n\s+', '\n', content) content = content.replace("\n", " ") content = content.replace("> ", ">") markdown = md(content, escape_asterisks=False, escape_underscores=False, escape_misc=False).strip() return markdown def create_discussion(url, description): if dryrun: return pathname = urlparse(url).path[1:] sha1 = hashlib.sha1(pathname.encode("utf-8")).hexdigest() body = f"# {pathname}\n\n{description}\n\n{url}\n\n" query = gql(f""" mutation {{ createDiscussion(input: {{repositoryId: "{repository_id}", categoryId: "{category_id}", body: "{escape_body(body)}", title: "{pathname}"}}) {{ discussion {{ id }} }} }} """) result = client.execute(query) discussion_id = result["createDiscussion"]["discussion"]["id"] print(f'Created discussion "{pathname}" with ID: {discussion_id}') return discussion_id def create_comment(discussion_id, body, reply_to_id): if dryrun: return reply_to_id = "null" if reply_to_id is None else f'"{reply_to_id}"' query = gql(f""" mutation {{ addDiscussionComment(input: {{discussionId: "{discussion_id}", body: "{escape_body(body)}", replyToId: {reply_to_id}}}) {{ comment {{ id }} }} }} """) result = client.execute(query) comment_id = result["addDiscussionComment"]["comment"]["id"] print(f"Created comment with ID: {comment_id}") sleep(1) # sleep for 1 second to avoid hitting rate limits return comment_id def escape_body(body): return body.replace('"', '\\"').replace("\n", "\\n") if __name__ == '__main__': main()