Skip to content

Instantly share code, notes, and snippets.

@mkmark
Created February 10, 2021 17:27
Show Gist options
  • Select an option

  • Save mkmark/d537af5a59236af8a316c5b37e1bc8f7 to your computer and use it in GitHub Desktop.

Select an option

Save mkmark/d537af5a59236af8a316c5b37e1bc8f7 to your computer and use it in GitHub Desktop.

Revisions

  1. mkmark created this gist Feb 10, 2021.
    136 changes: 136 additions & 0 deletions export_to_md.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,136 @@
    #!/usr/bin/env python3

    # variables
    # table prefix default to 'wp_'
    table_prefix = 'wp_'
    # wp_config.php abosolute path
    wp_config_path = r'/home/www/default/wp-config.php'
    # export markdown file path, make sure it exists
    export_path = r'/home/www/'

    import mysql.connector
    import re
    import html
    from wpconfigr import WpConfigFile

    # connect database
    wp_config = WpConfigFile(wp_config_path)
    con = mysql.connector.connect(
    host=wp_config.get('DB_HOST'),
    user=wp_config.get('DB_USER'),
    password=wp_config.get('DB_PASSWORD'),
    database=wp_config.get('DB_NAME')
    )

    wp_posts = table_prefix + 'posts'
    wp_terms = table_prefix + 'terms'
    wp_term_taxonomy = table_prefix + 'term_taxonomy'
    wp_term_relationships = table_prefix + 'term_relationships'
    wp_users = table_prefix + 'users'

    # posts info
    """
    SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status
    FROM wp_posts
    WHERE post_type = 'post'
    AND post_title <> 'Auto Draft';
    """
    cur = con.cursor()
    cur.execute("\
    SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status \
    FROM " + wp_posts + " \
    WHERE post_type = 'post' \
    AND post_title <> 'Auto Draft'; \
    ")

    postsd = {}
    columns = tuple( [d[0] for d in cur.description] )
    for row in cur:
    postsd[row[0]]=(dict(zip(columns, row)))

    # terms info
    """
    SELECT wp_terms.name, wp_term_taxonomy.taxonomy
    FROM wp_posts
    LEFT OUTER JOIN wp_term_relationships
    ON wp_posts.ID = wp_term_relationships.object_id
    LEFT OUTER JOIN wp_term_taxonomy
    ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id
    LEFT OUTER JOIN wp_terms
    ON wp_term_taxonomy.term_id = wp_terms.term_id
    WHERE ID = '';
    """

    # author info
    """
    SELECT display_name
    FROM wp_users
    WHERE ID = '';
    """
    for ID in postsd:
    cur.execute("\
    SELECT " + wp_terms + ".name, " + wp_term_taxonomy + ".taxonomy \
    FROM " + wp_posts + " \
    LEFT OUTER JOIN " + wp_term_relationships + " \
    ON " + wp_posts + ".ID = " + wp_term_relationships + ".object_id \
    LEFT OUTER JOIN " + wp_term_taxonomy + " \
    ON " + wp_term_relationships + ".term_taxonomy_id = " + wp_term_taxonomy + ".term_taxonomy_id \
    LEFT OUTER JOIN " + wp_terms + " \
    ON " + wp_term_taxonomy + ".term_id = " + wp_terms + ".term_id \
    WHERE ID = " + str(ID) + " \
    ")
    postsd[ID]['categories'] = []
    postsd[ID]['tags'] = []
    for row in cur:
    if row[1] == 'category':
    postsd[ID]['categories'].append(row[0])
    if row[1] == 'post_tag':
    postsd[ID]['tags'].append(row[0])

    cur.execute("\
    SELECT display_name \
    FROM " + wp_users + " \
    WHERE ID = " + str(postsd[ID]['post_author']) + " \
    ")
    for row in cur:
    postsd[ID]['author'] = row[0]

    def make_title_path_valid(_str):
    _str = re.sub(r'[\/]', '-', _str)
    _str = re.sub(r'[\\]', '-', _str)
    _str = re.sub(r'[\"]', '-', _str)
    _str = re.sub(r'[\:]', '-', _str)
    _str = re.sub(r'[\*]', '-', _str)
    _str = re.sub(r'[\?]', '-', _str)
    _str = re.sub(r'[\<]', '-', _str)
    _str = re.sub(r'[\>]', '-', _str)
    _str = re.sub(r'[\|]', '-', _str)
    _str = re.sub(r'[\s]', '-', _str)
    _str = _str.lower()
    return _str

    def make_title_md_valid(_str):
    _str = r'"' + _str + r'"'
    return _str

    for ID in postsd:
    file_name = postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d-") + make_title_path_valid(postsd[ID]['post_title']) + '.md'
    file_path = export_path + file_name
    with open(file_path, 'w', encoding='utf-8', errors='ignore') as md_file:
    file_content = '---\n'
    file_content = file_content + 'layout: post\n'
    file_content = file_content + 'title: ' + make_title_md_valid(postsd[ID]['post_title']) + '\n'
    file_content = file_content + 'date: ' + postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d %H:%M") + '\n'
    file_content = file_content + 'author: ' + postsd[ID]['author'] + '\n'
    file_content = file_content + 'comments: ' + ('true' if postsd[ID]['comment_status'] == 'open' else 'false') + '\n'
    file_content = file_content + 'categories: ' + str(postsd[ID]['categories']) + '\n'
    file_content = file_content + 'tags: ' + str(postsd[ID]['tags']) + '\n'
    file_content = file_content + 'published: ' + ('true' if (postsd[ID]['post_status'] == 'publish') and (postsd[ID]['post_password'] == '') else 'false') + '\n'
    file_content = file_content + '---\n'
    if postsd[ID]['post_content_filtered'] != '':
    file_content = file_content + html.unescape(postsd[ID]['post_content_filtered']) + '\n'
    else:
    file_content = file_content + html.unescape(postsd[ID]['post_content']) + '\n'
    md_file.write(file_content)

    con.close()