Skip to content

Instantly share code, notes, and snippets.

@agungkes
Last active June 30, 2025 00:51
Show Gist options
  • Select an option

  • Save agungkes/effc6bc5aa0e796d1eaaffd72e3727ba to your computer and use it in GitHub Desktop.

Select an option

Save agungkes/effc6bc5aa0e796d1eaaffd72e3727ba to your computer and use it in GitHub Desktop.
This script automates the process of backing up content from Lark Wiki pages and exporting them as .docx files. It's useful for teams or individuals who want offline access to their documentation or need to archive wiki content outside of Lark.
import os
import json
import time
import requests
# ====== Konfigurasi ======
ACCESS_TOKEN = "YOUR-ACCESS-TOKEN"
SPACE_ID = "YOUR-SPACE-ID"
ROOT_NODE_TOKEN = "YOUR-NODE-TOKEN" # token node paling atas
HEADERS = {
"Authorization": f"Bearer {ACCESS_TOKEN}",
"Content-Type": "application/json"
}
BASE_API = "https://open.larksuite.com/open-apis"
EXPORT_DIR = "exported_wiki"
ALL_NODES_FILE = "all_nodes.json"
TREE_FILE = "wiki_tree.json"
# ====== Fetch All Nodes Recursive ======
all_nodes = []
def fetch_nodes(parent_token=None):
all_nodes = []
def recurse(parent_node_token=None):
url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50"
if parent_node_token:
url += f"&parent_node_token={parent_node_token}"
while url:
res = requests.get(url, headers=HEADERS)
data = res.json()
if data.get("code") != 0:
print("❌ Gagal ambil data:", data.get("msg"))
return
items = data["data"]["items"]
for node in items:
all_nodes.append(node)
if node.get("has_child"):
recurse(node["origin_node_token"])
if data["data"].get("has_more") and data["data"].get("page_token"):
url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50&page_token={data['data']['page_token']}"
if parent_node_token:
url += f"&parent_node_token={parent_node_token}"
else:
break
recurse(parent_token)
return all_nodes
def build_tree(nodes, parent_token=None):
tree = []
for node in nodes:
if node.get("parent_node_token") == parent_token:
children = build_tree(nodes, node["origin_node_token"])
if children:
node["children"] = children
tree.append(node)
return tree
def sanitize_filename(name):
return "".join(c for c in name if c.isalnum() or c in " ._-").rstrip()
def poll_export_status(ticket, doc_token, timeout=60, interval=5):
"""Polling status export hingga success/failed atau timeout"""
status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"
elapsed = 0
while elapsed < timeout:
res = requests.get(status_url, headers=HEADERS)
data = res.json()
if data.get("code") != 0:
print("❌ Gagal cek status:", data.get("msg"))
return None
status = data["data"].get("status")
print(f"⌛ Status export: {status}")
if status == "success":
return data["data"]["file_token"]
elif status == "failed":
print("❌ Export gagal.")
return None
time.sleep(interval)
elapsed += interval
print("⚠️ Timeout menunggu export selesai.")
return None
def export_docx_file(doc_token, output_path):
payload = {
"file_extension": "docx",
"token": doc_token,
"type": "docx"
}
res = requests.post(f"{BASE_API}/drive/v1/export_tasks", headers=HEADERS, json=payload)
result = res.json()
if result.get("code") != 0:
print(f"❌ Gagal buat export task: {result.get('msg')}")
return
ticket = result["data"]["ticket"]
status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"
for _ in range(10):
status_res = requests.get(status_url, headers=HEADERS)
status_data = status_res.json()
if status_data['data']["result"]["job_status"] == 0:
file_token = status_data["data"]["result"]["file_token"]
break
elif status_data["msg"] == "failed":
print("❌ Export gagal.")
return
print("⌛ Menunggu export selesai...")
time.sleep(5)
else:
print("⚠️ Timeout.")
return
print(f"📦 File token: {file_token}")
download_url = f"{BASE_API}/drive/v1/export_tasks/file/{file_token}/download"
print(f"📥 Mengunduh file: {download_url}")
download_res = requests.get(download_url, headers=HEADERS)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "wb") as f:
f.write(download_res.content)
print(f"✅ File disimpan: {output_path}\n")
def process_tree(tree, current_path):
for node in tree:
title = sanitize_filename(node["title"])
if "children" in node:
folder_path = os.path.join(current_path, title)
os.makedirs(folder_path, exist_ok=True)
process_tree(node["children"], folder_path)
else:
token = node.get("obj_token")
if token:
filename = f"{title}.docx"
filepath = os.path.join(current_path, filename)
export_docx_file(token, filepath)
# ====== Eksekusi Utama ======
def main():
# Load atau ambil ulang node list
if os.path.exists(ALL_NODES_FILE):
print("📦 Memuat all_nodes dari cache...")
with open(ALL_NODES_FILE, "r") as f:
all_nodes = json.load(f)
else:
print("🌐 Mengambil data dari API...")
all_nodes = fetch_nodes(ROOT_NODE_TOKEN)
with open(ALL_NODES_FILE, "w") as f:
json.dump(all_nodes, f, indent=2)
# Load atau bangun ulang tree
if os.path.exists(TREE_FILE):
print("📂 Memuat tree dari cache...")
with open(TREE_FILE, "r") as f:
tree = json.load(f)
else:
print("🌲 Membangun struktur pohon...")
tree = build_tree(all_nodes, parent_token=ROOT_NODE_TOKEN)
with open(TREE_FILE, "w") as f:
json.dump(tree, f, indent=2)
# Proses ekspor file
process_tree(tree, EXPORT_DIR)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment