Last active
June 30, 2025 00:51
-
-
Save agungkes/effc6bc5aa0e796d1eaaffd72e3727ba to your computer and use it in GitHub Desktop.
This script automates the process of backing up content from Lark Wiki pages and exporting them as .docx files. It's useful for teams or individuals who want offline access to their documentation or need to archive wiki content outside of Lark.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import time | |
| import requests | |
| # ====== Konfigurasi ====== | |
| ACCESS_TOKEN = "YOUR-ACCESS-TOKEN" | |
| SPACE_ID = "YOUR-SPACE-ID" | |
| ROOT_NODE_TOKEN = "YOUR-NODE-TOKEN" # token node paling atas | |
| HEADERS = { | |
| "Authorization": f"Bearer {ACCESS_TOKEN}", | |
| "Content-Type": "application/json" | |
| } | |
| BASE_API = "https://open.larksuite.com/open-apis" | |
| EXPORT_DIR = "exported_wiki" | |
| ALL_NODES_FILE = "all_nodes.json" | |
| TREE_FILE = "wiki_tree.json" | |
| # ====== Fetch All Nodes Recursive ====== | |
| all_nodes = [] | |
| def fetch_nodes(parent_token=None): | |
| all_nodes = [] | |
| def recurse(parent_node_token=None): | |
| url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50" | |
| if parent_node_token: | |
| url += f"&parent_node_token={parent_node_token}" | |
| while url: | |
| res = requests.get(url, headers=HEADERS) | |
| data = res.json() | |
| if data.get("code") != 0: | |
| print("❌ Gagal ambil data:", data.get("msg")) | |
| return | |
| items = data["data"]["items"] | |
| for node in items: | |
| all_nodes.append(node) | |
| if node.get("has_child"): | |
| recurse(node["origin_node_token"]) | |
| if data["data"].get("has_more") and data["data"].get("page_token"): | |
| url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50&page_token={data['data']['page_token']}" | |
| if parent_node_token: | |
| url += f"&parent_node_token={parent_node_token}" | |
| else: | |
| break | |
| recurse(parent_token) | |
| return all_nodes | |
| def build_tree(nodes, parent_token=None): | |
| tree = [] | |
| for node in nodes: | |
| if node.get("parent_node_token") == parent_token: | |
| children = build_tree(nodes, node["origin_node_token"]) | |
| if children: | |
| node["children"] = children | |
| tree.append(node) | |
| return tree | |
| def sanitize_filename(name): | |
| return "".join(c for c in name if c.isalnum() or c in " ._-").rstrip() | |
| def poll_export_status(ticket, doc_token, timeout=60, interval=5): | |
| """Polling status export hingga success/failed atau timeout""" | |
| status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}" | |
| elapsed = 0 | |
| while elapsed < timeout: | |
| res = requests.get(status_url, headers=HEADERS) | |
| data = res.json() | |
| if data.get("code") != 0: | |
| print("❌ Gagal cek status:", data.get("msg")) | |
| return None | |
| status = data["data"].get("status") | |
| print(f"⌛ Status export: {status}") | |
| if status == "success": | |
| return data["data"]["file_token"] | |
| elif status == "failed": | |
| print("❌ Export gagal.") | |
| return None | |
| time.sleep(interval) | |
| elapsed += interval | |
| print("⚠️ Timeout menunggu export selesai.") | |
| return None | |
| def export_docx_file(doc_token, output_path): | |
| payload = { | |
| "file_extension": "docx", | |
| "token": doc_token, | |
| "type": "docx" | |
| } | |
| res = requests.post(f"{BASE_API}/drive/v1/export_tasks", headers=HEADERS, json=payload) | |
| result = res.json() | |
| if result.get("code") != 0: | |
| print(f"❌ Gagal buat export task: {result.get('msg')}") | |
| return | |
| ticket = result["data"]["ticket"] | |
| status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}" | |
| for _ in range(10): | |
| status_res = requests.get(status_url, headers=HEADERS) | |
| status_data = status_res.json() | |
| if status_data['data']["result"]["job_status"] == 0: | |
| file_token = status_data["data"]["result"]["file_token"] | |
| break | |
| elif status_data["msg"] == "failed": | |
| print("❌ Export gagal.") | |
| return | |
| print("⌛ Menunggu export selesai...") | |
| time.sleep(5) | |
| else: | |
| print("⚠️ Timeout.") | |
| return | |
| print(f"📦 File token: {file_token}") | |
| download_url = f"{BASE_API}/drive/v1/export_tasks/file/{file_token}/download" | |
| print(f"📥 Mengunduh file: {download_url}") | |
| download_res = requests.get(download_url, headers=HEADERS) | |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) | |
| with open(output_path, "wb") as f: | |
| f.write(download_res.content) | |
| print(f"✅ File disimpan: {output_path}\n") | |
| def process_tree(tree, current_path): | |
| for node in tree: | |
| title = sanitize_filename(node["title"]) | |
| if "children" in node: | |
| folder_path = os.path.join(current_path, title) | |
| os.makedirs(folder_path, exist_ok=True) | |
| process_tree(node["children"], folder_path) | |
| else: | |
| token = node.get("obj_token") | |
| if token: | |
| filename = f"{title}.docx" | |
| filepath = os.path.join(current_path, filename) | |
| export_docx_file(token, filepath) | |
| # ====== Eksekusi Utama ====== | |
| def main(): | |
| # Load atau ambil ulang node list | |
| if os.path.exists(ALL_NODES_FILE): | |
| print("📦 Memuat all_nodes dari cache...") | |
| with open(ALL_NODES_FILE, "r") as f: | |
| all_nodes = json.load(f) | |
| else: | |
| print("🌐 Mengambil data dari API...") | |
| all_nodes = fetch_nodes(ROOT_NODE_TOKEN) | |
| with open(ALL_NODES_FILE, "w") as f: | |
| json.dump(all_nodes, f, indent=2) | |
| # Load atau bangun ulang tree | |
| if os.path.exists(TREE_FILE): | |
| print("📂 Memuat tree dari cache...") | |
| with open(TREE_FILE, "r") as f: | |
| tree = json.load(f) | |
| else: | |
| print("🌲 Membangun struktur pohon...") | |
| tree = build_tree(all_nodes, parent_token=ROOT_NODE_TOKEN) | |
| with open(TREE_FILE, "w") as f: | |
| json.dump(tree, f, indent=2) | |
| # Proses ekspor file | |
| process_tree(tree, EXPORT_DIR) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment