agungkes · June 30, 2025 00:51
diff --git a/lark-to-docx.py b/lark-to-docx.py
 import os
 import json
 import time
 import requests

 # ====== Konfigurasi ======
 ACCESS_TOKEN = "YOUR-ACCESS-TOKEN"
 SPACE_ID = "YOUR-SPACE-ID"
 ROOT_NODE_TOKEN = "YOUR-NODE-TOKEN"  # token node paling atas
 HEADERS = {
    "Authorization": f"Bearer {ACCESS_TOKEN}",
    "Content-Type": "application/json"
 }
 BASE_API = "https://open.larksuite.com/open-apis"
 EXPORT_DIR = "exported_wiki"

 ALL_NODES_FILE = "all_nodes.json"
 TREE_FILE = "wiki_tree.json"
 # ====== Fetch All Nodes Recursive ======
 all_nodes = []

 def fetch_nodes(parent_token=None):
  all_nodes = []

  def recurse(parent_node_token=None):
      url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50"
      if parent_node_token:
          url += f"&parent_node_token={parent_node_token}"
      while url:
          res = requests.get(url, headers=HEADERS)
          data = res.json()
          if data.get("code") != 0:
              print("❌ Gagal ambil data:", data.get("msg"))
              return

          items = data["data"]["items"]
          for node in items:
              all_nodes.append(node)
              if node.get("has_child"):
                  recurse(node["origin_node_token"])

          if data["data"].get("has_more") and data["data"].get("page_token"):
              url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50&page_token={data['data']['page_token']}"
              if parent_node_token:
                  url += f"&parent_node_token={parent_node_token}"
          else:
              break

  recurse(parent_token)
  return all_nodes

 def build_tree(nodes, parent_token=None):
    tree = []
    for node in nodes:
        if node.get("parent_node_token") == parent_token:
            children = build_tree(nodes, node["origin_node_token"])
            if children:
                node["children"] = children
            tree.append(node)
    return tree

 def sanitize_filename(name):
    return "".join(c for c in name if c.isalnum() or c in " ._-").rstrip()

 def poll_export_status(ticket, doc_token, timeout=60, interval=5):
    """Polling status export hingga success/failed atau timeout"""
    status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"
    elapsed = 0

    while elapsed < timeout:
        res = requests.get(status_url, headers=HEADERS)
        data = res.json()

        if data.get("code") != 0:
            print("❌ Gagal cek status:", data.get("msg"))
            return None

        status = data["data"].get("status")
        print(f"⌛ Status export: {status}")

        if status == "success":
            return data["data"]["file_token"]
        elif status == "failed":
            print("❌ Export gagal.")
            return None

        time.sleep(interval)
        elapsed += interval

    print("⚠️ Timeout menunggu export selesai.")
    return None

 def export_docx_file(doc_token, output_path):
    payload = {
        "file_extension": "docx",
        "token": doc_token,
        "type": "docx"
    }
    res = requests.post(f"{BASE_API}/drive/v1/export_tasks", headers=HEADERS, json=payload)
    result = res.json()
    if result.get("code") != 0:
        print(f"❌ Gagal buat export task: {result.get('msg')}")
        return

    ticket = result["data"]["ticket"]
    status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"

    for _ in range(10):
        status_res = requests.get(status_url, headers=HEADERS)
        status_data = status_res.json()

        if status_data['data']["result"]["job_status"] == 0:
            file_token = status_data["data"]["result"]["file_token"]
            break
        elif status_data["msg"] == "failed":
            print("❌ Export gagal.")
            return
        print("⌛ Menunggu export selesai...")
        time.sleep(5)
    else:
        print("⚠️ Timeout.")
        return

    print(f"📦 File token: {file_token}")
    download_url = f"{BASE_API}/drive/v1/export_tasks/file/{file_token}/download"
    print(f"📥 Mengunduh file: {download_url}")
    download_res = requests.get(download_url, headers=HEADERS)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "wb") as f:
        f.write(download_res.content)
    print(f"✅ File disimpan: {output_path}\n")

 def process_tree(tree, current_path):
    for node in tree:
        title = sanitize_filename(node["title"])
        if "children" in node:
            folder_path = os.path.join(current_path, title)
            os.makedirs(folder_path, exist_ok=True)
            process_tree(node["children"], folder_path)
        else:
            token = node.get("obj_token")
            if token:
                filename = f"{title}.docx"
                filepath = os.path.join(current_path, filename)
                export_docx_file(token, filepath)

 # ====== Eksekusi Utama ======
 def main():
    # Load atau ambil ulang node list
    if os.path.exists(ALL_NODES_FILE):
        print("📦 Memuat all_nodes dari cache...")
        with open(ALL_NODES_FILE, "r") as f:
            all_nodes = json.load(f)
    else:
        print("🌐 Mengambil data dari API...")
        all_nodes = fetch_nodes(ROOT_NODE_TOKEN)
        with open(ALL_NODES_FILE, "w") as f:
            json.dump(all_nodes, f, indent=2)

    # Load atau bangun ulang tree
    if os.path.exists(TREE_FILE):
        print("📂 Memuat tree dari cache...")
        with open(TREE_FILE, "r") as f:
            tree = json.load(f)
    else:
        print("🌲 Membangun struktur pohon...")
        tree = build_tree(all_nodes, parent_token=ROOT_NODE_TOKEN)
        with open(TREE_FILE, "w") as f:
            json.dump(tree, f, indent=2)

    # Proses ekspor file
    process_tree(tree, EXPORT_DIR)

 if __name__ == "__main__":
    main()
	import os
	import json
	import time
	import requests

	# ====== Konfigurasi ======
	ACCESS_TOKEN = "YOUR-ACCESS-TOKEN"
	SPACE_ID = "YOUR-SPACE-ID"
	ROOT_NODE_TOKEN = "YOUR-NODE-TOKEN" # token node paling atas
	HEADERS = {
	"Authorization": f"Bearer {ACCESS_TOKEN}",
	"Content-Type": "application/json"
	}
	BASE_API = "https://open.larksuite.com/open-apis"
	EXPORT_DIR = "exported_wiki"

	ALL_NODES_FILE = "all_nodes.json"
	TREE_FILE = "wiki_tree.json"
	# ====== Fetch All Nodes Recursive ======
	all_nodes = []

	def fetch_nodes(parent_token=None):
	all_nodes = []

	def recurse(parent_node_token=None):
	url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50"
	if parent_node_token:
	url += f"&parent_node_token={parent_node_token}"
	while url:
	res = requests.get(url, headers=HEADERS)
	data = res.json()
	if data.get("code") != 0:
	print("❌ Gagal ambil data:", data.get("msg"))
	return

	items = data["data"]["items"]
	for node in items:
	all_nodes.append(node)
	if node.get("has_child"):
	recurse(node["origin_node_token"])

	if data["data"].get("has_more") and data["data"].get("page_token"):
	url = f"{BASE_API}/wiki/v2/spaces/{SPACE_ID}/nodes?page_size=50&page_token={data['data']['page_token']}"
	if parent_node_token:
	url += f"&parent_node_token={parent_node_token}"
	else:
	break

	recurse(parent_token)
	return all_nodes

	def build_tree(nodes, parent_token=None):
	tree = []
	for node in nodes:
	if node.get("parent_node_token") == parent_token:
	children = build_tree(nodes, node["origin_node_token"])
	if children:
	node["children"] = children
	tree.append(node)
	return tree

	def sanitize_filename(name):
	return "".join(c for c in name if c.isalnum() or c in " ._-").rstrip()

	def poll_export_status(ticket, doc_token, timeout=60, interval=5):
	"""Polling status export hingga success/failed atau timeout"""
	status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"
	elapsed = 0

	while elapsed < timeout:
	res = requests.get(status_url, headers=HEADERS)
	data = res.json()

	if data.get("code") != 0:
	print("❌ Gagal cek status:", data.get("msg"))
	return None

	status = data["data"].get("status")
	print(f"⌛ Status export: {status}")

	if status == "success":
	return data["data"]["file_token"]
	elif status == "failed":
	print("❌ Export gagal.")
	return None

	time.sleep(interval)
	elapsed += interval

	print("⚠️ Timeout menunggu export selesai.")
	return None

	def export_docx_file(doc_token, output_path):
	payload = {
	"file_extension": "docx",
	"token": doc_token,
	"type": "docx"
	}
	res = requests.post(f"{BASE_API}/drive/v1/export_tasks", headers=HEADERS, json=payload)
	result = res.json()
	if result.get("code") != 0:
	print(f"❌ Gagal buat export task: {result.get('msg')}")
	return

	ticket = result["data"]["ticket"]
	status_url = f"{BASE_API}/drive/v1/export_tasks/{ticket}?token={doc_token}"

	for _ in range(10):
	status_res = requests.get(status_url, headers=HEADERS)
	status_data = status_res.json()

	if status_data['data']["result"]["job_status"] == 0:
	file_token = status_data["data"]["result"]["file_token"]
	break
	elif status_data["msg"] == "failed":
	print("❌ Export gagal.")
	return
	print("⌛ Menunggu export selesai...")
	time.sleep(5)
	else:
	print("⚠️ Timeout.")
	return

	print(f"📦 File token: {file_token}")
	download_url = f"{BASE_API}/drive/v1/export_tasks/file/{file_token}/download"
	print(f"📥 Mengunduh file: {download_url}")
	download_res = requests.get(download_url, headers=HEADERS)
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	with open(output_path, "wb") as f:
	f.write(download_res.content)
	print(f"✅ File disimpan: {output_path}\n")

	def process_tree(tree, current_path):
	for node in tree:
	title = sanitize_filename(node["title"])
	if "children" in node:
	folder_path = os.path.join(current_path, title)
	os.makedirs(folder_path, exist_ok=True)
	process_tree(node["children"], folder_path)
	else:
	token = node.get("obj_token")
	if token:
	filename = f"{title}.docx"
	filepath = os.path.join(current_path, filename)
	export_docx_file(token, filepath)

	# ====== Eksekusi Utama ======
	def main():
	# Load atau ambil ulang node list
	if os.path.exists(ALL_NODES_FILE):
	print("📦 Memuat all_nodes dari cache...")
	with open(ALL_NODES_FILE, "r") as f:
	all_nodes = json.load(f)
	else:
	print("🌐 Mengambil data dari API...")
	all_nodes = fetch_nodes(ROOT_NODE_TOKEN)
	with open(ALL_NODES_FILE, "w") as f:
	json.dump(all_nodes, f, indent=2)

	# Load atau bangun ulang tree
	if os.path.exists(TREE_FILE):
	print("📂 Memuat tree dari cache...")
	with open(TREE_FILE, "r") as f:
	tree = json.load(f)
	else:
	print("🌲 Membangun struktur pohon...")
	tree = build_tree(all_nodes, parent_token=ROOT_NODE_TOKEN)
	with open(TREE_FILE, "w") as f:
	json.dump(tree, f, indent=2)

	# Proses ekspor file
	process_tree(tree, EXPORT_DIR)

	if __name__ == "__main__":
	main()
No results found