Last active
April 13, 2026 13:45
-
-
Save minanagehsalalma/9bd62eda8a59d09653c3b767e4e3cee3 to your computer and use it in GitHub Desktop.
Revisions
-
minanagehsalalma revised this gist
Apr 13, 2026 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,9 +1,9 @@ // ==UserScript== // @name Reddit Post JSON Exporter // @namespace https://github.com/minanagehsalalma // @version 1.2.0 // @description Select Reddit posts and export them as clean JSON for LLM ingestion. Now captures rich text, bullets, flairs, and mod notices. // @author Mina Nageh Salama // @match https://www.reddit.com/* // @match https://old.reddit.com/* // @grant GM_setClipboard -
minanagehsalalma created this gist
Mar 10, 2026 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,570 @@ // ==UserScript== // @name Reddit Post JSON Exporter // @namespace http://tampermonkey.net/ // @version 1.2.0 // @description Select Reddit posts and export them as clean JSON for LLM ingestion. Now captures rich text, bullets, flairs, and mod notices. // @author You // @match https://www.reddit.com/* // @match https://old.reddit.com/* // @grant GM_setClipboard // @run-at document-idle // ==/UserScript== (function () { 'use strict'; // ─── State ──────────────────────────────────────────────────────────────── let selectedPosts = []; let selectMode = false; // ─── Inject Styles ──────────────────────────────────────────────────────── const style = document.createElement('style'); style.textContent = ` #rpe-panel { position: fixed; bottom: 28px; right: 28px; z-index: 999999; font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace; display: flex; flex-direction: column; align-items: flex-end; gap: 10px; } #rpe-badge { background: #ff4500; color: #fff; font-size: 11px; font-weight: 700; letter-spacing: 0.08em; padding: 3px 9px; border-radius: 99px; display: none; box-shadow: 0 2px 8px rgba(255,69,0,0.4); } #rpe-controls { display: flex; gap: 8px; align-items: center; } .rpe-btn { border: none; border-radius: 10px; cursor: pointer; font-family: inherit; font-weight: 700; font-size: 12px; letter-spacing: 0.05em; padding: 10px 16px; transition: transform 0.1s, box-shadow 0.1s, opacity 0.15s; box-shadow: 0 4px 14px rgba(0,0,0,0.25); } .rpe-btn:hover { transform: translateY(-1px); box-shadow: 0 6px 18px rgba(0,0,0,0.3); } .rpe-btn:active { transform: translateY(0px); } #rpe-toggle { background: #1a1a2e; color: #e8e8f0; border: 1.5px solid #333355; } #rpe-toggle.active { background: #ff4500; color: #fff; border-color: #ff4500; box-shadow: 0 4px 14px rgba(255,69,0,0.45); } #rpe-export { background: #0a7c5c; color: #fff; display: none; } #rpe-clear { background: #2a2a2a; color: #aaa; display: none; padding: 10px 12px; } body.rpe-mode .rpe-selectable { outline: 2px dashed #ff450044; outline-offset: 4px; border-radius: 6px; cursor: crosshair !important; transition: outline-color 0.15s, background 0.15s; } body.rpe-mode .rpe-selectable:hover { outline-color: #ff4500aa !important; background: rgba(255, 69, 0, 0.03); } .rpe-selected { outline: 2px solid #ff4500 !important; outline-offset: 4px; background: rgba(255, 69, 0, 0.06) !important; border-radius: 6px; } #rpe-modal { display: none; position: fixed; inset: 0; z-index: 9999999; background: rgba(0,0,0,0.72); backdrop-filter: blur(4px); align-items: center; justify-content: center; } #rpe-modal.open { display: flex; } #rpe-modal-box { background: #0f0f17; border: 1.5px solid #2a2a40; border-radius: 16px; width: min(820px, 92vw); max-height: 80vh; display: flex; flex-direction: column; overflow: hidden; box-shadow: 0 24px 64px rgba(0,0,0,0.6); } #rpe-modal-header { display: flex; align-items: center; justify-content: space-between; padding: 18px 22px 14px; border-bottom: 1px solid #1e1e30; } #rpe-modal-header h2 { margin: 0; font-size: 13px; font-weight: 700; letter-spacing: 0.12em; color: #ff4500; text-transform: uppercase; } #rpe-modal-actions { display: flex; gap: 8px; } .rpe-modal-btn { border: none; border-radius: 8px; cursor: pointer; font-family: inherit; font-weight: 700; font-size: 11px; letter-spacing: 0.07em; padding: 7px 14px; transition: opacity 0.15s; } .rpe-modal-btn:hover { opacity: 0.85; } #rpe-copy-btn { background: #ff4500; color: #fff; } #rpe-close-btn { background: #1e1e2e; color: #888; border: 1px solid #2a2a3e; } #rpe-output { flex: 1; overflow-y: auto; padding: 20px 22px; margin: 0; font-size: 12.5px; line-height: 1.7; color: #c8d0e0; white-space: pre-wrap; word-break: break-word; background: transparent; scrollbar-width: thin; scrollbar-color: #333 transparent; } #rpe-toast { position: fixed; bottom: 90px; right: 28px; z-index: 9999998; background: #1a1a2e; color: #e0e0f0; font-family: 'SF Mono', monospace; font-size: 12px; font-weight: 600; padding: 10px 18px; border-radius: 10px; border: 1px solid #2a2a40; box-shadow: 0 4px 16px rgba(0,0,0,0.4); opacity: 0; transform: translateY(6px); transition: opacity 0.2s, transform 0.2s; pointer-events: none; } #rpe-toast.show { opacity: 1; transform: translateY(0); } `; document.head.appendChild(style); // ─── Build UI ───────────────────────────────────────────────────────────── const panel = document.createElement('div'); panel.id = 'rpe-panel'; panel.innerHTML = ` <div id="rpe-badge">0 selected</div> <div id="rpe-controls"> <button class="rpe-btn" id="rpe-clear">✕ Clear</button> <button class="rpe-btn" id="rpe-export">⬇ Export JSON</button> <button class="rpe-btn" id="rpe-toggle">⊞ Select Posts</button> </div> `; document.body.appendChild(panel); const modal = document.createElement('div'); modal.id = 'rpe-modal'; modal.innerHTML = ` <div id="rpe-modal-box"> <div id="rpe-modal-header"> <h2>Reddit Post Export</h2> <div id="rpe-modal-actions"> <button class="rpe-modal-btn" id="rpe-copy-btn">Copy to Clipboard</button> <button class="rpe-modal-btn" id="rpe-close-btn">Close</button> </div> </div> <pre id="rpe-output"></pre> </div> `; document.body.appendChild(modal); const toast = document.createElement('div'); toast.id = 'rpe-toast'; document.body.appendChild(toast); const toggleBtn = document.getElementById('rpe-toggle'); const exportBtn = document.getElementById('rpe-export'); const clearBtn = document.getElementById('rpe-clear'); const badge = document.getElementById('rpe-badge'); const output = document.getElementById('rpe-output'); const copyBtn = document.getElementById('rpe-copy-btn'); const closeBtn = document.getElementById('rpe-close-btn'); // ─── Toast ──────────────────────────────────────────────────────────────── let toastTimer; function showToast(msg) { clearTimeout(toastTimer); toast.textContent = msg; toast.classList.add('show'); toastTimer = setTimeout(() => toast.classList.remove('show'), 2200); } // ─── Parse "117K", "1.2M", plain numbers ───────────────────────────────── function parseCount(raw) { if (raw === null || raw === undefined) return null; const str = String(raw).trim().replace(/,/g, '').replace(/\s+/g, ''); if (!str || str === '-' || str === 'Vote') return null; if (/^\d+$/.test(str)) return parseInt(str, 10); const m = str.match(/^([\d.]+)([KkMmBb])$/); if (m) { const n = parseFloat(m[1]); const s = m[2].toUpperCase(); if (s === 'K') return Math.round(n * 1e3); if (s === 'M') return Math.round(n * 1e6); if (s === 'B') return Math.round(n * 1e9); } return null; } // ─── Views extraction ───────────────────────────────────────────────────── function extractViews(el) { for (const attr of ['view-count', 'viewcount', 'views', 'post-view-count', 'post-views']) { const v = el.getAttribute(attr); if (v !== null && v !== '') return parseCount(v); } const fullText = (el.innerText || el.textContent || '').replace(/\s+/g, ' '); const viewMatch = fullText.match(/([\d.,]+[KkMmBb]?)\s+views?/i); if (viewMatch) { return parseCount(viewMatch[1]); } return null; } // ─── Formatted Text Extraction (Preserves Line Breaks and Lists) ────────── function getFormattedText(node) { if (!node) return ''; const clone = node.cloneNode(true); // Convert <li> items to bullet points if they aren't already formatted clone.querySelectorAll('li').forEach(li => { const text = li.textContent.trim(); if (text && !/^[-•*]\s/.test(text)) { li.prepend('- '); } }); // Replace line breaks clone.querySelectorAll('br').forEach(br => br.replaceWith('\n')); // Add trailing newlines to block elements to maintain separation clone.querySelectorAll('p, div, li, h1, h2, h3, h4, h5, h6').forEach(el => { el.appendChild(document.createTextNode('\n')); }); // Condense multiple newlines and trim whitespace return clone.textContent.replace(/\n[ \t]*\n+/g, '\n\n').trim(); } // ─── Body + notices extraction ──────────────────────────────────────────── function extractBody(el) { const parts = []; // Flair (using shreddit specific elements/slots) const flairEls = el.querySelectorAll( '[data-testid="post-flair"], .flair, [class*="flair"], [slot="post-flair"], shreddit-post-flair' ); const flairs = [...flairEls] .map(f => f.textContent.trim()) .filter(f => f && f.length < 80); // De-dupe and append flairs if (flairs.length) parts.push('[Flair: ' + [...new Set(flairs)].join(' | ') + ']'); // Text post body (using formatted extraction) const bodySelectors = [ '[slot="text-body"]', 'div[data-post-click-location="text-body"]', '[data-click-id="text"] .md', '.usertext-body .md' ]; for (const sel of bodySelectors) { const node = el.querySelector(sel); if (node) { const txt = getFormattedText(node); if (txt) { parts.push(txt); break; } } } // Mod/removal/admin banners (using shreddit notice slots) const noticeSelectors = [ '[slot="post-notice"]', 'shreddit-post-notice', '[data-testid="mod-removed-snackbar"]', '[class*="removal-reason"]', '[class*="removed-banner"]', '[class*="ModeratorBanner"]', 'shreddit-mod-removed-banner', 'faceplate-alert' ]; for (const sel of noticeSelectors) { el.querySelectorAll(sel).forEach(node => { const txt = getFormattedText(node).replace(/\n/g, ' ').trim(); if (txt && txt.length > 10 && !parts.some(p => p.includes(txt))) { parts.push('[Notice: ' + txt + ']'); } }); } // Fallback broad text scan for known Reddit removal strings const fullPostText = (el.innerText || el.textContent || '').replace(/\s+/g, ' '); const removalRegexps = [ /(Sorry, this post has been removed by the moderators of r\/[^\s.]+)/i, /(Sorry, this post was removed by Reddit's filters)/i, /(This post was removed by Reddit's spam filters)/i, /(This post was deleted by the person who originally posted it)/i ]; for (const re of removalRegexps) { const match = fullPostText.match(re); if (match && !parts.some(p => p.includes(match[1]))) { parts.push('[Notice: ' + match[1] + ']'); } } return parts.length ? parts.join('\n\n') : null; } // ─── Main extractor ─────────────────────────────────────────────────────── function extractSubFromURL() { const m = location.pathname.match(/\/r\/([^/]+)/); return m ? 'r/' + m[1] : null; } function extractPostData(el) { const tag = el.tagName?.toLowerCase(); // New Reddit web component (shreddit) if (tag === 'shreddit-post') { const sub = el.getAttribute('subreddit-prefixed-name') || el.getAttribute('subreddit-name') || extractSubFromURL(); const author = el.getAttribute('author') || el.querySelector('[data-testid="post_author_link"]')?.textContent?.trim() || null; const created = el.getAttribute('created-timestamp') || el.getAttribute('post-created-at') || null; const title = el.getAttribute('post-title') || el.querySelector('[slot="title"]')?.textContent?.trim() || null; const permalink = el.getAttribute('permalink') || null; const score = el.getAttribute('score') || el.getAttribute('upvotes') || null; const cmtCount = el.getAttribute('comment-count') || null; const views = extractViews(el); const body = extractBody(el); const linkEl = el.querySelector( 'a[data-testid="outbound-link"], a[slot="outbound-link"], a[data-post-click-location="outbound-link"]' ); const link = linkEl?.href || (permalink ? 'https://www.reddit.com' + permalink : null); return { subreddit: sub, author: author, posted_at: created ? new Date(created).toISOString() : null, title, body, link, permalink: permalink ? 'https://www.reddit.com' + permalink : null, upvotes: parseCount(score), comments: parseCount(cmtCount), views, }; } // Article / old Reddit fallback const sub = el.querySelector('.subreddit')?.textContent?.trim() || el.querySelector('[data-subreddit]')?.dataset?.subreddit || document.querySelector('meta[name="reddit:subreddit"]')?.content || extractSubFromURL(); const authorEl = el.querySelector('.author, [data-testid="post_author_link"]'); const author = authorEl?.textContent?.trim() || null; const timeEl = el.querySelector('time'); const posted = timeEl?.getAttribute('datetime') || timeEl?.title || null; const titleEl = el.querySelector('a[data-click-id="body"], h3, .title a, h1'); const title = titleEl?.textContent?.trim() || null; const scoreEl = el.querySelector('[data-score], .score, .upvoteCount'); const upvotes = parseCount(scoreEl?.getAttribute('data-score') || scoreEl?.textContent); const cmtEl = el.querySelector('[data-testid="post-comment-count"], .comments, a[data-click-id="comments"]'); const comments = parseCount((cmtEl?.textContent || '').replace(/[^0-9KkMm.]/g, '').trim()); const views = extractViews(el); const body = extractBody(el); const extLink = el.querySelector('a[data-testid="outbound-link"], a.outbound-link'); const permalink = el.querySelector('a[data-click-id="body"]')?.href || el.querySelector('a.title')?.href || null; return { subreddit: sub, author: author, posted_at: posted ? new Date(posted).toISOString() : null, title, body, link: extLink?.href || permalink || null, permalink: permalink || null, upvotes: isNaN(upvotes) ? null : upvotes, comments: isNaN(comments) ? null : comments, views, }; } // ─── Select mode ────────────────────────────────────────────────────────── function getPostElements() { return [ ...document.querySelectorAll('shreddit-post'), ...document.querySelectorAll('article[data-testid="post-container"]'), ...document.querySelectorAll('.thing.link'), ]; } function updateBadge() { badge.textContent = selectedPosts.length + ' selected'; badge.style.display = selectedPosts.length ? 'block' : 'none'; exportBtn.style.display = selectedPosts.length ? 'inline-block' : 'none'; clearBtn.style.display = selectedPosts.length ? 'inline-block' : 'none'; } function attachListeners() { getPostElements().forEach(el => { if (el._rpeAttached) return; el._rpeAttached = true; el.classList.add('rpe-selectable'); el.addEventListener('click', function (e) { if (!selectMode) return; e.preventDefault(); e.stopPropagation(); const idx = selectedPosts.findIndex(p => p._el === el); if (idx === -1) { el.classList.add('rpe-selected'); const data = extractPostData(el); selectedPosts.push({ _el: el, ...data }); showToast('Added: ' + (data.title || 'post').slice(0, 50) + '...'); } else { el.classList.remove('rpe-selected'); selectedPosts.splice(idx, 1); showToast('Deselected post'); } updateBadge(); }, true); }); } const observer = new MutationObserver(() => { if (selectMode) attachListeners(); }); observer.observe(document.body, { childList: true, subtree: true }); // ─── Toggle ─────────────────────────────────────────────────────────────── toggleBtn.addEventListener('click', () => { selectMode = !selectMode; toggleBtn.classList.toggle('active', selectMode); toggleBtn.textContent = selectMode ? 'Stop Selecting' : 'Select Posts'; document.body.classList.toggle('rpe-mode', selectMode); if (selectMode) { attachListeners(); showToast('Click any post to select it'); } else { showToast('Selection mode off'); } }); // ─── Export ─────────────────────────────────────────────────────────────── exportBtn.addEventListener('click', () => { const clean = selectedPosts.map(({ _el, ...rest }) => rest); output.textContent = JSON.stringify( { exported_at: new Date().toISOString(), count: clean.length, posts: clean }, null, 2 ); modal.classList.add('open'); }); clearBtn.addEventListener('click', () => { selectedPosts.forEach(p => p._el?.classList.remove('rpe-selected')); selectedPosts = []; updateBadge(); showToast('Cleared all selections'); }); copyBtn.addEventListener('click', () => { const text = output.textContent; if (typeof GM_setClipboard !== 'undefined') { GM_setClipboard(text); } else { navigator.clipboard.writeText(text).catch(() => {}); } showToast('Copied to clipboard!'); }); closeBtn.addEventListener('click', () => modal.classList.remove('open')); modal.addEventListener('click', e => { if (e.target === modal) modal.classList.remove('open'); }); document.addEventListener('keydown', e => { if (e.key === 'Escape') modal.classList.remove('open'); }); })();