将X.com(Twitter)的推文(Tweet)和长文章(Article)转换为Markdown格式并下载
// ==UserScript==
// @name X.com(Twitter)2Markdown
// @namespace http://tampermonkey.net/
// @version 1.1.4
// @description 将X.com(Twitter)的推文(Tweet)和长文章(Article)转换为Markdown格式并下载
// @author Zhangyiw
// @match https://x.com/*/status/*
// @match https://x.com/*/article/*
// @match https://twitter.com/*/status/*
// @match https://twitter.com/*/article/*
// @grant none
// @license MIT
// @run-at document-start
// ==/UserScript==
(function() {
'use strict';
// ===== 配置 =====
const CONFIG = {
BUTTON_ID: 'x-markdown-download-btn',
DEBUG: true,
RETRY_DELAY: 1000,
MAX_RETRIES: 10
};
// ===== 日志工具 =====
function log(...args) {
if (CONFIG.DEBUG) {
console.log('[X转MD]', ...args);
}
}
function error(...args) {
console.error('[X转MD]', ...args);
}
// ===== 创建下载按钮 =====
function createDownloadButton() {
log('开始创建下载按钮');
// 检查是否已经存在按钮
if (document.getElementById(CONFIG.BUTTON_ID)) {
log('按钮已存在,跳过创建');
return true;
}
// 确保 body 存在
if (!document.body) {
log('body 不存在,稍后重试');
return false;
}
try {
const button = document.createElement('button');
button.id = CONFIG.BUTTON_ID;
button.innerHTML = '📄 下载为Markdown';
button.style.cssText = `
position: fixed;
top: 80px;
right: 20px;
z-index: 99999;
background: #000000;
color: white;
border: 1px solid #333;
padding: 12px 20px;
border-radius: 9999px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
box-shadow: 0 2px 10px rgba(0,0,0,0.3);
transition: all 0.3s ease;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
`;
// 悬停效果
button.addEventListener('mouseenter', () => {
button.style.background = '#1d1d1d';
button.style.transform = 'translateY(-2px)';
button.style.boxShadow = '0 4px 15px rgba(0,0,0,0.4)';
});
button.addEventListener('mouseleave', () => {
button.style.background = '#000000';
button.style.transform = 'translateY(0)';
button.style.boxShadow = '0 2px 10px rgba(0,0,0,0.3)';
});
// 点击事件
button.addEventListener('click', convertToMarkdown);
// 添加到页面
document.body.appendChild(button);
log('下载按钮创建成功');
return true;
} catch (e) {
error('创建按钮失败:', e);
return false;
}
}
// ===== 判断页面类型 =====
function getPageType() {
const url = window.location.href;
if (url.includes('/article/')) {
return 'article';
}
if (url.includes('/status/')) {
return 'tweet';
}
return null;
}
// ===== 获取作者信息 =====
function getAuthorInfo() {
const selectors = {
name: [
'[data-testid="User-Name"] a[role="link"]',
'a[href^="/"] h2[dir="ltr"]',
'[data-testid="UserName"]',
'[data-testid="tweet"] a[role="link"]',
'[data-testid="article-author-name"]'
],
username: [
'[data-testid="User-Name"] span[dir="ltr"]',
'a[href^="/"] span[dir="ltr"]',
'[data-testid="UserName"] span',
'[data-testid="article-author-username"]'
]
};
let authorName = '';
let authorUsername = '';
// 获取作者显示名称
for (const selector of selectors.name) {
const el = document.querySelector(selector);
if (el && el.textContent.trim()) {
authorName = el.textContent.trim();
break;
}
}
// 获取用户名
for (const selector of selectors.username) {
const el = document.querySelector(selector);
if (el && el.textContent.trim().startsWith('@')) {
authorUsername = el.textContent.trim();
break;
}
}
// 从URL提取作为备选
if (!authorUsername) {
const urlMatch = window.location.pathname.match(/^\/([^\/]+)/);
if (urlMatch) {
authorUsername = '@' + urlMatch[1];
}
}
if (!authorName && authorUsername) {
authorName = authorUsername.substring(1);
}
return { name: authorName, username: authorUsername };
}
// ===== 获取发布时间 =====
function getPublishTime() {
const timeSelectors = [
'time[datetime]',
'[data-testid="tweet"] time',
'[data-testid="article-header"] time',
'a[href*="/status/"] time'
];
for (const selector of timeSelectors) {
const timeEl = document.querySelector(selector);
if (timeEl) {
const datetime = timeEl.getAttribute('datetime');
if (datetime) {
return new Date(datetime).toLocaleString('zh-CN');
}
const timeText = timeEl.textContent.trim();
if (timeText) {
return timeText;
}
}
}
return '';
}
// ===== 处理图片 =====
function processImage(img) {
const urlAttributes = ['src', 'data-src', 'data-image'];
let imageUrl = '';
for (const attr of urlAttributes) {
const url = img.getAttribute(attr);
if (url && url.trim() && !url.startsWith('data:')) {
imageUrl = url.trim();
break;
}
}
if (!imageUrl) {
return '';
}
// 跳过用户头像
if (imageUrl.includes('profile_images')) {
return '';
}
// 去掉图片后缀 (:small, :medium, :large 等)
imageUrl = imageUrl.replace(/:\w+$/, '');
// 将 name=small/name=medium/name=900x900 等替换为 name=orig 获取原图
imageUrl = imageUrl.replace(/name=\w+/, 'name=orig');
const alt = img.getAttribute('alt') || '图片';
return ``;
}
// ===== 处理视频 =====
function processVideo(video) {
const videoEl = video.querySelector('video');
if (videoEl && videoEl.src) {
return `[🎬 视频](${videoEl.src})`;
}
const poster = videoEl ? videoEl.getAttribute('poster') : null;
if (poster) {
return ``;
}
return '🎬 *[视频内容]*';
}
// ===== 处理链接 =====
function processLink(link) {
const href = link.href;
const text = link.textContent.trim();
if (href.includes('/hashtag/') || href.includes('/search?q=%23')) {
return text;
}
if (href.startsWith('/') && !href.includes('/status/')) {
return text;
}
return `[${text}](${href})`;
}
// ===== 处理表情 =====
function processEmoji(emoji) {
const alt = emoji.getAttribute('alt');
if (alt) {
return alt;
}
return emoji.textContent;
}
// ===== 处理卡片链接 =====
function processCard(card) {
const linkEl = card.querySelector('a[href]');
if (!linkEl) return '';
const href = linkEl.href;
const titleEl = card.querySelector('[dir="ltr"]');
const title = titleEl ? titleEl.textContent.trim() : '链接';
const descEl = card.querySelector('span[dir="auto"]');
const description = descEl ? descEl.textContent.trim() : '';
let markdown = `\n> 📎 **[${title}](${href})**`;
if (description) {
markdown += `\n> ${description}`;
}
markdown += '\n\n';
return markdown;
}
// ===== 处理投票 =====
function processPoll(poll) {
const questionEl = poll.querySelector('[role="heading"]');
const question = questionEl ? questionEl.textContent.trim() : '投票';
const options = poll.querySelectorAll('[role="button"]');
let markdown = `\n📊 **${question}**\n\n`;
options.forEach(option => {
const text = option.textContent.trim();
if (text) {
markdown += `- [ ] ${text}\n`;
}
});
return markdown + '\n';
}
// ===== 提取文本内容 =====
function extractTextContent(element) {
let markdown = '';
for (const node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
markdown += node.textContent;
} else if (node.nodeType === Node.ELEMENT_NODE) {
const tagName = node.tagName.toLowerCase();
switch (tagName) {
case 'img':
if (node.getAttribute('draggable') === 'false' ||
node.src.includes('emoji') ||
node.getAttribute('alt')) {
markdown += processEmoji(node);
}
break;
case 'a':
markdown += processLink(node);
break;
case 'br':
markdown += '\n';
break;
case 'span':
case 'div':
markdown += extractTextContent(node);
break;
default:
markdown += extractTextContent(node);
}
}
}
return markdown;
}
// ===== 获取推文内容 =====
function getTweetContent() {
log('开始获取推文内容');
const contentSelectors = [
'[data-testid="tweetText"]',
'[data-testid="tweet"] div[lang]',
'[data-testid="tweet"] [dir="auto"]',
'article [data-testid="tweetText"]'
];
let contentEl = null;
for (const selector of contentSelectors) {
contentEl = document.querySelector(selector);
if (contentEl && contentEl.textContent.trim().length > 0) {
log('找到推文内容:', selector);
break;
}
}
if (!contentEl) {
log('未找到推文内容');
return null;
}
let content = extractTextContent(contentEl);
// 获取推文容器(文章容器)
const articleContainer = contentEl.closest('article');
let images = [];
let videos = [];
let cards = [];
let poll = null;
if (articleContainer) {
// 获取推文图片,排除emoji表情(通常来自 abs-0.twimg.com 或是 svg 格式)
const imageEls = articleContainer.querySelectorAll('img[src*="pbs.twimg.com"], img[src*="video.twimg.com"]');
imageEls.forEach(img => {
const src = img.getAttribute('src') || '';
// 跳过emoji表情图片
if (src.includes('abs-0.twimg.com/emoji') || src.includes('.svg')) {
return;
}
const md = processImage(img);
if (md) images.push(md);
});
const videoEls = articleContainer.querySelectorAll('[data-testid="videoPlayer"], video');
videoEls.forEach(video => {
videos.push(processVideo(video));
});
const cardEls = articleContainer.querySelectorAll('[data-testid="card.wrapper"], [data-testid="card.layoutLarge"]');
cardEls.forEach(card => {
const md = processCard(card);
if (md) cards.push(md);
});
const pollEl = articleContainer.querySelector('[data-testid="cardPoll"]');
if (pollEl) {
poll = processPoll(pollEl);
}
}
return {
content: content,
images: images,
videos: videos,
cards: cards,
poll: poll
};
}
// ===== 判断元素是否包含代码 =====
function isCodeBlock(element) {
// 检查是否是 blockquote 元素
if (element.tagName !== 'BLOCKQUOTE') return false;
const text = element.textContent;
// 代码特征:包含常见编程关键字或代码语法特征
const codePatterns = [
/\b(fn|func|function|def|class|import|from|const|let|var|if|else|for|while|return|async|await|try|catch|throw|new|this)\b/,
/[{};]\s*\n/, // 有大括号或分号后跟换行
/\(\s*\w+\s*:\s*\w+\s*\)/, // 类型标注 (name: Type)
/->\s*\w+/, // 返回类型标注
/=\s*\{/, // 对象/结构体赋值
/test\s+"/, // 测试代码
];
// 引言特征:不包含代码语法特征,且通常是引用性的文字
const isQuotePattern = text.length < 200 &&
!text.includes('{') &&
!text.includes('}') &&
!text.includes(';') &&
!/\b(fn|func|function|def|class|const|let|var|return)\b/.test(text);
if (isQuotePattern) {
return false; // 这是引言,不是代码
}
return codePatterns.some(pattern => pattern.test(text));
}
// ===== 提取代码块内容 =====
function extractCodeBlock(element) {
// 对于 Draft.js 格式的代码块,使用 innerText 可以保留格式
// innerText 会正确处理 CSS 样式导致的换行和缩进
let code = element.innerText || element.textContent;
// 清理多余换行,但保留代码结构
code = code.replace(/\n{3,}/g, '\n\n').trim();
// 尝试推断代码语言(简单启发式)
let language = '';
if (/\bfn\s+\w+\s*\(/.test(code)) language = 'rust'; // Rust 风格函数
else if (/\bfunc\s+\w+/.test(code)) language = 'go'; // Go 风格函数
else if (/\bdef\s+\w+\s*\(/.test(code)) language = 'python'; // Python
else if (/\bfunction\s+\w+/.test(code)) language = 'javascript'; // JavaScript
else if (/\bconst\s+\w+\s*[:=]/.test(code)) language = 'typescript'; // TypeScript
else if (/\bclass\s+\w+/.test(code)) language = 'java'; // Java/C#
else if (/#include|#define/.test(code)) language = 'c'; // C/C++
return { code, language };
}
// ===== 获取文章内容 =====
function getArticleContent() {
log('开始获取长文章内容');
// 获取文章容器
const articleContainer = document.querySelector('[data-testid="twitterArticleReadView"]');
// 获取标题
let title = '';
if (articleContainer) {
const titleDiv = articleContainer.querySelector('div[dir="auto"]');
if (titleDiv) {
title = titleDiv.textContent.trim();
} else {
const firstSpan = articleContainer.querySelector('span');
if (firstSpan) {
title = firstSpan.textContent.trim();
}
}
}
// 备选标题选择器
if (!title) {
const titleSelectors = [
'[data-testid="articleTitle"]',
'h1[dir="ltr"]',
'article h1'
];
for (const selector of titleSelectors) {
const el = document.querySelector(selector);
if (el && el.textContent.trim()) {
title = el.textContent.trim();
break;
}
}
}
if (!articleContainer) {
log('未找到文章容器');
return { title: title || 'X Article', content: '' };
}
// 获取文章内容 - 使用元素级遍历以正确处理代码块
let content = '';
const seenTexts = new Set();
const authorUsername = window.location.pathname.match(/^\/([^\/]+)/)?.[1] || '';
// 获取所有顶层块级元素(文章内容使用 longform-unstyled 和 longform-blockquote 类)
// 也获取 h1, h2 等标题元素
const blockElements = articleContainer.querySelectorAll('div.longform-unstyled, blockquote.longform-blockquote, h1.longform-header-one, h2.longform-header-two, h3.longform-header-three, [data-testid="articleBody"] > div');
blockElements.forEach((el, index) => {
// 跳过标题(第一个元素通常是标题)
if (index === 0 && el.textContent.trim() === title) return;
const text = el.textContent.trim();
if (!text || text.length === 0) return;
// 去重
if (seenTexts.has(text)) return;
seenTexts.add(text);
// 跳过作者相关信息
if (text === authorUsername) return;
if (text === '·') return;
if (text.startsWith('点击 关注')) return;
if (text === '关注') return;
// 跳过纯数字
if (/^\d+$/.test(text)) return;
if (/^\d+,\d+$/.test(text)) return;
// 跳过互动数据
if (/^\d+\s*(回复|转帖|喜欢|查看|书签)/.test(text)) return;
// 检查是否是代码块
if (isCodeBlock(el)) {
const { code, language } = extractCodeBlock(el);
if (code) {
content += '```' + language + '\n' + code + '\n```\n\n';
}
return;
}
// 检查是否是真正的标题元素 (h1, h2 等)
const tagName = el.tagName.toLowerCase();
if (tagName === 'h1' || tagName === 'h2' || tagName === 'h3') {
const level = tagName === 'h1' ? '# ' : tagName === 'h2' ? '## ' : '### ';
content += level + text + '\n\n';
return;
}
// 检查是否是引言 (blockquote 但不是代码块)
if (tagName === 'blockquote') {
// 将引言转换为引用格式
const lines = text.split('\n').map(line => '> ' + line).join('\n');
content += lines + '\n\n';
return;
}
// 判断章节标题(启发式,但更严格)
// 排除明显的非标题文本
const nonHeadingPrefixes = ['作者:', '原文:', '来源:', '译者:', '注:'];
const isNonHeading = nonHeadingPrefixes.some(prefix => text.startsWith(prefix));
// 标题应该是 h2 元素或符合特定模式
// 标题通常包含中文括号()或英文括号(),或者是全大写的英文
const isHeading = !isNonHeading &&
el.classList.contains('longform-header-two') &&
text.length < 100;
if (isHeading) {
content += '## ' + text + '\n\n';
} else {
content += text + '\n\n';
}
});
// 如果没有找到块级元素,回退到 TreeWalker
if (!content.trim()) {
const walker = document.createTreeWalker(
articleContainer,
NodeFilter.SHOW_TEXT,
null,
false
);
let node;
let isFirst = true;
while (node = walker.nextNode()) {
const text = node.textContent.trim();
if (!text || text.length === 0) continue;
// 去重
if (seenTexts.has(text)) continue;
seenTexts.add(text);
// 跳过标题
if (isFirst && text === title) {
isFirst = false;
continue;
}
// 跳过作者相关信息
if (text === authorUsername) continue;
if (text === '·') continue;
if (text.startsWith('点击 关注')) continue;
if (text === '关注') continue;
// 跳过纯数字
if (/^\d+$/.test(text)) continue;
if (/^\d+,\d+$/.test(text)) continue;
// 跳过互动数据
if (/^\d+\s*(回复|转帖|喜欢|查看|书签)/.test(text)) continue;
// 检查父元素是否是标题
const parentEl = node.parentElement;
const parentTag = parentEl ? parentEl.tagName.toLowerCase() : '';
if (parentTag === 'h1' || parentTag === 'h2' || parentTag === 'h3') {
const level = parentTag === 'h1' ? '# ' : parentTag === 'h2' ? '## ' : '### ';
content += level + text + '\n\n';
continue;
}
// 判断章节标题(更严格的启发式)
const nonHeadingPrefixes = ['作者:', '原文:', '来源:', '译者:', '注:'];
const isNonHeading = nonHeadingPrefixes.some(prefix => text.startsWith(prefix));
// 只有符合特定模式的短文本才认为是标题
// 并且父元素应该有标题类名
const isHeading = !isNonHeading &&
text.length < 80 &&
(text.includes('(') || text.includes(')')) &&
parentEl && parentEl.classList.contains('longform-header-two');
if (isHeading) {
content += '## ' + text + '\n\n';
} else {
content += text + '\n\n';
}
}
}
// 处理图片
const images = [];
const imageEls = articleContainer.querySelectorAll('img[src*="pbs.twimg.com"]');
imageEls.forEach(img => {
const md = processImage(img);
if (md) images.push(md);
});
if (images.length > 0) {
content += '\n' + images.join('\n\n') + '\n\n';
}
return {
title: title || 'X Article',
content: content
};
}
// ===== 清理Markdown =====
function cleanMarkdown(markdown) {
return markdown
.replace(/\n{3,}/g, '\n\n')
.replace(/^[ \t]+|[ \t]+$/gm, '')
.replace(/ {2,}/g, ' ')
.replace(/^\n+/, '')
.replace(/\n+$/, '') + '\n\n';
}
// ===== 转换Markdown =====
async function convertToMarkdown() {
log('开始转换');
try {
const pageType = getPageType();
const author = getAuthorInfo();
const publishTime = getPublishTime();
log('页面类型:', pageType);
log('作者信息:', author);
if (!pageType) {
alert('未能识别页面类型');
return;
}
let markdown = '';
let filename = '';
if (pageType === 'tweet') {
const tweetData = getTweetContent();
if (!tweetData) {
alert('未找到推文内容,请确保页面已完全加载。');
return;
}
markdown += '---\n';
markdown += `author: "${author.name}${author.username ? ' (' + author.username + ')' : ''}"\n`;
if (publishTime) {
markdown += `date: ${publishTime}\n`;
}
markdown += 'source: "X (Twitter)"\n';
markdown += `url: ${window.location.href}\n`;
markdown += '---\n\n';
markdown += tweetData.content + '\n\n';
if (tweetData.images.length > 0) {
markdown += tweetData.images.join('\n\n') + '\n\n';
}
if (tweetData.videos.length > 0) {
markdown += tweetData.videos.join('\n\n') + '\n\n';
}
if (tweetData.poll) {
markdown += tweetData.poll + '\n';
}
if (tweetData.cards.length > 0) {
markdown += tweetData.cards.join('') + '\n';
}
markdown += '---\n';
markdown += `*原文发布于 X (Twitter):${window.location.href}*\n`;
const tweetId = window.location.pathname.match(/status\/(\d+)/)?.[1] || '';
filename = `tweet-${author.username?.replace('@', '') || 'unknown'}-${tweetId}`;
} else {
const articleData = getArticleContent();
if (!articleData) {
alert('未找到文章内容,请确保页面已完全加载。');
return;
}
const title = articleData.title || 'X Article';
markdown += '---\n';
markdown += `title: "${title}"\n`;
markdown += `author: "${author.name}${author.username ? ' (' + author.username + ')' : ''}"\n`;
if (publishTime) {
markdown += `date: ${publishTime}\n`;
}
markdown += 'source: "X (Twitter) Articles"\n';
markdown += `url: ${window.location.href}\n`;
markdown += '---\n\n';
markdown += `# ${title}\n\n`;
markdown += articleData.content;
markdown += '\n---\n';
markdown += `*原文发布于 X (Twitter) Articles:${window.location.href}*\n`;
const articleId = window.location.pathname.match(/article\/(\d+)/)?.[1] || '';
filename = `article-${author.username?.replace('@', '') || 'unknown'}-${articleId}`;
}
markdown = cleanMarkdown(markdown);
log('转换完成');
downloadMarkdown(markdown, filename);
} catch (err) {
error('转换失败:', err);
alert('转换失败: ' + err.message);
}
}
// ===== 下载Markdown文件 =====
function downloadMarkdown(content, filename) {
const cleanFilename = filename.replace(/[<>:"/\\|?*]/g, '_').substring(0, 100);
const blob = new Blob([content], { type: 'text/markdown;charset=utf-8' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = `${cleanFilename}.md`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
showSuccessMessage(`已成功下载为:${cleanFilename}.md`);
}
// ===== 显示成功消息 =====
function showSuccessMessage(message) {
const messageDiv = document.createElement('div');
messageDiv.textContent = message;
messageDiv.style.cssText = `
position: fixed;
top: 140px;
right: 20px;
z-index: 100000;
background: #1d9bf0;
color: white;
padding: 15px 20px;
border-radius: 9999px;
box-shadow: 0 2px 10px rgba(0,0,0,0.2);
font-size: 14px;
font-weight: 600;
max-width: 300px;
word-wrap: break-word;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
`;
document.body.appendChild(messageDiv);
setTimeout(() => {
if (messageDiv.parentNode) {
messageDiv.parentNode.removeChild(messageDiv);
}
}, 3000);
}
// ===== 初始化 =====
let initAttempts = 0;
function tryInit() {
initAttempts++;
log(`初始化尝试 ${initAttempts}/${CONFIG.MAX_RETRIES}`);
const success = createDownloadButton();
if (!success && initAttempts < CONFIG.MAX_RETRIES) {
setTimeout(tryInit, CONFIG.RETRY_DELAY);
}
}
function init() {
log('脚本初始化,版本 1.1.4');
log('当前URL:', window.location.href);
log('页面类型:', getPageType());
// 尝试初始化
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => {
setTimeout(tryInit, 1000);
});
} else {
setTimeout(tryInit, 1000);
}
// 监听 URL 变化
let lastUrl = window.location.href;
const observer = new MutationObserver(() => {
const currentUrl = window.location.href;
if (currentUrl !== lastUrl) {
lastUrl = currentUrl;
log('检测到页面变化,重新创建按钮');
initAttempts = 0;
const oldBtn = document.getElementById(CONFIG.BUTTON_ID);
if (oldBtn) {
oldBtn.remove();
}
setTimeout(tryInit, 1000);
}
});
observer.observe(document.body, { childList: true, subtree: true });
}
// 启动
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}
})();