Greasy Fork is available in English.

贴子脱水工具

百度贴吧,天涯论坛,豆瓣小组等的贴子脱水工具(浏览的文章版权归原作者所有)

// --------------------------------------------------------------------
//
// ==UserScript==
// @name          贴子脱水工具
// @namespace     https://github.com/kingems/PaperGather
// @version       0.1.8
// @author        kingem(kingem@126.com)
// @description   百度贴吧,天涯论坛,豆瓣小组等的贴子脱水工具(浏览的文章版权归原作者所有)
// @grant         GM_addStyle
// @require       https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js
// @include       *://tieba.baidu.com/p/*
// @include       *://*/thread-*-*-*.html
// @include       *://bbs.tianya.cn/post-*-*-*.shtml
// @include       *://www.douban.com/group/*
// @run-at        document-end
// @homepageURL    https://greasyfork.org/scripts/32466/
// ==/UserScript==
//
// --------------------------------------------------------------------
(function(){

var Rule = {

};

Rule.specialSite = [
    {siteName : 'tieba',
        url : '.*?://tieba\\.baidu\\.com/p/\\d+',
        //获取banner位置
        banner_path: 'div.p_thread',
        //获取楼层位置
        floor_path:'div.l_post',
        //获取页面编码格式
        page_charset:function(){
            var h = $('head').html();
            var h_m = h.match(/charset=\"(.*?)\">/);
            if(!h_m) return 'gb2312';
            return h_m[1];
        },
        //获取总页数
        get_page_num:function(data){
        	var pg = $('li.l_reply_num');
        	if (data) pg=$(data).find('li.l_reply_num');
            if(!pg) return;
            var num = pg.eq(0).html().replace(/<[^>]*>/g,'');
            var num_m = num.match(/共(\d+)页/);
            if(!num_m) return 1;
            return num_m[1];
        },
        //获取标题
        get_topic_name:function(){
            return $('title').text();
        },
        //获得第1页网址
        format_thread_url_1st:function(url,islz){
            url = url.replace(/[\?\#].*$/, '');
            if (islz) url += '?see_lz=1';
            return url;
        },
        // 格式化第i页网址
        format_thread_url_ith:function(url,i){
            var j = i.toString();
            if(url.match(/\?see_lz=1/)){
                n_url = url.concat('&pn='+j);
            }else {
                n_url = url.replace(/$/, '?pn='+j);
            }
            return n_url;
        },
        //提取楼层主要信息
        extract_floor_info:function(bot,index) {
            var info = bot;
            var re = new Object;
            var ptail = info.attr('data-field');            
            if (!ptail) {
                return null;
            };
            re["poster"] = info.find('li.d_name').text(); //作者
            re["id"] = ptail.match(/"post_no":(.*?),/)[1]; //楼层
            if (ptail.match(/"date":"(.*?)"/)){
                re["time"] = ptail.match(/"date":"(.*?)"/)[1];//时间                
            }else{
                re["time"] =info.find('span.tail-info').text().split('楼')[1];//时间
            }
            re["content"] = info.find('div.d_post_content').html().
                replace(/<\/?font[^>]*>/g, '');  //内容
            re["word_num"] = re["content"].replace('<[^>]+>','').length; // 字数
            return re;
        },
    },
    {siteName : 'tianya',
        url : '.*?://bbs\\.tianya\\.cn/post-.*?-.*?-\\d+\\.shtml',
        //获取banner位置
        banner_path: 'div.atl-menu',
        //获取楼层位置
        floor_path:'div.atl-content',
        //获取页面编码格式
        page_charset:function(){
            var h = $('head').html();
            var h_m = h.match(/charset=\"(.*?)\"/);
            if(!h_m) return 'utf-8';
            return h_m[1];
        },
        //获取总页数
        get_page_num:function(data){
            var pg = $('div.atl-pages');
            if (data) pg = $(data).find('div.atl-pages');
            if(!pg) return;
            var num = pg.eq(0).html().replace(/<[^>]*>/g,' ').replace(/\s+/g,' ');
            var num_m = num.match(/(\d+) 下页/);
            if(!num_m) return 1;
            return num_m[1];
        },
        //获取标题
        get_topic_name:function(){
            return $('h1').text();
        },
        //获得第1页网址
        format_thread_url_1st:function(url,islz){
            return url.replace(/\d+.shtml/, '1.shtml');
        },
        // 格式化第i页网址
        format_thread_url_ith:function(url,i){
            var j = i.toString();
            var n_url = url.replace(/\d+.shtml/, j+'.shtml');
            return n_url;
        },
        //提取楼层主要信息
        extract_floor_info:function(bot,index) {
            var info = bot.parent();
            var re = new Object;
            re["content"] = info.find('div.atl-content').html().
                replace(/<\/?font[^>]*>/g, '').replace(/<div class="host-data">(\s*.*)*<\/div>/,'').replace(/<div[^>]+class="atl-reply(\s*.*)*<\/div>/,'');
            re["word_num"] = re["content"].replace('<[^>]+>','').length;

            var atinfo = info.find('.atl-info').text();
            if (!atinfo) {
                atinfo = info.parent().prev().find('.atl-info').text();
                re["id"] = 1;
            }else{
                re["id"] = parseInt(info.attr('id'))+1;
            }
            re["poster"] = atinfo.match(/[楼主作者]{2}:(.*?)\s*时间/)[1];
            re["time"] = atinfo.match(/时间:([\d\s-:]+)/)[1];
            return re;
        },
    },
    {siteName : 'douban',
        url : '.*?://www\\.douban\\.com/group/.*?',
        //获取banner位置
        banner_path: 'div.article',
        //获取楼层位置
        floor_path:'div.user-face',
        //获取页面编码格式
        page_charset:function(){
            var h = $('head').html();
            var h_m = h.match(/charset=\"(.*?)\"/);
            if(!h_m) return 'utf-8';
            return h_m[1];
        },
        //获取总页数
        get_page_num:function(data){
            var pg = $('.thispage');
            if (data) pg=$(data).find('.thispage');
            if(!pg) return 1;
            return pg.eq(0).attr('data-total-page');
        },
        //获取标题
        get_topic_name:function(){
            return $('h1').text();
        },
        //获得第1页网址
        format_thread_url_1st:function(url,islz){
            url = url.replace(/\?start.*$/,'').replace(/[\#\&].*$/, '');
            if(islz) url += '?author=1';
            return url;
        },
        // 格式化第i页网址
        format_thread_url_ith:function(url,i){
            var j = (i-1).toString();
            var n_url = url;
            if(url.match(/\?author=1/)){
                n_url = url.concat('&start='+j+'00');
            }else {
                n_url = url.replace(/$/, '?start='+j+'00');
            }
            return n_url;
        },
        //提取楼层主要信息
        extract_floor_info:function(bot,index) {
            var info = bot;
            var re = new Object;
            re["id"] = index;
            re["poster"] = info.find('img.pil').attr('alt');
            re["time"] = info.next().text().match(/(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})/)[1];
            re["content"] = info.next().html().replace(/<h[34]>\s*.*\s*.*\s*<\/h[34]>/,'').replace(/<\/?font[^>]*>/g, '').replace(/<a[^>]+class=".*lnk.*<\/a>/g,'');
            re["word_num"] = re["content"].replace('<[^>]+>','').length;
            return re;
        },
    },
    {siteName : 'discuz',
        url : '.*?://.*?/thread-.*?-.*?-1\.html',
        //获取banner位置
        banner_path: 'div#pt',
        //获取楼层位置
        floor_path:'div#postlist table.plhin',
        //获取页面编码格式
        page_charset:function(){
            var h = $('head').html();
            var h_m = h.match(/charset=\"(.*?)\"/);
            if(!h_m) return 'gb2312';
            return h_m[1];
        },
        //获取总页数
        get_page_num:function(data){
            var pg = $('div#pgt');
            if (data) pg=$(data).find('div#pgt');
            if(!pg) return;
            var num = pg.eq(0).html().replace(/<[^>]*>/g,' ');
            var num_m = num.match(/[\D\s]*(\d+)\s*(页|下一页)/);
            if(!num_m) return;
            return num_m[1];
        },
        //获取标题
        get_topic_name:function(){
            return $('#thread_subject').text();
        },
        //获得第1页网址
        format_thread_url_1st:function(url,islz){
            url = url.replace(/\/thread-(\d+)-\d+-1.html$/, '/forum.php?mod=viewthread&tid=$1&page=1');
            url = url.replace(/#.*$/, '').replace(/&extra=[^&]*/,'').replace(/&page=\d+/, '&page=1');

            if(! url.match(/&page=\d+/)){
                url = url.concat('&page=1');
            }

            return url;
        },
        // 格式化第i页网址
        format_thread_url_ith:function(url,i){
            var j = i.toString();
            var n_url = url.replace(/&page=\d+/, '&page='+ j);
            return n_url;
        },
        //提取楼层主要信息
        extract_floor_info:function(bot,index) {
            var info = bot.parent();
            var re = new Object;
            re["poster"] = info.find('div.authi').eq(0).text();
            re["time"] = info.find('div.authi em').text().replace('发表于','');
            re["id"] = index;
            re["content"] = info.find('td.t_f').html().
                replace(/<[^>]+style="display:none"[^>]*>[^<]+<[^>]+>/g, '').
                replace(/<[^>]+class="jammer"[^>]*>[^<]+<[^>]+>/g, '').
                replace(/<\/?font[^>]*>/g, '');
            re["word_num"] = re["content"].replace('<[^>]+>','').length;
            return re;
        },
    },
];

// 通用函数
// 获取特殊规则
function getCurSiteInfo() {
    var rules = Rule.specialSite;
    var locationHref = location.href;
    var info = {};
    for (var i in rules) {
        var x = rules[i];
        var url = new RegExp(x.url, 'ig');
        if (url.test(locationHref)){
            info = x;
            break;
        }
    }
    return info;
};
//添加选项框
function add_dewater_banner(xp) {
    $dewater_div = $('\
                     <div id="dewater_div_form" style="align:center;background: #cad6e1;">\
                     <center>\
                     显示<input id="min_page_num" name="min_page_num" size="5" value=""/>页至\
                     <input id="max_page_num" name="max_page_num" size="5" value=""/>页的内容,\
                     且只显示<input id="set_min_floor" name="set_min_floor" size="5" value=""/>楼至\
                     <input id="set_max_floor" name="set_max_floor" size="5" value=""/>楼的内容,\
                     每楼最少<input id="min_word_num" name="min_word_num" size="5" value=""/>字,\
                     <input type="checkbox" id="only_poster" name="only_poster" checked />只看楼主,\
                     <input type="submit" id="dewater_btn" value="脱水"/>\
                     </center></div>');
    $(xp).eq(0).before($dewater_div);
    $('#dewater_btn').click(function(){
        var option = get_dewater_option();
        if (option.set_min_floor && option.set_max_floor && option.set_min_floor> option.set_max_floor) {
            alert("显示楼层数填写错误");
            return;
        }
        var main_floors = get_thread_floors(option);
        option.poster = get_topic_poster(main_floors);

        var topic=set_topic('#dewater_title');
        set_dewater_head(topic);

        $('#dewater_floors').html('');
        add_floor_onpage(main_floors,option);
        $('body').html($('#dewater_div').html());
        set_btn_click();
        set_top_btn();

    });

    $main_floors = $('\
                      <div id="dewater_div">\
                      <div id="dewater_title"></div>\
                      <div id="dewater_floors"></div></div>');
    $(xp).before($main_floors);

};
//设置标题
function set_topic(dst) {
    var tp = site.get_topic_name() ;
    var c = '<a href="' + get_topic_url() + '" id="backurl"><center><font color=red>' +tp+ '</font></center></a>'+
            '<div id="dewater_div_form1"><center>\
            前往<input id="go_floor_num" name="go_floor_num" size="5" />楼, \
            <input type="submit" id="go_floor_btn" value="go" /><br>\
            <input type="submit" id="goback_btn" value="返回原网页" />\
            <input type="submit" id="save_txt_btn" value="存为txt" />\
            <input type="submit" id="save_html_btn" value="存为html" />\
            </center></div>'+
            '<hr width=80% color=blue>';

    $(dst).html(c);
    return tp;
};
//添加按钮点击事件
function set_btn_click() {
    $('#go_floor_btn').click(function(){
        var gofloornum = $("#go_floor_num")[0].value;
        if (gofloornum) {
            var url=site.format_thread_url_1st(location.href,false);
            location.href = url+"#floor"+gofloornum;
        }
    });
    $('#goback_btn').click(function(){
        location.href = $('#backurl').attr('href');
    });
    $('#flip_btn').click(function(){
        $('.floorhide').each(function(index){
            if (index<30) {
                $(this).show();
                $(this).attr('class','floor');
            }
        });
        if ($('.floorhide').length < 1)  $('#flip_btn').remove();
    });
    $('#save_txt_btn').click(function(){
        var chapters = [];
        var fileobj = $('#backurl');
        var fileName = fileobj.text();
        var fileUrl = fileobj.attr('href');
        chapters.push(fileName);
        chapters.push(fileUrl);
        $('.floor').each(function(){
            var s=$(this).html();
            var values = s.replace(/&nbsp;/g,'').replace(/\s+/g,' ').replace(/<br>/g,'\r\n').replace(/<img.*?>/g,'[我是一张图片]<').replace(/<[^>]*>/g, '');
            chapters.push(values);
        });    
        $('.floorhide').each(function(){
            var s=$(this).html(); 
            var values = s.replace(/&nbsp;/g,'').replace(/\s+/g,' ').replace(/<br>/g,'\r\n').replace(/<img.*?>/g,'[我是一张图片]<').replace(/<[^>]*>/g, '');
            chapters.push(values);
        });    
        saveAs(chapters.join('\r\n-----------------\r\n'),fileName+'.txt');
    });
    $('#save_html_btn').click(function(){
        var fileName = $('#backurl').text();
        var chapters = ['<html>'];

        var htmltext = $('*').html().replace(/class="floorhide"/g,'class=\"floor\"').replace(/<div[^>]class="flip".*<\/div>/,'').replace(/<div[^>]class="topbtn".*<\/div>/,'').replace(/<div id="dewater_div_form1">.*<\/div>/,''); 
        chapters.push(htmltext);
        chapters.push('<\/html>');
        saveAs(chapters.join('\r\n'),fileName+'.html');

    });
}
//保存数据
function saveAs(data, filename) {
    if(!filename) filename = 'console.json';

    if (typeof data == 'object') {
        data = JSON.stringify(data, undefined, 4);
    }

    var blob = new Blob([data], { type: 'application/octet-stream' });
    var url = window.URL.createObjectURL(blob);
    var saveas = document.createElement('a');
    saveas.href = url;
    saveas.style.display = 'none';
    document.body.appendChild(saveas);
    saveas.download = filename;
    saveas.click();
    setTimeout(function() {
        saveas.parentNode.removeChild(saveas);
    }, 1000);
    document.addEventListener('unload', function() {
        window.URL.revokeObjectURL(url);
    });
};
//设置新网页头文件
function set_dewater_head(tp) {
    $('head').html(
        '<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />'
        +'<title>'+tp+'</title>'
        +'<style type=text/css> \n'
        +'body {\n text-align:left;\n background-color:e0e0f0;\n font-family: Arial,Verdana, Helvetica, sans-serif;\n line-height: 22px;\n margin:5; \n} \n'
        +'.floor{\n font-size:16px;\n text-align:left;\n width:95%;\n padding:10 20px 10 10px;\n background:#f0f0f0;\n line-height:24px;\n margin:10;\n }\n' 
        +'.floorhide{\n display:none;\n font-size:16px;\n text-align:left;\n width:95%;\n padding:10 20px 10 10px;\n background:#f0f0f0;\n line-height:24px;\n margin:10;\n }\n'       
        +'.zhuan{\n background:#d8e2c8;\n font-size:16px;\n color: black;\n line-height:24px;\n text-decoration:none; \n border:0 !important;\n text-align:left;\n width:100% !important;\n}\n '
        +'.pinglun{\n font-size:14px;\n text-align:left;\n width:100%;\n background:#f0fff0;\n line-height:24px;\n}\n '
        +'.flip{\n width:94%; \n margin:0px;\n padding:5px;\n text-align:center;\n background:#f0e5e5;\n border:solid 1px #c3c3c3;\n}\n '
        +'img{\n max-width:450px;\n width:expression(document.body.clientWidth > 450? \"450px\": \"auto\" );\n overflow:hidden;\n}\n'
        +'a:link{ color: #53A650; \n}\n'
        +'</style>\n'
        );    
};

// 获取自定义设置
function get_dewater_option() {
    var opt = {
        min_page_num: parseInt($("#min_page_num")[0].value),
        max_page_num: parseInt($("#max_page_num")[0].value),        
        set_min_floor: parseInt($("#set_min_floor")[0].value),
        set_max_floor: parseInt($("#set_max_floor")[0].value),
        min_word_num: parseInt($("#min_word_num")[0].value),
        only_poster: $("#only_poster")[0].checked,        
    };
    return opt;
};
//获取楼主信息
function get_topic_poster(main_floors){
    return main_floors[0].poster;
};

//获取当前网页网址
function get_topic_url() {
    return window.location.href;
};
//判断是否跳过此楼层
function is_skip_floor(f, opt) {
    if (opt.only_poster && (f.poster != opt.poster)) return 1;
    if (opt.min_word_num && (f.word_num < opt.min_word_num)) return 1;
    if (opt.set_min_floor && (f.id<opt.set_min_floor)) return 1;
    if (opt.set_max_floor && (f.id>opt.set_max_floor)) return 1;

    return;
};
//在页面中增加楼层
function add_floor_onpage(fs,opt){
    var hide = false;
    var j = 0;    
    for (var i = 0;i<fs.length;i++) {              
        var f = fs[i];     
        if (is_skip_floor(f, opt)) continue;
        if (opt.set_max_floor && (f.id>opt.set_max_floor))  break;      
        if (j>=29) hide = true; 
        add_floor_content('#dewater_floors', f, hide);
        ++j;
    }
    if (hide) {
        var $panels = $('<div class="flip" id="flip_btn" >点击加载更多楼层</div>');
        $('#dewater_floors').append($panels);         
    }
};
//增加楼层内容
function add_floor_content(dst, f, hide) {
    var html='<div class="floor" id="floor' + f.id + '"><font color=#e06080><b>【'+f.id+'】</font></b>作者:'+f.poster+'&emsp;&emsp;&emsp;时间:'+f.time +'<br><div class="zhuan">'+f.content+'</div></div>\n';
    if (hide)  html = '<div class="floorhide" id="floor' + f.id + '"><font color=#e06080><b>【'+f.id+'】</font></b>作者:'+f.poster+'&emsp;&emsp;&emsp;时间:'+f.time +'<br><div class="zhuan">'+f.content+'</div></div>\n';  
    $floor = $(html);
    $(dst).append($floor);
};

//获得要提取楼层的主要信息
function get_thread_floors(option) {
    var main_floors = new Array();
    var select_urls = select_page_urls(option);
    var now_id = 1;
    for (var i in select_urls) {
        var u = select_urls[i];
        var f = get_page_floors(u,i);
        var flen = f.length;
        for (var j = 0; j < flen; j++) {
            if(! f[j].id) f[j].id = now_id;
            var id = f[j].id;
            if (is_push_floor(main_floors, id)==false) continue;
            main_floors.push(f[j]);
            now_id++;
        }
    }
    return main_floors;
};
//获取页面楼层数
function get_page_floors(u,num) {
    var a = parseInt(num)+1;
    var pnum = a.toString();
    $('#dewater_title').html("正在取第" + pnum +"页:" + u);
    var floors_info = new Array();
    var fp = site.floor_path;
    var snum = pnum + ".";
    $.ajax({
        type: "get",
        url: u,
        cache: false,
        async: false,
        beforeSend: function(jqXHR) {
            jqXHR.overrideMimeType('text/html; charset='+ site.page_charset());
        },
        success: function(data) {
            var $resp = $(data);
            var s = $resp.find(fp);
            for (var i = 0; i<s.length;i++){
                var bot = $(s[i]);
                var f_i = site.extract_floor_info(bot,snum+(i+1).toString());
                if (!f_i) continue;
                floors_info.push(f_i);
            }

        }
    });
    return floors_info;
};
//判断是否已经加载此楼层,已经加载返回false
function is_push_floor(floors_info, id){
    var len = floors_info.length;
    if(len<=0) return true;
    var last_id = parseInt(floors_info[len-1].id);
    if(id > last_id) return true;
    return false;
};

//选择需要脱水的网页地址
function select_page_urls(option) {
    var page_urls = get_page_urls(option);

    if (!option.min_page_num && !option.max_page_num) return page_urls;
    var minnum = 1;
    var maxnum = page_urls.length;
    if (option.min_page_num)  minnum = option.min_page_num;
    if (option.max_page_num)  maxnum = option.max_page_num;
    var urls = new Array();
    for (var i= minnum; i<= maxnum; i++) {
        var u = page_urls[i-1];
        urls.push(u);        
    }
    return urls;
};
// 获取所有网页的url
function get_page_urls(option) {
	var num = site.get_page_num('');
    var url = get_topic_url();
    if (option.only_poster) {        
        urls = site.format_thread_url_1st(url,option.only_poster);
        if (urls!=url) {
        	$.ajax({
		        type: "get",
		        url: urls,
		        cache: false,
		        async: false,
		        beforeSend: function(jqXHR) {
		            jqXHR.overrideMimeType('text/html; charset='+ site.page_charset());
		        },
		        success: function(data) {
		            num = site.get_page_num(data);
        		}
    		});
    		url=urls;
        }
    }
    $('#dewater_title').html("共 " + num + " 页");
    if (!num) return [url];    

    var url_list = new Array();
    for (var i = 1; i <= num; i++) {
        var n_url = site.format_thread_url_ith(url, i);
        url_list.push(n_url);
    }
    return url_list;
};
//添加回到顶部按钮
function set_top_btn(){
    GM_addStyle('\
            .topbtn {\
                position: fixed;\
                right: 10px;\
                bottom: 10px;\
                z-index: 2247483648;\
                padding: 20px 5px;\
                width: 50px;\
                height: 20px;\
                line-height: 20px;\
                text-align: center;\
                border: 1px solid;\
                border-color: #888;\
                border-radius: 50%;\
                background: rgba(0,0,0,.5);\
                color: #FFF;\
                font: 12px/1.5 "微软雅黑","宋体",Arial;\
                cursor: pointer;\
            }\
        ');
    $("<div>")
            .addClass("topbtn")
            .html("回到顶部")
            .mousedown(function(){
                location.href += '#top';
            })
            .appendTo('body');
};

var site = getCurSiteInfo();
add_dewater_banner(site.banner_path);

})();