知乎用户排行榜生成器0.2版源码

这是我在知乎专栏发表的那篇知乎用户排行榜生成器0.2版的源码,为增强专栏的可读性所以在那边只放了压缩版的。这里是完整版,有兴趣者可以自行调试修改。

这段脚本需要登录知乎网站后在浏览器控制台运行,更详细的用法请点上面链接查看。

1//用逗号分隔用户名
2var userarray = "guxizhao,zou-dao-kou,xiaodaoren,cai-tong,xu-xiang-nan,unogzx,shenbin,PeterDeng,namiheike,wu-si-yang-32,yskin,jixin";
3//回答数限制
4var answerlimit = 10;
5//赞同数限制
6var agreelimit = 1000;
7//赞同回答比数限制
8var ratiolimit = 10;
9//关注者数限制
10var followerlimit = 10;
11 
12var users = userarray.split(',');
13var usercursor = 0;
14var result = new Array();
15var showtable = true;
16var cardcount = 0;
17 
18function showmsg(msg) { $("#msg").html(msg); }
19function showresult() {
20    $("#switchshowtable").show(0);
21    $("#sorttype").show(0);
22    var rsdiv = $("#result");
23    if (showtable) {
24        var tablehtm = "<table border='1' cellpadding='2' style='border-collapse: collapse;'><tr><td>编号</td><td>用户名</td><td>关注者</td><td>提问</td><td>回答</td><td>赞同</td><td>赞同/回答比</td></tr>";
25        for (i in result) {
26            tablehtm += "<tr><td>" + (parseInt(i) + 1) + "</td><td><a href='/people/" + result[i].id + "/' target='_blank'>" + result[i].name + "</a></td><td>" + result[i].follower + "</td><td>" + result[i].ask + "</td><td>" + result[i].answer + "</td><td>" + result[i].agree + "</td><td>" + result[i].ratio + "</td></tr>";
27        }
28        tablehtm += "</table>";
29        rsdiv.html(tablehtm);
30    }
31    else {
32        rsdiv.html("编号,用户名,关注者,提问,回答,赞同,赞同/回答比");
33        for (i in result) {
34            rsdiv.append("<br/>" + (parseInt(i) + 1) + ",<a href='/people/" + result[i].id + "/' target='_blank'>" + result[i].name + "</a>," + result[i].follower + "," + result[i].ask + "," + result[i].answer + "," + result[i].agree + "," + result[i].ratio);
35        }
36    }
37}
38 
39function loadmore() {
40    var content = $("#tempframe").contents();
41    var name = content.find(".title-section.ellipsis a").html();
42    if (content.find('.zu-button-more[aria-role]').length < 1) {
43        showmsg(name + "的" + cardcount + "个关注者加载完成");
44        showratio();
45    }
46    else {
47        content.find('.zu-button-more[aria-role]').get(0).click();
48        var total = content.find(".zm-profile-side-following strong").html();
49        cardcount = content.find('.zh-general-list .zm-profile-card .zm-list-content-medium').length;
50        showmsg("正在加载" + name + "的关注者:" + cardcount + "/" + total + "... <img style='vertical-align: text-bottom;' src='http://static.zhihu.com/static/img/spinner/grey-loading.gif'/>");
51        setTimeout(loadmore, 2000);
52    }
53}
54 
55function showratio() {
56    var cards = $("#tempframe").contents().find('.zh-general-list .zm-profile-card .zm-list-content-medium');
57    cards.each(function () {
58        var name = $(this).find('a.zg-link').html();
59        var id = $(this).find('a.zg-link').attr("href").replace("http://www.zhihu.com/people/", "");
60        var detail = $(this).find('.details');
61        var follower = Number(detail.eq(0).children().eq(0).html().split(' ')[0]);
62        var ask = Number(detail.eq(0).children().eq(1).html().split(' ')[0]);
63        var answer = Number(detail.eq(0).children().eq(2).html().split(' ')[0]);
64        var agree = Number(detail.eq(0).children().eq(3).html().split(' ')[0]);
65        if (answer >= answerlimit && agree >= agreelimit && agree / answer >= ratiolimit && follower > followerlimit) {
66            var r = new Object();
67            r.name = name;
68            r.id = id;
69            r.follower = follower;
70            r.ask = ask;
71            r.agree = agree;
72            r.answer = answer;
73            r.ratio = (agree / answer).toFixed(2);
74            addresult(r);
75        }
76    });
77    sortresult();
78    showresult();
79    usercursor++;
80    loaduser();
81}
82 
83function loaduser() {
84    if (usercursor < users.length) {
85        showmsg("共" + users.length + "个用户,准备扫描第" + (usercursor + 1) + "个... <img style='vertical-align: text-bottom;' src='http://static.zhihu.com/static/img/spinner/grey-loading.gif'/>");
86        $("#tempframe").attr("src", "/people/" + users[usercursor] + "/followees");
87    }
88    else {
89        showmsg("所有" + users.length + "名用户的关注者已经全部扫描完成,共找到" + result.length + "个符合条件的用户");
90    }
91}
92 
93function addresult(r) {
94    var exist = false;
95    for (i in result) { if (r.id == result[i].id) { exist = true; break; } }
96    if (!exist) result.push(r);
97}
98 
99function sortresult() {
100    if (result.length > 0) {
101        var type = $("#sorttype").val();
102        switch (type) {
103            case "ratio":
104                result = result.sort(function (a, b) { return b.ratio - a.ratio; });
105                break;
106            case "agree":
107                result = result.sort(function (a, b) { return b.agree - a.agree; });
108                break;
109            case "answer":
110                result = result.sort(function (a, b) { return b.answer - a.answer; });
111                break;
112            case "ask":
113                result = result.sort(function (a, b) { return b.ask - a.ask; });
114                break;
115            case "follower":
116                result = result.sort(function (a, b) { return b.follower - a.follower; });
117                break;
118            default:
119                break;
120        }
121    }
122}
123 
124$("body").prepend('<div id="mask" style="width:100%;height:100%;top:0px;left:0px;position:fixed;z-index: 998;background-color: rgba(0, 0, 0, 0.4);text-align:center;"><div id="container" style="width:600px;height:400px;margin:80px auto 0px auto;position: relative;z-index: 999; padding: 5px;"><iframe id="tempframe" style="width:1px;height:1px;top:-999px;left:-999px;position:absolute;"></iframe><div id="msg" style="height: 30px;background-color: #C4D299;line-height: 30px;text-align: left;padding-left: 5px;"></div><div id="result" style="height: 350px;background-color: #F0F0F0;text-align: left;padding: 5px;margin-top: 5px;overflow-y: auto;"></div><input id="switchshowtable" style="display:none;position: absolute;width: 100px;top: 10px;right: 25px;" type="button" value="改为逗号分隔"/><select id="sorttype" name="sorttype" style="display:none;position: absolute;width: 100px;top: 45px;right: 25px;"><option value="ratio" selected>赞同/回答比</option><option value="agree">赞同</option><option value="answer">回答</option><option value="ask">提问</option><option value="follower">关注</option></select></div></div>');
125$("#switchshowtable").click(function () { showtable = !showtable; $(this).val(showtable ? "改为逗号分隔" : "改为表格显示"); showresult(); });
126$("#sorttype").change(function () { sortresult(); showresult(); });
127$("#tempframe").load(function () { loadmore(); });
128loaduser();
+1

6 comments

  1. zihaolucky says:

    请问楼主这个抓取的结果如何保存为文本文件呢? 我试过用Scrapy抓取,不过还没解决cookie和”更多”的问题 > <

    0
    • 苏莉安 says:

      这个是网页js脚本,当然不存在cookie的问题了,而“更多”是找到按钮的dom对象然后直接click实现的,也没有保存为文本,而是写到页面div里。

      我不懂Python,你先解决cookie,然后抓包看“更多”访问的是哪个url、带哪些参数就行了。

      0
      • zihaolucky says:

        嗯、谢谢提供思路。我们已在你的代码基础上做了一点改动,可以抓到不少信息了。不过为了性能以及灵活性,还是得把Python的问题解决才行。

        +1
  2. 匿名 says:

    楼主您好,我最近在学习js,您的loadmore函数中。title-section.ellipsis .zu-button-more[aria-role] 我没有在知乎网页源代码中找到,请问这是如何获得的呢?谢谢。

    0
    • 苏莉安 says:

      这段脚本是两个月前写的,最近知乎的网页有改版,一些元素的位置和名称变了。
      title-section.ellipsis是为了找用户名,.zu-button-more是为了找“更多”按钮。你可以自己来找一下。

      0

回复 zihaolucky 取消回复

您的邮箱地址不会被公开。 必填项已用 * 标注