通过F12抓包获取HDR数据,发现评论在 get?csrf_token= 中,同时发现 paramsencSecKey 是被加密过的

因此,需要找到未加密前的数据,以及如何加密,最后模拟实现加密,请求到网易拿到评论数据

从调用堆栈中,发现相应位置

image-20210713095650997.png

找到程序还未加密的最后一个堆栈位置

image-20210713100049113.png

因此确定,u0x,be1x 为加密步骤,开始分析

u0x.be1x = function(X0x, e0x) {
    var i0x = {},
        e0x = NEJ.X({}, e0x),
        mf4j = X0x.indexOf("?");
    if (window.GEnc && /(^|\.com)\/api/.test(X0x) && !(e0x.headers && e0x.headers[et2x.Ct2x] == et2x.Hx4B) && !e0x.noEnc) {
        if (mf4j != -1) {
            i0x = j0x.hg3x(X0x.substring(mf4j + 1));
            X0x = X0x.substring(0, mf4j)
        }
        if (e0x.query) {
            i0x = NEJ.X(i0x, j0x.fU2x(e0x.query) ? j0x.hg3x(e0x.query) : e0x.query)
        }
        if (e0x.data) {
            i0x = NEJ.X(i0x, j0x.fU2x(e0x.data) ? j0x.hg3x(e0x.data) : e0x.data)
        }
        i0x["csrf_token"] = u0x.gW3x("__csrf");
        X0x = X0x.replace("api", "weapi");
        e0x.method = "post";
        delete e0x.query;
        var bWf9W = window.asrsea(JSON.stringify(i0x), bsG2x(["流泪", "强"]), bsG2x(XH8z.md), bsG2x(["爱心", "女孩", "惊恐", "大笑"]));
        e0x.data = j0x.cr1x({
            params: bWf9W.encText,
            encSecKey: bWf9W.encSecKey
        })
    }
    var cdnHost = "y.music.163.com";
    var apiHost = "interface.music.163.com";
    if (location.host === cdnHost) {
        X0x = X0x.replace(cdnHost, apiHost);
        if (X0x.match(/^\/(we)?api/)) {
            X0x = "//" + apiHost + X0x
        }
        e0x.cookie = true
    }
    czh5m(X0x, e0x)
};

经断点调试,发现在地20行进行了加密,将 i0x 计算后生成 bWf9W 临时变量,然后将 encTextencSecKey 赋给 e0x.dataparamsencSecKey,在这步拿出 i0x 的数据

image-20210713103009436.png

找到加密函数

"""
function a(a) {
    var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
    for (d = 0; a > d; d += 1)
        e = Math.random() * b.length,
            e = Math.floor(e),
            c += b.charAt(e);
    return c
}

function b(a, b) {
    var c = CryptoJS.enc.Utf8.parse(b),
        d = CryptoJS.enc.Utf8.parse("0102030405060708"),
        e = CryptoJS.enc.Utf8.parse(a),
        f = CryptoJS.AES.encrypt(e, c, {
                iv: d,
                mode: CryptoJS.mode.CBC
            }
        );
    return f.toString() 
}

function c(a, b, c) {
    var d, e;
    return setMaxDigits(131),
        d = new RSAKeyPair(b, "", c),
        e = encryptedString(d, a)
}

function d(d, e, f, g) {
    var h = {},
        i = a(16);
    return h.encText = b(d, g),
        h.encText = b(h.encText, i),
        h.encSecKey = c(i, e, f), 
        h
}

对于此页面的 function d(d, e, f, g)

  • d 为数据
  • ebsG2x(["流泪", "强"]) = 010001
  • fbsG2x(XH8z.md) = 一串字符串
  • gbsG2x(["爱心", "女孩", "惊恐", "大笑"] = 0CoJUm6Qyw8W8jud
function d(d, e, f, g) { # d:数据, e:010001, f:*****, g:0CoJUm6Qyw8W8jud
    var h = {},  # 空对象 
        i = a(16); # 16位随机值, 把i设置为定值
    return h.encText = b(d, g), # g是密钥
        h.encText = b(h.encText, i), # 返回的是params, i是密钥
        h.encSecKey = c(i, e, f), # 返回的是encSecKey,e和f是定死的,如果i固定,返回值应该也固定
        h
}

先分析函数 function c,发现传入参数 ef 是定死的,如果第一项参数 i 固定,那么该函数的返回值也就固定了

function c(a, b, c) { # b和c是定死的,不同的参数只有a, a固定时返回值也固定,也就是不产生随机值
    var d, e;
    return setMaxDigits(131),
        d = new RSAKeyPair(b, "", c),
        e = encryptedString(d, a)
}

最后来看函数 ab
a是生成了一个16位的随机数,这里可以通过F12获取到一个定值,那么剩下的变量只有b中的AES加密了

function b(a, b) { # a:数据,要加密的内容   b:密钥
    var c = CryptoJS.enc.Utf8.parse(b),
        d = CryptoJS.enc.Utf8.parse("0102030405060708"),
        e = CryptoJS.enc.Utf8.parse(a), # e是数据
        f = CryptoJS.AES.encrypt(e, c, { # AES加密, c是加密密钥
                iv: d, # 偏移量
                mode: CryptoJS.mode.CBC # 加密模式:CBC
            }
        );
    return f.toString() 
}

通过以上分析可以得到爬虫代码:

import requests
# pip install pycryptodome
from Crypto.Cipher import AES
from base64 import b64encode
import json

url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="

headers = {
    "Referer": "https://music.163.com/#/song?id=1857602696",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.67"
}

# 请求方法:POST
data = {
    "rid": "R_SO_4_1857602696",
    "threadId": "R_SO_4_1857602696",
    "pageNo": "1",
    "pageSize": "20",
    "cursor": "-1",
    "offset": "0",
    "orderType": "1",
    "csrf_token": ""
}

# 处理加密过程
e = "010001"
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
i = "7MdgDcaOz6fkX1ZB"

def get_encSecKey():
    return "1df401f4e77fc902f766446af730404c7b8137ed65798effb5ac4fca109ff24ff86696a4fa124505ad324655ddef3d1edf7e6effc7eb03ab906e59d93f46047d747a54e291d79e5d9650e378cad5a4e511e32172cbc63a4ad99fcaf233be481e144dc255febeb45ec529a08107a8ae5ed58a2a483694b7e31b04a34030116ecf"


def get_params(data):  # 字符串
    first = enc_params(data, g)
    second = enc_params(first, i)
    return second  # 返回的就是params


def to_16(data):
    pad = 16 - len(data) % 16
    data += chr(pad) * pad
    return data


def enc_params(data, key):  # 加密过程
    iv = "0102030405060708"
    data = to_16(data)
    aes = AES.new(key=key.encode("utf-8"), IV=iv.encode("utf-8"), mode=AES.MODE_CBC)  # 创建
    bs = aes.encrypt(data.encode("utf-8"))  # 加密, 要求加密内容长度必须是16的倍数
    return str(b64encode(bs), "utf-8")  # 转化成字符串返回


resp = requests.post(url, headers=headers, data={
    "params": get_params(json.dumps(data)),
    "encSecKey": get_encSecKey()
})

print(resp.text)

结果如下,成功爬取评论:

image-20210713134507102.png