本人用c#写了一个爬虫,爬到一个页面是用js做跳转的 获取到的html页面代码如下
<!DOCTYPE html>
<html>
<head>
<title>正在跳转至购买页面</title>
<meta charset="utf-8" />
<meta http-equiv="pragma" content="no-cache" />
<noscript><meta http-equiv="refresh" content="0; url=/"></noscript>
<script>
eval(function(p,a,c,k,e,d){e=function(c){return(c<a?"":e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!"".replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return"\w+"};c=1};while(c--){if(k[c]){p=p.replace(new RegExp("\b"+e(c)+"\b","g"),k[c])}}return p}("6 k(z){4 7,B=J 1m("(^| )"+z+"=([^;]*)(;|$)");8(7=D.1v.1w(B)){d S(7[2])}I{d\"\"}}6 9(c,y,G,9){d c.w(0,y-1)+9+c.w(G,c.1x)}4 u=n.e.h;4 3=k("3");8(3!=\"\"){3=16("("+3+")");f=3.f;b=3.b;8(u.15(f+"/"+b)<0){E=9(u,13,18,f+"/"+b);n.e.1d=E}}(6(){(6(i,s,o,g,r,a,m){i[\"1a\"]=r;i[r]=i[r]||6(){(i[r].q=i[r].q||[]).1f(Y)},i[r].l=1*J W();a=s.X(o),m=s.10(o)[0];a.R=1;a.1b=g;m.1y.1g(a,m)})(n,D,\"1z\",\"//P.1A-F.p/F.1E\",\"5\");4 K=k(\"1B\");4 7=K.1u(\"|\");8(7[1]){5(\"H\",\"C-A-1\",\"x\",{\"1t\":7[1]})}I{5(\"H\",\"C-A-1\",\"x\")}4 j=1s;6 v(){8(j)d;j=1p;e.h=Q}5(\"Z\",\"1o\",e.h);5(\"O\",\"1q\");Q=\"1r://P.N.p/1n/1i/1h/?t=M-L&1k=1l&1C=M-L\";5(\"O\",\"V\",\"直达链接\",\"1c\",\"1e\",{\"19\":\"12\",\"14\":\"17\",\"11\":\"U\",\"T\":\"N.p\",\"1D\":v});1j(v,1F)})()",62,104,"|||zdm_track_info|var|ga|function|arr|if|changeStr||channel|allstr|return|location|source||href||redirected|getCookie|||window||com|||||this_url|redirect|substring|auto|start|name|27058866|reg|UA|document|go_url|analytics|end|create|else|new|cookie_user|20|joyo01y|amazon|send|www|smzdmhref|async|unescape|dimension30|790|event|Date|createElement|arguments|set|getElementsByTagName|dimension1|aa|26|dimension6|indexOf|eval|1515|30|dimension29|GoogleAnalyticsObject|src|ca|replace|ht|push|insertBefore|B003Y3AZVY|product|setTimeout|smid|ATVPDKIKX0DER|RegExp|gp|page|true|pageview|https|false|userId|split|cookie|match|length|parentNode|script|google|user|tag|hitCallback|js|1000".split("|"),0,{}))
</script>
</head>
</html>
现在想通过这段代码得到跳转页面的地址,求高手指点!!
已有思路:使用 MSScriptControl.ScriptControl库直接执行代码中的js,但是遇到问题,script代码中使用widow对象,本人不知道怎么去构造这个widow对象。
求高手
解决方案
40
他这段跳转的是加密的,你找下有解密的js库有,有就好办
60
把这段JS代码解压贴给你:
function getCookie(name) {
var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)");
if (arr = document.cookie.match(reg)) {
return unescape(arr[2])
} else {
return ""
}
}
function changeStr(allstr, start, end, changeStr) {
return allstr.substring(0, start - 1) + changeStr + allstr.substring(end, allstr.length)
}
var this_url = window.location.href;
var zdm_track_info = getCookie("zdm_track_info");
if (zdm_track_info != "") {
zdm_track_info = eval("(" + zdm_track_info + ")");
source = zdm_track_info.source;
channel = zdm_track_info.channel;
if (this_url.indexOf(source + "/" + channel) < 0) {
go_url = changeStr(this_url, 26, 30, source + "/" + channel);
window.location.replace = go_url
}
}(function () {
(function (i, s, o, g, r, a, m) {
i["GoogleAnalyticsObject"] = r;
i[r] = i[r] || function () {
(i[r].q = i[r].q || []).push(arguments)
}, i[r].l = 1 * new Date();
a = s.createElement(o), m = s.getElementsByTagName(o)[0];
a.async = 1;
a.src = g;
m.parentNode.insertBefore(a, m)
})(window, document, "script", "//www.google-analytics.com/analytics.js", "ga");
var cookie_user = getCookie("user");
var arr = cookie_user.split("|");
if (arr[1]) {
ga("create", "UA-27058866-1", "auto", {
"userId": arr[1]
})
} else {
ga("create", "UA-27058866-1", "auto")
}
var redirected = false;
function redirect() {
if (redirected) return;
redirected = true;
location.href = smzdmhref
}
ga("set", "page", location.href);
ga("send", "pageview");
smzdmhref = "https://www.amazon.com/gp/product/B003Y3AZVY/?t=joyo01y-20&smid=ATVPDKIKX0DER&tag=joyo01y-20";
ga("send", "event", "直达链接", "ca", "ht", {
"dimension29": "aa",
"dimension6": "1515",
"dimension1": "790",
"dimension30": "amazon.com",
"hitCallback": redirect
});
setTimeout(redirect, 1000)
})()
假如是针对性的抓取同类页面,分析这段代码后基本可得出跳转地址为固定连接地址;
假如是要获取不同的js代码的跳转地址,可利用C#调用浏览器来执行页面js,例如 WebBrowser控件。