在页面上实现Google PR值的显示

原文 http://www.zeali.net/entry/78
Google本身提供了查询指定的url的PageRank值的接口,知道了这个接口,就可以很容易编写脚本在页面上实现这一功能,而无需再依赖google toolbar才能进行查询。本文提供了一个用PHP实现的pr查询接口。同时修正了大部分版本中存在的 Linux 操作系统及64位操作系统下无法正常生成checksum的问题。

使用很简单,只要在需要的地方

<?php
include_once(./pr.inc.php);
echo getPR($urlToQuery);
?>

即可显示出指定url的PageRank的数值。知道了这个数值再在其基础上模拟出Google Toolbar上面的图形化的pr显示也就不是难事了。实际上实现原理说白了也很简单,就是传递特定的查询参数到Google的搜索引擎,然后抓取返回的页面内容。

演示页面请参见 : Google PageRank Query

本文代码素材来源: http://forums.seochat.com/archive/t-17286/Php-Pagerank-checker ;NewGCH方法实现参考于Firefox的工具栏扩展插件SearchStatus的相关代码实现。

网上还有一个开源的pr状态查询的项目: http://pagerankstatus.mozdev.org/source.html , 可以从cvs上直接抓取完整的源代码(cvs用户密码 guest):

cvs -d :pserver:guest@mozdev.org:/cvs login
cvs -d :pserver:guest@mozdev.org:/cvs co pagerankstatus

一个专门提供pr显示接口的网站: http://www.prchecker.info/

——————————————————————

pr.inc.php源文件如下(Updated 2008-05-04 14:29 — Google修改了checksum的计算算法,需要在原有的GCH方法之后再套一层NewGCH方法来得到正确的checksum,同时引发的php int overflow及64位机器兼容性问题请参照以下源代码的变化部分):

http://toolbarqueries.google.com/search?client=navclient-auto&ch=6“
        .$ch.”&ie=UTF-8&oe=UTF-8&features=Rank&q=”.$url;
    $pr_str = retrieveURLContent($googlePRUrl,$gettype);
    return substr($pr_str,strrpos($pr_str, “:”)+1);
}

//unsigned shift right
function zeroFill($a, $b){
    $z = hexdec(‘8’.implode(”,array_fill(0,PHP_INT_SIZE*2-1,’0′)));
    if ($z & $a){
        $a = ($a>>1);
        $a &= (~$z);
        $a |= hexdec(‘4’.implode(”,array_fill(0,PHP_INT_SIZE*2-1,’0′)));
        $a = ($a>>($b-1));
    }
    else{
        $a = ($a>>$b);
    }
    return $a;
}

// discard bits beyonds 32 bit.
function trunkbitForce32bit($n){
    if(PHP_INT_SIZE <= 4){
        settype($n,’float’);
        if ( $n < 0 ) $n += 4294967296;
        return $n;
    }
    else{
        $clearbit = ”;
        for($i=0;$i<PHP_INT_SIZE-4;$i++){
            $clearbit .= ’00’;
        }
        for($i=0;$i<4;$i++){
            $clearbit .= ‘ff’;
        }
        return ($n & hexdec($clearbit));
    }
}

function bigxor($m,$n){
    //if(function_exists(‘gmp_init’)){
    //  return floatval(gmp_strval(gmp_xor($m,$n)));
    //}
    //else{
        return $m ^ $n;
    //}
}

function mix($a,$b,$c){

    $a = trunkbitForce32bit($a);
    $b = trunkbitForce32bit($b);
    $c = trunkbitForce32bit($c);
    $a -= $b; $a = trunkbitForce32bit($a);
    $a -= $c; $a = trunkbitForce32bit($a);
    $a = bigxor($a,(zeroFill($c,13))); $a = trunkbitForce32bit($a);
    $b -= $c; $b = trunkbitForce32bit($b);
    $b -= $a; $b = trunkbitForce32bit($b);
    $b = bigxor($b,trunkbitForce32bit($a<<8)); $b = trunkbitForce32bit($b);
    $c -= $a; $c = trunkbitForce32bit($c);
    $c -= $b; $c = trunkbitForce32bit($c);
    $c = bigxor($c,(zeroFill($b,13))); $c = trunkbitForce32bit($c);
    $a -= $b;$a = trunkbitForce32bit($a);
    $a -= $c;$a = trunkbitForce32bit($a);
    $a = bigxor($a,(zeroFill($c,12)));$a = trunkbitForce32bit($a);
    $b -= $c;$b = trunkbitForce32bit($b);
    $b -= $a;$b = trunkbitForce32bit($b);
    $b = bigxor($b,trunkbitForce32bit($a<<16));

    $c -= $a; $c = trunkbitForce32bit($c);
    $c -= $b; $c = trunkbitForce32bit($c);
    $c = bigxor($c,(zeroFill($b,5))); $c = trunkbitForce32bit($c);

    $a -= $b;$a = trunkbitForce32bit($a);
    $a -= $c;$a = trunkbitForce32bit($a);
    $a = bigxor($a,(zeroFill($c,3)));$a = trunkbitForce32bit($a);
    $b -= $c;$b = trunkbitForce32bit($b);
    $b -= $a;$b = trunkbitForce32bit($b);
    $b = bigxor($b,trunkbitForce32bit($a<<10));

    $c -= $a; $c = trunkbitForce32bit($c);
    $c -= $b; $c = trunkbitForce32bit($c);
    $c = bigxor($c,(zeroFill($b,15))); $c = trunkbitForce32bit($c);

    return array($a,$b,$c);
}

function NewGCH($ch){
    $ch = ( trunkbitForce32bit( ( $ch / 7 ) << 2 ) |
            ( ( myfmod( $ch,13 ) ) & 7 ) );

    $prbuf = array();
    $prbuf[0] = $ch;
    for( $i = 1; $i < 20; $i++ )
    {
      $prbuf[$i] = $prbuf[$i-1] – 9;
    }

    $ch = GCH( c32to8bit( $prbuf ) );

    return $ch;
}
function myfmod($x,$y){
    $i = floor( $x / $y );
    return ( $x – $i * $y );
}
function c32to8bit($arr32){
    $arr8 = array();

    for( $i = 0; $i < count($arr32); $i++ ) {
        for( $bitOrder = $i * 4;
                $bitOrder <= $i * 4 + 3; $bitOrder++ ) {
        $arr8[$bitOrder] = $arr32[$i] & 255;
        $arr32[$i] = zeroFill( $arr32[$i], 8 );
      }
    }

    return $arr8;
}

function GCH($url, $length=null){
    if(is_null($length)) {
        $length = sizeof($url);
    }
    $init = 0xE6359A60;

    $a = 0x9E3779B9;
    $b = 0x9E3779B9;
    $c = 0xE6359A60;
    $k = 0;
    $len = $length;
    $mixo = array();

    while( $len >= 12 ){
        $a += ($url[$k+0] +trunkbitForce32bit($url[$k+1]<<8)
              +trunkbitForce32bit($url[$k+2]<<16)
              +trunkbitForce32bit($url[$k+3]<<24));
        $b += ($url[$k+4] +trunkbitForce32bit($url[$k+5]<<8)
              +trunkbitForce32bit($url[$k+6]<<16)
              +trunkbitForce32bit($url[$k+7]<<24));
        $c += ($url[$k+8] +trunkbitForce32bit($url[$k+9]<<8)
              +trunkbitForce32bit($url[$k+10]<<16)
              +trunkbitForce32bit($url[$k+11]<<24));
        $mixo = mix($a,$b,$c);
        $a = $mixo[0]; $b = $mixo[1]; $c = $mixo[2];
        $k += 12;
        $len -= 12;
    }

    $c += $length;

    switch( $len ) {
        case 11:
        $c += trunkbitForce32bit($url[$k+10]<<24);

        case 10:
        $c+=trunkbitForce32bit($url[$k+9]<<16);

        case 9 :
        $c+=trunkbitForce32bit($url[$k+8]<<8);

        case 8 :
        $b+=trunkbitForce32bit($url[$k+7]<<24);

        case 7 :
        $b+=trunkbitForce32bit($url[$k+6]<<16);

        case 6 :
        $b+=trunkbitForce32bit($url[$k+5]<<8);

        case 5 :
        $b+=trunkbitForce32bit($url[$k+4]);

        case 4 :
        $a+=trunkbitForce32bit($url[$k+3]<<24);

        case 3 :
        $a+=trunkbitForce32bit($url[$k+2]<<16);

        case 2 :
        $a+=trunkbitForce32bit($url[$k+1]<<8);

        case 1 :
        $a+=trunkbitForce32bit($url[$k+0]);
    }

    $mixo = mix( $a, $b, $c );

    $mixo[2] = trunkbitForce32bit($mixo[2]);

    if( $mixo[2] < 0 ){
        return (
            hexdec(‘1’.
                implode(”,
                    array_fill(0,PHP_INT_SIZE*2,’0′)))
            + $mixo[2] );
    }
    else{
        return $mixo[2];
    }
}

// converts a string into an array of integers
// containing the numeric value of the char
function strord($string){
    for($i=0;$i<strlen($string);$i++){
        $result[$i] = ord($string{$i});
    }
    return $result;
}

// return url page content or false if failed.
function retrieveURLContent($url,$gettype){
    switch($gettype){
        case G_PR_GET_TYPE_FILE:
            return retrieveURLContentByFile($url);
            break;
        default:
            return retrieveURLContentBySocket($url);
            break;
    }
}

function retrieveURLContentByFile($url){
    $fd = @fopen($url,”r”);
    if(!$fd){
        return false;
    }
    $result = “”;
    while($buffer = fgets($fd, 4096)) {
      $result .= $buffer;
    }
    fclose($fd);
    return $result;
}

function retrieveURLContentBySocket($url,
                                    $host=””,
                                    $port=80,
                                    $timeout=30){
    if($host == “”){
        if(!($pos = strpos($url,’://’))){
            return false;
        }
        $host = substr( $url,
                        $pos+3,
                        strpos($url,’/’,$pos+3) – $pos – 3);
        $uri = substr($url,strpos($url,’/’,$pos+3));
    }
    else{
        $uri = $url;
    }

    $request =  “GET “.$uri.” HTTP/1.0\r\n”
               .”Host: “.$host.”\r\n”
               .”Accept: */*\r\n”
               .”User-Agent: ZealGet\r\n”
               .”\r\n”;
    $sHnd = @fsockopen ($host, $port, $errno, $errstr, $timeout);
    if(!$sHnd){
        return false;
    }
    @fputs ($sHnd, $request);

    // Get source
    $result = “”;
    while (!feof($sHnd)){
        $result .= fgets($sHnd,4096);
    }
    fclose($sHnd);

    $headerend = strpos($result,”\r\n\r\n”);
    if (is_bool($headerend))
    {
        return $result;
    }
    else{
        return substr($result,$headerend+4);
    }
}
?>



发表评论

您的电子邮箱地址不会被公开。

53 + = 63