海运的博客

自用完美PHP异步并发multi curl

发布时间:December 7, 2014 // 分类:PHP // No Comments

修改自https://code.google.com/p/rolling-curl/

<?php
   /*
   Authored by Josh Fraser (www.joshfraser.com)
   Released under Apache License 2.0

   Maintained by Alexander Makarov, http://rmcreative.ru/

   $Id$
   */

   /**
   * Class that represent a single curl request
   */
   class RollingCurlRequest {
      public $url = false;
      public $method = 'GET';
      public $post_data = null;
      public $headers = null;
      public $options = null;
      public $info = null;
      public $callback;
      public $recursion = false;

      /**
      * @param string $url
      * @param string $method
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return void
      */
      function __construct($url, $options = null, $info = null, $method = "GET", $post_data = null, $headers = null  ) {
         $this->url = $url;
         $this->method = $method;
         $this->post_data = $post_data;
         $this->headers = $headers;
         $this->options = $options;
         $this->info = $info;
      }

      /**
      * @return void
      */
      public function __destruct() {
         unset($this->url, $this->method, $this->post_data, $this->headers, $this->options);
      }
   }

   /**
   * RollingCurl custom exception
   */
   class RollingCurlException extends Exception {
   }

   /**
   * Class that holds a rolling queue of curl requests.
   *
   * @throws RollingCurlException
   */
   class RollingCurl {
      /**
      * @var int
      *
      * Window size is the max number of simultaneous connections allowed.
      *
      * REMEMBER TO RESPECT THE SERVERS:
      * Sending too many requests at one time can easily be perceived
      * as a DOS attack. Increase this window_size if you are making requests
      * to multiple servers or have permission from the receving server admins.
      */
      private $window_size = 5;

      //private $master = 'NULL';
      //保存连接数量
      public $current_size =0;
      /**
      * @var float
      *
      * Timeout is the timeout used for curl_multi_select.
      */
      private $timeout = 10;

      /**
      * @var array
      *
      * Set your base options that you want to be used with EVERY request.
      */
      protected $options = array(
         CURLOPT_SSL_VERIFYPEER => 0,
         CURLOPT_RETURNTRANSFER => 1,
         CURLOPT_VERBOSE => 0,
         CURLOPT_TIMEOUT => 20,
         CURLOPT_DNS_CACHE_TIMEOUT => 3600,
         CURLOPT_CONNECTTIMEOUT => 10,
         CURLOPT_ENCODING => 'gzip,deflate',
         CURLOPT_FOLLOWLOCATION => 1,
         CURLOPT_MAXREDIRS => 2,
         CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0',
         //CURLOPT_HEADER => 1
      );

      /**
      * @var array
      */
      private $headers = array(

         'Connection: Keep-Alive',
         'Keep-Alive: 300',
         'Expect:'
      );

      /**
      * @var Request[]
      *
      * The request queue
      */
      private $requests = array();

      /**
      * @var RequestMap[]
      *
      * Maps handles to request indexes
      */
      private $requestMap = array();

      /**
      * @param  $callback
      * Callback function to be applied to each result.
      *
      * Can be specified as 'my_callback_function'
      * or array($object, 'my_callback_method').
      *
      * Function should take three parameters: $response, $info, $request.
      * $response is response body, $info is additional curl info.
      * $request is the original request
      *
      * @return void
      */
      function __construct($callback = null) {
         $this->callback = $callback;
      }

      /**
      * @param string $name
      * @return mixed
      */
      public function __get($name) {
         return (isset($this->{$name})) ? $this->{$name} : null;
      }

      /**
      * @param string $name
      * @param mixed $value
      * @return bool
      */
      public function __set($name, $value) {
         // append the base options & headers
         if ($name == "options" || $name == "headers") {
            $this->{$name} = $value + $this->{$name};
         } else {
            $this->{$name} = $value;
         }
         return true;
      }

      /**
      * Add a request to the request queue
      *
      * @param Request $request
      * @return bool
      */
      public function add($request) {
         $this->requests[] = $request;
         return true;
      }

      /**
      * Create new Request and add it to the request queue
      *
      * @param string $url
      * @param string $method
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
         $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
         return true;
      }

      /**
      * Perform GET request
      *
      * @param string $url
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function get($url, $headers = null, $options = null) {
         return $this->request($url, "GET", null, $headers, $options);
      }

      /**
      * Perform POST request
      *
      * @param string $url
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function post($url, $post_data = null, $headers = null, $options = null) {
         return $this->request($url, "POST", $post_data, $headers, $options);
      }

      /**
      * Execute processing
      *
      * @param int $window_size Max number of simultaneous connections
      * @return string|bool
      */
      public function execute($window_size = null) {
         // rolling curl window must always be greater than 1
         if (sizeof($this->requests) == 1) {
            return $this->single_curl();
         } else {
            // start the rolling curl. window_size is the max number of simultaneous connections
            return $this->rolling_curl($window_size);
         }
      }

      /**
      * Performs a single curl request
      *
      * @access private
      * @return string
      */
      private function single_curl() {
         $ch = curl_init();
         $request = array_shift($this->requests);
         //获取选项及header
         $options = $this->get_options($request);
         curl_setopt_array($ch, $options);
         $output = curl_exec($ch);
         $info = curl_getinfo($ch);
         //处理错误
         if (curl_error($ch))
         $info['error'] = curl_error($ch);

         // it's not neccesary to set a callback for one-off requests
         if ($request->callback) {
            $callback = $request->callback;
            if (is_callable($callback)) {
               call_user_func($callback, $output, $info, $request);
            }
         }
         else
         return $output;
         return true;
      }

      /**
      * Performs multiple curl requests
      *
      * @access private
      * @throws RollingCurlException
      * @param int $window_size Max number of simultaneous connections
      * @return bool
      */
      private function rolling_curl($window_size = null) {
         if ($window_size)
         $this->window_size = $window_size;

         // make sure the rolling window isn't greater than the # of urls
         if (sizeof($this->requests) < $this->window_size)
         $this->window_size = sizeof($this->requests);

         if ($this->window_size < 2) {
            throw new RollingCurlException("Window size must be greater than 1");
         }

         $master = curl_multi_init();

         //首次执行填满请求
         for ($i = 0; $i < $this->window_size; $i++) {
            $ch = curl_init();

            $options = $this->get_options($this->requests[$i]);

            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);

            $key = (int) $ch;
            //ch重用队列
            $chs[$key] = $ch;
            //请求map,后续根据返回信息的ch获取原始请求信息
            $this->requestMap[$key] = $i;
            $this->current_size++;
         }

         do {
            //执行句柄内所有连接,包括后来新加入的连接
            do {
               //running变量返回正在处理的curl数量,0表示当前没有正在执行的curl
               $execrun = curl_multi_exec($master, $running);
            } while ($execrun == CURLM_CALL_MULTI_PERFORM); // 7.20.0后弃用

            if ($execrun != CURLM_OK)
            echo "ERROR!\n " . curl_multi_strerror($execrun);

            //阻塞一会等待有数据可读,返回可读数量,失败为-1,避免一直循环占用CPU
            if ($running)
            curl_multi_select($master, $this->timeout);

            //读取返回的连接,并加入新的连接
            while ($done = curl_multi_info_read($master)) {

               //获取完成的句柄
               $ch = $done['handle'];
               //获取返回的请求信息
               $info = curl_getinfo($ch);
               //获取返回内容
               $output = curl_multi_getcontent($ch);
               //处理错误信息
               //if (curl_error($ch))  
               if ($done['result'] != CURLE_OK) 
               $info['error'] = curl_error($ch);

               //根据请求映射是哪个请求返回的信息,即请求数组中第i个请求
               $key = (int) $ch;
               $request = $this->requests[$this->requestMap[$key]];
               //发送返回信息到回调函数
               $callback = $request->callback;
               if (is_callable($callback)) {
                  //移除请求信息和请求映射
                  unset($this->requests[$this->requestMap[$key]]);
                  unset($this->requestMap[$key]);
                  $this->current_size--;
                  //回调函数
                  call_user_func($callback, $output, $info, $request);
               }
               //删除完成的句柄
               curl_multi_remove_handle($master, $done['handle']);

               //判断队列内的连接是否用完
               if (isset($this->requests[$i])) {
                  //重用之前完成的ch
                  $ch = $chs[$key];
                  //var_dump($ch);
                  $options = $this->get_options($this->requests[$i]);
                  curl_setopt_array($ch, $options);
                  //增加新的连接
                  curl_multi_add_handle($master, $ch);

                  //添加到request Maps,用于返回信息时根据handle找到相应连接
                  $key = (int) $ch;
                  $this->requestMap[$key] = $i;
                  $this->current_size++;
                  $i++;
               } 
            }

         } while ($this->current_size) ;
         curl_multi_close($master);
         return true;
      }

      //返回是否还有活动连接
      public function state() {
         return curl_multi_select($this->master, $this->timeout);
      }

      /**
      * Helper function to set up a new request by setting the appropriate options
      *
      * @access private
      * @param Request $request
      * @return array
      */
      private function get_options($request) {
         //获取类内选项设置
         $options = $this->__get('options');
         if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
            $options[CURLOPT_FOLLOWLOCATION] = 1;
            $options[CURLOPT_MAXREDIRS] = 5;
         }

         //附加类内设置到请求选项中
         if ($request->options) {
            $options = $request->options + $options;
         }

         //获取类内head设置
         $headers = $this->__get('headers');

         //附加header
         if ($request->headers) {
            $headers = $request->headers + $headers;
         }

         // set the request URL
         $options[CURLOPT_URL] = $request->url;

         // posting data w/ this request?
         if ($request->post_data) {
            $options[CURLOPT_POST] = 1;
            $options[CURLOPT_POSTFIELDS] = $request->post_data;
         }
         if ($headers) {
            $options[CURLOPT_HEADER] = 0;
            $options[CURLOPT_HTTPHEADER] = $headers;
         }

         return $options;
      }

      /**
      * @return void
      */
      public function __destruct() {
         unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
      }

      Function test() {

         var_dump($this->requests);
      }
   }

使用方法:

<?php
   require("class/RollingCurl.php");
   function callback($response, $info, $request) {
      print_r($response);
      print_r($info);
      print_r($request);
   }
   $rc = new RollingCurl();
   $rc->window_size = 2;
   for ($i = 1; $i < 1000; $i++) {
      $url = "http://www.baidu.com/";
      $request = new RollingCurlRequest($url);
      $request->options = array(CURLOPT_COOKIEJAR => '/tmp/ck.cookie', CURLOPT_COOKIEFILE => '/tmp/ck.cookie');
      $request->headers = array('Referer: https://www.haiyun.me');
      $request->callback = 'callback';
      $rc->add($request);
   }
   $res = $rc->execute();

PHP Libevent HTTP客户端

发布时间:December 4, 2014 // 分类:PHP // No Comments

<?php
//请求完成回调
function _request_handler($req, $base) {
  global $pend_req;
  //echo __FUNCTION__, PHP_EOL;

  if (is_null($req)) {
    //echo "Timed out\n";
  } else {
    $response_code = $req->getResponseCode();

    if ($response_code == 0) {
      //echo "Connection refused\n";
    } elseif ($response_code != 200) {
      //echo "Unexpected response: $response_code\n";
    } else {
      //echo "Success: $response_code\n";
      /*
      $buf = $req->getInputBuffer();
      echo "Body:\n";
      while ($s = $buf->readLine(EventBuffer::EOL_ANY)) {
      echo $s, PHP_EOL;
      }
       */
    }
  }
  $pend_req--;
  //退出循环
  if (!$pend_req) {
    $base = $conn->getBase();
    $base->exit(NULL);
  }
  //释放内存
  unset($req);
  unset($conn);
}

//$address = "www.google.com";
$pend_req = 0;
$port = 80;
//初始化event base
$base = new EventBase();
echo "Event method used: ", $base->getMethod(), PHP_EOL;
 //使用异步DNS
$dns_base = new EventDnsBase($base, TRUE);
$f= fopen("./50000.txt","r");
while (!feof($f))
{
  $line = fgets($f);
  //echo $address;
  $address = trim($line);
  //新建http连接事件到base
  $conn = new EventHttpConnection($base, $dns_base, $address, $port);
  $conn->setTimeout(1);
  //设置请求回调
  $req = new EventHttpRequest("_request_handler", $conn);

  $req->addHeader("Host", $address, EventHttpRequest::OUTPUT_HEADER);
  $req->addHeader("Content-Length", "0", EventHttpRequest::OUTPUT_HEADER);
  $conn->makeRequest($req, EventHttpRequest::CMD_GET, "/");
  $pend_req++;
}
fclose($f);
//事件主循环
$base->loop();
?>

c语言版,参考:https://github.com/libevent/libevent/issues/115

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <evhttp.h>
#include <event2/event.h>
#include <event2/http.h>
#include <event2/bufferevent.h>
typedef struct my_struct_s my_struct_t;

struct my_struct_s {
  struct evhttp_connection *conn;
  struct evhttp_request *req;
  struct evhttp_uri *uri;
  struct event *cleanup;
};

struct event_base *Base_Primary;

char *trimwhitespace(char *str)
{
  char *end;

  // Trim leading space
  while(isspace(*str)) str++;

  if(*str == 0)  // All spaces?
    return str;

  // Trim trailing space
  end = str + strlen(str) - 1;
  while(end > str && isspace(*end)) end--;

  // Write new null terminator
  *(end+1) = 0;

  return str;
}

void connection_free(int sock, short which, void *arg) {
  //printf("freeing connection!!! The socket's FD would have been closed when the HTTP request ended and the ->req object would have been free'd\n");

  // Get our structure object
  my_struct_t *myStruct = arg;

  // Cleanup our properties
  event_free(myStruct->cleanup);
  evhttp_connection_free(myStruct->conn);
  evhttp_request_free(myStruct->req);
  evhttp_uri_free(myStruct->uri);

  // Free our custom structure
  free(myStruct);
}

void http_request_done(struct evhttp_request *req, void *arg){

  // Get our custom struct
  my_struct_t *myStruct = arg;

  // Setup our timeout information (we delay 5 seconds)
  struct timeval Timeout;
  Timeout.tv_sec = 0;
  Timeout.tv_usec = 0;

  // Add this structure to our cleanup base to be cleaned up synchronously
  // TODO: Probably not the best way to cleanup and event, but it'l work for the purposes of illustration.
  // This way would ensure no race conditions exist, but it's probably not the most efficient depending on how many requests, etc we're dealing with.
  myStruct->cleanup = evtimer_new(Base_Primary, connection_free, (void *)myStruct);
  evtimer_add(myStruct->cleanup, &Timeout);

  //printf("http_request_done, we put our custom strucutre into a cleanup event to be freed!\n");
}

int http_req(char *uri) {

  // Allocate our custom struture
  my_struct_t *myStruct = malloc(sizeof(my_struct_t));

  // Create our EVHTP connection and request
  myStruct->uri = evhttp_uri_parse(uri);
  myStruct->conn = evhttp_connection_base_new(Base_Primary, NULL, uri, 80);
  myStruct->req = evhttp_request_new(http_request_done, myStruct);
  evhttp_add_header(evhttp_request_get_output_headers(myStruct->req), "Host", "localhost");
  evhttp_add_header(evhttp_request_get_output_headers(myStruct->req), "Connection", "close");
  evhttp_make_request(myStruct->conn, myStruct->req, EVHTTP_REQ_GET, uri);
  evhttp_connection_set_timeout(myStruct->req->evcon, 2);
  return 1;
}


// Define our primary function
int main(int argc, char *argv[]) {

  // Initialize our bases
  Base_Primary = event_base_new();

  char filename[] = "/tmp/50000.txt"; //文件名
  FILE *fp; 
  char StrLine[1024];             //每行最大读取的字符数
  char *host;
  if((fp = fopen(filename,"r")) == NULL) //判断文件是否存在及可读
  { 
    printf("error!"); 
    return -1; 
  } 

  while (!feof(fp)) 
  { 
    fgets(StrLine,1024,fp);  //读取一行
    host = StrLine;
    host = trimwhitespace(host);
    //printf("%s", host); //输出
    http_req(host);
  }
  fclose(fp);  

  //
  //event_base_loop(Base_Primary);
  event_base_dispatch(Base_Primary);

  // Free our primary base
  event_base_free(Base_Primary);
  return 1;
}

PHP JSON解码GBK

发布时间:November 19, 2014 // 分类:PHP // No Comments

在用PHP JSON处理符合标准的GBK编码内容时会返回null,将内容从GBK转换为UTF8再处理正常:

$jsonp = mb_convert_encoding($jsonp, "gbk", "utf-8");
//或
$jsonp = iconv('gbk','utf-8',$jsonp)

PHP Curl使用Cookie

发布时间:November 16, 2014 // 分类:PHP // No Comments

发送或保存Cookie:

$ch = curl_init();
$url = 'https://www.haiyun.me/';
$cookiefile = '/tmp/cookie.txt';
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiefile); //保存cookie到此文件
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiefile); //发送请求时从此文件获取cookie
curl_setopt($ch, CURLOPT_COOKIE, 'user=user; pass=pass'); //单独设置请求cookie
$content = curl_exec($ch);
curl_close($ch);

解析服务器设置的cookie并保存为变量:

$ch = curl_init('https://www.haiyun.me/');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//输出返回head
curl_setopt($ch, CURLOPT_HEADER, 1);
$res = curl_exec($ch);
//切分内容和head
list($header, $body) = explode("\r\n\r\n", $res);
preg_match('/^Set-Cookie:\s*([^;]*)/mi', $header, $match);
parse_str($match[1], $cookies);
print_r($cookies);

从curl保存的cookie文件中解析cookie,文件格式见:http://www.cookiecentral.com/faq/#3.5
domain - The domain that created AND that can read the variable.
flag - A TRUE/FALSE value indicating if all machines within a given domain can access the variable. This value is set automatically by the browser, depending on the value you set for domain.
path - The path within the domain that the variable is valid for.
secure - A TRUE/FALSE value indicating if a secure connection with the domain is needed to access the variable.
expiration - The UNIX time that the variable will expire on. UNIX time is defined as the number of seconds since Jan 1, 1970 00:00:00 GMT.
name - The name of the variable.
value - The value of the variable.

<?php
   $lines = file('/tmp/404344922.cookie');
   foreach($lines as $line) {
      if($line[0] != '#' && substr_count($line, "\t") == 6) {
         $tokens = explode("\t", $line);
         $tokens = array_map('trim', $tokens);
         $cookies[$tokens[5]] = $tokens[6];
      }
   }
   print_r($cookies);

设置保存cookie至文件:

<?php
   $file = fopen("/tmp/reg.cookie", "a");
   $domain = '.haiyun.me';
   $flag = 'TRUE';
   $path = '/';
   $secure = 'FALSE';
   $expiration = 0;
   $key = 'key';
   $value = 'value';
   $cookie = "$domain\t$flag\t$path\t$secure\t$expiration\t$key\t$value\n";
   fwrite($file, $cookie);
   fclose($file);

Typecho代码高亮Codebox转SyntaxHighlighter

发布时间:November 9, 2014 // 分类:PHP,Typecho // 1 Comment

由于SQL正则替换不支持反向引用使用PHP读取替换并重新写入数据库:

<?php
$db = 'typecho';
$user = 'root';
$pass = 'password';
try { 
  $dbo = new PDO('mysql:host=localhost;dbname='.$db, $user, $pass, array(PDO::MYSQL_ATTR_INIT_COMMAND => "SET NAMES 'utf8';"));
} catch (PDOException $e) { 
  $error = $e->getMessage();
  die("PDO Execute Error : ".$error."\n");       
}
$dbo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

$sql = "SELECT cid,text FROM typecho_contents";
$stmt = $dbo->query($sql);
//$result = $stmt->fetch(PDO::FETCH_ASSOC);
//var_dump($result);
while (list($cid,$text)=$stmt->fetch(PDO::FETCH_NUM)){
  echo $cid."\n";
  $pattern = "/<code\s+?lang=[\'\"](\w+?)[\'\"]>/i";
  //preg_match($pattern, $text, $matches);
  //print_r($matches);
  $text = preg_replace($pattern, "```\$1", $text);
  $text = preg_replace('/<code>/', "```", $text);
  $text = preg_replace('/<\/code>/', "```", $text);
  $text = preg_replace('/^/', '<!--markdown-->', $text);
  $st = $dbo->prepare("UPDATE typecho_contents set `text` = ? where cid = ?");
  $st->bindParam(1, $text);
  $st->bindParam(2, $cid);
  $st->execute();
}
分类
最新文章
最近回复
  • opnfense: 谢谢博主!!!解决问题了!!!我之前一直以为内置的odhcp6就是唯一管理ipv6的方式
  • liyk: 这个方法获取的IPv6大概20分钟之后就会失效,默认路由先消失,然后Global IPV6再消失
  • 海运: 不好意思,没有。
  • zongboa: 您好,請問一下有immortalwrt設定guest Wi-Fi的GUI教學嗎?感謝您。
  • 海运: 恩山有很多。
  • swsend: 大佬可以分享一下固件吗,谢谢。
  • Jimmy: 方法一 nghtp3步骤需要改成如下才能编译成功: git clone https://git...
  • 海运: 地址格式和udpxy一样,udpxy和msd_lite能用这个就能用。
  • 1: 怎么用 编译后的程序在家里路由器内任意一台设备上运行就可以吗?比如笔记本电脑 m参数是笔记本的...
  • 孤狼: ups_status_set: seems that UPS [BK650M2-CH] is ...