海运的博客

自用完美PHP异步并发multi curl

发布时间:December 7, 2014 // 分类:PHP // No Comments

修改自https://code.google.com/p/rolling-curl/

<?php
   /*
   Authored by Josh Fraser (www.joshfraser.com)
   Released under Apache License 2.0

   Maintained by Alexander Makarov, http://rmcreative.ru/

   $Id$
   */

   /**
   * Class that represent a single curl request
   */
   class RollingCurlRequest {
      public $url = false;
      public $method = 'GET';
      public $post_data = null;
      public $headers = null;
      public $options = null;
      public $info = null;
      public $callback;
      public $recursion = false;

      /**
      * @param string $url
      * @param string $method
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return void
      */
      function __construct($url, $options = null, $info = null, $method = "GET", $post_data = null, $headers = null  ) {
         $this->url = $url;
         $this->method = $method;
         $this->post_data = $post_data;
         $this->headers = $headers;
         $this->options = $options;
         $this->info = $info;
      }

      /**
      * @return void
      */
      public function __destruct() {
         unset($this->url, $this->method, $this->post_data, $this->headers, $this->options);
      }
   }

   /**
   * RollingCurl custom exception
   */
   class RollingCurlException extends Exception {
   }

   /**
   * Class that holds a rolling queue of curl requests.
   *
   * @throws RollingCurlException
   */
   class RollingCurl {
      /**
      * @var int
      *
      * Window size is the max number of simultaneous connections allowed.
      *
      * REMEMBER TO RESPECT THE SERVERS:
      * Sending too many requests at one time can easily be perceived
      * as a DOS attack. Increase this window_size if you are making requests
      * to multiple servers or have permission from the receving server admins.
      */
      private $window_size = 5;

      //private $master = 'NULL';
      //保存连接数量
      public $current_size =0;
      /**
      * @var float
      *
      * Timeout is the timeout used for curl_multi_select.
      */
      private $timeout = 10;

      /**
      * @var array
      *
      * Set your base options that you want to be used with EVERY request.
      */
      protected $options = array(
         CURLOPT_SSL_VERIFYPEER => 0,
         CURLOPT_RETURNTRANSFER => 1,
         CURLOPT_VERBOSE => 0,
         CURLOPT_TIMEOUT => 20,
         CURLOPT_DNS_CACHE_TIMEOUT => 3600,
         CURLOPT_CONNECTTIMEOUT => 10,
         CURLOPT_ENCODING => 'gzip,deflate',
         CURLOPT_FOLLOWLOCATION => 1,
         CURLOPT_MAXREDIRS => 2,
         CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0',
         //CURLOPT_HEADER => 1
      );

      /**
      * @var array
      */
      private $headers = array(

         'Connection: Keep-Alive',
         'Keep-Alive: 300',
         'Expect:'
      );

      /**
      * @var Request[]
      *
      * The request queue
      */
      private $requests = array();

      /**
      * @var RequestMap[]
      *
      * Maps handles to request indexes
      */
      private $requestMap = array();

      /**
      * @param  $callback
      * Callback function to be applied to each result.
      *
      * Can be specified as 'my_callback_function'
      * or array($object, 'my_callback_method').
      *
      * Function should take three parameters: $response, $info, $request.
      * $response is response body, $info is additional curl info.
      * $request is the original request
      *
      * @return void
      */
      function __construct($callback = null) {
         $this->callback = $callback;
      }

      /**
      * @param string $name
      * @return mixed
      */
      public function __get($name) {
         return (isset($this->{$name})) ? $this->{$name} : null;
      }

      /**
      * @param string $name
      * @param mixed $value
      * @return bool
      */
      public function __set($name, $value) {
         // append the base options & headers
         if ($name == "options" || $name == "headers") {
            $this->{$name} = $value + $this->{$name};
         } else {
            $this->{$name} = $value;
         }
         return true;
      }

      /**
      * Add a request to the request queue
      *
      * @param Request $request
      * @return bool
      */
      public function add($request) {
         $this->requests[] = $request;
         return true;
      }

      /**
      * Create new Request and add it to the request queue
      *
      * @param string $url
      * @param string $method
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
         $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
         return true;
      }

      /**
      * Perform GET request
      *
      * @param string $url
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function get($url, $headers = null, $options = null) {
         return $this->request($url, "GET", null, $headers, $options);
      }

      /**
      * Perform POST request
      *
      * @param string $url
      * @param  $post_data
      * @param  $headers
      * @param  $options
      * @return bool
      */
      public function post($url, $post_data = null, $headers = null, $options = null) {
         return $this->request($url, "POST", $post_data, $headers, $options);
      }

      /**
      * Execute processing
      *
      * @param int $window_size Max number of simultaneous connections
      * @return string|bool
      */
      public function execute($window_size = null) {
         // rolling curl window must always be greater than 1
         if (sizeof($this->requests) == 1) {
            return $this->single_curl();
         } else {
            // start the rolling curl. window_size is the max number of simultaneous connections
            return $this->rolling_curl($window_size);
         }
      }

      /**
      * Performs a single curl request
      *
      * @access private
      * @return string
      */
      private function single_curl() {
         $ch = curl_init();
         $request = array_shift($this->requests);
         //获取选项及header
         $options = $this->get_options($request);
         curl_setopt_array($ch, $options);
         $output = curl_exec($ch);
         $info = curl_getinfo($ch);
         //处理错误
         if (curl_error($ch))
         $info['error'] = curl_error($ch);

         // it's not neccesary to set a callback for one-off requests
         if ($request->callback) {
            $callback = $request->callback;
            if (is_callable($callback)) {
               call_user_func($callback, $output, $info, $request);
            }
         }
         else
         return $output;
         return true;
      }

      /**
      * Performs multiple curl requests
      *
      * @access private
      * @throws RollingCurlException
      * @param int $window_size Max number of simultaneous connections
      * @return bool
      */
      private function rolling_curl($window_size = null) {
         if ($window_size)
         $this->window_size = $window_size;

         // make sure the rolling window isn't greater than the # of urls
         if (sizeof($this->requests) < $this->window_size)
         $this->window_size = sizeof($this->requests);

         if ($this->window_size < 2) {
            throw new RollingCurlException("Window size must be greater than 1");
         }

         $master = curl_multi_init();

         //首次执行填满请求
         for ($i = 0; $i < $this->window_size; $i++) {
            $ch = curl_init();

            $options = $this->get_options($this->requests[$i]);

            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);

            $key = (int) $ch;
            //ch重用队列
            $chs[$key] = $ch;
            //请求map,后续根据返回信息的ch获取原始请求信息
            $this->requestMap[$key] = $i;
            $this->current_size++;
         }

         do {
            //执行句柄内所有连接,包括后来新加入的连接
            do {
               //running变量返回正在处理的curl数量,0表示当前没有正在执行的curl
               $execrun = curl_multi_exec($master, $running);
            } while ($execrun == CURLM_CALL_MULTI_PERFORM); // 7.20.0后弃用

            if ($execrun != CURLM_OK)
            echo "ERROR!\n " . curl_multi_strerror($execrun);

            //阻塞一会等待有数据可读,返回可读数量,失败为-1,避免一直循环占用CPU
            if ($running)
            curl_multi_select($master, $this->timeout);

            //读取返回的连接,并加入新的连接
            while ($done = curl_multi_info_read($master)) {

               //获取完成的句柄
               $ch = $done['handle'];
               //获取返回的请求信息
               $info = curl_getinfo($ch);
               //获取返回内容
               $output = curl_multi_getcontent($ch);
               //处理错误信息
               //if (curl_error($ch))  
               if ($done['result'] != CURLE_OK) 
               $info['error'] = curl_error($ch);

               //根据请求映射是哪个请求返回的信息,即请求数组中第i个请求
               $key = (int) $ch;
               $request = $this->requests[$this->requestMap[$key]];
               //发送返回信息到回调函数
               $callback = $request->callback;
               if (is_callable($callback)) {
                  //移除请求信息和请求映射
                  unset($this->requests[$this->requestMap[$key]]);
                  unset($this->requestMap[$key]);
                  $this->current_size--;
                  //回调函数
                  call_user_func($callback, $output, $info, $request);
               }
               //删除完成的句柄
               curl_multi_remove_handle($master, $done['handle']);

               //判断队列内的连接是否用完
               if (isset($this->requests[$i])) {
                  //重用之前完成的ch
                  $ch = $chs[$key];
                  //var_dump($ch);
                  $options = $this->get_options($this->requests[$i]);
                  curl_setopt_array($ch, $options);
                  //增加新的连接
                  curl_multi_add_handle($master, $ch);

                  //添加到request Maps,用于返回信息时根据handle找到相应连接
                  $key = (int) $ch;
                  $this->requestMap[$key] = $i;
                  $this->current_size++;
                  $i++;
               } 
            }

         } while ($this->current_size) ;
         curl_multi_close($master);
         return true;
      }

      //返回是否还有活动连接
      public function state() {
         return curl_multi_select($this->master, $this->timeout);
      }

      /**
      * Helper function to set up a new request by setting the appropriate options
      *
      * @access private
      * @param Request $request
      * @return array
      */
      private function get_options($request) {
         //获取类内选项设置
         $options = $this->__get('options');
         if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
            $options[CURLOPT_FOLLOWLOCATION] = 1;
            $options[CURLOPT_MAXREDIRS] = 5;
         }

         //附加类内设置到请求选项中
         if ($request->options) {
            $options = $request->options + $options;
         }

         //获取类内head设置
         $headers = $this->__get('headers');

         //附加header
         if ($request->headers) {
            $headers = $request->headers + $headers;
         }

         // set the request URL
         $options[CURLOPT_URL] = $request->url;

         // posting data w/ this request?
         if ($request->post_data) {
            $options[CURLOPT_POST] = 1;
            $options[CURLOPT_POSTFIELDS] = $request->post_data;
         }
         if ($headers) {
            $options[CURLOPT_HEADER] = 0;
            $options[CURLOPT_HTTPHEADER] = $headers;
         }

         return $options;
      }

      /**
      * @return void
      */
      public function __destruct() {
         unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
      }

      Function test() {

         var_dump($this->requests);
      }
   }

使用方法:

<?php
   require("class/RollingCurl.php");
   function callback($response, $info, $request) {
      print_r($response);
      print_r($info);
      print_r($request);
   }
   $rc = new RollingCurl();
   $rc->window_size = 2;
   for ($i = 1; $i < 1000; $i++) {
      $url = "http://www.baidu.com/";
      $request = new RollingCurlRequest($url);
      $request->options = array(CURLOPT_COOKIEJAR => '/tmp/ck.cookie', CURLOPT_COOKIEFILE => '/tmp/ck.cookie');
      $request->headers = array('Referer: http://www.haiyun.me');
      $request->callback = 'callback';
      $rc->add($request);
   }
   $res = $rc->execute();

标签:php, curl, libcurl, 并发, 异步

发表评论

分类
最新文章
最近回复
  • 404NTF: 交叉编译的作用是?
  • 404NTF: 请教一下 CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go ...
  • 小智: 安利个PHP开发以太坊区块链的教程: http://xc.hubwiz.com/course/...
  • 陈浩南: 您好,您教程中的 dev.openwrt.org.cn 现在似乎不可用了。请问现在应该在哪里下...
  • 斑马斑马: 怎么查看虚拟机CPU 内存使用情况
  • zzg: http://downloads.openwrt.org.cn/PandoraBox/rali...
  • xiongrui: 看图
  • 傻狍子: filebrowser好像和这功能一样? https://github.com/filebro...
  • 陶国文: 想看看能不能安装
  • 傻狍子: 服务端设置了用户名密码认证的话,在openwrt的openvpn客户端的配置文件当中该如何更改...
页面执行耗时:0.0362秒。-->