|  | 1 anno fa | |
|---|---|---|
| .. | ||
| demo | 1 anno fa | |
| src | 1 anno fa | |
| .gitignore | 1 anno fa | |
| LICENSE | 1 anno fa | |
| README.md | 1 anno fa | |
| README_CN.md | 1 anno fa | |
| composer.json | 1 anno fa | |
Implemented by using curlmulti internal io event.It's a high performance,high universality,high expansibility library which especially suitable for massive scale tasks and complex logic case.
PHP: >=5.3
composer require ares333/php-curl
Curl::add() add tasks to task pool.Curl::start() start the event loop and blocked.Events(onSuccess,onFail,onInfo,onTask...) will be triggered and corresponding callbacks will be called on the fly.The loop finished when all tasks in the task poll finished.
basic
$curl = new Curl();
$curl->add(
    array(
        'opt' => array(
            CURLOPT_URL => 'http://baidu.com'
        ),
        'args' => 'This is user arg for ' . $v
    ),
    function ($r, $args) {
        echo "Request success for " . $r['info']['url'] . "\n";
        echo "\nHeader info:\n";
        print_r($r['info']);
        echo "\nRaw header:\n";
        print_r($r['header']);
        echo "\nArgs:\n";
        print_r($args);
        echo "\n\nBody size:\n";
        echo strlen($r['body']) . ' bytes';
        echo "\n";
    });
$curl->start();
file download
$curl = new Curl();
$url = 'http://www.baidu.com/img/bd_logo1.png';
$file = __DIR__ . '/download.png';
// $fp is closed automatically on download finished.
$fp = fopen($file, 'w');
$curl->add(
    array(
        'opt' => array(
            CURLOPT_URL => $url,
            CURLOPT_FILE => $fp,
            CURLOPT_HEADER => false
        ),
        'args' => array(
            'file' => $file
        )
    ),
    function ($r, $args) {
        echo "download finished successfully, file=$args[file]\n";
    })->start();
task callback
Task can be added in task callback. See more details in Curl::$onTask.
$curl = (new Toolkit())->getCurl();
$curl->maxThread = 1;
$curl->onTask = function ($curl) {
    static $i = 0;
    if ($i >= 50) {
        return;
    }
    $url = 'http://www.baidu.com';
    $curl->add(
        array(
            'opt' => array(
                CURLOPT_URL => $url . '?wd=' . $i ++
            )
        ));
};
$curl->start();
running info
$curl = new Curl();
$toolkit = new Toolkit();
$curl->onInfo = array(
    $toolkit,
    'onInfo'
);
$curl->maxThread = 2;
$url = 'http://www.baidu.com';
for ($i = 0; $i < 100; $i ++) {
    $curl->add(
        array(
            'opt' => array(
                CURLOPT_URL => $url . '?wd=' . $i
            )
        ));
}
$curl->start();
run in cli and will out info with format:
SPD    DWN  FNH  CACHE  RUN  ACTIVE  POOL  QUEUE  TASK  FAIL  
457KB  3MB  24   0      3    3       73    0      100   0
Info callback will receive all information.The default callback only show part of it.
SPD:Download speed
DWN:Bytes downloaded
FNH:Task count which has finished
CACHE:Cache count which were used 
RUN:Task running count
ACTIVE:Task count which has IO activities
POOL:Task count in task pool
QUEUE:Task count which has finished and waiting for onSuccess callback to process
TASK:Task count has been added to the task pool
FAIL:Task count which has failed after retry.
transparent cache
$curl = new Curl();
$toolkit = new Toolkit();
$curl->onInfo = array(
    $toolkit,
    'onInfo'
);
$curl->maxThread = 2;
$curl->cache['enable'] = true;
$curl->cache['dir'] = __DIR__ . '/output/cache';
if (! is_dir($curl->cache['dir'])) {
    mkdir($curl->cache['dir'], 0755, true);
}
$url = 'http://www.baidu.com';
for ($i = 0; $i < 20; $i ++) {
    $curl->add(
        array(
            'opt' => array(
                CURLOPT_URL => $url . '?wd=' . $i
            )
        ));
}
$curl->start();
Run the script second time and will output:
SPD  DWN  FNH  CACHE  RUN  ACTIVE  POOL  QUEUE  TASK  FAIL  
0KB  0MB  20   20     0    0       0     0      20    0
The result indicate that all tasks is using cache and there is no network activity.
dynamic tasks
$curl = new Curl();
$url = 'http://baidu.com';
$curl->add(array(
    'opt' => array(
        CURLOPT_URL => $url
    )
), 'cb1');
echo "add $url\n";
$curl->start();
function cb1($r, $args)
{
    echo "finish " . $r['info']['url'] . "\n";
    $url = 'http://bing.com';
    $r['curl']->add(
        array(
            'opt' => array(
                CURLOPT_URL => $url
            )
        ), 'cb2');
    echo "add $url\n";
}
function cb2($r, $args)
{
    echo "finish " . $r['info']['url'] . "\n";
}
Output is as below:
add http://baidu.com
finish https://www.baidu.com/
add http://bing.com
finish http://cn.bing.com/
Finished url has a / at end because curl processed the 3xx redirect(Curl::$opt[CURLOPT_FOLLOWLOCATION]=true). Curl::onTask should be used when deal with massive sale tasks.
public $maxThread = 10
Max work parallels which can be change on the fly.
public $maxTry = 3
Max retry before onFail event is triggered.
public $opt = array ()
Global CURLOPT_* which can be overwrite by task config.
public $cache = array(
    'enable' => false,
    'compress' => 0, //0-9,6 is a good choice if you want use compress.
    'dir' => null, //Cache dir which must exists.
    'expire' => 86400,
    'verifyPost' => false //If http post will be part of cache id.
);
Global cache config.Cache id is related with url.This config can be overwrite by task config and onSuccess callback return value with same config format.
public $taskPoolType = 'queue'
stack or queue.
public $onTask = null
Will be triggered when parallel count is less than Curl::$maxThread and task pool is empty.Only argument for callbak is current Curl instance.
public $onInfo = null
Running state callback which triggered when IO event happens.Triggered one second at most.Callback arguments are as below:
bool, is last call or not.
public $onEvent = null
Triggered on IO event.Only argument for callbak is current Curl instance.
public $onFail = null
Global callback for fail.Can be overwrite by task onTask.The callback receive two arguments.
$item['args'] value from Curl::add().
public function add(array $item, $onSuccess = null, $onFail = null, $ahead = null)
Add one task to the pool.
Return: current Curl instance.
public function start()
Start the event loop and block.
public function stop($onStop = null)
Stop the event loop and $onStop will be called when the loop has been stoped. Only argument for callbak is current Curl instance.
function __construct(Curl $curl = null)
Default Curl instance is used if $curl is not setted.
The default instance will initialize Curl::$opt,Curl::onInfo,Curl::onFail. Curl::$opt initial values are as follows:
array(
    CURLINFO_HEADER_OUT => true,
    CURLOPT_HEADER => true,
    CURLOPT_CONNECTTIMEOUT => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_AUTOREFERER => true,
    CURLOPT_USERAGENT => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_FOLLOWLOCATION => true,
    CURLOPT_SSL_VERIFYHOST => false,
    CURLOPT_SSL_VERIFYPEER => false,
    CURLOPT_MAXREDIRS => 5
)
function onFail($error, $args)
Default onFail.See Curl::$onFail for details.
function onInfo($info)
Default onInfo.See Curl::onInfo for details.
The method can be call manually with a string parameter which will be added to output buffer.The purpose is to avoid the effects of shell control characters.
function htmlEncode($html, $in = null, $out = 'UTF-8', $mode = 'auto')
Powerful html encoding tranformer which can get current automatically and replace encoding in <head></head>. Arguments:
Return: New encoded html.
function isUrl($url)
Full url or not.Return bool.
function urlFormater($url)
Replace space,process scheme and hosts and remove anchor etc.
function buildUrl(array $parse)
Inverse function for parse_url().
function uri2url($uri, $urlCurrent)
Transform uri to full url for currentPage.$urlCurrent should be redirected after 3xx.
function url2uri($url, $urlCurrent)
Transform full url to uri for currentPage.$urlCurrent should be redirected after 3xx.
function url2dir($url)
Transform full url to dir.$urlCurrent should be redirected after 3xx.
function getCurl()
return current Curl instance.
public $expire = null
Local file expire time.
public $download = array(
	'pic' => true,
	'video' => false
);
false will use remote file.
public $blacklist = array();
Used for urls out of work.
public $downloadExtension = array();
Download by extension(in html tag a),for example zip,rar.
public $httpCode = array(
    200
);
Valid http code.Invalid http code will be reported and ignored.
function __construct($dir)
Local top directory.
function add($url, $depth = null)
Add a start url.All sub urls with prefix $url will be download if $depth is null. Return: Self instance.
function start()
Start clone and block.
Based on Curl and Toolkit,inherit power of Curl and has self features as below:
Notice: Clone is a very complex work and was tested with limited website.Below is the demo from some of the tests:
demo2: Source has been closed Clone