我正在尝试集成一个 Web 服务,其中 JSON 响应返回接下来 1,000 个结果的 URL。我要做的就是循环 cURL 继续处理请求,直到“下一个”变量为空。
我在完成此操作时遇到问题,因为一旦看到返回的变量,我就无法循环备份。
$url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?offset=0&limit=1000";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$result = curl_exec($ch);
file_put_contents(rj_array, $result, FILE_APPEND);
$server = file_get_contents(rj_array);
$json_result = json_decode($server, true);
$next = $json_result['next'];
我正在考虑做这样的事情:
while(!empty($next)){
//Do the curl request again with the new URL $next
}
但是我不能每次都重置 $next 变量。有人有什么想法吗?
为什么不
$base_url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?limit=1000";
$next = 0;
while( $next !== false ) {
$url = $base_url . "&offset=" . $next;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$result = curl_exec($ch);
file_put_contents(rj_array, $result, FILE_APPEND);
$server = file_get_contents(rj_array);
$json_result = json_decode($server, true);
$next = $json_result['next'];
if( /*test if next is empty*/ ) {
$next = false;
}
}
我确实喜欢递归函数方法:
function crawler($url, $return) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$result = curl_exec($ch);
file_put_contents(rj_array, $result, FILE_APPEND);
$server = file_get_contents(rj_array);
$json_result = json_decode($server, true);
$return[$url] = $json_result; //set your return information in some array
$next = $json_result['next'];
if(!empty($next)) {
return crawler($next, $return); //call again same function with next url and array of data that you would use later
}
return $return;
}
$url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?offset=0&limit=1000";
$allJsonResults = crawler($url, []);
我不建议预测试循环或递归,这是后测试循环的任务。
在进入循环之前声明curl 调用的不变方面。在循环内部操纵偏移值和极限值并监视
next
值以确定是否需要另一次迭代。将 array_push()
与展开运算符一起使用,将新数据行追加到结果数组中。
代码:(模拟演示)
$url = "https://example.com/process/api/transactions/"
. "from/$yesterday/to/$today/for/company/$company?offset=%d&limit=%d";
$ch = curl_init();
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$offset = 0;
$limit = 1000;
$result = [];
do {
curl_setopt($ch, CURLOPT_URL, sprintf($url, $offset, $limit));
$json = curl_exec($ch);
// save the returned json string to your system if you wish
$response = json_decode($json, true);
array_push($result, ...$response['results']);
$offset += $limit;
} while (!empty($response['next']));
var_export($result);