想请教一下各位大佬CURL的问题
以下就是我要请求的目标链接的php代码和python代码,php返回的是403,但是我使用简单的python请求有时正常有时403,但是用php的curl直接请求就是403了,请问各位大佬怎么解决,帮忙调试一下,谢谢
$url = 'https://hstock.org/ru'; $headers = [ "Content-type:application/json;", 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Language:ja,en-US;q=0.9,en;q=0.8', 'Accept-Encoding:gzip', 'Cache-Control:max-age=0', 'Sec-Ch-Ua-Mobile:?0', 'Sec-Ch-Ua-Platform:"Windows"', 'Sec-Fetch-Dest:document', 'Sec-Fetch-Mode:navigate', 'Sec-Fetch-Site:none', 'Sec-Fetch-User:?1', 'Upgrade-Insecure-Requests:1', 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31', ]; // 初始化cURL会话 $ch = curl_init(); // 设置cURL选项 curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); // 不自动处理重定向 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //这个是重点,规避ssl的证书检查。 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); curl_setopt($ch, CURLOPT_HEADER, true); // 获取响应头信息 curl_setopt($ch, CURLOPT_NOBODY, false); // 获取响应体 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); // 执行请求并获取响应 $response = curl_exec($ch); // 获取HTTP状态码 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); // 关闭cURL会话 curl_close($ch); halt($response); // 分离响应头和响应体 list($header, $body) = explode("\r\n\r\n", $response, 1); // 提取重定向URL(如果有) $redirectUrl = null; if (preg_match('/Location: (.+?)\r\n/', $header, $matches)) { $redirectUrl = trim($matches[1]); } // 处理响应 if ($httpCode == 200) { echo '页面请求成功!' . PHP_EOL; echo '状态码:' . $httpCode . PHP_EOL; echo '请求URL:' . $url . PHP_EOL; // 解析HTML $html = str_get_html($body); echo $html; // 这里简单输出整个HTML对象,你可能需要更具体的操作 } elseif ($httpCode == 302) { echo '页面已重定向到新的URL地址:' . $redirectUrl . PHP_EOL; } else { echo '页面请求失败,状态码为:' . $httpCode . PHP_EOL; } import requests from bs4 import BeautifulSoup url = 'https://hstock.org/ru' headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'ja,en-US;q=0.9,en;q=0.8', 'Cache-Control': 'max-age=0', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31', } response = requests.get(url, headers=headers) if response.status_code == 200: print('页面请求成功!') print(response.status_code) print(response.url) soup = BeautifulSoup(response.content, 'html.parser') print(soup) elif response.status_code == 302: redirect_url = response.headers.get('Location') print('页面已重定向到新的URL地址:', redirect_url) else: print('页面请求失败,状态码为:', response.status_code)
关于 LearnKu
python 测试header 缺少了 "Content-type:application/json;" 加上看看
访问频率过高、访问量过大或者网站设置了访问限制,相应的增大对应的操作间隔
用tcpdump抓包详细比对下header头
我是想知道PHP的代码为什么不能请求,一请求就是403