HTTPError:HTTP 错误 403:下载 csv 文件时禁止

问题描述 投票:0回答:1

我已经被这个错误困扰了几天。我查了很多相关的解决方案,在 StackOverflow 和其他网站上都可以找到,但仍然没有得到解决。

我正在使用以下代码将数据下载到 Juyter Notebook 中的 CSV 中 - python(python 3,pandas 版本 1.0.5)。

参考此视频,其中以下代码在视频 - 视频的 28.26 部分中工作正常,但我收到错误(https://www.youtube.com/watch?v=tW1BWtQRZ2M

任何解决方案非常感谢

CSV 的 URL

df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep= '\t') 

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-6-68e8ad1d75d5> in <module>
     1 ## URL to CSV
     2 
----> 3 df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep= '\t')
     4 ## the main thing is when you downalod from internet and open in notepad it will have slash t as separtor

~\Anan\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
   674         )
   675 
--> 676         return _read(filepath_or_buffer, kwds)
   677 
   678     parser_f.__name__ = name

~\Anan\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
   428     # though mypy handling of conditional imports is difficult.
   429     # See https://github.com/python/mypy/issues/1297
--> 430     fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
   431         filepath_or_buffer, encoding, compression
   432     )

~\Anan\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
   170 
   171     if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
--> 172         req = urlopen(filepath_or_buffer)
   173         content_encoding = req.headers.get("Content-Encoding", None)
   174         if content_encoding == "gzip":

~\Anan\lib\site-packages\pandas\io\common.py in urlopen(*args, **kwargs)
   139     import urllib.request
   140 
--> 141     return urllib.request.urlopen(*args, **kwargs)
   142 
   143 

~\Anan\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
   220     else:
   221         opener = _opener
--> 222     return opener.open(url, data, timeout)
   223 
   224 def install_opener(opener):

~\Anan\lib\urllib\request.py in open(self, fullurl, data, timeout)
   529         for processor in self.process_response.get(protocol, []):
   530             meth = getattr(processor, meth_name)
--> 531             response = meth(req, response)
   532 
   533         return response

~\Anan\lib\urllib\request.py in http_response(self, request, response)
   638         # request was successfully received, understood, and accepted.
   639         if not (200 <= code < 300):
--> 640             response = self.parent.error(
   641                 'http', request, response, code, msg, hdrs)
   642 

~\Anan\lib\urllib\request.py in error(self, proto, *args)
   567         if http_err:
   568             args = (dict, 'default', 'http_error_default') + orig_args
--> 569             return self._call_chain(*args)
   570 
   571 # XXX probably also want an abstract factory that knows when it makes

~\Anan\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
   500         for handler in handlers:
   501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
   503             if result is not None:
   504                 return result

~\Anan\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
   647 class HTTPDefaultErrorHandler(BaseHandler):
   648     def http_error_default(self, req, fp, code, msg, hdrs):
--> 649         raise HTTPError(req.full_url, code, msg, hdrs, fp)
   650 
   651 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 403: Forbidden 
python python-3.x pandas web-scraping http-status-code-403
1个回答
0
投票

您可以尝试使用

requests
下载文件:

from io import StringIO
import pandas as pd

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
}
url = "http://download.bls.gov/pub/time.series/cu/cu.item"
f = StringIO(requests.get(url, headers=headers).text)

df = pd.read_csv(f, sep="\t")
print(df.head())

打印:

  item_code                                           item_name  display_level selectable  sort_sequence
0       AA0                                All items - old base              0          T              2
1      AA0R  Purchasing power of the consumer dollar - old base              0          T            400
2       SA0                                           All items              0          T              1
3      SA0E                                              Energy              1          T            375
4     SA0L1                                 All items less food              1          T            359
© www.soinside.com 2019 - 2024. All rights reserved.