我已经被这个错误困扰了几天。我查了很多相关的解决方案,在 StackOverflow 和其他网站上都可以找到,但仍然没有得到解决。
我正在使用以下代码将数据下载到 Juyter Notebook 中的 CSV 中 - python(python 3,pandas 版本 1.0.5)。
参考此视频,其中以下代码在视频 - 视频的 28.26 部分中工作正常,但我收到错误(https://www.youtube.com/watch?v=tW1BWtQRZ2M)
df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep= '\t')
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-6-68e8ad1d75d5> in <module>
1 ## URL to CSV
2
----> 3 df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep= '\t')
4 ## the main thing is when you downalod from internet and open in notepad it will have slash t as separtor
~\Anan\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
674 )
675
--> 676 return _read(filepath_or_buffer, kwds)
677
678 parser_f.__name__ = name
~\Anan\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
428 # though mypy handling of conditional imports is difficult.
429 # See https://github.com/python/mypy/issues/1297
--> 430 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
431 filepath_or_buffer, encoding, compression
432 )
~\Anan\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
170
171 if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
--> 172 req = urlopen(filepath_or_buffer)
173 content_encoding = req.headers.get("Content-Encoding", None)
174 if content_encoding == "gzip":
~\Anan\lib\site-packages\pandas\io\common.py in urlopen(*args, **kwargs)
139 import urllib.request
140
--> 141 return urllib.request.urlopen(*args, **kwargs)
142
143
~\Anan\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
~\Anan\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
~\Anan\lib\urllib\request.py in http_response(self, request, response)
638 # request was successfully received, understood, and accepted.
639 if not (200 <= code < 300):
--> 640 response = self.parent.error(
641 'http', request, response, code, msg, hdrs)
642
~\Anan\lib\urllib\request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
~\Anan\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
500 for handler in handlers:
501 func = getattr(handler, meth_name)
--> 502 result = func(*args)
503 if result is not None:
504 return result
~\Anan\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
您可以尝试使用
requests
下载文件:
from io import StringIO
import pandas as pd
import requests
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
}
url = "http://download.bls.gov/pub/time.series/cu/cu.item"
f = StringIO(requests.get(url, headers=headers).text)
df = pd.read_csv(f, sep="\t")
print(df.head())
打印:
item_code item_name display_level selectable sort_sequence
0 AA0 All items - old base 0 T 2
1 AA0R Purchasing power of the consumer dollar - old base 0 T 400
2 SA0 All items 0 T 1
3 SA0E Energy 1 T 375
4 SA0L1 All items less food 1 T 359