我一直在尝试从此网站下载日前市场 ExPost LMPs csv 文件:
我需要 Excel 文件中各个列和行的信息,以便稍后在我的项目中使用。因此,我通过编写这段代码开始了这个过程。但是,我收到了一些错误消息,并且由于我仍在学习 Python,所以我不确定如何解决这些错误。我也试图远离使用 ChatGPT,这样我就可以有效地学习,这就是我在这里问的原因。任何帮助,将不胜感激;谢谢。
这是我在 Jupyter Labs 中编写的代码片段:
import urllib3
import pandas as pd
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
url = 'https://www.misoenergy.org/markets-and-operations/real-time--market-data/market-reports#nt=/MarketReportType:Historical%20LMP/MarketReportName:Day-Ahead%20Market%20ExPost%20LMPs%20(csv)'
df = pd.read_csv('https://www.misoenergy.org/markets-and-operations/real-time--market-data/market-reports#nt=/MarketReportType:Historical%20LMP/MarketReportName:Day-Ahead%20Market%20ExPost%20LMPs%20(csv)', skiprows = 5)
df.head()
以下是我收到的错误消息:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File /lib/python311.zip/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
File /lib/python311.zip/http/client.py:1283, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1282 """Send a complete request to the server."""
-> 1283 self._send_request(method, url, body, headers, encode_chunked)
File /lib/python311.zip/http/client.py:1329, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1328 body = _encode(body, 'body')
-> 1329 self.endheaders(body, encode_chunked=encode_chunked)
File /lib/python311.zip/http/client.py:1278, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1277 raise CannotSendHeader()
-> 1278 self._send_output(message_body, encode_chunked=encode_chunked)
File /lib/python311.zip/http/client.py:1038, in HTTPConnection._send_output(self, message_body, encode_chunked)
1037 del self._buffer[:]
-> 1038 self.send(msg)
1040 if message_body is not None:
1041
1042 # create a consistent interface to message_body
File /lib/python311.zip/http/client.py:976, in HTTPConnection.send(self, data)
975 if self.auto_open:
--> 976 self.connect()
977 else:
File /lib/python311.zip/http/client.py:1448, in HTTPSConnection.connect(self)
1446 "Connect to a host on a given (SSL) port."
-> 1448 super().connect()
1450 if self._tunnel_host:
File /lib/python311.zip/http/client.py:942, in HTTPConnection.connect(self)
941 sys.audit("http.client.connect", self, self.host, self.port)
--> 942 self.sock = self._create_connection(
943 (self.host,self.port), self.timeout, self.source_address)
944 # Might fail in OSs that don't implement TCP_NODELAY
File /lib/python311.zip/socket.py:851, in create_connection(address, timeout, source_address, all_errors)
850 if not all_errors:
--> 851 raise exceptions[0]
852 raise ExceptionGroup("create_connection failed", exceptions)
File /lib/python311.zip/socket.py:836, in create_connection(address, timeout, source_address, all_errors)
835 sock.bind(source_address)
--> 836 sock.connect(sa)
837 # Break explicitly a reference cycle
OSError: [Errno 23] Host is unreachable
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
Cell In[26], line 5
3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
4 url = 'https://www.misoenergy.org/markets-and-operations/real-time--market-data/market-reports#nt=/MarketReportType:Historical%20LMP/MarketReportName:Day-Ahead%20Market%20ExPost%20LMPs%20(csv)'
----> 5 df = pd.read_csv('https://www.misoenergy.org/markets-and-operations/real-time--market-data/market-reports#nt=/MarketReportType:Historical%20LMP/MarketReportName:Day-Ahead%20Market%20ExPost%20LMPs%20(csv)', skiprows = 5)
6 df.head()
File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File /lib/python3.11/site-packages/pandas/io/common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
710 codecs.lookup_error(errors)
712 # open URLs
--> 713 ioargs = _get_filepath_or_buffer(
714 path_or_buf,
715 encoding=encoding,
716 compression=compression,
717 mode=mode,
718 storage_options=storage_options,
719 )
721 handle = ioargs.filepath_or_buffer
722 handles: list[BaseBuffer]
File /lib/python3.11/site-packages/pandas/io/common.py:363, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
361 # assuming storage_options is to be interpreted as headers
362 req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options)
--> 363 with urlopen(req_info) as req:
364 content_encoding = req.headers.get("Content-Encoding", None)
365 if content_encoding == "gzip":
366 # Override compression based on Content-Encoding header
File /lib/python3.11/site-packages/pandas/io/common.py:265, in urlopen(*args, **kwargs)
259 """
260 Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
261 the stdlib.
262 """
263 import urllib.request
--> 265 return urllib.request.urlopen(*args, **kwargs)
File /lib/python311.zip/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
--> 216 return opener.open(url, data, timeout)
File /lib/python311.zip/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout)
516 req = meth(req)
518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 519 response = self._open(req, data)
521 # post-process response
522 meth_name = protocol+"_response"
File /lib/python311.zip/urllib/request.py:536, in OpenerDirector._open(self, req, data)
533 return result
535 protocol = req.type
--> 536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
539 return result
File /lib/python311.zip/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
498 return result
File /lib/python311.zip/urllib/request.py:1391, in HTTPSHandler.https_open(self, req)
1390 def https_open(self, req):
-> 1391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)
File /lib/python311.zip/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
-> 1351 raise URLError(err)
1352 r = h.getresponse()
1353 except:
URLError: <urlopen error [Errno 23] Host is unreachable>
尽管尝试了各种修复方法,但我尚未找到解决方案。我也提到了该网站上的许多其他解决方案,但是这些解决方案似乎也不适合我的情况。任何提示/帮助将不胜感激。
这里有两件事:
url
变量未使用因此,考虑到这些事情,下面的代码应该可以工作。请注意,该 url 现在任意为您最初使用的 url 链接的第一个 csv:
import urllib3
import pandas as pd
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
df = pd.read_csv('https://docs.misoenergy.org/marketreports/20240524_da_expost_lmp.csv', skiprows = 5)
df.head()