使用Python获取网站的html代码并检查

Question

有没有一种解决方案可以使用Python进入站点并在检查部分运行脚本并获取该站点的html。（无法通过get获取网站的html）

我有这个脚本，我手动尝试过，我可以用它下载网站 html，但我想自动执行。

function myFunction() {
  filename = "dom";
  var html = '',
    node = document.firstChild
  while (node) {
    switch (node.nodeType) {
      case Node.ELEMENT_NODE:
        html += node.outerHTML
        break
      case Node.TEXT_NODE:
        html += node.nodeValue
        break
      case Node.CDATA_SECTION_NODE:
        html += '<![CDATA[' + node.nodeValue + ']]>'
        break
      case Node.COMMENT_NODE:
        html += '<!--' + node.nodeValue + '-->'
        break
      case Node.DOCUMENT_TYPE_NODE:
        // (X)HTML documents are identified by public identifiers
        html +=
          '<!DOCTYPE ' +
          node.name +
          (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '') +
          (!node.publicId && node.systemId ? ' SYSTEM' : '') +
          (node.systemId ? ' "' + node.systemId + '"' : '') +
          '>\n'
        break
    }
    node = node.nextSibling
  }


  var file = new Blob([html], {
    type: 'text/html'
  });
  if (window.navigator.msSaveOrOpenBlob) // IE10+
    window.navigator.msSaveOrOpenBlob(file, filename);
  else { // Others
    var a = document.createElement("a"),
      url = URL.createObjectURL(file);
    a.href = url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    setTimeout(function () {
      document.body.removeChild(a);
      window.URL.revokeObjectURL(url);
    }, 0);
  }
}
setInterval(myFunction, 5000);

Answer 1

您应该使用网络爬虫，例如beautiful soup

使用Python获取网站的html代码并检查

问题描述投票：0回答：1

1个回答

最新问题

使用Python获取网站的html代码并检查

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1