我如何使用纯JS从此网页上的span itemprop行生成值?

问题描述 投票:0回答:2

我正在尝试从Winmo配置文件(例如https://open.winmo.com/open/decision_makers/ca/pasadena/jorge/garcia/489325)中解析某些没有id或类值的跨度标签,例如

<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>

[我发现两个旧的StackOverflow示例很有用(thisthis),但是我仍然使用以下代码获取网页上9个span itemprop-matching行的每一个的空值:

var nodes=[], values=[];
var els = document.getElementsByTagName('span'), i = 0, whatev;
for(i; i < els.length; i++) {
    prop = els[i].getAttribute('itemprop');
    if(prop) {
        whatev = els[i];
        nodes.push(whatev.nodeName); // provides attribute names, in all CAPS = "SPAN"
        values.push(whatev.nodeValue); // for attribute values, why saying null if els[i] is fine?
        console.log(values); // (whatev) outputs whole thing, but it seems values is what I need
       // break; // need this? seems to prevent values after first span from generating
    }
}

如何从这些类型的页面中仅返回部分隐藏的电子邮件值(j****@***********.com)和postalCode(91195)?我需要纯JS格式的解决方案,因为我会将其压缩为其他书签。

javascript microdata
2个回答
0
投票

您可以从itemprop属性中获取分配。

类似这样:

function getItemPropsAsJSON(){
  var ob = {};
  Array.from(document.getElementsByTagName('span')).forEach(el=> {     
    var key = el.getAttribute('itemprop');
    var val = el.innerText;
    if (key && val) ob[key] = val;
  });
  return ob;
}
/* expected output: 
    {
      "name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
      "email": "j****@***********.com",
      "telephone": "(347) ***-****",
      "streetAddress": "177 East West Colorado Boulevard",
      "addressLocality": "Pasadena",
      "addressRegion": "CA",
      "postalCode": "91195",
      "addressCountry": "USA"
    }
*/

如果您在其他地方使用此键,则可能希望对其进行规范化,因为itemprop属性可能并不总是转换为理想的对象表示法格式。为此,请使用以下命令:

function normalizeObjectNotation(key){
  return key && typeof key == 'string' && /[A-Z]/.test(key) && /\W+/.test(key) == false
  ? key.trim().split(/(?=[A-Z])/).reduce((a,b)=> a+'_'+b).replace(/^\d+/, '').toLowerCase() 
  : key && typeof key == 'string' ? key.trim().replace(/\W+/g, '_').replace(/^\d+/, '').toLowerCase() 
  : 'failed_object';
}

function getItemPropsAsJSON(){
  var ob = {};
  Array.from(document.getElementsByTagName('span')).forEach(el=> {     
    var key = el.getAttribute('itemprop');
    var val = el.innerText;
    if (key && val) ob[normalizeObjectNotation(key)] = val;
  });
  return ob;
}
getItemPropsAsJSON()

/* Expected Output:

{
  "name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
  "email": "j****@***********.com",
  "telephone": "(347) ***-****",
  "street_address": "177 East West Colorado Boulevard",
  "address_locality": "Pasadena",
  "address_region": "CA",
  "postal_code": "91195",
  "address_country": "USA"
}

*/

0
投票

您可以通过选择器获取电子邮件范围

span[itemprop="email"]

和具有相同方法的postalCode

span[itemprop="postalCode"]

使用这些选择器,使用querySelector到元素,然后提取其textContent

const [email, postalCode] = ['email', 'postalCode'].map(
  val => document.querySelector(`span[itemprop="${val}"]`).textContent
);
console.log(email);
console.log(postalCode);
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>
© www.soinside.com 2019 - 2024. All rights reserved.