有没有更好的方法从字符串中提取信息?

问题描述 投票:17回答:4

假设我有一个字符串数组,我需要来自它们的具体信息,这将是一种简单的方法吗?

假设数组是这样的:

let infoArr = [
  "1 Ben Howard 12/16/1988 apple",
  "2 James Smith 1/10/1999 orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/1975 peach",
  "5 Doug Jones 11/10/1975 peach"
];

假设我想提取日期并将其保存到另一个数组中,我可以创建这样的函数

function extractDates(arr)
{
  let dateRegex = /(\d{1,2}\/){2}\d{4}/g, dates = "";
  let dateArr = [];

  for(let i = 0; i<arr.length; i++)
  {
    dates = /(\d{1,2}\/){2}\d{4}/g.exec(arr[i])
    dates.pop();
    dateArr.push(dates);
  }

  return dateArr.flat();
}

虽然这很有效,但它很笨重并需要pop(),因为它将返回一个数组数组,即:["12/16/1988", "16/"],此外我需要调用flat()

另一种选择是使用给定位置对字符串进行子串,我需要知道正则表达式。

function extractDates2(arr)
{
  let dates = [];

  for(let i = 0; i<arr.length; i++)
  {
    let begin = regexIndexOf(arr[i], /(\d{1,2}\/){2}\d{4}/g);
    let end = regexIndexOf(arr[i], /[0-9] /g, begin) + 1;
    dates.push(arr[i].substring(begin, end));
  }

  return dates;
 }    

当然它使用下一个regexIndexOf()函数:

function regexIndexOf(str, regex, start = 0)
{
  let indexOf = str.substring(start).search(regex);
  indexOf = (indexOf >= 0) ? (indexOf + start) : -1;
  return indexOf;
}

同样,这个函数也可以工作,但是完成简单的提取似乎太糟糕了。有没有更简单的方法将数据提取到数组?

javascript arrays regex
4个回答
19
投票

一种选择是通过一个不匹配的分隔符连接字符串,如,,然后只需执行全局匹配即可从中获取日期数组:

let infoArr = [
  "1 Ben Howard 12/16/1988 apple",
  "2 James Smith 1/10/1999 orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/1975 peach",
  "5 Doug Jones 11/10/1975 peach"
];
const result = infoArr
  .join(',')
  .match(/(\d{1,2}\/){2}\d{4}/g);
console.log(result);

21
投票

一种方法可能是在数组元素上使用map(),在每个元素上应用匹配,最后调用flat()以获得所需的结果:

let infoArr = [
  "1 Ben Howard 12/16/1988 apple",
  "2 James Smith 1/10/1999 orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/1975 peach",
  "5 Doug Jones 11/10/1975 peach"
];

const result = infoArr.map(o => o.match(/(\d{1,2}\/){2}\d{4}/g)).flat();

console.log(result);

或者,您可以使用flatMap()

let infoArr = [
  "1 Ben Howard 12/16/1988 apple",
  "2 James Smith 1/10/1999 orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/1975 peach",
  "5 Doug Jones 11/10/1975 peach"
];

const result = infoArr.flatMap(o => o.match(/(\d{1,2}\/){2}\d{4}/g));

console.log(result);

此外,如果您需要从最终数组中删除null值,如果有没有日期的字符串,您可以应用filter(),如下所示:

const result = infoArr.map(o => o.match(/(\d{1,2}\/){2}\d{4}/g))
                      .flat()
                      .filter(date => date !== null);

const result = infoArr.flatMap(o => o.match(/(\d{1,2}\/){2}\d{4}/g))
                      .filter(date => date !== null);

An example with conflicting data:

let infoArr = [
  "1 Ben Howard 12/16/1988 apple 10/22/1922",
  "2 James Smith orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/19075 peach",
  "5 Doug Jones 11/10-1975 peach"
];

const result = infoArr.flatMap(o => o.match(/(\d{1,2}\/){2}\d{4}/g))
                      .filter(date => date !== null); /* or filter(date => date) */

console.log(result);

Alternative without flat():

由于flat()flatMap()目前仍处于“实验性”状态,可能会发生变化,并且某些浏览器(或某些版本)不支持它,您可以使用下一个替代方案,其限制只会在每个string上获得第一个匹配:

const infoArr = [
  "1 Ben Howard 12/16/1988 apple 10/22/1922",
  "2 James Smith orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/19075 peach",
  "5 Doug Jones 11/10-1975 peach"
];

const getData = (input, regexp, filterNulls) =>
{
    let res = input.map(o =>
    {
        let matchs = o.match(regexp);
        return matchs && matchs[0];
    });

    return filterNulls ? res.filter(Boolean) : res;
}

console.log(getData(infoArr, /(\d{1,2}\/){2}\d{4}/g, false));
console.log(getData(infoArr, /(\d{1,2}\/){2}\d{4}/g, true));

4
投票

虽然这很有效,但它很笨重并需要pop(),因为它将返回一个数组数组,即:["12/16/1988", "16/"],此外我需要调用flat

正则表达式exec方法总是在0属性中匹配(假设它完全匹配),你可以访问它并将其推送到你的数组:

let infoArr = [
  "1 Ben Howard 12/16/1988 apple",
  "2 James Smith 1/10/1999 orange",
  "3 Andy Bloss 10/25/1956 apple",
  "4 Carrie Walters 8/20/1975 peach",
  "5 Doug Jones 11/10/1975 peach"
];

function extractDates(arr){
  const dateRegex = /(\d{1,2}\/){2}\d{4}/g;
  const dateArr = [];
  for (const str of arr){
    const date = /(\d{1,2}\/){2}\d{4}/g.exec(str);
    dateArr.push(date[0]);
  }
  return dateArr;
}

console.log(extractDates(infoArr));

(当然你也可以在map回调中做同样的事情)


2
投票

您可以使用reduce()而不是循环来配对代码。如果没有匹配,请小心保持null不在数组中。

let infoArr = [
    "1 Ben Howard 12/16/1988 apple",
    "2 James Smith 1/10/1999 orange",
    "3 Andy Bloss 10/25/1956 apple",
    "4 Carrie Walters 8/20/1975 peach",
    "5 Doug Jones 11/10/1975 peach"
  ];
  
let regex = /(\d{1,2}\/){2}\d{4}/g
let dates =  infoArr.reduce((arr, s) => arr.concat(s.match(regex) || []) , [])
console.log(dates)
© www.soinside.com 2019 - 2024. All rights reserved.