比较一个大字符串和一个大字符串数组,在 javascript 中进行 3 个或更多连续单词匹配

问题描述 投票:0回答:2

我有一个大字符串(1000 个单词),我想将其与数组的所有元素进行比较,该数组也包含大字符串,用于所有 3 个或更多连续单词匹配。我已经用正则表达式实现了它但是得到了空白匹配数组。

较小文本的示例:

let textToCompare = "Hello there how are you doing with your life";

let textsToCompareWith= [
  { id:1, text:"Hope you are doing good with your life" },
  { id:2, text:"what are you doing with your life. hello there how are you" },
  { id:3, text:"hello there mate" }
];

预期产出:

[
  {id:1, matchedText:["with your life"]}, 
  {id:2, matchedText:["are you doing with your life","hello there how are you"]},
  {id:3, matchedText:[]}
];

当前输出:

[
  {id:1, matchedText:[]}, 
  {id:2, matchedText:[]},
  {id:3, matchedText:[]}
];

我的代码:

let regex = new RegExp("\\b" + textToCompare.split(" ").join("\\b.*\\b") + "\\b", "gi");

let output = textsToCompareWith.map(textObj => {
  // Match against each element in the array
  let matchedText = textObj?.text.match(regex);
  console.log(matchedText);
  return {
    id: textObj.id,
    matchedText: matchedText ? matchedText : [] // Return an empty array if no match is found
  };
});
                                  
console.log(output);
javascript regex string-comparison
2个回答
2
投票

你可以互相检查每个单词并留意最后一个单词。

const
    compare = (w1, w2) => {
        const
            result = [],
            ends = {};
        
        for (let i = 0; i < w1.length; i++) {
            for (let j = 0; j < w2.length; j++) {
                if (w1[i] !== w2[j]) continue;
                let k = 0;
                while (i + k < w1.length && j + k < w2.length) {
                    if (w1[i + k] !== w2[j + k]) break;
                    k++;
                }
                if (k > 2 && !ends[j + k]) {
                    result.push(w2.slice(j, j + k).join(' '));
                    ends[j + k] = true;
                }
            }
        }
        return result;
    },
    lower = s => s.toLowerCase(),
    textToCompare = "Hello there how are you doing with your life",
    textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
    words = textToCompare.match(/\w+/g).map(lower),
    result = textsToCompareWith.map(({ id, text }) => ({
        id,
        matchedText: compare(words, text.match(/\w+/g).map(lower))
    }));

console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }

一种略有不同的方法,避免使用单词。

const
    compare = (w1, w2) => {
        const
            result = [],
            skip = {};
        
        for (let i = 0; i < w1.length; i++) {
            for (let j = 0; j < w2.length; j++) {
                if (skip[j] || w1[i] !== w2[j]) continue;
                let k = 0;
                while (i + k < w1.length && j + k < w2.length) {
                    if (w1[i + k] !== w2[j + k]) break;
                    k++;
                }
                if (k > 2) {
                    result.push(w2.slice(j, j + k).join(' '));
                    while (k--) skip[j + k] = true;
                }
            }
        }
        return result;
    },
    lower = s => s.toLowerCase(),
    textToCompare = "Hello there how are you doing with your life",
    textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
    words = textToCompare.match(/\w+/g).map(lower),
    result = textsToCompareWith.map(({ id, text }) => ({
        id,
        matchedText: compare(words, text.match(/\w+/g).map(lower))
    }));

console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }


1
投票

我创建了一个答案,只是为了我自己学习 JavaScript。把东西拼凑在一起,我想出了:

let textToCompare = "Hello there how are you doing with your life";
let words = textToCompare.split(/\s+/);
let x = words.length;
let textsToCompareWith= [
  { id:1, text:"Hope you are doing good with your life" },
  { id:2, text:"what are you doing with your life. hello there how are you" },
  { id:3, text:"hello there mate" }
];

let combos = [...chunks(words)];
combos.sort(function(a, b){return b.length - a.length});

console.log(textsToCompareWith.map(({ id, text }) => ({id, matchedText: FindMatches(text)})));

function* chunks(arr) {
    for (let i = 0; i < x-2; i++) {
        for (let j = i+3; j < x+1; j++) {
            yield arr.slice(i,j).join(" ");
        }
    }
}

function FindMatches(s) {
    var r = [];
    for (let i = 0; i < combos.length; i++) {
        re = new RegExp(`\\b${combos[i]}\\b`, 'i');
        if (re.test(s)) {
            r.push(combos[i]);
            s = s.replace(re, ' ');
        } 
    }
    return r;
}

我很确定这段代码会有很多缺陷并且看起来很笨拙,但我的想法是根据它可以被空格分割的假设,将你的输入分割成 3 个以上单词的块。然后我尝试按长度对结果数组进行排序,这样我们就不会先找到较小的子串。

谁知道,也许这里的东西真的有用。

© www.soinside.com 2019 - 2024. All rights reserved.