如何在javascript中按单词顺序匹配两个字符串?

问题描述 投票:0回答:3

需要找到与给定字符串最匹配的字符串。

list = ['Mr. Adam Smith Junior', 'Anna Smith Jr.', 'Adam Jhon Smith', 'Smith Adam'];
str = 'Adam Smith Jr.';

// output: 'Mr. Adam Smith Junior'

我已经尝试过对单词进行标记,对于列表中的每个项目,它都与2个令牌(Adam / Smith / Jr。)相匹配。但是我的预期输出是list [0]。因此,我需要一种解决方案,以按最匹配的单词序列查找匹配的字符串。

javascript string-matching
3个回答
-1
投票

我想知道您是否正在寻找这样的东西:

var list = ['Mr. Adam Smith Junior', 'Anna Smith Jr.', 'Adam Jhon Smith', 'Smith Adam'];
var str = 'Adam Smith Jr.';

// This loop finds the best matching string in the array and
// and prints it in the console
function findBestMatch(arr, s) {
    for (var i = s.length; i > 1; i--) {
        var found = arr.find(function(ele) {
            return ele.indexOf(s.substr(0, i)) > -1;
        }) || [];
        if (found.length) {
            console.info(found);
            break;
        }
    }
}

findBestMatch(list, str);

-1
投票

假设:

  • 匹配的str的顺序很重要,
  • 您希望列表中的项与搜索词的最长部分匹配,
  • 和最匹配str开头的项目;

此功能应该执行:

    var list = ['Mr. Adam Smith Junior', 'Anna Smith Jr.', 'Adam Jhon Smith', 'Smith Adam'];
    var str = 'Adam Smith Jr.';

    function onlyUnique(value, index, self) {
        // See: https://stackoverflow.com/a/14438954/2142071
        return self.indexOf(value) === index;
    }

    function getBestMatch(word, list, minimal_length) {
        // Minimal length is used to make the search more meaningful;
        // single character searches would not be helpful most of the times
        if ( word.length < minimal_length ) {
            return ['', -1]; // None found, word is too short
        }

        // Pre-process search RegEx patterns
        var patterns = [];
        var words = [];
        // For all possible starting positions inside `word`:
        // So:
        // Adam Smith Jr.
        // dam Smith Jr.
        // am Smith Jr.
        // .. etc ..
        for (var i = 0; i <= word.length - minimal_length; i++) {
            var temp_word = word.substring(i);
            // For every part of the remainder of `word`:
            // So:
            // Adam Smith Jr.
            // Adam Smith Jr
            // Adam Smith J
            // .. etc ..
            for (var j = minimal_length; j <= temp_word.length; j++) {
                // Add that word part to a central array of patterns to search for in list
                var word_part = temp_word.substring(0, j);
                words[ words.length ] = word_part;
            }
        }
        
        // Sort all unique word parts
        words = words.filter( onlyUnique );
        words.sort(function(a, b){
            // (longest parts first, since they are assumed a better match)
            return b.length - a.length;
        });

        // For every word part:
        for (var i = 0; i < words.length; i++) {
            // Create RegEx pattern
            var pattern = new RegExp( words[ i ] );
            
            // Search each item in the list using this pattern for a match.
            for (var n = 0; n < list.length; n++) {
                if ( words[ i ].length <= list[ n ].length ) {
                    if ( pattern.test( list[ n ] ) ) {
                        // Return the first match and its index
                        return [list[ n ], n];
                    }
                }
            }
        }

        return ['', -1]; // None found
    }

    var match = getBestMatch(str, list, 2);
    // match[0] is the matched item
    // match[1] is the index of that item in the list
    console.log( match[0], 'at index', match[1] );

注:请记住,此功能已经很复杂,没有解决类似的问题:

  • str中的单词应该整体匹配吗?
  • str的匹配部分可以按不同顺序排列吗?
  • 拼写错误的单词呢?
  • ..等..
  • 以及应将哪个分数/值归因于一场比赛(例如,什么定义了“最佳”比赛?)
  • 应该返回所有得分最高的比赛,还是仅返回一个? (和哪一个?)
  • 性能:它必须生成多少个单词部分搜索(对于list中的每个项目)

[Hence,搜索是它自己的编程业务。 ;)


-1
投票
// Your code request - Updated code.
var list = ['Mr. Adam Smith Junior', 'Anna Smith Jr.', 'Adam Jhon Smith', 'Smith Adam'];
var str = 'Adam Smith Jr.';

// Updated code.
str = str.split(' ');
var i = 0, j = 0;
var matched = [];

// First loop for your `list` variable array to go through each string.
while(i < str.length){
    // Second loop for your `str` variable to match each part of string with your `list` variable's each string.    
    for(j=0;j < list.length;j++){      
        var match = new RegExp( str[i], 'g' );  
            if(list[j].match(match)){
            // Find exact match - Your code goes here.
            if(!matched.includes(list[j])){
                matched.push(list[j]);
            }

          }
        }
  i++;
}

// Your result of find the all matched values from `list` variable.
console.log(matched);

0
投票
// Your code request.
var list = ['Mr. Adam Smith Junior', 'Anna Smith Jr.', 'Adam Jhon Smith', 'Smith Adam'];
var str = 'Adam Smith Jr.';

// Updated code.
str = str.split(' ');
var i = 0, j = 0;

// First loop for your `list` variable array to go through each string.
while(i < list.length){     
  // Second loop for your `str` variable to match each part of string with your `list` variable's each string.
    while(j < str.length){
        // Added regex you can find online for more information.
        var match = new RegExp( str[j], 'g' );  
            if(list[i].match(match)){
                // Find exact match - Your code goes here.
                console.log('String matched: ', str[j]);
            }
        j++;
    }   
    i++;
}
© www.soinside.com 2019 - 2024. All rights reserved.