快速查找所有出现的子字符串

问题描述 投票:0回答:8

我在 Swift 中有一个 String 类的扩展,它返回给定子字符串的第一个字母的索引。

任何人都可以帮我实现它,以便它返回所有出现的数组,而不仅仅是第一个吗?

谢谢你。

extension String {
    func indexOf(string : String) -> Int {
        var index = -1
        if let range = self.range(of : string) {
            if !range.isEmpty {
                index = distance(from : self.startIndex, to : range.lowerBound)
            }
        }
        return index
    }
}

例如,我想要类似

50
 的返回值,而不是 
[50, 74, 91, 103]

swift string swift3
8个回答
39
投票

您只需继续推进搜索范围,直到找不到该子字符串的更多实例:

extension String {
    func indicesOf(string: String) -> [Int] {
        var indices = [Int]()
        var searchStartIndex = self.startIndex

        while searchStartIndex < self.endIndex,
            let range = self.range(of: string, range: searchStartIndex..<self.endIndex),
            !range.isEmpty
        {
            let index = distance(from: self.startIndex, to: range.lowerBound)
            indices.append(index)
            searchStartIndex = range.upperBound
        }

        return indices
    }
}

let keyword = "a"
let html = "aaaa"
let indicies = html.indicesOf(string: keyword)
print(indicies) // [0, 1, 2, 3]

10
投票

我知道我们不是在这里玩代码高尔夫,但对于任何对不使用

var
或循环的函数式单行实现感兴趣的人,这是另一种可能的解决方案:

extension String {
    func indices(of string: String) -> [Int] {
        return indices.reduce([]) { $1.encodedOffset > ($0.last ?? -1) && self[$1...].hasPrefix(string) ? $0 + [$1.encodedOffset] : $0 }
    }
}

8
投票

这里有2个功能。一个返回

[Range<String.Index>]
,另一个返回
[Range<Int>]
。如果您不需要前者,可以将其设为私有。我设计它是为了模仿
range(of:options:range:locale:)
方法,因此它支持所有相同的功能。

import Foundation

extension String {
    public func allRanges(
        of aString: String,
        options: String.CompareOptions = [],
        range: Range<String.Index>? = nil,
        locale: Locale? = nil
    ) -> [Range<String.Index>] {

        // the slice within which to search
        let slice = (range == nil) ? self[...] : self[range!]

        var previousEnd = s.startIndex
        var ranges = [Range<String.Index>]()

        while let r = slice.range(
            of: aString, options: options,
            range: previousEnd ..< s.endIndex,
            locale: locale
        ) {
            if previousEnd != self.endIndex { // don't increment past the end
                    previousEnd = self.index(after: r.lowerBound)
            }
            ranges.append(r)
        }

        return ranges
    }

    public func allRanges(
        of aString: String,
        options: String.CompareOptions = [],
        range: Range<String.Index>? = nil,
        locale: Locale? = nil
    ) -> [Range<Int>] {
        return allRanges(of: aString, options: options, range: range, locale: locale)
            .map(indexRangeToIntRange)
    }


    private func indexRangeToIntRange(_ range: Range<String.Index>) -> Range<Int> {
        return indexToInt(range.lowerBound) ..< indexToInt(range.upperBound)
    }

    private func indexToInt(_ index: String.Index) -> Int {
        return self.distance(from: self.startIndex, to: index)
    }
}

let s = "abc abc  abc   abc    abc"
print(s.allRanges(of: "abc") as [Range<String.Index>])
print()
print(s.allRanges(of: "abc") as [Range<Int>])

7
投票

实际上并没有内置函数可以执行此操作,但我们可以实现修改后的 Knuth-Morris-Pratt 算法 来获取我们想要匹配的字符串的所有索引。它也应该非常高效,因为我们不需要在字符串上重复调用 range

extension String {
    func indicesOf(string: String) -> [Int] {
        // Converting to an array of utf8 characters makes indicing and comparing a lot easier
        let search = self.utf8.map { $0 }
        let word = string.utf8.map { $0 }

        var indices = [Int]()

        // m - the beginning of the current match in the search string
        // i - the position of the current character in the string we're trying to match
        var m = 0, i = 0
        while m + i < search.count {
            if word[i] == search[m+i] {
                if i == word.count - 1 {
                    indices.append(m)
                    m += i + 1
                    i = 0
                } else {
                    i += 1
                }
            } else {
                m += 1
                i = 0
            }
        }

        return indices
    }
}

4
投票

请检查以下答案以在多个位置查找多个项目

func indicesOf(string: String) -> [Int] {
    var indices = [Int]()
    var searchStartIndex = self.startIndex
    
    while searchStartIndex < self.endIndex,
        let range = self.range(of: string, range: searchStartIndex..<self.endIndex),
        !range.isEmpty
    {
        let index = distance(from: self.startIndex, to: range.lowerBound)
        indices.append(index)
        searchStartIndex = range.upperBound
    }
    
    return indices
}

func attributedStringWithColor(_ strings: [String], color: UIColor, characterSpacing: UInt? = nil) -> NSAttributedString {
    let attributedString = NSMutableAttributedString(string: self)
    for string in strings {
        let indexes = self.indicesOf(string: string)
        for index in indexes {
            let range = NSRange(location: index, length: string.count)
            attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: color, range: range)
        }
    }
    
    guard let characterSpacing = characterSpacing else {return attributedString}
    
    attributedString.addAttribute(NSAttributedString.Key.kern, value: characterSpacing, range: NSRange(location: 0, length: attributedString.length))
    
    return attributedString
}

可以按如下方式使用:

let message = "Item 1 + Item 2 + Item 3"
message.attributedStringWithColor(["Item", "+"], color: UIColor.red)

并得到结果


2
投票

Swift 5.7 - iOS 16.0+ - macOS 13.0+

Foundation
现在有
func ranges<C>(of other: C) -> [Range<Self.Index>] where C : Collection, Self.Element == C.Element

所以它变得更加简单:

extension String {
    func allIndices(of subString: String, caseSensitive: Bool = true) -> [Int] {
        if caseSensitive {
            return self.ranges(of: subString).map { distance(from: self.startIndex, to: $0.lowerBound) }
        } else {
            return self.lowercased().ranges(of: subString.lowercased()).map { distance(from: self.startIndex, to: $0.lowerBound) }
        }
    }
}

1
投票

这可以通过递归方法来完成。我用数字字符串来测试它。它返回一个可选的

Int
数组,这意味着如果找不到子字符串,它将为零。

extension String {
    func indexes(of string: String, offset: Int = 0) -> [Int]? {
        if let range = self.range(of : string) {
            if !range.isEmpty {
                let index = distance(from : self.startIndex, to : range.lowerBound) + offset
                var result = [index]
                let substr = self.substring(from: range.upperBound)
                if let substrIndexes = substr.indexes(of: string, offset: index + distance(from: range.lowerBound, to: range.upperBound)) {
                    result.append(contentsOf: substrIndexes)
                }
                return result
            }
        }
        return nil
    }
}

let numericString = "01234567890123456789012345678901234567890123456789012345678901234567890123456789"
numericString.indexes(of: "3456")

0
投票

我已经调整了接受的答案,以便可以配置

case sensitivity

extension String {
    func allIndexes(of subString: String, caseSensitive: Bool = true) -> [Int] {
        let subString = caseSensitive ? subString : subString.lowercased()
        let mainString = caseSensitive ? self : self.lowercased()
        var indices = [Int]()
        var searchStartIndex = mainString.startIndex
        while searchStartIndex < mainString.endIndex,
            let range = mainString.range(of: subString, range: searchStartIndex..<mainString.endIndex),
            !range.isEmpty
        {
            let index = distance(from: mainString.startIndex, to: range.lowerBound)
            indices.append(index)
            searchStartIndex = range.upperBound
        }

        return indices
    }
}
© www.soinside.com 2019 - 2024. All rights reserved.