如何比较 char(或 wchar_t)作为 C++ 中字符串的一部分

问题描述 投票:0回答:1

我有两个 C 风格的 null 终止的 (宽)字符串不是

std::wstring
),我通过迭代器遍历它们,并希望逐个字符地比较它们(用于排序)。我不需要使用标准运算符(
==
>
<
),因为我想通过本地规则(设置了 _locale_t)来比较它们。

我只找到了

_wcsicmp_l(wchar_t*, wchar_t*, _locale_t)
,它可以比较两个字符串,而不是字符。

当我将指向比较函数上的

wchar_t
参数的指针传递给比较函数时,例如字符串的一部分(字符串的迭代器),该函数会像指向字符串的指针一样获取它,并比较两个整个字符串,而不仅仅是字符。

我想举个例子会更能说明问题。这是一个好方法,还是太困难、太慢?有没有更简单的方法?

目标提供自然排序 - 字符串包括字符和数字。

inline bool compareChars(const wchar_t& lhs, const wchar_t& rhs)
{
    //I have to add terminate \0 here, in otherwise comparator compare string from lhs to end, not only chars
    wchar_t str_lhs[2]{ lhs, L'\0' }, str_rhs[2]{ rhs, L'\0' };
    _locale_t loc = _create_locale(LC_ALL, "cs-CZ.utf8");
    bool res = _wcsicoll_l(&str_lhs[0], &str_rhs[0], loc) < 0;
    _free_locale(loc);
    return res;
}

int main()
{
    const wchar_t* firstStr = L"Test", * secondStr = L"Test2";
    const wchar_t* it1 = firstStr;
    const wchar_t* it2 = secondStr;
    const wchar_t* itEnd1 = it1;
    while (*itEnd1 != '\0')itEnd1++;
    const wchar_t* itEnd2 = it2;
    while (*itEnd2 != '\0')itEnd2++;

    for (; it1 != itEnd1; ++it1) //Go througth first string
        for (; it2 != itEnd2; ++it2) //Go througth second string
        {
            int res = compareChars(*it1, *it2); //chars like a part of string
        }

    return 0;
}
c++ string localization compare
1个回答
0
投票

结果在这里:

#include <locale>
#include <string>

using namespace std;

namespace MajCompare {

struct MajLoc
{
private:
    _locale_t m_loc;
public:
    MajLoc(){ m_loc = _create_locale(LC_COLLATE, "cs-CZ.utf8"); }
    ~MajLoc() { _free_locale(m_loc);}
    _locale_t& GetLocale() { return m_loc; }
};
static MajLoc s_cLoc;

//Vrací -1, 0 1
int CompareNatural(const wchar_t* lhsBegin, const wchar_t* lhsEnd
    , const wchar_t* rhsBegin, const wchar_t* rhsEnd)
{

    wstring wsPom1, wsPom2;
    const wchar_t* itCurLhs = lhsBegin, *itCurRhs = rhsBegin;
    while (!iswdigit(*itCurLhs) && !iswdigit(*itCurRhs) && itCurLhs != lhsEnd && itCurRhs != rhsEnd)
    {
        wsPom1.push_back(*itCurLhs);
        wsPom2.push_back(*itCurRhs);
        if (*itCurLhs != *itCurRhs)
            return _wcsicoll_l(lhsBegin, rhsBegin, s_cLoc.GetLocale());

        itCurLhs++, itCurRhs++;
    }
    if (itCurLhs == lhsEnd || itCurRhs == rhsEnd || iswdigit(*itCurLhs) != iswdigit(*itCurRhs)) //jeden je delší nebo kombinace čísla a písmena, porovnáme původní řetězce bez ohledu na čísla porovnáme standardně
        return _wcsicoll_l(lhsBegin, rhsBegin, s_cLoc.GetLocale());
    else //oba řetězce mají čísla a nejsou na konci
    {
        wstring wsNum1, wsNum2;
        while (iswdigit(*itCurLhs))
            wsNum1.push_back(*itCurLhs++);
        while (iswdigit(*itCurRhs))
            wsNum2.push_back(*itCurRhs++);
        int iNum1 = _wtoi(wsNum1.data());
        int iNum2 = _wtoi(wsNum2.data());
        if (iNum1 != iNum2)
            return iNum1 > iNum2 ? 1 : -1;
        return CompareNatural(itCurLhs, lhsEnd, itCurRhs, rhsEnd);
    }
}

int CompareNatural(const wchar_t* lsFirst, const wchar_t* lsSecond)
{
    const wchar_t* it1 = lsFirst;
    while (*it1 != '\0')it1++;
    const wchar_t* it2 = lsSecond;
    while (*it2 != '\0')it2++;
    return CompareNatural(lsFirst, it1, lsSecond, it2);
}
};
© www.soinside.com 2019 - 2024. All rights reserved.