字符串转整数 - 厚积薄发

字符串转整数

今天微薄上看到　http://weibo.com/1401880315/AaNkykg6J#_rnd1379945435634

左耳朵耗子：
“现在的程序员，连atoi()都不知道是什么了，没事，那我改，不叫atoi()改叫StrToInt()，却发现，好些人连ASCII码都不知道是怎么一回事，没事，我教会你。但最终却发现怎么有这么多人连这样简单的程序都写不好（包括有多年工作经验的人）。“比技术更恐怖的是有一群不合格的程序员在使用这这些技术”。

好久没写这么基本的代码了，简单尝试写了下，结果花了半个小时，只写了一个最基本功能的，通过后面的测试用例还花了不少时间进行调试修改。

INT StrToInt(const TCHAR* lpszValue)
{
    assert(lpszValue != NULL);

    INT nLen = lstrlen(lpszValue);
    const TCHAR* pEnd = lpszValue + nLen;

    TCHAR* pCurrent = (TCHAR*)lpszValue;
    BOOL bNegative(FALSE);
    if(nLen >= 1)
    {
        if(*pCurrent == _T('+'))
        {
            bNegative = FALSE;
            pCurrent += 1;
        }
        else if(*pCurrent == _T('-'))
        {
            bNegative = TRUE;
            pCurrent += 1;
        }
        else
        {
            bNegative = FALSE;
        }
    }

    INT nBase(10);
    if(pEnd - pCurrent >= 2)
    {
        if(pCurrent[0] == _T('0')
            && ::toupper(pCurrent[1]) == _T('X'))
        {
            pCurrent += 2;
            nBase = 16;
        }
    }

    INT nRet(0);
    INT nValue(0);
    while(pCurrent != pEnd)
    {
        TCHAR ch(*pCurrent);
        if(ch >= _T('0') && ch <= _T('9'))
        {
            nValue = ch - _T('0');
        }
        else if(nBase == 16)
        {
            if(::toupper(ch) >= _T('A') && ::toupper(ch) <= _T('F'))
            {
                nValue = 10 + (::toupper(ch) - _T('A'));
            }
            else
            {
                assert(FALSE);
                break;
            }
        }
        else
        {
            assert(FALSE);
            break;
        }

        nRet += nValue * pow((double)nBase, pEnd - pCurrent - 1);
        pCurrent += 1;
    }

    return bNegative ?  -nRet : nRet;
}

void test
{
    assert(StrToInt(_T("11")) == 11);
    assert(StrToInt(_T("+12")) == 12);
    assert(StrToInt(_T("-123")) == -123);
    assert(StrToInt(_T("-0x1CF")) == -0x1CF);
    assert(StrToInt(_T("-0X123")) == -0x123);
    assert(StrToInt(_T("0X123")) == 0x123);
}

感慨用惯了Windows API和STL，对于最基本的字符串处理代码反而写不好了。细想一下这个基本的东西确实不好写，实际上我是上面只是考虑了10进制和16进制，没有考虑其他进制，也没有考虑小数，非法的字符串或是溢出等情况，而真正工业级的库要考虑所有的情况，另外还要考虑转换效率等问题。

实际上CRT源码中有这个函数的实现：

/***
*wcstol, wcstoul(nptr,endptr,ibase) - Convert ascii string to long un/signed
*       int.
*
*Purpose:
*       Convert an ascii string to a long 32-bit value.  The base
*       used for the caculations is supplied by the caller.  The base
*       must be in the range 0, 2-36.  If a base of 0 is supplied, the
*       ascii string must be examined to determine the base of the
*       number:
*           (a) First char = '0', second char = 'x' or 'X',
*               use base 16.
*           (b) First char = '0', use base 8
*           (c) First char in range '1' - '9', use base 10.
*
*       If the 'endptr' value is non-NULL, then wcstol/wcstoul places
*       a pointer to the terminating character in this value.
*       See ANSI standard for details
*
*Entry:
*       nptr == NEAR/FAR pointer to the start of string.
*       endptr == NEAR/FAR pointer to the end of the string.
*       ibase == integer base to use for the calculations.
*
*       string format: [whitespace] [sign] [0] [x] [digits/letters]
*
*Exit:
*       Good return:
*           result
*
*       Overflow return:
*           wcstol -- LONG_MAX or LONG_MIN
*           wcstoul -- ULONG_MAX
*           wcstol/wcstoul -- errno == ERANGE
*
*       No digits or bad base return:
*           0
*           endptr = nptr*
*
*Exceptions:
*       Input parameters are validated. Refer to the validation section of the function.
*
*******************************************************************************/

/* flag values */
#define FL_UNSIGNED   1       /* wcstoul called */
#define FL_NEG        2       /* negative sign found */
#define FL_OVERFLOW   4       /* overflow occured */
#define FL_READDIGIT  8       /* we've read at least one correct digit */

static unsigned long __cdecl wcstoxl (
        _locale_t plocinfo,
        const wchar_t *nptr,
        const wchar_t **endptr,
        int ibase,
        int flags
        )
{
    const wchar_t *p;
    wchar_t c;
    unsigned long number;
    unsigned digval;
    unsigned long maxval;
    _LocaleUpdate _loc_update(plocinfo);

    /* validation section */
    if (endptr != NULL)
    {
        /* store beginning of string in endptr */
        *endptr = nptr;
    }
    _VALIDATE_RETURN(nptr != NULL, EINVAL, 0L);
    _VALIDATE_RETURN(ibase == 0 || (2 <= ibase && ibase <= 36), EINVAL, 0L);

    p = nptr;           /* p is our scanning pointer */
    number = 0;         /* start with zero */

    c = *p++;           /* read char */

    while ( _iswspace_l(c, _loc_update.GetLocaleT()) )
        c = *p++;       /* skip whitespace */

    if (c == '-') {
        flags |= FL_NEG;    /* remember minus sign */
        c = *p++;
    }
    else if (c == '+')
        c = *p++;       /* skip sign */

    if (ibase == 0) {
        /* determine base free-lance, based on first two chars of
           string */
        if (_wchartodigit(c) != 0)
            ibase = 10;
        else if (*p == L'x' || *p == L'X')
            ibase = 16;
        else
            ibase = 8;
    }

    if (ibase == 16) {
        /* we might have 0x in front of number; remove if there */
        if (_wchartodigit(c) == 0 && (*p == L'x' || *p == L'X')) {
            ++p;
            c = *p++;   /* advance past prefix */
        }
    }

    /* if our number exceeds this, we will overflow on multiply */
    maxval = ULONG_MAX / ibase;

    for (;;) {  /* exit in middle of loop */

        /* convert c to value */
        if ( (digval = _wchartodigit(c)) != -1 )
            ;
        else if ( __ascii_iswalpha(c))
            digval = __ascii_towupper(c) - L'A' + 10;
        else
            break;

        if (digval >= (unsigned)ibase)
            break;      /* exit loop if bad digit found */

        /* record the fact we have read one digit */
        flags |= FL_READDIGIT;

        /* we now need to compute number = number * base + digval,
           but we need to know if overflow occured.  This requires
           a tricky pre-check. */

        if (number < maxval || (number == maxval &&
        (unsigned long)digval <= ULONG_MAX % ibase)) {
            /* we won't overflow, go ahead and multiply */
            number = number * ibase + digval;
        }
        else {
            /* we would have overflowed -- set the overflow flag */
            flags |= FL_OVERFLOW;
            if (endptr == NULL) {
                /* no need to keep on parsing if we
                   don't have to return the endptr. */
                break;
            }
        }

        c = *p++;       /* read next digit */
    }

    --p;                /* point to place that stopped scan */

    if (!(flags & FL_READDIGIT)) {
        /* no number there; return 0 and point to beginning of
           string */
        if (endptr)
            /* store beginning of string in endptr later on */
            p = nptr;
        number = 0L;        /* return 0 */
    }
    else if ( (flags & FL_OVERFLOW) ||
          ( !(flags & FL_UNSIGNED) &&
            ( ( (flags & FL_NEG) && (number > -LONG_MIN) ) ||
              ( !(flags & FL_NEG) && (number > LONG_MAX) ) ) ) )
    {
        /* overflow or signed overflow occurred */
        errno = ERANGE;
        if ( flags & FL_UNSIGNED )
            number = ULONG_MAX;
        else if ( flags & FL_NEG )
            number = (unsigned long)(-LONG_MIN);
        else
            number = LONG_MAX;
    }

    if (endptr != NULL)
        /* store pointer to char that stopped the scan */
        *endptr = p;

    if (flags & FL_NEG)
        /* negate result if there was a neg sign */
        number = (unsigned long)(-(long)number);

    return number;          /* done. */
}

透过这道题确实可以投射出一个程序员的计算机基本功，大家可以自己尝试下实现这个函数，看看自己的计算机基本功。

posted on 2013-09-23 22:39 Richard Wei 阅读(2747) 评论(0) 编辑收藏引用所属分类: C++

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！

相关文章: 客户端UI层设计的思考关于字符集，编码格式，大小端的简单总结 Stack的三种含义（转载）字符串转整数 64位平台C/C++开发注意事项(转载) 常见的数据枚举遍历方法 C++程序风格的思考 C++11标准之右值引用（ravalue reference）（转载）常见C++内存池技术代码自动生成-宏带来的奇技淫巧(转载)

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

常用链接

留言簿(40)

随笔分类

随笔档案

友情链接

最新评论

阅读排行榜

评论排行榜