forked from nuttx/nuttx-update
libc/wchar: support wchar
Implemented according to https://en.wikipedia.org/wiki/UTF-8 Signed-off-by: Jiuzhu Dong <dongjiuzhu1@xiaomi.com>
This commit is contained in:
parent
035840a770
commit
673a2e0136
10 changed files with 289 additions and 95 deletions
|
@ -60,7 +60,7 @@
|
|||
* the correct value.
|
||||
*/
|
||||
|
||||
#define MB_LEN_MAX 1
|
||||
#define MB_LEN_MAX 4
|
||||
|
||||
/* Configurable limits required by POSIX ****************************************
|
||||
*
|
||||
|
|
|
@ -56,7 +56,7 @@
|
|||
* character specified by the current locale.
|
||||
*/
|
||||
|
||||
#define MB_CUR_MAX 1
|
||||
#define MB_CUR_MAX 4
|
||||
|
||||
/* The environ variable, normally 'char **environ;' is not implemented as a
|
||||
* function call. However, get_environ_ptr() can be used in its place.
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Name: mbtowc.c
|
||||
* Name: mbtowc
|
||||
*
|
||||
* Description:
|
||||
* Minimal multibyte to wide char converter
|
||||
|
@ -59,10 +59,5 @@ int mbtowc(FAR wchar_t *pwc, FAR const char *s, size_t n)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (pwc)
|
||||
{
|
||||
*pwc = (wchar_t)*s;
|
||||
}
|
||||
|
||||
return (*s != '\0');
|
||||
return mbrtowc(pwc, s, n, NULL);
|
||||
}
|
||||
|
|
|
@ -34,10 +34,8 @@
|
|||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
#include <errno.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
|
@ -53,19 +51,5 @@
|
|||
|
||||
int wctomb(FAR char *s, wchar_t wc)
|
||||
{
|
||||
if (s == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Verify that wchar is a valid single-byte character. */
|
||||
|
||||
if ((size_t) wc >= 0x100)
|
||||
{
|
||||
set_errno(EILSEQ);
|
||||
return -1;
|
||||
}
|
||||
|
||||
*s = (char)wc;
|
||||
return 1;
|
||||
return wcrtomb(s, wc, NULL);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ CSRCS += lib_wmemmove.c lib_wmemset.c lib_btowc.c lib_mbrtowc.c lib_wctob.c
|
|||
CSRCS += lib_wcslcpy.c lib_wcsxfrm.c lib_wcrtomb.c lib_wcsftime.c
|
||||
CSRCS += lib_wcscoll.c lib_wcstol.c lib_wcstoll.c lib_wcstoul.c
|
||||
CSRCS += lib_wcstoull.c lib_wcstold.c lib_wcstof.c lib_wcstod.c
|
||||
CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c
|
||||
CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c lib_mbsinit.c
|
||||
CSRCS += lib_mbrlen.c lib_mbsrtowcs.c lib_wcsrtombs.c
|
||||
|
||||
# Add the wchar directory to the build
|
||||
|
|
|
@ -32,12 +32,62 @@
|
|||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Pre-processor Definitions
|
||||
****************************************************************************/
|
||||
|
||||
/* Implemented according to https://en.wikipedia.org/wiki/UTF-8 */
|
||||
|
||||
#define SA 0xc2u
|
||||
#define SB 0xf4u
|
||||
|
||||
/* Upper 6 state bits are a negative integer offset to bound-check next byte
|
||||
* equivalent to: (((b) - 0x80) | ((b) + offset)) & ~0x3f
|
||||
*/
|
||||
|
||||
#define OOB(c, b) (((((b) >> 3) - 0x10) | \
|
||||
(((b) >> 3) + ((int32_t)(c) >> 26))) & ~7)
|
||||
|
||||
/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */
|
||||
|
||||
#define R(a, b) ((uint32_t)((uint32_t)((a) == 0x80 ? 0x40u - (b) : \
|
||||
0u - (a)) << 23))
|
||||
|
||||
#define C(x) ((x) < 2 ? -1 : (R(0x80, 0xc0) | (x)))
|
||||
#define D(x) C((x) + 16)
|
||||
#define E(x) (((x) == 0 ? R(0xa0, 0xc0) : \
|
||||
(x) == 0xd ? R(0x80, 0xa0) : R(0x80, 0xc0)) \
|
||||
| (R(0x80, 0xc0) >> 6) \
|
||||
| (x))
|
||||
#define F(x) (((x) >= 5 ? 0 : \
|
||||
(x) == 0 ? R(0x90, 0xc0) : \
|
||||
(x) == 4 ? R(0x80, 0x90) : R(0x80, 0xc0)) \
|
||||
| (R(0x80, 0xc0) >> 6) \
|
||||
| (R(0x80, 0xc0) >> 12) \
|
||||
| (x))
|
||||
|
||||
/****************************************************************************
|
||||
* Private Data
|
||||
****************************************************************************/
|
||||
|
||||
/* This definition of g_bittab refer to link:
|
||||
* https://en.wikipedia.org/wiki/UTF-8 [Codepage layout].
|
||||
*/
|
||||
|
||||
static const uint32_t g_bittab[] =
|
||||
{
|
||||
C(0x2), C(0x3), C(0x4), C(0x5), C(0x6), C(0x7),
|
||||
C(0x8), C(0x9), C(0xa), C(0xb), C(0xc), C(0xd), C(0xe), C(0xf),
|
||||
D(0x0), D(0x1), D(0x2), D(0x3), D(0x4), D(0x5), D(0x6), D(0x7),
|
||||
D(0x8), D(0x9), D(0xa), D(0xb), D(0xc), D(0xd), D(0xe), D(0xf),
|
||||
E(0x0), E(0x1), E(0x2), E(0x3), E(0x4), E(0x5), E(0x6), E(0x7),
|
||||
E(0x8), E(0x9), E(0xa), E(0xb), E(0xc), E(0xd), E(0xe), E(0xf),
|
||||
F(0x0), F(0x1), F(0x2), F(0x3), F(0x4)
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
****************************************************************************/
|
||||
|
@ -53,20 +103,86 @@
|
|||
size_t mbrtowc(FAR wchar_t *pwc, FAR const char *s,
|
||||
size_t n, FAR mbstate_t *ps)
|
||||
{
|
||||
FAR const char *e = s;
|
||||
size_t retval = 0;
|
||||
FAR const unsigned char *src = (FAR const void *)s;
|
||||
static mbstate_t state;
|
||||
size_t num = n;
|
||||
wchar_t dummy;
|
||||
uint32_t c;
|
||||
|
||||
if (s == NULL)
|
||||
if (ps == NULL)
|
||||
{
|
||||
s = e = "";
|
||||
n = 1;
|
||||
ps = &state;
|
||||
}
|
||||
|
||||
retval = mbsnrtowcs(pwc, &e, 1, n, ps);
|
||||
if (retval == 1)
|
||||
c = *(FAR uint32_t *)ps;
|
||||
if (src == NULL)
|
||||
{
|
||||
retval = e - s;
|
||||
if (c != 0)
|
||||
{
|
||||
goto ilseq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if (pwc == NULL)
|
||||
{
|
||||
pwc = &dummy;
|
||||
}
|
||||
|
||||
return retval;
|
||||
if (n == 0)
|
||||
{
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
if (*src < 0x80)
|
||||
{
|
||||
return !!(*pwc = *src);
|
||||
}
|
||||
|
||||
if (*src - SA > SB - SA)
|
||||
{
|
||||
goto ilseq;
|
||||
}
|
||||
|
||||
c = g_bittab[*src++ - SA];
|
||||
n--;
|
||||
}
|
||||
|
||||
if (n != 0)
|
||||
{
|
||||
if (OOB(c, *src) != 0)
|
||||
{
|
||||
goto ilseq;
|
||||
}
|
||||
|
||||
loop:
|
||||
c = (c << 6) | (*src++ - 0x80);
|
||||
n--;
|
||||
if ((c >> 31) == 0)
|
||||
{
|
||||
*(FAR uint32_t *)ps = 0;
|
||||
*pwc = c;
|
||||
return num - n;
|
||||
}
|
||||
|
||||
if (n != 0)
|
||||
{
|
||||
if (*src - 0x80u >= 0x40)
|
||||
{
|
||||
goto ilseq;
|
||||
}
|
||||
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
*(FAR uint32_t *)ps = c;
|
||||
return -2;
|
||||
|
||||
ilseq:
|
||||
*(FAR uint32_t *)ps = 0;
|
||||
set_errno(EILSEQ);
|
||||
return -1;
|
||||
}
|
||||
|
|
42
libs/libc/wchar/lib_mbsinit.c
Normal file
42
libs/libc/wchar/lib_mbsinit.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
/****************************************************************************
|
||||
* libs/libc/wchar/lib_mbsinit.c
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership. The
|
||||
* ASF licenses this file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the
|
||||
* License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <wchar.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Name: mbsinit
|
||||
*
|
||||
* Description:
|
||||
* test for initial shift state
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
int mbsinit(FAR const mbstate_t *st)
|
||||
{
|
||||
return st == NULL || !*(FAR uint32_t *)st;
|
||||
}
|
|
@ -22,8 +22,6 @@
|
|||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
/****************************************************************************
|
||||
|
@ -68,23 +66,59 @@
|
|||
size_t mbsnrtowcs(FAR wchar_t *dst, FAR const char **src, size_t nms,
|
||||
size_t len, FAR mbstate_t *ps)
|
||||
{
|
||||
size_t i;
|
||||
FAR const char *s = *src;
|
||||
FAR wchar_t *ws = dst;
|
||||
size_t cnt = 0;
|
||||
size_t l;
|
||||
|
||||
if (dst == NULL)
|
||||
{
|
||||
return strnlen(*src, nms);
|
||||
len = SIZE_MAX;
|
||||
}
|
||||
|
||||
for (i = 0; i < nms && i < len; i++)
|
||||
if (s != NULL)
|
||||
{
|
||||
dst[i] = (wchar_t)(*src)[i];
|
||||
if (dst[i] == L'\0')
|
||||
while (len > 0 && nms > 0)
|
||||
{
|
||||
*src = NULL;
|
||||
return i;
|
||||
l = mbrtowc(ws, s, nms, ps);
|
||||
if ((ssize_t)l <= 0)
|
||||
{
|
||||
if ((ssize_t)l == -2)
|
||||
{
|
||||
/* if the input buffer ends with an incomplete character
|
||||
* stops at the end of the input buffer.
|
||||
*/
|
||||
|
||||
s += nms;
|
||||
}
|
||||
else if (l == 0)
|
||||
{
|
||||
s = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
cnt = l;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
s += l;
|
||||
nms -= l;
|
||||
if (ws != NULL)
|
||||
{
|
||||
ws++;
|
||||
}
|
||||
|
||||
len--;
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
*src += i;
|
||||
return i;
|
||||
if (dst != NULL)
|
||||
{
|
||||
*src = s;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
|
|
@ -34,10 +34,7 @@
|
|||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
/****************************************************************************
|
||||
|
@ -54,17 +51,37 @@
|
|||
|
||||
size_t wcrtomb(FAR char *s, wchar_t wc, FAR mbstate_t *ps)
|
||||
{
|
||||
int retval = 0;
|
||||
char buf[MB_LEN_MAX];
|
||||
|
||||
if (s == NULL)
|
||||
{
|
||||
retval = wctomb(buf, wc);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
else if ((unsigned)wc < 0x80)
|
||||
{
|
||||
retval = wctomb(s, wc);
|
||||
*s = wc;
|
||||
return 1;
|
||||
}
|
||||
else if ((unsigned)wc < 0x800)
|
||||
{
|
||||
*s++ = 0xc0 | (wc >> 6);
|
||||
*s = 0x80 | (wc & 0x3f);
|
||||
return 2;
|
||||
}
|
||||
else if ((unsigned)wc < 0xd800 || (unsigned)wc <= 0xffff)
|
||||
{
|
||||
*s++ = 0xe0 | (wc >> 12);
|
||||
*s++ = 0x80 | ((wc >> 6) & 0x3f);
|
||||
*s = 0x80 | (wc & 0x3f);
|
||||
return 3;
|
||||
}
|
||||
else if ((unsigned long)wc < 0x110000)
|
||||
{
|
||||
*s++ = 0xf0 | ((unsigned long)wc >> 18);
|
||||
*s++ = 0x80 | ((wc >> 12) & 0x3f);
|
||||
*s++ = 0x80 | ((wc >> 6) & 0x3f);
|
||||
*s = 0x80 | (wc & 0x3f);
|
||||
return 4;
|
||||
}
|
||||
|
||||
return retval;
|
||||
set_errno(EILSEQ);
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -22,12 +22,9 @@
|
|||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
|
@ -70,48 +67,57 @@
|
|||
size_t wcsnrtombs(FAR char *dst, FAR const wchar_t **src, size_t nwc,
|
||||
size_t len, FAR mbstate_t *ps)
|
||||
{
|
||||
size_t i;
|
||||
FAR const wchar_t *ws = *src;
|
||||
size_t cnt = 0;
|
||||
|
||||
if (dst == NULL)
|
||||
{
|
||||
for (i = 0; i < nwc; i++)
|
||||
{
|
||||
wchar_t wc = (*src)[i];
|
||||
|
||||
if (wc < 0 || wc > 0xff)
|
||||
{
|
||||
set_errno(EILSEQ);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (wc == L'\0')
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
len = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < nwc && i < len; i++)
|
||||
while (ws != NULL && nwc != 0)
|
||||
{
|
||||
wchar_t wc = (*src)[i];
|
||||
char tmp[MB_LEN_MAX];
|
||||
size_t res;
|
||||
|
||||
if (wc < 0 || wc > 0xff)
|
||||
if (*ws == 0)
|
||||
{
|
||||
*src += i;
|
||||
set_errno(EILSEQ);
|
||||
return -1;
|
||||
ws = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
dst[i] = wc;
|
||||
if (wc == L'\0')
|
||||
res = wcrtomb(len < MB_LEN_MAX ? tmp : dst, *ws, ps);
|
||||
if ((ssize_t)res < 0)
|
||||
{
|
||||
*src = NULL;
|
||||
return i;
|
||||
cnt = res;
|
||||
break;
|
||||
}
|
||||
|
||||
if (dst != NULL)
|
||||
{
|
||||
if (len < MB_LEN_MAX)
|
||||
{
|
||||
if (res > len)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(dst, tmp, res);
|
||||
}
|
||||
|
||||
dst += res;
|
||||
len -= res;
|
||||
}
|
||||
|
||||
ws++;
|
||||
nwc--;
|
||||
cnt += res;
|
||||
}
|
||||
|
||||
*src += i;
|
||||
return i;
|
||||
if (dst != NULL)
|
||||
{
|
||||
*src = ws;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue