1
0
Fork 0
forked from nuttx/nuttx-update

libc/wchar: support wchar

Implemented according to https://en.wikipedia.org/wiki/UTF-8

Signed-off-by: Jiuzhu Dong <dongjiuzhu1@xiaomi.com>
This commit is contained in:
Jiuzhu Dong 2022-05-29 16:27:52 +08:00 committed by Xiang Xiao
parent 035840a770
commit 673a2e0136
10 changed files with 289 additions and 95 deletions

View file

@ -60,7 +60,7 @@
* the correct value.
*/
#define MB_LEN_MAX 1
#define MB_LEN_MAX 4
/* Configurable limits required by POSIX ****************************************
*

View file

@ -56,7 +56,7 @@
* character specified by the current locale.
*/
#define MB_CUR_MAX 1
#define MB_CUR_MAX 4
/* The environ variable, normally 'char **environ;' is not implemented as a
* function call. However, get_environ_ptr() can be used in its place.

View file

@ -40,7 +40,7 @@
****************************************************************************/
/****************************************************************************
* Name: mbtowc.c
* Name: mbtowc
*
* Description:
* Minimal multibyte to wide char converter
@ -59,10 +59,5 @@ int mbtowc(FAR wchar_t *pwc, FAR const char *s, size_t n)
return -1;
}
if (pwc)
{
*pwc = (wchar_t)*s;
}
return (*s != '\0');
return mbrtowc(pwc, s, n, NULL);
}

View file

@ -34,10 +34,8 @@
* Included Files
****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <errno.h>
/****************************************************************************
* Public Functions
@ -53,19 +51,5 @@
int wctomb(FAR char *s, wchar_t wc)
{
if (s == NULL)
{
return 0;
}
/* Verify that wchar is a valid single-byte character. */
if ((size_t) wc >= 0x100)
{
set_errno(EILSEQ);
return -1;
}
*s = (char)wc;
return 1;
return wcrtomb(s, wc, NULL);
}

View file

@ -25,7 +25,7 @@ CSRCS += lib_wmemmove.c lib_wmemset.c lib_btowc.c lib_mbrtowc.c lib_wctob.c
CSRCS += lib_wcslcpy.c lib_wcsxfrm.c lib_wcrtomb.c lib_wcsftime.c
CSRCS += lib_wcscoll.c lib_wcstol.c lib_wcstoll.c lib_wcstoul.c
CSRCS += lib_wcstoull.c lib_wcstold.c lib_wcstof.c lib_wcstod.c
CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c
CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c lib_mbsinit.c
CSRCS += lib_mbrlen.c lib_mbsrtowcs.c lib_wcsrtombs.c
# Add the wchar directory to the build

View file

@ -32,12 +32,62 @@
* Included Files
****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <wchar.h>
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
/* Implemented according to https://en.wikipedia.org/wiki/UTF-8 */
#define SA 0xc2u
#define SB 0xf4u
/* Upper 6 state bits are a negative integer offset to bound-check next byte
* equivalent to: (((b) - 0x80) | ((b) + offset)) & ~0x3f
*/
#define OOB(c, b) (((((b) >> 3) - 0x10) | \
(((b) >> 3) + ((int32_t)(c) >> 26))) & ~7)
/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */
#define R(a, b) ((uint32_t)((uint32_t)((a) == 0x80 ? 0x40u - (b) : \
0u - (a)) << 23))
#define C(x) ((x) < 2 ? -1 : (R(0x80, 0xc0) | (x)))
#define D(x) C((x) + 16)
#define E(x) (((x) == 0 ? R(0xa0, 0xc0) : \
(x) == 0xd ? R(0x80, 0xa0) : R(0x80, 0xc0)) \
| (R(0x80, 0xc0) >> 6) \
| (x))
#define F(x) (((x) >= 5 ? 0 : \
(x) == 0 ? R(0x90, 0xc0) : \
(x) == 4 ? R(0x80, 0x90) : R(0x80, 0xc0)) \
| (R(0x80, 0xc0) >> 6) \
| (R(0x80, 0xc0) >> 12) \
| (x))
/****************************************************************************
* Private Data
****************************************************************************/
/* This definition of g_bittab refer to link:
* https://en.wikipedia.org/wiki/UTF-8 [Codepage layout].
*/
static const uint32_t g_bittab[] =
{
C(0x2), C(0x3), C(0x4), C(0x5), C(0x6), C(0x7),
C(0x8), C(0x9), C(0xa), C(0xb), C(0xc), C(0xd), C(0xe), C(0xf),
D(0x0), D(0x1), D(0x2), D(0x3), D(0x4), D(0x5), D(0x6), D(0x7),
D(0x8), D(0x9), D(0xa), D(0xb), D(0xc), D(0xd), D(0xe), D(0xf),
E(0x0), E(0x1), E(0x2), E(0x3), E(0x4), E(0x5), E(0x6), E(0x7),
E(0x8), E(0x9), E(0xa), E(0xb), E(0xc), E(0xd), E(0xe), E(0xf),
F(0x0), F(0x1), F(0x2), F(0x3), F(0x4)
};
/****************************************************************************
* Public Functions
****************************************************************************/
@ -53,20 +103,86 @@
size_t mbrtowc(FAR wchar_t *pwc, FAR const char *s,
size_t n, FAR mbstate_t *ps)
{
FAR const char *e = s;
size_t retval = 0;
FAR const unsigned char *src = (FAR const void *)s;
static mbstate_t state;
size_t num = n;
wchar_t dummy;
uint32_t c;
if (s == NULL)
if (ps == NULL)
{
s = e = "";
n = 1;
ps = &state;
}
retval = mbsnrtowcs(pwc, &e, 1, n, ps);
if (retval == 1)
c = *(FAR uint32_t *)ps;
if (src == NULL)
{
retval = e - s;
if (c != 0)
{
goto ilseq;
}
return 0;
}
else if (pwc == NULL)
{
pwc = &dummy;
}
return retval;
if (n == 0)
{
return -2;
}
if (c == 0)
{
if (*src < 0x80)
{
return !!(*pwc = *src);
}
if (*src - SA > SB - SA)
{
goto ilseq;
}
c = g_bittab[*src++ - SA];
n--;
}
if (n != 0)
{
if (OOB(c, *src) != 0)
{
goto ilseq;
}
loop:
c = (c << 6) | (*src++ - 0x80);
n--;
if ((c >> 31) == 0)
{
*(FAR uint32_t *)ps = 0;
*pwc = c;
return num - n;
}
if (n != 0)
{
if (*src - 0x80u >= 0x40)
{
goto ilseq;
}
goto loop;
}
}
*(FAR uint32_t *)ps = c;
return -2;
ilseq:
*(FAR uint32_t *)ps = 0;
set_errno(EILSEQ);
return -1;
}

View file

@ -0,0 +1,42 @@
/****************************************************************************
* libs/libc/wchar/lib_mbsinit.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <wchar.h>
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: mbsinit
*
* Description:
* test for initial shift state
*
****************************************************************************/
int mbsinit(FAR const mbstate_t *st)
{
return st == NULL || !*(FAR uint32_t *)st;
}

View file

@ -22,8 +22,6 @@
* Included Files
****************************************************************************/
#include <sys/types.h>
#include <string.h>
#include <wchar.h>
/****************************************************************************
@ -68,23 +66,59 @@
size_t mbsnrtowcs(FAR wchar_t *dst, FAR const char **src, size_t nms,
size_t len, FAR mbstate_t *ps)
{
size_t i;
FAR const char *s = *src;
FAR wchar_t *ws = dst;
size_t cnt = 0;
size_t l;
if (dst == NULL)
{
return strnlen(*src, nms);
len = SIZE_MAX;
}
for (i = 0; i < nms && i < len; i++)
if (s != NULL)
{
dst[i] = (wchar_t)(*src)[i];
if (dst[i] == L'\0')
while (len > 0 && nms > 0)
{
*src = NULL;
return i;
l = mbrtowc(ws, s, nms, ps);
if ((ssize_t)l <= 0)
{
if ((ssize_t)l == -2)
{
/* if the input buffer ends with an incomplete character
* stops at the end of the input buffer.
*/
s += nms;
}
else if (l == 0)
{
s = NULL;
}
else
{
cnt = l;
}
break;
}
s += l;
nms -= l;
if (ws != NULL)
{
ws++;
}
len--;
cnt++;
}
}
*src += i;
return i;
if (dst != NULL)
{
*src = s;
}
return cnt;
}

View file

@ -34,10 +34,7 @@
* Included Files
****************************************************************************/
#include <string.h>
#include <wchar.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
/****************************************************************************
@ -54,17 +51,37 @@
size_t wcrtomb(FAR char *s, wchar_t wc, FAR mbstate_t *ps)
{
int retval = 0;
char buf[MB_LEN_MAX];
if (s == NULL)
{
retval = wctomb(buf, wc);
return 0;
}
else
else if ((unsigned)wc < 0x80)
{
retval = wctomb(s, wc);
*s = wc;
return 1;
}
else if ((unsigned)wc < 0x800)
{
*s++ = 0xc0 | (wc >> 6);
*s = 0x80 | (wc & 0x3f);
return 2;
}
else if ((unsigned)wc < 0xd800 || (unsigned)wc <= 0xffff)
{
*s++ = 0xe0 | (wc >> 12);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s = 0x80 | (wc & 0x3f);
return 3;
}
else if ((unsigned long)wc < 0x110000)
{
*s++ = 0xf0 | ((unsigned long)wc >> 18);
*s++ = 0x80 | ((wc >> 12) & 0x3f);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s = 0x80 | (wc & 0x3f);
return 4;
}
return retval;
set_errno(EILSEQ);
return -1;
}

View file

@ -22,12 +22,9 @@
* Included Files
****************************************************************************/
#include <sys/types.h>
#include <wchar.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <limits.h>
/****************************************************************************
* Public Functions
@ -70,48 +67,57 @@
size_t wcsnrtombs(FAR char *dst, FAR const wchar_t **src, size_t nwc,
size_t len, FAR mbstate_t *ps)
{
size_t i;
FAR const wchar_t *ws = *src;
size_t cnt = 0;
if (dst == NULL)
{
for (i = 0; i < nwc; i++)
{
wchar_t wc = (*src)[i];
if (wc < 0 || wc > 0xff)
{
set_errno(EILSEQ);
return -1;
}
if (wc == L'\0')
{
return i;
}
}
return i;
len = 0;
}
for (i = 0; i < nwc && i < len; i++)
while (ws != NULL && nwc != 0)
{
wchar_t wc = (*src)[i];
char tmp[MB_LEN_MAX];
size_t res;
if (wc < 0 || wc > 0xff)
if (*ws == 0)
{
*src += i;
set_errno(EILSEQ);
return -1;
ws = NULL;
break;
}
dst[i] = wc;
if (wc == L'\0')
res = wcrtomb(len < MB_LEN_MAX ? tmp : dst, *ws, ps);
if ((ssize_t)res < 0)
{
*src = NULL;
return i;
cnt = res;
break;
}
if (dst != NULL)
{
if (len < MB_LEN_MAX)
{
if (res > len)
{
break;
}
memcpy(dst, tmp, res);
}
dst += res;
len -= res;
}
ws++;
nwc--;
cnt += res;
}
*src += i;
return i;
if (dst != NULL)
{
*src = ws;
}
return cnt;
}