From 1104db8328a197c7ccf6959a238277f416a2113a Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Fri, 21 Nov 2014 15:50:27 -0800 Subject: [PATCH] libpayload: UTF-16LE to ASCII conversion This patch adds a simple function to convert a string in UTF-16LE to ASCII. TEST=Ran against a string found in a GPT with the intended outcome BRANCH=none BUG=none Signed-off-by: Dan Ehrenberg Change-Id: I50ca5bfdfbef9e084321b2beb1b8d4194ca5af9c Reviewed-on: https://chromium-review.googlesource.com/231456 Reviewed-by: Julius Werner --- payloads/libpayload/include/string.h | 7 +++++++ payloads/libpayload/libc/string.c | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/payloads/libpayload/include/string.h b/payloads/libpayload/include/string.h index 9e4f791a7d..b75128a5ee 100644 --- a/payloads/libpayload/include/string.h +++ b/payloads/libpayload/include/string.h @@ -67,6 +67,13 @@ char* strtok(char *str, const char *delim); char* strtok_r(char *str, const char *delim, char **ptr); /** @} */ +/** + * @defgroup string Unicode functions + * @{ + */ +char *utf16le_to_ascii(uint16_t *utf16_string, int maxlen); +/** @} */ + /** * @defgroup string OpenBSD based safe string functions * @{ diff --git a/payloads/libpayload/libc/string.c b/payloads/libpayload/libc/string.c index cfa0b4ada4..883d158222 100644 --- a/payloads/libpayload/libc/string.c +++ b/payloads/libpayload/libc/string.c @@ -643,3 +643,22 @@ void perror(const char *s) { printf("%s: %d\n", s?s:"(none)", errno); } + +/** + * Simple routine to convert UTF-16 to ASCII, giving up with ? if too high. + * A single code point may convert to ?? if in not in the BMP. + * @param utf16_string A string encoded in UTF-16LE + * @param maxlen Maximum possible length of the string in code points + * @return Newly allocated ASCII string + */ +char *utf16le_to_ascii(uint16_t *utf16_string, int maxlen) +{ + char *ascii_string = xmalloc(maxlen + 1); /* +1 for trailing \0 */ + ascii_string[maxlen] = '\0'; + int i; + for (i = 0; i < maxlen; i++) { + uint16_t wchar = utf16_string[i]; + ascii_string[i] = wchar > 0x7f ? '?' : (char)wchar; + } + return ascii_string; +}