Harden string processing during parsing in lexer.l, config_gram.y and otherwise

- Replace strdup(s) with cfg_strdup(funname, s) that exits on out of mem
 - Replace malloc(n) with cfg_malloc(funname, n) that exits on out of mem
 - Change multiline string scanning in lexer.l to avoid core dump
 - Remove global variables string_buf and string_bug_ptr
 - Ensure reading strings unescapes strings C-Style
 - Ensure writing strings escapes strings C-Style again

Commit looks longer than needed as unescape() and auxiliary functions needed
to be moved from term.c (not in libavrdude) to config.c (in libavrdude).
This commit is contained in:
Stefan Rueger
2022-08-09 21:20:44 +01:00
parent 8a717987ec
commit 22c4dbf23e
14 changed files with 355 additions and 402 deletions

View File

@@ -20,7 +20,6 @@
#include "ac_cfg.h"
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
@@ -346,178 +345,6 @@ static int cmd_dump(PROGRAMMER * pgm, struct avrpart * p,
}
// Convert the next n hex digits of s to a hex number
static unsigned int tohex(const unsigned char *s, unsigned int n) {
int ret, c;
ret = 0;
while(n--) {
ret *= 16;
c = *s++;
ret += c >= '0' && c <= '9'? c - '0': c >= 'a' && c <= 'f'? c - 'a' + 10: c - 'A' + 10;
}
return ret;
}
/*
* Create a utf-8 character sequence from a single unicode character.
* Permissive for some invalid unicode sequences but not for those with
* high bit set). Returns numbers of characters written (0-6).
*/
static int wc_to_utf8str(unsigned int wc, unsigned char *str) {
if(!(wc & ~0x7fu)) {
*str = (char) wc;
return 1;
}
if(!(wc & ~0x7ffu)) {
*str++ = (char) ((wc >> 6) | 0xc0);
*str++ = (char) ((wc & 0x3f) | 0x80);
return 2;
}
if(!(wc & ~0xffffu)) {
*str++ = (char) ((wc >> 12) | 0xe0);
*str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
*str++ = (char) ((wc & 0x3f) | 0x80);
return 3;
}
if(!(wc & ~0x1fffffu)) {
*str++ = (char) ((wc >> 18) | 0xf0);
*str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
*str++ = (char) ((wc & 0x3f) | 0x80);
return 4;
}
if(!(wc & ~0x3ffffffu)) {
*str++ = (char) ((wc >> 24) | 0xf8);
*str++ = (char) (((wc >> 18) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
*str++ = (char) ((wc & 0x3f) | 0x80);
return 5;
}
if(!(wc & ~0x7fffffffu)) {
*str++ = (char) ((wc >> 30) | 0xfc);
*str++ = (char) (((wc >> 24) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 18) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
*str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
*str++ = (char) ((wc & 0x3f) | 0x80);
return 6;
}
return 0;
}
// Unescape C-style strings, destination d must hold enough space (and can be source s)
static unsigned char *unescape(unsigned char *d, const unsigned char *s) {
unsigned char *ret = d;
int n, k;
while(*s) {
switch (*s) {
case '\\':
switch (*++s) {
case 'n':
*d = '\n';
break;
case 't':
*d = '\t';
break;
case 'a':
*d = '\a';
break;
case 'b':
*d = '\b';
break;
case 'e': // Non-standard ESC
*d = 27;
break;
case 'f':
*d = '\f';
break;
case 'r':
*d = '\r';
break;
case 'v':
*d = '\v';
break;
case '?':
*d = '?';
break;
case '`':
*d = '`';
break;
case '"':
*d = '"';
break;
case '\'':
*d = '\'';
break;
case '\\':
*d = '\\';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': // 1-3 octal digits
n = *s - '0';
for(k = 0; k < 2 && s[1] >= '0' && s[1] <= '7'; k++) // Max 2 more octal characters
n *= 8, n += s[1] - '0', s++;
*d = n;
break;
case 'x': // Unlimited hex digits
for(k = 0; isxdigit(s[k + 1]); k++)
continue;
if(k > 0) {
*d = tohex(s + 1, k);
s += k;
} else { // No hex digits after \x? copy \x
*d++ = '\\';
*d = 'x';
}
break;
case 'u': // Exactly 4 hex digits and valid unicode
if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]) &&
(n = wc_to_utf8str(tohex(s+1, 4), d))) {
d += n - 1;
s += 4;
} else { // Invalid \u sequence? copy \u
*d++ = '\\';
*d = 'u';
}
break;
case 'U': // Exactly 6 hex digits and valid unicode
if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]) && isxdigit(s[5]) && isxdigit(s[6]) &&
(n = wc_to_utf8str(tohex(s+1, 6), d))) {
d += n - 1;
s += 6;
} else { // Invalid \U sequence? copy \U
*d++ = '\\';
*d = 'U';
}
break;
default: // Keep the escape sequence (C would warn and remove \)
*d++ = '\\';
*d = *s;
}
break;
default: // Not an escape sequence: just copy the character
*d = *s;
}
d++;
s++;
}
*d = *s; // Terminate
return ret;
}
static size_t maxstrlen(int argc, char **argv) {
size_t max = 0;
@@ -800,7 +627,7 @@ static int cmd_write(PROGRAMMER * pgm, struct avrpart * p,
}
// Strip start and end quotes, and unescape C string
strncpy(s, argi+1, arglen-2);
unescape((unsigned char *) s, (unsigned char *) s);
cfg_unescape(s, s);
if (*argi == '\'') { // Single C-style character
if(*s && s[1])
terminal_message(MSG_INFO, "%s (write): only using first character of %s\n",