Harden string processing during parsing in lexer.l, config_gram.y and otherwise

- Replace strdup(s) with cfg_strdup(funname, s) that exits on out of mem - Replace malloc(n) with cfg_malloc(funname, n) that exits on out of mem - Change multiline string scanning in lexer.l to avoid core dump - Remove global variables string_buf and string_bug_ptr - Ensure reading strings unescapes strings C-Style - Ensure writing strings escapes strings C-Style again Commit looks longer than needed as unescape() and auxiliary functions needed to be moved from term.c (not in libavrdude) to config.c (in libavrdude).
2025-12-13 17:34:56 +00:00 · 2022-08-09 21:20:44 +01:00
parent 8a717987ec
commit 22c4dbf23e
14 changed files with 355 additions and 402 deletions
--- a/src/term.c
+++ b/src/term.c
@@ -20,7 +20,6 @@

 #include "ac_cfg.h"

-#include <ctype.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -346,178 +345,6 @@ static int cmd_dump(PROGRAMMER * pgm, struct avrpart * p,
 }


-// Convert the next n hex digits of s to a hex number
-static unsigned int tohex(const unsigned char *s, unsigned int n) {
-  int ret, c;
-
-  ret = 0;
-  while(n--) {
-    ret *= 16;
-    c = *s++;
-    ret += c >= '0' && c <= '9'? c - '0': c >= 'a' && c <= 'f'? c - 'a' + 10: c - 'A' + 10;
-  }
-
-  return ret;
-}
-
-/*
- * Create a utf-8 character sequence from a single unicode character.
- * Permissive for some invalid unicode sequences but not for those with
- * high bit set). Returns numbers of characters written (0-6).
- */
-static int wc_to_utf8str(unsigned int wc, unsigned char *str) {
-  if(!(wc & ~0x7fu)) {
-    *str = (char) wc;
-    return 1;
-  }
-  if(!(wc & ~0x7ffu)) {
-    *str++ = (char) ((wc >> 6) | 0xc0);
-    *str++ = (char) ((wc & 0x3f) | 0x80);
-    return 2;
-  }
-  if(!(wc & ~0xffffu)) {
-    *str++ = (char) ((wc >> 12) | 0xe0);
-    *str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
-    *str++ = (char) ((wc & 0x3f) | 0x80);
-    return 3;
-  }
-  if(!(wc & ~0x1fffffu)) {
-    *str++ = (char) ((wc >> 18) | 0xf0);
-    *str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
-    *str++ = (char) ((wc & 0x3f) | 0x80);
-    return 4;
-  }
-  if(!(wc & ~0x3ffffffu)) {
-    *str++ = (char) ((wc >> 24) | 0xf8);
-    *str++ = (char) (((wc >> 18) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
-    *str++ = (char) ((wc & 0x3f) | 0x80);
-    return 5;
-  }
-  if(!(wc & ~0x7fffffffu)) {
-    *str++ = (char) ((wc >> 30) | 0xfc);
-    *str++ = (char) (((wc >> 24) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 18) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 12) & 0x3f) | 0x80);
-    *str++ = (char) (((wc >> 6) & 0x3f) | 0x80);
-    *str++ = (char) ((wc & 0x3f) | 0x80);
-    return 6;
-  }
-  return 0;
-}
-
-// Unescape C-style strings, destination d must hold enough space (and can be source s)
-static unsigned char *unescape(unsigned char *d, const unsigned char *s) {
-  unsigned char *ret = d;
-  int n, k;
-
-  while(*s) {
-    switch (*s) {
-    case '\\':
-      switch (*++s) {
-      case 'n':
-        *d = '\n';
-        break;
-      case 't':
-        *d = '\t';
-        break;
-      case 'a':
-        *d = '\a';
-        break;
-      case 'b':
-        *d = '\b';
-        break;
-      case 'e':                 // Non-standard ESC
-        *d = 27;
-        break;
-      case 'f':
-        *d = '\f';
-        break;
-      case 'r':
-        *d = '\r';
-        break;
-      case 'v':
-        *d = '\v';
-        break;
-      case '?':
-        *d = '?';
-        break;
-      case '`':
-        *d = '`';
-        break;
-      case '"':
-        *d = '"';
-        break;
-      case '\'':
-        *d = '\'';
-        break;
-      case '\\':
-        *d = '\\';
-        break;
-      case '0':
-      case '1':
-      case '2':
-      case '3':
-      case '4':
-      case '5':
-      case '6':
-      case '7':                 // 1-3 octal digits
-        n = *s - '0';
-        for(k = 0; k < 2 && s[1] >= '0' && s[1] <= '7'; k++)  // Max 2 more octal characters
-          n *= 8, n += s[1] - '0', s++;
-        *d = n;
-        break;
-      case 'x':                 // Unlimited hex digits
-        for(k = 0; isxdigit(s[k + 1]); k++)
-          continue;
-        if(k > 0) {
-          *d = tohex(s + 1, k);
-          s += k;
-        } else {                // No hex digits after \x? copy \x
-          *d++ = '\\';
-          *d = 'x';
-        }
-        break;
-      case 'u':                 // Exactly 4 hex digits and valid unicode
-        if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]) &&
-          (n = wc_to_utf8str(tohex(s+1, 4), d))) {
-          d += n - 1;
-          s += 4;
-        } else {                // Invalid \u sequence? copy \u
-          *d++ = '\\';
-          *d = 'u';
-        }
-        break;
-      case 'U':                 // Exactly 6 hex digits and valid unicode
-        if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]) && isxdigit(s[5]) && isxdigit(s[6]) &&
-          (n = wc_to_utf8str(tohex(s+1, 6), d))) {
-          d += n - 1;
-          s += 6;
-        } else {                // Invalid \U sequence? copy \U
-          *d++ = '\\';
-          *d = 'U';
-        }
-        break;
-      default:                  // Keep the escape sequence (C would warn and remove \)
-        *d++ = '\\';
-        *d = *s;
-      }
-      break;
-
-    default:                    // Not an escape sequence: just copy the character
-      *d = *s;
-    }
-    d++;
-    s++;
-  }
-  *d = *s;                      // Terminate
-
-  return ret;
-}
-
-
 static size_t maxstrlen(int argc, char **argv) {
  size_t max = 0;

@@ -800,7 +627,7 @@ static int cmd_write(PROGRAMMER * pgm, struct avrpart * p,
          }
          // Strip start and end quotes, and unescape C string
          strncpy(s, argi+1, arglen-2);
-          unescape((unsigned char *) s, (unsigned char *) s);
+          cfg_unescape(s, s);
          if (*argi == '\'') { // Single C-style character
            if(*s && s[1])
              terminal_message(MSG_INFO, "%s (write): only using first character of %s\n",