init_utils_string.c (3824B)
1 /* 2 * Copyright (C) 2004-2005 Kay Sievers <kay.sievers@vrfy.org> 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation version 2 of the License. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License along 14 * with this program; if not, write to the Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 * 17 */ 18 19 20 static void remove_trailing_chars(char *path, char c) 21 { 22 size_t len; 23 24 len = strlen(path); 25 while (len > 0 && path[len-1] == c) 26 path[--len] = '\0'; 27 } 28 29 /* count of characters used to encode one unicode char */ 30 static int utf8_encoded_expected_len(const char *str) 31 { 32 unsigned char c = (unsigned char)str[0]; 33 34 if (c < 0x80) 35 return 1; 36 if ((c & 0xe0) == 0xc0) 37 return 2; 38 if ((c & 0xf0) == 0xe0) 39 return 3; 40 if ((c & 0xf8) == 0xf0) 41 return 4; 42 if ((c & 0xfc) == 0xf8) 43 return 5; 44 if ((c & 0xfe) == 0xfc) 45 return 6; 46 return 0; 47 } 48 49 /* decode one unicode char */ 50 static int utf8_encoded_to_unichar(const char *str) 51 { 52 int unichar; 53 int len; 54 int i; 55 56 len = utf8_encoded_expected_len(str); 57 switch (len) { 58 case 1: 59 return (int)str[0]; 60 case 2: 61 unichar = str[0] & 0x1f; 62 break; 63 case 3: 64 unichar = (int)str[0] & 0x0f; 65 break; 66 case 4: 67 unichar = (int)str[0] & 0x07; 68 break; 69 case 5: 70 unichar = (int)str[0] & 0x03; 71 break; 72 case 6: 73 unichar = (int)str[0] & 0x01; 74 break; 75 default: 76 return -1; 77 } 78 79 for (i = 1; i < len; i++) { 80 if (((int)str[i] & 0xc0) != 0x80) 81 return -1; 82 unichar <<= 6; 83 unichar |= (int)str[i] & 0x3f; 84 } 85 86 return unichar; 87 } 88 89 /* expected size used to encode one unicode char */ 90 static int utf8_unichar_to_encoded_len(int unichar) 91 { 92 if (unichar < 0x80) 93 return 1; 94 if (unichar < 0x800) 95 return 2; 96 if (unichar < 0x10000) 97 return 3; 98 if (unichar < 0x200000) 99 return 4; 100 if (unichar < 0x4000000) 101 return 5; 102 return 6; 103 } 104 105 /* check if unicode char has a valid numeric range */ 106 static int utf8_unichar_valid_range(int unichar) 107 { 108 if (unichar > 0x10ffff) 109 return 0; 110 if ((unichar & 0xfffff800) == 0xd800) 111 return 0; 112 if ((unichar > 0xfdcf) && (unichar < 0xfdf0)) 113 return 0; 114 if ((unichar & 0xffff) == 0xffff) 115 return 0; 116 return 1; 117 } 118 119 /* validate one encoded unicode char and return its length */ 120 static int utf8_encoded_valid_unichar(const char *str) 121 { 122 int len; 123 int unichar; 124 int i; 125 126 len = utf8_encoded_expected_len(str); 127 if (len == 0) 128 return -1; 129 130 /* ascii is valid */ 131 if (len == 1) 132 return 1; 133 134 /* check if expected encoded chars are available */ 135 for (i = 0; i < len; i++) 136 if ((str[i] & 0x80) != 0x80) 137 return -1; 138 139 unichar = utf8_encoded_to_unichar(str); 140 141 /* check if encoded length matches encoded value */ 142 if (utf8_unichar_to_encoded_len(unichar) != len) 143 return -1; 144 145 /* check if value has valid range */ 146 if (!utf8_unichar_valid_range(unichar)) 147 return -1; 148 149 return len; 150 } 151 152 /* replace everything but whitelisted plain ascii and valid utf8 */ 153 static int replace_untrusted_chars(char *str) 154 { 155 size_t i = 0; 156 int replaced = 0; 157 158 while (str[i] != '\0') { 159 int len; 160 161 /* valid printable ascii char */ 162 if ((str[i] >= '0' && str[i] <= '9') || 163 (str[i] >= 'A' && str[i] <= 'Z') || 164 (str[i] >= 'a' && str[i] <= 'z') || 165 strchr(" #$%+-./:=?@_,", str[i])) { 166 i++; 167 continue; 168 } 169 /* valid utf8 is accepted */ 170 len = utf8_encoded_valid_unichar(&str[i]); 171 if (len > 1) { 172 i += len; 173 continue; 174 } 175 176 /* everything else is garbage */ 177 str[i] = '_'; 178 i++; 179 replaced++; 180 } 181 182 return replaced; 183 }