Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "replace.h"
22 : #include "system/locale.h"
23 : #include "charset.h"
24 : #include "lib/util/byteorder.h"
25 : #include "lib/util/fault.h"
26 :
27 : /**
28 : String replace.
29 : NOTE: oldc and newc must be 7 bit characters
30 : **/
31 5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
32 : {
33 5 : struct smb_iconv_handle *ic = get_iconv_handle();
34 19 : while (s && *s) {
35 14 : size_t size;
36 14 : codepoint_t c = next_codepoint_handle(ic, s, &size);
37 14 : if (c == oldc) {
38 5 : *s = newc;
39 : }
40 14 : s += size;
41 : }
42 5 : }
43 :
44 : /**
45 : Convert a string to lower case, allocated with talloc
46 : **/
47 5814525 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
48 : TALLOC_CTX *ctx, const char *src)
49 : {
50 5814525 : size_t size=0;
51 18037 : char *dest;
52 :
53 5814525 : if(src == NULL) {
54 0 : return NULL;
55 : }
56 :
57 : /* this takes advantage of the fact that upper/lower can't
58 : change the length of a character by more than 1 byte */
59 5814525 : dest = talloc_array(ctx, char, 2*(strlen(src))+1);
60 5814525 : if (dest == NULL) {
61 0 : return NULL;
62 : }
63 :
64 129232125 : while (*src) {
65 414760 : size_t c_size;
66 123417600 : codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
67 123417600 : src += c_size;
68 :
69 123417600 : c = tolower_m(c);
70 :
71 123417600 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
72 123417600 : if (c_size == -1) {
73 0 : talloc_free(dest);
74 0 : return NULL;
75 : }
76 123417600 : size += c_size;
77 : }
78 :
79 5814525 : dest[size] = 0;
80 :
81 : /* trim it so talloc_append_string() works */
82 5814525 : dest = talloc_realloc(ctx, dest, char, size+1);
83 :
84 5814525 : talloc_set_name_const(dest, dest);
85 :
86 5814525 : return dest;
87 : }
88 :
89 5814519 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
90 : {
91 5814519 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
92 5814519 : return strlower_talloc_handle(iconv_handle, ctx, src);
93 : }
94 :
95 : /**
96 : Convert a string to UPPER case, allocated with talloc
97 : source length limited to n bytes, iconv handle supplied
98 : **/
99 698815251 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
100 : TALLOC_CTX *ctx, const char *src, size_t n)
101 : {
102 698815251 : size_t size=0;
103 12931226 : char *dest;
104 :
105 698815251 : if (!src) {
106 192013 : return NULL;
107 : }
108 :
109 : /* this takes advantage of the fact that upper/lower can't
110 : change the length of a character by more than 1 byte */
111 698585561 : dest = talloc_array(ctx, char, 2*(n+1));
112 698585561 : if (dest == NULL) {
113 0 : return NULL;
114 : }
115 :
116 10692946973 : while (n && *src) {
117 118604017 : size_t c_size;
118 9994361414 : codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
119 : CH_UNIX, &c_size);
120 9994361414 : src += c_size;
121 9994361414 : n -= c_size;
122 :
123 9994361414 : c = toupper_m(c);
124 :
125 9994361414 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
126 9994361414 : if (c_size == -1) {
127 2 : talloc_free(dest);
128 2 : return NULL;
129 : }
130 9994361412 : size += c_size;
131 : }
132 :
133 698585559 : dest[size] = 0;
134 :
135 : /* trim it so talloc_append_string() works */
136 698585559 : dest = talloc_realloc(ctx, dest, char, size+1);
137 :
138 698585559 : talloc_set_name_const(dest, dest);
139 :
140 698585559 : return dest;
141 : }
142 :
143 : /**
144 : Convert a string to UPPER case, allocated with talloc
145 : source length limited to n bytes
146 : **/
147 698815245 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
148 : {
149 698815245 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
150 698815245 : return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
151 : }
152 : /**
153 : Convert a string to UPPER case, allocated with talloc
154 : **/
155 5948676 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
156 : {
157 5948676 : return strupper_talloc_n(ctx, src, src?strlen(src):0);
158 : }
159 :
160 : /**
161 : talloc_strdup() a unix string to upper case.
162 : **/
163 3050621 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
164 : {
165 3050621 : return strupper_talloc(ctx, src);
166 : }
167 :
168 : /**
169 : Find the number of 'c' chars in a string
170 : **/
171 4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
172 : {
173 4 : struct smb_iconv_handle *ic = get_iconv_handle();
174 4 : size_t count = 0;
175 :
176 13 : while (*s) {
177 9 : size_t size;
178 9 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
179 9 : if (c2 == c) count++;
180 9 : s += size;
181 : }
182 :
183 4 : return count;
184 : }
185 :
186 3017519 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
187 : {
188 3017519 : if (flags & (STR_NOALIGN|STR_ASCII)) {
189 128545 : return 0;
190 : }
191 2888880 : return PTR_DIFF(p, base_ptr) & 1;
192 : }
193 :
194 : /**
195 : return the number of bytes occupied by a buffer in CH_UTF16 format
196 : **/
197 15 : size_t utf16_len(const void *buf)
198 : {
199 15 : size_t len;
200 :
201 1207 : for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
202 :
203 15 : return len;
204 : }
205 :
206 : /**
207 : return the number of bytes occupied by a buffer in CH_UTF16 format
208 : the result includes the null termination
209 : **/
210 9 : size_t utf16_null_terminated_len(const void *buf)
211 : {
212 9 : return utf16_len(buf) + 2;
213 : }
214 :
215 : /**
216 : return the number of bytes occupied by a buffer in CH_UTF16 format
217 : limited by 'n' bytes
218 : **/
219 1383451 : size_t utf16_len_n(const void *src, size_t n)
220 : {
221 1464 : size_t len;
222 :
223 17659363 : for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
224 :
225 1383451 : return len;
226 : }
227 :
228 : /**
229 : return the number of bytes occupied by a buffer in CH_UTF16 format
230 : the result includes the null termination
231 : limited by 'n' bytes
232 : **/
233 1383436 : size_t utf16_null_terminated_len_n(const void *src, size_t n)
234 : {
235 1457 : size_t len;
236 :
237 1383436 : len = utf16_len_n(src, n);
238 :
239 1383436 : if (len+2 <= n) {
240 370009 : len += 2;
241 : }
242 :
243 1383436 : return len;
244 : }
245 :
246 5 : uint16_t *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
247 : {
248 5 : uint16_t *new_str = NULL;
249 :
250 : /* Check for overflow. */
251 5 : if (len > SIZE_MAX - 2) {
252 0 : return NULL;
253 : }
254 :
255 : /*
256 : * Allocate the new string, including space for the
257 : * UTF‐16 null terminator.
258 : */
259 5 : new_str = talloc_size(mem_ctx, len + 2);
260 5 : if (new_str == NULL) {
261 0 : return NULL;
262 : }
263 :
264 5 : memcpy(new_str, str, len);
265 :
266 : {
267 : /*
268 : * Ensure that the UTF‐16 string is
269 : * null‐terminated.
270 : */
271 :
272 5 : char *new_bytes = (char *)new_str;
273 :
274 5 : new_bytes[len] = '\0';
275 5 : new_bytes[len + 1] = '\0';
276 : }
277 :
278 5 : return new_str;
279 : }
280 :
281 0 : uint16_t *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
282 : {
283 0 : if (str == NULL) {
284 0 : return NULL;
285 : }
286 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
287 : }
288 :
289 0 : uint16_t *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
290 : {
291 0 : if (str == NULL) {
292 0 : return NULL;
293 : }
294 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
295 : }
296 :
297 : /**
298 : * Determine the length and validity of a utf-8 string.
299 : *
300 : * @param input the string pointer
301 : * @param maxlen maximum size of the string
302 : * @param byte_len receives the length of the valid section
303 : * @param char_len receives the number of unicode characters in the valid section
304 : * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
305 : *
306 : * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
307 : */
308 0 : bool utf8_check(const char *input, size_t maxlen,
309 : size_t *byte_len,
310 : size_t *char_len,
311 : size_t *utf16_len)
312 : {
313 0 : const uint8_t *s = (const uint8_t *)input;
314 0 : size_t i;
315 0 : size_t chars = 0;
316 0 : size_t long_chars = 0;
317 0 : uint32_t codepoint;
318 0 : uint8_t a, b, c, d;
319 0 : for (i = 0; i < maxlen; i++, chars++) {
320 0 : if (s[i] == 0) {
321 0 : break;
322 : }
323 0 : if (s[i] < 0x80) {
324 0 : continue;
325 : }
326 0 : if ((s[i] & 0xe0) == 0xc0) {
327 : /* 110xxxxx 10xxxxxx */
328 0 : a = s[i];
329 0 : if (maxlen - i < 2) {
330 0 : goto error;
331 : }
332 0 : b = s[i + 1];
333 0 : if ((b & 0xc0) != 0x80) {
334 0 : goto error;
335 : }
336 0 : codepoint = (a & 31) << 6 | (b & 63);
337 0 : if (codepoint < 0x80) {
338 0 : goto error;
339 : }
340 0 : i++;
341 0 : continue;
342 : }
343 0 : if ((s[i] & 0xf0) == 0xe0) {
344 : /* 1110xxxx 10xxxxxx 10xxxxxx */
345 0 : if (maxlen - i < 3) {
346 0 : goto error;
347 : }
348 0 : a = s[i];
349 0 : b = s[i + 1];
350 0 : c = s[i + 2];
351 0 : if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
352 0 : goto error;
353 : }
354 0 : codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
355 :
356 0 : if (codepoint < 0x800) {
357 0 : goto error;
358 : }
359 0 : if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
360 : /*
361 : * This is an invalid codepoint, per
362 : * RFC3629, as it encodes part of a
363 : * UTF-16 surrogate pair for a
364 : * character over U+10000, which ought
365 : * to have been encoded as a four byte
366 : * utf-8 sequence.
367 : */
368 0 : goto error;
369 : }
370 0 : i += 2;
371 0 : continue;
372 : }
373 :
374 0 : if ((s[i] & 0xf8) == 0xf0) {
375 : /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
376 0 : if (maxlen - i < 4) {
377 0 : goto error;
378 : }
379 0 : a = s[i];
380 0 : b = s[i + 1];
381 0 : c = s[i + 2];
382 0 : d = s[i + 3];
383 :
384 0 : if ((b & 0xc0) != 0x80 ||
385 0 : (c & 0xc0) != 0x80 ||
386 0 : (d & 0xc0) != 0x80) {
387 0 : goto error;
388 : }
389 0 : codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
390 :
391 0 : if (codepoint < 0x10000 || codepoint > 0x10ffff) {
392 0 : goto error;
393 : }
394 : /* this one will need two UTF16 characters */
395 0 : long_chars++;
396 0 : i += 3;
397 0 : continue;
398 : }
399 : /*
400 : * If it wasn't handled yet, it's wrong.
401 : */
402 0 : goto error;
403 : }
404 0 : *byte_len = i;
405 0 : *char_len = chars;
406 0 : *utf16_len = chars + long_chars;
407 0 : return true;
408 :
409 0 : error:
410 0 : *byte_len = i;
411 0 : *char_len = chars;
412 0 : *utf16_len = chars + long_chars;
413 0 : return false;
414 : }
415 :
416 :
417 : /**
418 : * Copy a string from a char* unix src to a dos codepage string destination.
419 : *
420 : * @converted_size the number of bytes occupied by the string in the destination.
421 : * @return bool true if success.
422 : *
423 : * @param flags can include
424 : * <dl>
425 : * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
426 : * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
427 : * </dl>
428 : *
429 : * @param dest_len the maximum length in bytes allowed in the
430 : * destination. If @p dest_len is -1 then no maximum is used.
431 : **/
432 4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
433 : {
434 144 : size_t src_len;
435 144 : bool ret;
436 :
437 4123 : if (flags & STR_UPPER) {
438 4 : char *tmpbuf = strupper_talloc(NULL, src);
439 4 : if (tmpbuf == NULL) {
440 0 : return false;
441 : }
442 4 : ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
443 4 : talloc_free(tmpbuf);
444 4 : return ret;
445 : }
446 :
447 4119 : src_len = strlen(src);
448 :
449 4119 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
450 4075 : src_len++;
451 :
452 4119 : return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
453 : }
454 :
455 : /**
456 : * Copy a string from a dos codepage source to a unix char* destination.
457 : *
458 : * The resulting string in "dest" is always null terminated.
459 : *
460 : * @param flags can have:
461 : * <dl>
462 : * <dt>STR_TERMINATE</dt>
463 : * <dd>STR_TERMINATE means the string in @p src
464 : * is null terminated, and src_len is ignored.</dd>
465 : * </dl>
466 : *
467 : * @param src_len is the length of the source area in bytes.
468 : * @returns the number of bytes occupied by the string in @p src.
469 : **/
470 172 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
471 : {
472 172 : size_t size = 0;
473 :
474 172 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
475 0 : if (src_len == (size_t)-1) {
476 0 : src_len = strlen((const char *)src) + 1;
477 : } else {
478 0 : size_t len = strnlen((const char *)src, src_len);
479 0 : if (len < src_len)
480 0 : len++;
481 0 : src_len = len;
482 : }
483 : }
484 :
485 : /* We're ignoring the return here.. */
486 172 : (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
487 :
488 172 : if (dest_len)
489 172 : dest[MIN(size, dest_len-1)] = 0;
490 :
491 172 : return src_len;
492 : }
493 :
494 : /**
495 : * Copy a string from a char* src to a unicode destination.
496 : *
497 : * @returns the number of bytes occupied by the string in the destination.
498 : *
499 : * @param flags can have:
500 : *
501 : * <dl>
502 : * <dt>STR_TERMINATE <dd>means include the null termination.
503 : * <dt>STR_UPPER <dd>means uppercase in the destination.
504 : * <dt>STR_NOALIGN <dd>means don't do alignment.
505 : * </dl>
506 : *
507 : * @param dest_len is the maximum length allowed in the
508 : * destination. If dest_len is -1 then no maximum is used.
509 : **/
510 456661 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
511 : {
512 456661 : size_t len=0;
513 456661 : size_t src_len = strlen(src);
514 456661 : size_t size = 0;
515 11140 : bool ret;
516 :
517 456661 : if (flags & STR_UPPER) {
518 2952 : char *tmpbuf = strupper_talloc(NULL, src);
519 144 : ssize_t retval;
520 2952 : if (tmpbuf == NULL) {
521 0 : return -1;
522 : }
523 2952 : retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
524 2952 : talloc_free(tmpbuf);
525 2952 : return retval;
526 : }
527 :
528 453709 : if (flags & STR_TERMINATE)
529 314129 : src_len++;
530 :
531 453709 : if (ucs2_align(NULL, dest, flags)) {
532 148588 : *(char *)dest = 0;
533 148588 : dest = (void *)((char *)dest + 1);
534 148588 : if (dest_len) dest_len--;
535 143519 : len++;
536 : }
537 :
538 : /* ucs2 is always a multiple of 2 bytes */
539 453709 : dest_len &= ~1;
540 :
541 453709 : ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
542 453709 : if (ret == false) {
543 0 : return 0;
544 : }
545 :
546 453709 : len += size;
547 :
548 453709 : return (ssize_t)len;
549 : }
550 :
551 :
552 : /**
553 : Copy a string from a ucs2 source to a unix char* destination.
554 : Flags can have:
555 : STR_TERMINATE means the string in src is null terminated.
556 : STR_NOALIGN means don't try to align.
557 : if STR_TERMINATE is set then src_len is ignored if it is -1.
558 : src_len is the length of the source area in bytes
559 : Return the number of bytes occupied by the string in src.
560 : The resulting string in "dest" is always null terminated.
561 : **/
562 :
563 0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
564 : {
565 0 : size_t size = 0;
566 :
567 0 : if (ucs2_align(NULL, src, flags)) {
568 0 : src = (const void *)((const char *)src + 1);
569 0 : if (src_len > 0)
570 0 : src_len--;
571 : }
572 :
573 0 : if (flags & STR_TERMINATE) {
574 0 : if (src_len == (size_t)-1) {
575 0 : src_len = utf16_null_terminated_len(src);
576 : } else {
577 0 : src_len = utf16_null_terminated_len_n(src, src_len);
578 : }
579 : }
580 :
581 : /* ucs2 is always a multiple of 2 bytes */
582 0 : if (src_len != (size_t)-1)
583 0 : src_len &= ~1;
584 :
585 : /* We're ignoring the return here.. */
586 0 : (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
587 0 : if (dest_len)
588 0 : dest[MIN(size, dest_len-1)] = 0;
589 :
590 0 : return src_len;
591 : }
592 :
593 : /**
594 : Copy a string from a char* src to a unicode or ascii
595 : dos codepage destination choosing unicode or ascii based on the
596 : flags in the SMB buffer starting at base_ptr.
597 : Return the number of bytes occupied by the string in the destination.
598 : flags can have:
599 : STR_TERMINATE means include the null termination.
600 : STR_UPPER means uppercase in the destination.
601 : STR_ASCII use ascii even with unicode packet.
602 : STR_NOALIGN means don't do alignment.
603 : dest_len is the maximum length allowed in the destination. If dest_len
604 : is -1 then no maximum is used.
605 : **/
606 :
607 457828 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
608 : {
609 457828 : if (flags & STR_ASCII) {
610 4119 : size_t size = 0;
611 4119 : if (push_ascii_string(dest, src, dest_len, flags, &size)) {
612 4119 : return (ssize_t)size;
613 : } else {
614 0 : return (ssize_t)-1;
615 : }
616 453709 : } else if (flags & STR_UNICODE) {
617 453709 : return push_ucs2(dest, src, dest_len, flags);
618 : } else {
619 0 : smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
620 : return -1;
621 : }
622 : }
623 :
624 :
625 : /**
626 : Copy a string from a unicode or ascii source (depending on
627 : the packet flags) to a char* destination.
628 : Flags can have:
629 : STR_TERMINATE means the string in src is null terminated.
630 : STR_UNICODE means to force as unicode.
631 : STR_ASCII use ascii even with unicode packet.
632 : STR_NOALIGN means don't do alignment.
633 : if STR_TERMINATE is set then src_len is ignored is it is -1
634 : src_len is the length of the source area in bytes.
635 : Return the number of bytes occupied by the string in src.
636 : The resulting string in "dest" is always null terminated.
637 : **/
638 :
639 172 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
640 : {
641 172 : if (flags & STR_ASCII) {
642 172 : return pull_ascii_string(dest, src, dest_len, src_len, flags);
643 0 : } else if (flags & STR_UNICODE) {
644 0 : return pull_ucs2(dest, src, dest_len, src_len, flags);
645 : } else {
646 0 : smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
647 : return -1;
648 : }
649 : }
|