NeoMutt  2024-04-25-76-g20fe7b
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <ctype.h>
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 
#define HSPACE(ch)   (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))
 
#define CONTINUATION_BYTE(ch)   (((ch) & 0xc0) == 0x80)
 

Typedefs

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -.
 
static size_t q_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -.
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements.
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text.
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder.
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted.
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering.
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string.
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string.
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string.
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields.
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list.
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list.
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope.
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope.
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Federico Kircheis
  • Pietro Cerutti
  • Richard Russon
  • Anna Figueiredo Gomes
  • наб

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Definition at line 47 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Definition at line 48 of file rfc2047.c.

◆ HSPACE

#define HSPACE (   ch)    (((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Definition at line 50 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE (   ch)    (((ch) & 0xc0) == 0x80)

Definition at line 52 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)

Definition at line 65 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char * parse_encoded_word ( char *  str,
enum ContentEncoding enc,
char **  charset,
size_t *  charsetlen,
char **  text,
size_t *  textlen 
)
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 148 of file rfc2047.c.

150{
151 regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
152 if (!match)
153 return NULL;
154
155 const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
156 const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
157 const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
158 const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
159
160 /* Charset */
161 *charset = str + mutt_regmatch_start(mcharset);
162 *charsetlen = mutt_regmatch_len(mcharset);
163
164 /* Encoding: either Q or B */
165 *enc = (tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE : ENC_BASE64;
166
167 *text = str + mutt_regmatch_start(mtext);
168 *textlen = mutt_regmatch_len(mtext);
169 return str + mutt_regmatch_start(mfull);
170}
@ ENC_BASE64
Base-64 encoded text.
Definition: mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition: mime.h:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition: prex.c:296
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition: prex.h:98
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition: prex.h:99
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition: prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition: prex.h:96
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition: prex.h:36
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition: regex3.h:76
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition: regex3.h:56
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char *  d,
size_t  dlen,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 191 of file rfc2047.c.

193{
194 char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
195 const char *ib = NULL;
196 char *ob = NULL;
197 size_t ibl, obl;
198 int count, len, len_b, len_q;
199
200 if (fromcode)
201 {
202 iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
204 ib = d;
205 ibl = dlen;
206 ob = buf;
207 obl = sizeof(buf) - strlen(tocode);
208 if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) ||
209 (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ))
210 {
211 ASSERT(errno == E2BIG);
212 ASSERT(ib > d);
213 return ((ib - d) == dlen) ? dlen : ib - d + 1;
214 }
215 }
216 else
217 {
218 if (dlen > (sizeof(buf) - strlen(tocode)))
219 return sizeof(buf) - strlen(tocode) + 1;
220 memcpy(buf, d, dlen);
221 ob = buf + dlen;
222 }
223
224 count = 0;
225 for (char *p = buf; p < ob; p++)
226 {
227 unsigned char c = *p;
228 ASSERT(strchr(MimeSpecials, '?'));
229 if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
230 ((c != ' ') && strchr(MimeSpecials, *p)))
231 {
232 count++;
233 }
234 }
235
236 len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
237 len_b = len + (((ob - buf) + 2) / 3) * 4;
238 len_q = len + (ob - buf) + 2 * count;
239
240 /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
241 if (mutt_istr_equal(tocode, "ISO-2022-JP"))
242 len_q = ENCWORD_LEN_MAX + 1;
243
244 if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
245 {
246 *encoder = b_encoder;
247 *wlen = len_b;
248 return 0;
249 }
250 else if (len_q <= ENCWORD_LEN_MAX)
251 {
252 *encoder = q_encoder;
253 *wlen = len_q;
254 return 0;
255 }
256 else
257 {
258 return dlen;
259 }
260}
static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition: rfc2047.c:70
static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition: rfc2047.c:103
const char MimeSpecials[]
Characters that need special treatment in MIME.
Definition: mime.c:67
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition: charset.c:594
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition: charset.h:73
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition: charset.h:105
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition: charset.h:114
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition: string.c:672
#define ENCWORD_LEN_MIN
Definition: rfc2047.c:48
#define ENCWORD_LEN_MAX
Definition: rfc2047.c:47
#define ASSERT(COND)
Definition: signal2.h:58
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char *  str,
char *  buf,
size_t  buflen,
const char *  fromcode,
const char *  tocode,
encoder_t  encoder 
)
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 274 of file rfc2047.c.

276{
277 if (!fromcode)
278 {
279 return (*encoder)(str, buf, buflen, tocode);
280 }
281
282 const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
284 const char *ib = buf;
285 size_t ibl = buflen;
286 char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
287 char *ob = tmp;
288 size_t obl = sizeof(tmp) - strlen(tocode);
289 const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
290 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
291 ASSERT((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ));
292 return (*encoder)(str, tmp, ob - tmp, tocode);
293}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const char *  tocode,
encoder_t encoder,
size_t *  wlen 
)
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 311 of file rfc2047.c.

313{
314 const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
315
316 size_t n = dlen;
317 while (true)
318 {
319 ASSERT(n > 0);
320 const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
321 if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
322 break;
323 n = ((nn != 0) ? nn : n) - 1;
324 ASSERT(n > 0);
325 if (utf8)
326 while ((n > 1) && CONTINUATION_BYTE(d[n]))
327 n--;
328 }
329 return n;
330}
#define CONTINUATION_BYTE(ch)
Definition: rfc2047.c:52
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition: rfc2047.c:191
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer res,
struct Buffer buf,
char *  charset,
size_t  charsetlen 
)
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 341 of file rfc2047.c.

342{
343 if (!charset)
344 return;
345 char end = charset[charsetlen];
346 charset[charsetlen] = '\0';
348 charset[charsetlen] = end;
350 buf_addstr(res, buf->data);
351 FREE(&buf->data);
352 buf_init(buf);
353}
struct Buffer * buf_init(struct Buffer *buf)
Initialise a new Buffer.
Definition: buffer.c:61
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition: buffer.c:226
const char * cc_charset(void)
Get the cached value of $charset.
Definition: config_cache.c:116
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition: mbyte.c:423
#define FREE(x)
Definition: memory.h:45
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition: charset.c:831
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition: charset.h:74
char * data
Pointer to data.
Definition: buffer.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char * decode_word ( const char *  s,
size_t  len,
enum ContentEncoding  enc 
)
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The input string must be null-terminated; the len parameter is an optimization. The caller must free the returned string.

Definition at line 365 of file rfc2047.c.

366{
367 const char *it = s;
368 const char *end = s + len;
369
370 ASSERT(*end == '\0');
371
372 if (enc == ENC_QUOTED_PRINTABLE)
373 {
374 struct Buffer *buf = buf_pool_get();
375 for (; it < end; it++)
376 {
377 if (*it == '_')
378 {
379 buf_addch(buf, ' ');
380 }
381 else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
382 (!(it[2] & ~127) && (hexval(it[2]) != -1)))
383 {
384 buf_addch(buf, (hexval(it[1]) << 4) | hexval(it[2]));
385 it += 2;
386 }
387 else
388 {
389 buf_addch(buf, *it);
390 }
391 }
392 char *str = buf_strdup(buf);
393 buf_pool_release(&buf);
394 return str;
395 }
396 else if (enc == ENC_BASE64)
397 {
398 const int olen = 3 * len / 4 + 1;
399 char *out = mutt_mem_malloc(olen);
400 int dlen = mutt_b64_decode(it, out, olen);
401 if (dlen == -1)
402 {
403 FREE(&out);
404 return NULL;
405 }
406 out[dlen] = '\0';
407 return out;
408 }
409
410 ASSERT(0); /* The enc parameter has an invalid value */
411 return NULL;
412}
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert null-terminated base64 string to raw bytes.
Definition: base64.c:135
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition: buffer.c:241
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition: buffer.c:571
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Definition: memory.c:91
#define hexval(ch)
Definition: mime.h:80
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition: pool.c:81
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition: pool.c:94
String manipulation buffer.
Definition: buffer.h:36
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char *  d,
size_t  dlen,
int  col,
const char *  fromcode,
const struct Slist charsets,
char **  e,
size_t *  elen,
const char *  specials 
)
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 426 of file rfc2047.c.

428{
429 int rc = 0;
430 char *buf = NULL;
431 size_t bufpos, buflen;
432 char *t0 = NULL, *t1 = NULL, *t = NULL;
433 char *s0 = NULL, *s1 = NULL;
434 size_t ulen, r, wlen = 0;
435 encoder_t encoder = NULL;
436 char *tocode1 = NULL;
437 const char *tocode = NULL;
438 const char *icode = "utf-8";
439
440 /* Try to convert to UTF-8. */
441 char *u = mutt_strn_dup(d, dlen);
442 if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
443 {
444 rc = 1;
445 icode = 0;
446 }
447 ulen = mutt_str_len(u);
448
449 /* Find earliest and latest things we must encode. */
450 s0 = 0;
451 s1 = 0;
452 t0 = 0;
453 t1 = 0;
454 for (t = u; t < (u + ulen); t++)
455 {
456 if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
457 {
458 if (!t0)
459 t0 = t;
460 t1 = t;
461 }
462 else if (specials && *t && strchr(specials, *t))
463 {
464 if (!s0)
465 s0 = t;
466 s1 = t;
467 }
468 }
469
470 /* If we have something to encode, include RFC822 specials */
471 if (t0 && s0 && (s0 < t0))
472 t0 = s0;
473 if (t1 && s1 && (s1 > t1))
474 t1 = s1;
475
476 if (!t0)
477 {
478 /* No encoding is required. */
479 *e = u;
480 *elen = ulen;
481 return rc;
482 }
483
484 /* Choose target charset. */
485 tocode = fromcode;
486 if (icode)
487 {
488 tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
489 if (tocode1)
490 {
491 tocode = tocode1;
492 }
493 else
494 {
495 rc = 2;
496 icode = 0;
497 }
498 }
499
500 /* Hack to avoid labelling 8-bit data as us-ascii. */
501 if (!icode && mutt_ch_is_us_ascii(tocode))
502 tocode = "unknown-8bit";
503
504 /* Adjust t0 for maximum length of line. */
505 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
506 if (t < u)
507 t = u;
508 if (t < t0)
509 t0 = t;
510
511 /* Adjust t0 until we can encode a character after a space. */
512 for (; t0 > u; t0--)
513 {
514 if (!HSPACE(*(t0 - 1)))
515 continue;
516 t = t0 + 1;
517 if (icode)
518 while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
519 t++;
520 if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
521 ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
522 {
523 break;
524 }
525 }
526
527 /* Adjust t1 until we can encode a character before a space. */
528 for (; t1 < (u + ulen); t1++)
529 {
530 if (!HSPACE(*t1))
531 continue;
532 t = t1 - 1;
533 if (icode)
534 while (CONTINUATION_BYTE(*t))
535 t--;
536 if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
537 ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
538 {
539 break;
540 }
541 }
542
543 /* We shall encode the region [t0,t1). */
544
545 /* Initialise the output buffer with the us-ascii prefix. */
546 buflen = 2 * ulen;
547 buf = mutt_mem_malloc(buflen);
548 bufpos = t0 - u;
549 memcpy(buf, u, t0 - u);
550
551 col += t0 - u;
552
553 t = t0;
554 while (true)
555 {
556 /* Find how much we can encode. */
557 size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
558 if (n == (t1 - t))
559 {
560 /* See if we can fit the us-ascii suffix, too. */
561 if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
562 break;
563 n = t1 - t - 1;
564 if (icode)
565 while (CONTINUATION_BYTE(t[n]))
566 n--;
567 if (n == 0)
568 {
569 /* This should only happen in the really stupid case where the
570 * only word that needs encoding is one character long, but
571 * there is too much us-ascii stuff after it to use a single
572 * encoded word. We add the next word to the encoded region
573 * and try again. */
574 ASSERT(t1 < (u + ulen));
575 for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
576 ; // do nothing
577
578 continue;
579 }
580 n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
581 }
582
583 /* Add to output buffer. */
584 const char *line_break = "\n\t";
585 const int lb_len = 2; /* strlen(line_break) */
586
587 if ((bufpos + wlen + lb_len) > buflen)
588 {
589 buflen = bufpos + wlen + lb_len;
590 mutt_mem_realloc(&buf, buflen);
591 }
592 r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
593 ASSERT(r == wlen);
594 bufpos += wlen;
595 memcpy(buf + bufpos, line_break, lb_len);
596 bufpos += lb_len;
597
598 col = 1;
599
600 t += n;
601 }
602
603 /* Add last encoded word and us-ascii suffix to buffer. */
604 buflen = bufpos + wlen + (u + ulen - t1);
605 mutt_mem_realloc(&buf, buflen + 1);
606 r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
607 ASSERT(r == wlen);
608 bufpos += wlen;
609 memcpy(buf + bufpos, t1, u + ulen - t1);
610
611 FREE(&tocode1);
612 FREE(&u);
613
614 buf[buflen] = '\0';
615
616 *e = buf;
617 *elen = buflen + 1;
618 return rc;
619}
void mutt_mem_realloc(void *ptr, size_t size)
Resize a block of memory on the heap.
Definition: memory.c:115
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition: charset.c:1111
#define mutt_ch_is_us_ascii(str)
Definition: charset.h:99
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition: string.c:380
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition: string.c:496
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition: rfc2047.c:311
size_t(* encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode)
Definition: rfc2047.c:65
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition: rfc2047.c:274
#define HSPACE(ch)
Definition: rfc2047.c:50
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char **  pd,
const char *  specials,
int  col,
const struct Slist charsets 
)

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 628 of file rfc2047.c.

629{
630 if (!pd || !*pd)
631 return;
632
633 const char *const c_charset = cc_charset();
634 if (!c_charset)
635 return;
636
637 struct Slist *fallback = NULL;
638 if (!charsets)
639 {
640 fallback = slist_parse("utf-8", D_SLIST_SEP_COLON);
641 charsets = fallback;
642 }
643
644 char *e = NULL;
645 size_t elen = 0;
646 encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
647
648 slist_free(&fallback);
649 FREE(pd);
650 *pd = e;
651}
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition: slist.c:175
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition: slist.c:122
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition: rfc2047.c:426
String list.
Definition: slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition: types.h:112
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char **  pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 661 of file rfc2047.c.

662{
663 if (!pd || !*pd)
664 return;
665
666 struct Buffer *buf = buf_pool_get(); // Output buffer
667 char *s = *pd; // Read pointer
668 char *beg = NULL; // Begin of encoded word
669 enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE
670 char *charset = NULL; // Which charset
671 size_t charsetlen; // Length of the charset
672 char *text = NULL; // Encoded text
673 size_t textlen = 0; // Length of encoded text
674
675 /* Keep some state in case the next decoded word is using the same charset
676 * and it happens to be split in the middle of a multibyte character.
677 * See https://github.com/neomutt/neomutt/issues/1015 */
678 struct Buffer *prev = buf_pool_get(); /* Previously decoded word */
679 char *prev_charset = NULL; /* Previously used charset */
680 size_t prev_charsetlen = 0; /* Length of the previously used charset */
681
682 const struct Slist *c_assumed_charset = cc_assumed_charset();
683 const char *c_charset = cc_charset();
684 while (*s)
685 {
686 beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
687 if (beg != s)
688 {
689 /* Some non-encoded text was found */
690 size_t holelen = beg ? beg - s : mutt_str_len(s);
691
692 /* Ignore whitespace between encoded words */
693 if (beg && (mutt_str_lws_len(s, holelen) == holelen))
694 {
695 s = beg;
696 continue;
697 }
698
699 /* If we have some previously decoded text, add it now */
700 if (!buf_is_empty(prev))
701 {
702 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
703 }
704
705 /* Add non-encoded part */
706 if (slist_is_empty(c_assumed_charset))
707 {
708 buf_addstr_n(buf, s, holelen);
709 }
710 else
711 {
712 char *conv = mutt_strn_dup(s, holelen);
713 mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
714 buf_addstr(buf, conv);
715 FREE(&conv);
716 }
717 s += holelen;
718 }
719 if (beg)
720 {
721 /* Some encoded text was found */
722 text[textlen] = '\0';
723 char *decoded = decode_word(text, textlen, enc);
724 if (!decoded)
725 {
726 goto done;
727 }
728 if (!buf_is_empty(prev) && ((prev_charsetlen != charsetlen) ||
729 !mutt_strn_equal(prev_charset, charset, charsetlen)))
730 {
731 /* Different charset, convert the previous chunk and add it to the
732 * final result */
733 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
734 }
735
736 buf_addstr(prev, decoded);
737 FREE(&decoded);
738 prev_charset = charset;
739 prev_charsetlen = charsetlen;
740 s = text + textlen + 2; /* Skip final ?= */
741 }
742 }
743
744 /* Save the last chunk */
745 if (!buf_is_empty(prev))
746 {
747 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
748 }
749
750 FREE(pd);
751 *pd = buf_strdup(buf);
752
753done:
754 buf_pool_release(&buf);
755 buf_pool_release(&prev);
756}
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition: buffer.c:96
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition: buffer.c:291
const struct Slist * cc_assumed_charset(void)
Get the cached value of $assumed_charset.
Definition: config_cache.c:101
ContentEncoding
Content-Transfer-Encoding.
Definition: mime.h:47
@ ENC_OTHER
Encoding unknown.
Definition: mime.h:48
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition: charset.c:331
bool slist_is_empty(const struct Slist *list)
Is the slist empty?
Definition: slist.c:138
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition: string.c:628
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition: string.c:425
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition: rfc2047.c:148
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition: rfc2047.c:365
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition: rfc2047.c:341
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList *  al,
const char *  tag 
)

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)
Note
rfc2047_encode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 766 of file rfc2047.c.

767{
768 if (!al)
769 return;
770
771 int col = tag ? strlen(tag) + 2 : 32;
772 struct Address *a = NULL;
773 char *data = NULL;
774 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
775 TAILQ_FOREACH(a, al, entries)
776 {
777 if (a->personal)
778 {
779 data = buf_strdup(a->personal);
780 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
781 buf_strcpy(a->personal, data);
782 FREE(&data);
783 }
784 else if (a->group && a->mailbox)
785 {
786 data = buf_strdup(a->mailbox);
787 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
788 buf_strcpy(a->mailbox, data);
789 FREE(&data);
790 }
791 }
792}
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition: address.c:45
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition: buffer.c:395
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition: helpers.c:242
#define TAILQ_FOREACH(var, head, field)
Definition: queue.h:725
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition: rfc2047.c:628
An email address.
Definition: address.h:36
struct Buffer * personal
Real name of address.
Definition: address.h:37
bool group
Group mailbox?
Definition: address.h:39
struct Buffer * mailbox
Mailbox and host address.
Definition: address.h:38
Container for Accounts, Notifications.
Definition: neomutt.h:42
struct ConfigSubset * sub
Inherited config items.
Definition: neomutt.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList *  al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList
Note
rfc2047_decode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 801 of file rfc2047.c.

802{
803 if (!al)
804 return;
805
806 const bool assumed = !slist_is_empty(cc_assumed_charset());
807 struct Address *a = NULL;
808 char *data = NULL;
809 TAILQ_FOREACH(a, al, entries)
810 {
811 if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed))
812 {
813 data = buf_strdup(a->personal);
814 rfc2047_decode(&data);
815 buf_strcpy(a->personal, data);
816 FREE(&data);
817 }
818 else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?"))
819 {
820 data = buf_strdup(a->mailbox);
821 rfc2047_decode(&data);
822 buf_strcpy(a->mailbox, data);
823 FREE(&data);
824 }
825 }
826}
const char * buf_find_string(const struct Buffer *buf, const char *s)
Return a pointer to a substring found in the buffer.
Definition: buffer.c:640
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition: rfc2047.c:661
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 832 of file rfc2047.c.

833{
834 if (!env)
835 return;
844 rfc2047_decode(&env->x_label);
845
846 char *subj = env->subject;
847 *(char **) &env->subject = NULL;
848 rfc2047_decode(&subj);
849 mutt_env_set_subject(env, subj);
850 FREE(&subj);
851}
void mutt_env_set_subject(struct Envelope *env, const char *subj)
Set both subject and real_subj to subj.
Definition: envelope.c:69
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition: rfc2047.c:801
struct AddressList return_path
Return path for the Email.
Definition: envelope.h:58
char *const subject
Email's subject.
Definition: envelope.h:70
struct AddressList to
Email's 'To' list.
Definition: envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition: envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition: envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition: envelope.h:61
struct AddressList sender
Email's sender.
Definition: envelope.h:63
struct AddressList bcc
Email's 'Bcc' list.
Definition: envelope.h:62
char * x_label
X-Label.
Definition: envelope.h:76
struct AddressList from
Email's 'From' list.
Definition: envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 857 of file rfc2047.c.

858{
859 if (!env)
860 return;
861 rfc2047_encode_addrlist(&env->from, "From");
862 rfc2047_encode_addrlist(&env->to, "To");
863 rfc2047_encode_addrlist(&env->cc, "Cc");
864 rfc2047_encode_addrlist(&env->bcc, "Bcc");
865 rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
866 rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
867 rfc2047_encode_addrlist(&env->sender, "Sender");
868 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
869 rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
870
871 char *subj = env->subject;
872 *(char **) &env->subject = NULL;
873 rfc2047_encode(&subj, NULL, sizeof("Subject:"), c_send_charset);
874 mutt_env_set_subject(env, subj);
875 FREE(&subj);
876}
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition: rfc2047.c:766
+ Here is the call graph for this function:
+ Here is the caller graph for this function: