Features:
- supports encoding of multi-line values
- supports both Q- and base64 encoding
- passes all samples in RFC 2047, section "8. Examples"
public class EncodedWordEncoding { public static string Decode(string encodedText) { if (encodedText == null) return null; var regex = new Regex(@"=\?(?<charset>.*?)\?(?<encoding>[qQbB])\?(?<value>.*?)\?="); var encodedString = encodedText; var decodedString = string.Empty; var encodedWordBefore = false; while (encodedString.Length > 0) { var match = regex.Match(encodedString); if (match.Success) { // If the match isn't at the start of the string, copy the initial few chars to the output var beforeMatch = encodedString.Substring(0, match.Index); // Filter out space chars between encoded words if (encodedWordBefore) { var regex2 = new Regex(@"(\r?\n|\r)*[ \t]+"); var match2 = regex2.Match(beforeMatch); if (match2.Success && match2.Value == beforeMatch) beforeMatch = ""; } decodedString += beforeMatch; var charset = match.Groups["charset"].Value; var encoding = match.Groups["encoding"].Value.ToUpper(); var value = match.Groups["value"].Value; var contentEncoding = Encoding.GetEncoding(charset); if (encoding.Equals("B")) { // Encoded value is Base-64 var bytes = Convert.FromBase64String(value); decodedString += contentEncoding.GetString(bytes); } else if (encoding.Equals("Q")) { decodedString += QuotedPrintableEncoding.Decode(value, contentEncoding).Replace("_", " "); } else { // Encoded value not known, return original string // (Match should not be successful in this case, so this code may never get hit) decodedString += encodedString; break; } // Trim off up to and including the match, then we'll loop and try matching again. encodedString = encodedString.Substring(match.Index + match.Length); encodedWordBefore = true; } else { // No match, not encoded, return original string decodedString += encodedString; break; } } return decodedString; } } public static class QuotedPrintableEncoding { public static string Decode(string text, Encoding encoding) { if (text == null) throw new ArgumentNullException("text"); if (encoding == null) throw new ArgumentNullException("encoding"); if (text.Length == 0) return text; text = text.Replace("=\r\n", ""); var regex = new Regex(@"(=[0-9A-F][0-9A-F])+", RegexOptions.Multiline | RegexOptions.IgnoreCase); return regex.Replace(text, m => { if (m.Value.Length % 3 != 0) throw new InvalidOperationException("Unexpected match length."); var bytes = m.Value.Split(new[] { '=' }, StringSplitOptions.RemoveEmptyEntries) .Select(v => byte.Parse(v, NumberStyles.HexNumber)); return encoding.GetString(bytes.ToArray()); }); } }
Code used from klaas114's post and Dave's post.
UPDATE 31 Jan 2010: implemented my own quoted-printable decoder.
No comments:
Post a Comment