Features:
- supports encoding of multi-line values
- supports both Q- and base64 encoding
- passes all samples in RFC 2047, section "8. Examples"
public class EncodedWordEncoding
{
public static string Decode(string encodedText)
{
if (encodedText == null)
return null;
var regex = new Regex(@"=\?(?<charset>.*?)\?(?<encoding>[qQbB])\?(?<value>.*?)\?=");
var encodedString = encodedText;
var decodedString = string.Empty;
var encodedWordBefore = false;
while (encodedString.Length > 0)
{
var match = regex.Match(encodedString);
if (match.Success)
{
// If the match isn't at the start of the string, copy the initial few chars to the output
var beforeMatch = encodedString.Substring(0, match.Index);
// Filter out space chars between encoded words
if (encodedWordBefore)
{
var regex2 = new Regex(@"(\r?\n|\r)*[ \t]+");
var match2 = regex2.Match(beforeMatch);
if (match2.Success && match2.Value == beforeMatch)
beforeMatch = "";
}
decodedString += beforeMatch;
var charset = match.Groups["charset"].Value;
var encoding = match.Groups["encoding"].Value.ToUpper();
var value = match.Groups["value"].Value;
var contentEncoding = Encoding.GetEncoding(charset);
if (encoding.Equals("B"))
{
// Encoded value is Base-64
var bytes = Convert.FromBase64String(value);
decodedString += contentEncoding.GetString(bytes);
}
else if (encoding.Equals("Q"))
{
decodedString +=
QuotedPrintableEncoding.Decode(value, contentEncoding).Replace("_", " ");
}
else
{
// Encoded value not known, return original string
// (Match should not be successful in this case, so this code may never get hit)
decodedString += encodedString;
break;
}
// Trim off up to and including the match, then we'll loop and try matching again.
encodedString = encodedString.Substring(match.Index + match.Length);
encodedWordBefore = true;
}
else
{
// No match, not encoded, return original string
decodedString += encodedString;
break;
}
}
return decodedString;
}
}
public static class QuotedPrintableEncoding
{
public static string Decode(string text, Encoding encoding)
{
if (text == null)
throw new ArgumentNullException("text");
if (encoding == null)
throw new ArgumentNullException("encoding");
if (text.Length == 0)
return text;
text = text.Replace("=\r\n", "");
var regex = new Regex(@"(=[0-9A-F][0-9A-F])+", RegexOptions.Multiline | RegexOptions.IgnoreCase);
return regex.Replace(text, m =>
{
if (m.Value.Length % 3 != 0)
throw new InvalidOperationException("Unexpected match length.");
var bytes = m.Value.Split(new[] { '=' }, StringSplitOptions.RemoveEmptyEntries)
.Select(v => byte.Parse(v, NumberStyles.HexNumber));
return encoding.GetString(bytes.ToArray());
});
}
}
Code used from klaas114's post and Dave's post.
UPDATE 31 Jan 2010: implemented my own quoted-printable decoder.