Sunday, November 14, 2010

How to decode Encoded-Word header values in emails

Here is C# code to decode Encoded-Word header values in emails.
Features:

  • supports encoding of multi-line values
  • supports both Q- and base64 encoding
  • passes all samples in RFC 2047, section "8. Examples"
public class EncodedWordEncoding
    {
        public static string Decode(string encodedText)
        {
            if (encodedText == null)
                return null;

            var regex = new Regex(@"=\?(?<charset>.*?)\?(?<encoding>[qQbB])\?(?<value>.*?)\?=");
            var encodedString = encodedText;
            var decodedString = string.Empty;
            var encodedWordBefore = false;

            while (encodedString.Length > 0)
            {
                var match = regex.Match(encodedString);
                if (match.Success)
                {

                    // If the match isn't at the start of the string, copy the initial few chars to the output
                    var beforeMatch = encodedString.Substring(0, match.Index);

                    // Filter out space chars between encoded words
                    if (encodedWordBefore)
                    {
                        var regex2 = new Regex(@"(\r?\n|\r)*[ \t]+");
                        var match2 = regex2.Match(beforeMatch);
                        if (match2.Success && match2.Value == beforeMatch)
                            beforeMatch = "";
                    }

                    decodedString += beforeMatch;

                    var charset = match.Groups["charset"].Value;
                    var encoding = match.Groups["encoding"].Value.ToUpper();
                    var value = match.Groups["value"].Value;

                    var contentEncoding = Encoding.GetEncoding(charset);

                    if (encoding.Equals("B"))
                    {
                        // Encoded value is Base-64
                        var bytes = Convert.FromBase64String(value);
                        decodedString += contentEncoding.GetString(bytes);
                    }
                    else if (encoding.Equals("Q"))
                    {
                        decodedString +=
                           QuotedPrintableEncoding.Decode(value, contentEncoding).Replace("_", " ");
                    }
                    else
                    {
                        // Encoded value not known, return original string
                        // (Match should not be successful in this case, so this code may never get hit)
                        decodedString += encodedString;
                        break;
                    }

                    // Trim off up to and including the match, then we'll loop and try matching again.
                    encodedString = encodedString.Substring(match.Index + match.Length);

                    encodedWordBefore = true;
                }
                else
                {
                    // No match, not encoded, return original string
                    decodedString += encodedString;
                    break;
                }
            }

            return decodedString;
        }
    }

    public static class QuotedPrintableEncoding
    {
        public static string Decode(string text, Encoding encoding)
        {
            if (text == null)
                throw new ArgumentNullException("text");
            if (encoding == null)
                throw new ArgumentNullException("encoding");

            if (text.Length == 0)
                return text;

            text = text.Replace("=\r\n", "");

            var regex = new Regex(@"(=[0-9A-F][0-9A-F])+", RegexOptions.Multiline | RegexOptions.IgnoreCase);

            return regex.Replace(text, m =>
                {
                    if (m.Value.Length % 3 != 0)
                        throw new InvalidOperationException("Unexpected match length.");

                    var bytes = m.Value.Split(new[] { '=' }, StringSplitOptions.RemoveEmptyEntries)
                        .Select(v => byte.Parse(v, NumberStyles.HexNumber));

                    return encoding.GetString(bytes.ToArray());
                });
        }
    }

Code used from klaas114's post and Dave's post.

UPDATE 31 Jan 2010: implemented my own quoted-printable decoder.

Monday, October 4, 2010

How to move Microsoft Help Library (Visual Studio 2010)

On the Help Library Manager page you can read that you can not move the content of the library.
I've found a couple of ways to do this actually.


Hacking windows registry
You can change this setting in the registry key:
HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Help\v1.0\LocalStore

This is a general direction and may change in next versions of the Microsoft Help Library.
  1. Copy your files to new location
  2. Change the key value
  3. Start the Help Library Manager, go to Settings and check the path is correct
  4. Click the Check for updates online link to check your content


First time run... Again
Also you can tell the Help Library Manager to start as first time and then set a new location.
  1. Start notepad or your favorite editor with administrator privileges
  2. Open the file "C:\Program Files\Microsoft Help Viewer\v1.0\HelpLibManager.exe.config"and change FirstTimeRun key value to True
  3. Start the Help Library Manager and set new location
Finally
Open file "HelpLibrary\manifest\queryManifest.N.xml" (mine is queryManifest.4.xml) and correct catalogPath and contentPath values.
Close the Help Library Agent if it's running (look at the tray area).