C# Strip HTML and Attributes in MVC

If you are receiving this error message when posting content that contains HTML:

A potentially dangerous Request.Form value was detected from the client

Add ValidateInput(false) to your controller’s HttpPost method:

[HttpPost, ValidateInput(false)]

Take a look at this blog article for the class that does the HTML strip work, http://www.dijksterhuis.org/safely-cleaning-html-with-strip_tags-in-csharp/.

In my implementation, the list of allowed tags is a string in the web.config:

<configuration>
	<appSettings>
		<add key="HtmlAllowed" value="a,b,br,em,i,p,strong"/>
	</appSettings>
</configuration>

The settings class contains this line to set the HtmlAllowed web.config appSetting to a char array:

public static string[] HtmlAllowed { get { return ConfigurationManager.AppSettings["HtmlAllowed"].Split(",".ToCharArray()); } }

Here is an example of a call to the method to strip HTML posted from a form input named “MyTextarea” while passing in the allowed tags array parameter in the second argument:

StripHtml.StripTagsAndAttributes(collection["MyTextarea"].Trim(), Settings.HtmlAllowed );

Source code

using System;
using System.Text.RegularExpressions;

namespace StripHTML
{
    class MainClass
    {
        
        private static string ReplaceFirst(string haystack, string needle, string replacement)
        {
            int pos = haystack.IndexOf(needle);
            if (pos < 0) return haystack;
            return haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
        }

        private static string ReplaceAll(string haystack, string needle, string replacement)
        {
             int pos;
             // Avoid a possible infinite loop
             if (needle == replacement) return haystack;
              while((pos = haystack.IndexOf(needle))>0)
                       haystack = haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
                        return haystack;
        }       

        public static string StripTags(string Input, string[] AllowedTags)
        {
            Regex StripHTMLExp = new Regex(@"(<\/?[^>]+>)");
            string Output = Input;

            foreach(Match Tag in StripHTMLExp.Matches(Input))
            {
                string HTMLTag = Tag.Value.ToLower();
                bool IsAllowed = false;
                
                foreach(string AllowedTag in AllowedTags)
                {
                    int offset = -1;

                    // Determine if it is an allowed tag 
                    // "" , "');
                    if (offset!=0) offset = HTMLTag.IndexOf('<'+AllowedTag+' ');
                    if (offset!=0) offset = HTMLTag.IndexOf(" m.Groups[1].Value + "href..;,;.." + m.Groups[2].Value;
            MatchEvaluator ClassMatch = m => m.Groups[1].Value + "class..;,;.." + m.Groups[2].Value;
            MatchEvaluator UnsafeMatch = m => m.Groups[1].Value + m.Groups[4].Value;
            
            /* Allow the "href" attribute */
            Output = new Regex("()").Replace(Output,HrefMatch);

            /* Allow the "class" attribute */
            Output = new Regex("()").Replace(Output,ClassMatch);

            /* Remove unsafe attributes in any of the remaining tags */
            Output = new Regex(@"(<.*) .*=(\'|\""|\w)[\w|.|(|)]*(\'|\""|\w)(.*>)").Replace(Output,UnsafeMatch);

            /* Return the allowed tags to their proper form */
            Output = ReplaceAll(Output,"..;,;..", "=");
            
            return Output;
        }
    }
}

Published by

Jim Frenette

Web Developer - views here are my own except those taken from people more clever than me.