# SARE HTML Ruleset for SpamAssassin - ruleset eng # Version: 01.03.07 # Created: 2004-03-31 # Modified: 2005-07-02 # Usage instructions, documentation, and change history in 70_sare_html0.cf #@@# Revision History: Full Revision History stored in 70_sare_html.log #@@# 01.03.07: July 2 2005 #@@# Minor score tweaks based on recent mass-checks # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Bob Menschel - RMSA@Menschel.net # Current Home: http://www.rulesemporium.com/rules/70_sare_html_eng.cf # ######## ###################### ################################################## ######## ###################### ################################################## # Use of accented HTML entities ######## ###################### ################################################## # Accented HTML entities are generally not used in English-language emails. # Note: OE will render these entities without the trailing ; so we must not check for this! # Taken from: http://nwalsh.com/perl/dtdparse/html40/dtdent/ # & aacute; = á # & eacute; = é # & iacute; = í # & oacute; = ó # & uacute; = ú # & yacute; = ý # & acirc; = â # & ecirc; = ê # & icirc; = î # & ocirc; = ô # & ucirc; = û # & agrave; = à # & egrave; = è # & igrave; = ì # & ograve; = ò # & ugrave; = ù # & atilde; = ã # & ntilde; = ñ # & otilde; = õ # & auml; = ä # & euml; = ë # & iuml; = ï # & ouml; = ö # & uuml; = ü # & yuml; = ÿ # & aring; = å # & ccedil; = ç # & cent; = ¢ # & aelig; = æ # & micro; = µ # & yen; = ¥ # & iexcl; = ¡ # & iquest; = ¿ # & oslash; = ø rawbody __SARE_HTML_ENT_ACUTE /&[aeiouy]acute/i rawbody __SARE_HTML_ENT_CIRC /&[aeiou]circ/i rawbody __SARE_HTML_ENT_GRAVE /&[aeiou]grave/i rawbody __SARE_HTML_ENT_TILDE /&[ano]tilde/i rawbody __SARE_HTML_ENT_UML /&[aeiouy]uml/i rawbody __SARE_HTML_ENT_OTHER /&(?:aring|ccedil|cent|aelig|micro|yen|iexcl|iquest|oslash)/i meta SARE_HTML_ENT_4 ((__SARE_HTML_ENT_ACUTE + __SARE_HTML_ENT_CIRC + __SARE_HTML_ENT_GRAVE + __SARE_HTML_ENT_TILDE + __SARE_HTML_ENT_UML + __SARE_HTML_ENT_OTHER) >= 4) describe SARE_HTML_ENT_4 email contains a number of diff types of accents score SARE_HTML_ENT_4 0.615 #counts SARE_HTML_ENT_4 198s/6h of 269462 corpus (128310s/141152h RM) 06/17/05 #max SARE_HTML_ENT_4 643s/0h of 84989 corpus (62394s/22595h RM) 06/07/04 #counts SARE_HTML_ENT_4 35s/0h of 13446 corpus (11335s/2111h MY) 06/02/04 #counts SARE_HTML_ENT_4 6s/16h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05 #max SARE_HTML_ENT_4 227s/11h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_ENT_4 2s/0h of 47221 corpus (42968s/4253h MY) 06/18/05 #counts SARE_HTML_ENT_4 0s/0h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_ENT_4 5s/0h of 11260 corpus (6568s/4692h CT) 06/17/05 # EOF