# SARE HTML Ruleset for SpamAssassin - ruleset eng
# Version: 01.03.07
# Created: 2004-03-31
# Modified: 2005-07-02
# Usage instructions, documentation, and change history in 70_sare_html0.cf
#@@# Revision History: Full Revision History stored in 70_sare_html.log
#@@# 01.03.07: July 2 2005
#@@# Minor score tweaks based on recent mass-checks
# License: Artistic - see http://www.rulesemporium.com/license.txt
# Current Maintainer: Bob Menschel - RMSA@Menschel.net
# Current Home: http://www.rulesemporium.com/rules/70_sare_html_eng.cf
#
######## ###################### ##################################################
######## ###################### ##################################################
# Use of accented HTML entities
######## ###################### ##################################################
# Accented HTML entities are generally not used in English-language emails.
# Note: OE will render these entities without the trailing ; so we must not check for this!
# Taken from: http://nwalsh.com/perl/dtdparse/html40/dtdent/
# & aacute; = á # & eacute; = é # & iacute; = í # & oacute; = ó # & uacute; = ú # & yacute; = ý
# & acirc; = â # & ecirc; = ê # & icirc; = î # & ocirc; = ô # & ucirc; = û
# & agrave; = à # & egrave; = è # & igrave; = ì # & ograve; = ò # & ugrave; = ù
# & atilde; = ã # & ntilde; = ñ # & otilde; = õ
# & auml; = ä # & euml; = ë # & iuml; = ï # & ouml; = ö # & uuml; = ü # & yuml; = ÿ
# & aring; = å # & ccedil; = ç # & cent; = ¢ # & aelig; = æ # & micro; = µ # & yen; = ¥
# & iexcl; = ¡ # & iquest; = ¿ # & oslash; = ø
rawbody __SARE_HTML_ENT_ACUTE /&[aeiouy]acute/i
rawbody __SARE_HTML_ENT_CIRC /&[aeiou]circ/i
rawbody __SARE_HTML_ENT_GRAVE /&[aeiou]grave/i
rawbody __SARE_HTML_ENT_TILDE /&[ano]tilde/i
rawbody __SARE_HTML_ENT_UML /&[aeiouy]uml/i
rawbody __SARE_HTML_ENT_OTHER /&(?:aring|ccedil|cent|aelig|micro|yen|iexcl|iquest|oslash)/i
meta SARE_HTML_ENT_4 ((__SARE_HTML_ENT_ACUTE + __SARE_HTML_ENT_CIRC + __SARE_HTML_ENT_GRAVE + __SARE_HTML_ENT_TILDE + __SARE_HTML_ENT_UML + __SARE_HTML_ENT_OTHER) >= 4)
describe SARE_HTML_ENT_4 email contains a number of diff types of accents
score SARE_HTML_ENT_4 0.615
#counts SARE_HTML_ENT_4 198s/6h of 269462 corpus (128310s/141152h RM) 06/17/05
#max SARE_HTML_ENT_4 643s/0h of 84989 corpus (62394s/22595h RM) 06/07/04
#counts SARE_HTML_ENT_4 35s/0h of 13446 corpus (11335s/2111h MY) 06/02/04
#counts SARE_HTML_ENT_4 6s/16h of 54067 corpus (16890s/37177h JH-3.01) 06/18/05
#max SARE_HTML_ENT_4 227s/11h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04
#counts SARE_HTML_ENT_4 2s/0h of 47221 corpus (42968s/4253h MY) 06/18/05
#counts SARE_HTML_ENT_4 0s/0h of 4676 corpus (808s/3868h ft) 05/28/05
#counts SARE_HTML_ENT_4 5s/0h of 11260 corpus (6568s/4692h CT) 06/17/05
# EOF