# SARE HTML Ruleset for SpamAssassin - Archived Rules # Version: 01.03.07 # Created: 2004-03-31 # Modified: 2005-07-02 # Usage instructions, documentation, and change history in 70_sare_html0.cf #@@# Revision History: Full Revision History stored in 70_sare_html.log #@@# 01.03.07: July 2 2005 #@@# Minor score tweaks based on recent mass-checks #@@# Archived from file 4: SARE_HTML_TAB_EHTML # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Bob Menschel - RMSA@Menschel.net # Current Home: http://www.rulesemporium.com/rules/70_sare_html_arc.cf # ######## ###################### ################################################## ######## ###################### ################################################## # and
tag spamsign ######## ###################### ################################################## rawbody SARE_HTML_HTML_4 /){2}/i describe SARE_HTML_URI_MANYP2 Too many empty paragraph tags in a row score SARE_HTML_URI_MANYP2 0.100 #overlap SARE_HTML_URI_MANYP2 Significant overlap with SARE_HTML_URI_MANYP3 #counts SARE_HTML_URI_MANYP2 587s/1051h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_MANYP2 89s/70h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_MANYP2 9s/2h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_MANYP2 73s/3h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_MANYP2 1s/107h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_NODOT2 m{^(?!(?:file|javascript|mailto):|https?://(?:localhost|'|$))[^\.]+$}i describe SARE_HTML_URI_NODOT2 URI found with no Dots score SARE_HTML_URI_NODOT2 0.100 #hist SARE_HTML_URI_NODOT2 archived Aug 19 2004; too many ham hits #counts SARE_HTML_URI_NODOT2 6359s/3950h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_NODOT2 344s/47h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_NODOT2 1717s/277h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_NODOT2 701s/159h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_NODOT2 12s/111h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_OBFU1 /http\%3A\%2F ?\%2F/i describe SARE_HTML_URI_OBFU1 text references URI with coded :// score SARE_HTML_URI_OBFU1 0.100 #ham SARE_HTML_URI_OBFU1 drugstore.com, OmPlace Altwire Newsletter - April 25, 2004, #ham SARE_HTML_URI_OBFU1 redirecting URI copied into valid and personal ham email, #ham SARE_HTML_URI_OBFU1 Salary.com Newswire - Salary Hikes Lowest in 30 Years #counts SARE_HTML_URI_OBFU1 1343s/949h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_OBFU1 190s/20h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_OBFU1 5s/12h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_OBFU1 8s/29h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_OBFU1 74s/33h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_OBFU1 0s/95h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_OBFU4 /(&\#(\d){3};){9,15}/i describe SARE_HTML_URI_OBFU4 URI with obfuscated destination score SARE_HTML_URI_OBFU4 0.100 #hist SARE_HTML_URI_OBFU4 Mike Kuentz #overlap SARE_HTML_URI_OBFU4 Removed SARE_HTML_URI_OBFU5 due to complete overlap: /(&\#(\d){3};){16,26}/i #counts SARE_HTML_URI_OBFU4 0s/0h of 56828 corpus (32234s/24594h RM) 07/25/04 #max SARE_HTML_URI_OBFU4 39s/0h of 97268 corpus (79437s/17831h RM) 01/24/04 #counts SARE_HTML_URI_OBFU4 0s/0h of 29366 corpus (5882s/23484h JH) 07/23/04 TM2 SA3.0-pre2 #max SARE_HTML_URI_OBFU4 11s/0h of 32906 corpus (9660s/23246h JH) 05/24/04 #counts SARE_HTML_URI_OBFU4 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 uri SARE_HTML_URI_OBFU4a /(&\#(\d){3};){9}/i describe SARE_HTML_URI_OBFU4a URI with obfuscated destination score SARE_HTML_URI_OBFU4a 0.100 #hist SARE_HTML_URI_OBFU4a Mike Kuentz #overlap SARE_HTML_URI_OBFU4a Removed SARE_HTML_URI_OBFU5 due to complete overlap: /(&\#(\d){3};){16,26}/i #counts SARE_HTML_URI_OBFU4a 0s/0h of 65984 corpus (40739s/25245h RM) 08/21/04 #counts SARE_HTML_URI_OBFU4a 0s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_URI_OBFU4a 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 rawbody __SARE_HTML_INCREDML m{content=3D"IncrediMail} uri __SARE_HTML_URI_REDASP /.{1,20}redir.asp.{0,20}/i uri __SARE_HTML_URI_INCRED /www\.incredimail\.com/ uri __SARE_HTML_URI_ANCEST m'www\.ancestry\.com/rd/' meta SARE_HTML_URI_REDASP __SARE_HTML_URI_REDASP && !__SARE_HTML_INCREDML && !__SARE_HTML_URI_INCRED && !__SARE_HTML_URI_ANCEST describe SARE_HTML_URI_REDASP body link apparently includes html redirect score SARE_HTML_URI_REDASP 0.100 #counts SARE_HTML_URI_REDASP 270s/91h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_REDASP 1438s/20h of 84988 corpus (62393s/22595h RM) 06/08/04 #counts SARE_HTML_URI_REDASP 0s/13h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_REDASP 287s/1h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_REDASP 53s/1h of 10826 corpus (6364s/4462h CT) 05/28/05 uri SARE_HTML_URI_RID /\?rid(?:\x10\x30\x30|=1000)/i describe SARE_HTML_URI_RID Spammer signature in URL score SARE_HTML_URI_RID 1.666 #hist SARE_HTML_URI_RID LW_URI_RID #counts SARE_HTML_URI_RID 4s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_RID 202s/0h of 85789 corpus (63590s/22199h RM) 05/30/04 #counts SARE_HTML_URI_RID 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_URI_RID 196s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_URI_RID 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 #max SARE_HTML_URI_RID 9s/0h of 13454 corpus (11339s/2115h MY) 06/02/04 uri SARE_HTML_URI_SPACER /spacer\.gif/i describe SARE_HTML_URI_SPACER URI with common spammer graphic name score SARE_HTML_URI_SPACER 0.100 #counts SARE_HTML_URI_SPACER 3984s/3340h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_SPACER 111s/112h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_SPACER 163s/113h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_SPACER 2230s/224h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_SPACER 8s/452h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_URI_SPACER 178s/61h of 10826 corpus (6364s/4462h CT) 05/28/05 uri SARE_HTML_URI_UNSASP /unsubscribe\.asp/i describe SARE_HTML_URI_UNSASP text uri to unsubscribe link score SARE_HTML_URI_UNSASP 0.100 #counts SARE_HTML_URI_UNSASP 393s/139h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_UNSASP 10s/30h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_UNSASP 8s/1h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_UNSASP 87s/2h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_UNSASP 2s/4h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## #
.+)(?!\ "?\w.+)
\s{0,50}(?:\ \;)+\s*\W.{0,20}/i describe SARE_HTML_P_BREAK Bad HTML form. Breaking lines with paragraphs. score SARE_HTML_P_BREAK 0.200 #stype SARE_HTML_P_BREAK max:0.2 #ham SARE_HTML_P_BREAK
name or signature goes here #hist SARE_HTML_P_BREAK 2004-06-07: Added
exclusion, common signature exclusion, and exclusion for text following#hist SARE_HTML_P_BREAK 2004-06-07: Allow for multilpe following
#counts SARE_HTML_P_BREAK 5538s/2325h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_P_BREAK 1226s/83h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_P_BREAK 2417s/37h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_P_BREAK 1379s/155h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_P_BREAK 18s/36h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_P_BREAK 171s/91h of 10826 corpus (6364s/4462h CT) 05/28/05 rawbody SARE_HTML_SPACES1 /
/i describe SARE_HTML_SPACES1 body has strange html spacing pattern score SARE_HTML_SPACES1 0.167 #ham SARE_HTML_SPACES1 confirmed (1) #counts SARE_HTML_SPACES1 0s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_SPACES1 3s/0h of 97268 corpus (79437s/17831h RM) 01/24/04 #counts SARE_HTML_SPACES1 0s/0h of 32900 corpus (9656s/23244h JH) 05/24/04 #counts SARE_HTML_SPACES1 0s/1h of 26326 corpus (22886s/3440h MY) 02/15/05 ######## ###################### ################################################## # Javascript and object tests ######## ###################### ################################################## rawbody __SARE_HTML_JVS_HREF /\<[^>]+on[a-z]+ ?=[^>]?location\.(?:href|replace){1}[^>]*\>/i meta SARE_HTML_JVS_HREF ((HTML_MESSAGE || MIME_HTML_ONLY) && __SARE_HTML_JVS_HREF) describe SARE_HTML_JVS_HREF location in javascript event score SARE_HTML_JVS_HREF 0.100 #hist SARE_HTML_JVS_HREF From Jesse Houwing May 14 2004 #counts SARE_HTML_JVS_HREF 0s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_JVS_HREF 1s/0h of 175738 corpus (98979s/76759h RM) 02/14/05 #counts SARE_HTML_JVS_HREF 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_JVS_HREF 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 ######## ###################### ################################################## # Suspicious tag combinations ######## ###################### ################################################## body __TAG_EXISTS_BODY eval:html_tag_exists('body') body __TAG_EXISTS_HTML eval:html_tag_exists('html') meta SARE_HTML_NO_BODY1 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_TITLE) describe SARE_HTML_NO_BODY1 No body tag found in HTML email score SARE_HTML_NO_BODY1 0.100 #counts SARE_HTML_NO_BODY1 26675s/14533h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY1 3278s/1717h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_NO_BODY1 2s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_NO_BODY1 12555s/1533h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY1 98s/734h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_NO_BODY1 774s/448h of 10826 corpus (6364s/4462h CT) 05/28/05 body __TAG_EXISTS_BODY eval:html_tag_exists('body') body __TAG_EXISTS_HTML eval:html_tag_exists('html') meta SARE_HTML_NO_BODY2 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_IMG) score SARE_HTML_NO_BODY2 0.077 #counts SARE_HTML_NO_BODY2 45924s/19390h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY2 5056s/14737h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_BODY2 743s/2h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_NO_BODY2 12697s/627h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY2 1854s/344h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_BODY2 151s/995h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_BODY3 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_FONT) score SARE_HTML_NO_BODY3 0.247 #counts SARE_HTML_NO_BODY3 44782s/31577h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY3 6855s/1741h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_BODY3 352s/58h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_BODY3 19008s/2078h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY3 1989s/1349h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_BODY3 136s/1543h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML2 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_BR) score SARE_HTML_NO_HTML2 0.100 #counts SARE_HTML_NO_HTML2 51239s/33234h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML2 9486s/16140h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML2 304s/167h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML2 21230s/1903h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML2 2848s/1141h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML2 210s/2141h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML3 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_IMG) score SARE_HTML_NO_HTML3 0.074 #counts SARE_HTML_NO_HTML3 45924s/19390h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML3 5056s/14737h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML3 178s/8h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML3 12697s/627h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML3 1854s/344h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML3 151s/995h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML4 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_FONT) score SARE_HTML_NO_HTML4 0.200 #stype SARE_HTML_NO_HTML4 max:0.2 #counts SARE_HTML_NO_HTML4 44782s/31577h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML4 6855s/1741h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML4 168s/54h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML4 19008s/2078h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML4 1989s/1349h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML4 136s/1543h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML5 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_TITLE) score SARE_HTML_NO_HTML5 0.212 #counts SARE_HTML_NO_HTML5 26675s/14533h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML5 3278s/1717h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML5 11s/7h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML5 12555s/1533h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML5 774s/448h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML5 98s/734h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## # Discontinued Rules ######## ###################### ################################################## uri SARE_HTML_URI_REMOVE /http:\/\/remove/i describe SARE_HTML_URI_REMOVE URI with common spammer email remove page name score SARE_HTML_URI_REMOVE 0.611 #hist SARE_HTML_URI_REMOVE Replaced by REMOVE_PAGE version 2.6x and 3.0.0 #counts SARE_HTML_URI_REMOVE 25s/8h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_REMOVE 8s/0h of 85861 corpus (63662s/22199h RM) 05/30/04 #counts SARE_HTML_URI_REMOVE 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #max SARE_HTML_URI_REMOVE 1s/0h of 32900 corpus (9656s/23244h JH) 05/24/04 #counts SARE_HTML_URI_REMOVE 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_REMOVE 1s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_REMOVE 0s/1h of 4676 corpus (808s/3868h ft) 05/28/05 # EOF