# SARE HTML Ruleset for SpamAssassin - Archived Rules # Version: 01.03.07 # Created: 2004-03-31 # Modified: 2005-07-02 # Usage instructions, documentation, and change history in 70_sare_html0.cf #@@# Revision History: Full Revision History stored in 70_sare_html.log #@@# 01.03.07: July 2 2005 #@@# Minor score tweaks based on recent mass-checks #@@# Archived from file 4: SARE_HTML_TAB_EHTML # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Bob Menschel - RMSA@Menschel.net # Current Home: http://www.rulesemporium.com/rules/70_sare_html_arc.cf # ######## ###################### ################################################## ######## ###################### ################################################## # and tag spamsign ######## ###################### ################################################## rawbody SARE_HTML_HTML_4 //i describe SARE_HTML_HTML_4 First three HTML tags in a row score SARE_HTML_HTML_4 0.091 #counts SARE_HTML_HTML_4 2257s/1967h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_HTML_4 427s/185h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_HTML_4 198s/42h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_HTML_4 38s/136h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_HTML_4 166s/136h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_HTML_4 3s/50h of 4676 corpus (808s/3868h ft) 05/28/05 #@@# Archived from file 4: SARE_HTML_TAB_EHTML rawbody SARE_HTML_TAB_EHTML /\t<\/html>/i describe SARE_HTML_TAB_EHTML Message body has very strange HTML sequence score SARE_HTML_TAB_EHTML 0.008 #hist SARE_HTML_TAB_EHTML Matt K: MK_BAD_HTML_03 #counts SARE_HTML_TAB_EHTML 42s/64h of 269462 corpus (128310s/141152h RM) 06/17/05 #counts SARE_HTML_TAB_EHTML 1s/1h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_TAB_EHTML 9s/2h of 47221 corpus (42968s/4253h MY) 06/18/05 #counts SARE_HTML_TAB_EHTML 0s/3h of 11260 corpus (6568s/4692h CT) 06/17/05 #max SARE_HTML_TAB_EHTML 2s/1h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_TAB_EHTML 1s/3h of 6804 corpus (1336s/5468h ft) 06/17/05 ################################################################### # Invalid or Suspicious URI Tests ######## ###################### ################################################## body SARE_HTML_HOTCAST /HotCast Mass E-Mailer/i describe SARE_HTML_HOTCAST Ratware mailer 3 score SARE_HTML_HOTCAST 0.100 #note SARE_HTML_HOTCAST Shows up at bottom of email when you use unregistered version. #counts SARE_HTML_HOTCAST 0s/0h of 65861 corpus (40624s/25237h RM) 08/18/04 #counts SARE_HTML_HOTCAST 0s/0h of 38374 corpus (14893s/23481h JH-SA3.0rc1) 08/18/04 #counts SARE_HTML_HOTCAST 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 body SARE_HTML_URI_ASNTVO /http:\/\/www\.adminsystem\.net \(Trial Version Only\)/ describe SARE_HTML_URI_ASNTVO Ratware mailer. score SARE_HTML_URI_ASNTVO 1.000 #counts SARE_HTML_URI_ASNTVO 0s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_ASNTVO 163s/0h of 115439 corpus (94250s/21189h RM) 04/30/04 #counts SARE_HTML_URI_ASNTVO 0s/0h of 38374 corpus (14893s/23481h JH-SA3.0rc1) 08/18/04 #counts SARE_HTML_URI_ASNTVO 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 uri SARE_HTML_URI_ATWWW /\@www\./i describe SARE_HTML_URI_ATWWW Funny WWW address. score SARE_HTML_URI_ATWWW 0.036 #counts SARE_HTML_URI_ATWWW 43s/471h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_ATWWW 121s/16h of 60248 corpus (35274s/24974h RM) 08/10/04 #counts SARE_HTML_URI_ATWWW 7s/4h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_URI_ATWWW 79s/4h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_URI_ATWWW 3s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_ATWWW 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_ATWWW 34s/1h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_ATWWW 0s/5h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_AXEL /\?axel\=/ describe SARE_HTML_URI_AXEL Spamsign found in URI score SARE_HTML_URI_AXEL 1.000 #stype SARE_HTML_URI_AXEL spam #hist SARE_HTML_URI_AXEL From Chris Santerre <csanterre@MerchantsOverseas.com>, Tue, 30 Mar 2004 12:32:44 -0500 #counts SARE_HTML_URI_AXEL 0s/0h of 175738 corpus (98979s/76759h RM) 02/14/05 #max SARE_HTML_URI_AXEL 756s/0h of 125070 corpus (104883s/20187h RM) 03/30/04 #counts SARE_HTML_URI_AXEL 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_URI_AXEL 7s/0h of 29366 corpus (5882s/23484h JH) 07/23/04 TM2 SA3.0-pre2 #counts SARE_HTML_URI_AXEL 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 uri SARE_HTML_URI_BADQRY /\.(?:php|asp|cgi)\?[a-z0-9\.]{44}/ describe SARE_HTML_URI_BADQRY Page has long query string, no = or & score SARE_HTML_URI_BADQRY 0.001 #hist SARE_HTML_URI_BADQRY Fred T, was: FR_PAGE_Q_NO_EQ #counts SARE_HTML_URI_BADQRY 0s/5h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_BADQRY 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_URI_BADQRY 30s/0h of 29366 corpus (5882s/23484h JH) 07/23/04 TM2 SA3.0-pre2 #counts SARE_HTML_URI_BADQRY 1s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_BADQRY 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_BADQRY 1s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #@@# Archive from file 3: SARE_HTML_URI_ENC_AT rawbody SARE_HTML_URI_ENC_AT m{^http://.{15,99}[a-z0-9-]{3}\%40[a-z0-9-]{3,24}\.[a-z]{2}}i describe SARE_HTML_URI_ENC_AT at sign encoded in http score SARE_HTML_URI_ENC_AT -0.141 #stype SARE_HTML_URI_ENC_AT ham tflags SARE_HTML_URI_ENC_AT nice #counts SARE_HTML_URI_ENC_AT 1147s/1747h of 269462 corpus (128310s/141152h RM) 06/17/05 #counts SARE_HTML_URI_ENC_AT 0s/2h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_ENC_AT 458s/12h of 47221 corpus (42968s/4253h MY) 06/18/05 #counts SARE_HTML_URI_ENC_AT 0s/5h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_ENC_AT 1s/11h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_ENC_AT 11s/26h of 6804 corpus (1336s/5468h ft) 06/17/05 rawbody SARE_HTML_URI_MANYP2 /(?:<p>){2}/i describe SARE_HTML_URI_MANYP2 Too many empty paragraph tags in a row score SARE_HTML_URI_MANYP2 0.100 #overlap SARE_HTML_URI_MANYP2 Significant overlap with SARE_HTML_URI_MANYP3 #counts SARE_HTML_URI_MANYP2 587s/1051h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_MANYP2 89s/70h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_MANYP2 9s/2h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_MANYP2 73s/3h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_MANYP2 1s/107h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_NODOT2 m{^(?!(?:file|javascript|mailto):|https?://(?:localhost|'|$))[^\.]+$}i describe SARE_HTML_URI_NODOT2 URI found with no Dots score SARE_HTML_URI_NODOT2 0.100 #hist SARE_HTML_URI_NODOT2 archived Aug 19 2004; too many ham hits #counts SARE_HTML_URI_NODOT2 6359s/3950h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_NODOT2 344s/47h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_NODOT2 1717s/277h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_NODOT2 701s/159h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_NODOT2 12s/111h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_OBFU1 /http\%3A\%2F ?\%2F/i describe SARE_HTML_URI_OBFU1 text references URI with coded :// score SARE_HTML_URI_OBFU1 0.100 #ham SARE_HTML_URI_OBFU1 drugstore.com, OmPlace Altwire Newsletter - April 25, 2004, #ham SARE_HTML_URI_OBFU1 redirecting URI copied into valid and personal ham email, #ham SARE_HTML_URI_OBFU1 Salary.com Newswire - Salary Hikes Lowest in 30 Years #counts SARE_HTML_URI_OBFU1 1343s/949h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_OBFU1 190s/20h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_OBFU1 5s/12h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_OBFU1 8s/29h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_OBFU1 74s/33h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_OBFU1 0s/95h of 4676 corpus (808s/3868h ft) 05/28/05 uri SARE_HTML_URI_OBFU4 /(&\#(\d){3};){9,15}/i describe SARE_HTML_URI_OBFU4 URI with obfuscated destination score SARE_HTML_URI_OBFU4 0.100 #hist SARE_HTML_URI_OBFU4 Mike Kuentz #overlap SARE_HTML_URI_OBFU4 Removed SARE_HTML_URI_OBFU5 due to complete overlap: /(&\#(\d){3};){16,26}/i #counts SARE_HTML_URI_OBFU4 0s/0h of 56828 corpus (32234s/24594h RM) 07/25/04 #max SARE_HTML_URI_OBFU4 39s/0h of 97268 corpus (79437s/17831h RM) 01/24/04 #counts SARE_HTML_URI_OBFU4 0s/0h of 29366 corpus (5882s/23484h JH) 07/23/04 TM2 SA3.0-pre2 #max SARE_HTML_URI_OBFU4 11s/0h of 32906 corpus (9660s/23246h JH) 05/24/04 #counts SARE_HTML_URI_OBFU4 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 uri SARE_HTML_URI_OBFU4a /(&\#(\d){3};){9}/i describe SARE_HTML_URI_OBFU4a URI with obfuscated destination score SARE_HTML_URI_OBFU4a 0.100 #hist SARE_HTML_URI_OBFU4a Mike Kuentz #overlap SARE_HTML_URI_OBFU4a Removed SARE_HTML_URI_OBFU5 due to complete overlap: /(&\#(\d){3};){16,26}/i #counts SARE_HTML_URI_OBFU4a 0s/0h of 65984 corpus (40739s/25245h RM) 08/21/04 #counts SARE_HTML_URI_OBFU4a 0s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_URI_OBFU4a 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 rawbody __SARE_HTML_INCREDML m{content=3D"IncrediMail} uri __SARE_HTML_URI_REDASP /.{1,20}redir.asp.{0,20}/i uri __SARE_HTML_URI_INCRED /www\.incredimail\.com/ uri __SARE_HTML_URI_ANCEST m'www\.ancestry\.com/rd/' meta SARE_HTML_URI_REDASP __SARE_HTML_URI_REDASP && !__SARE_HTML_INCREDML && !__SARE_HTML_URI_INCRED && !__SARE_HTML_URI_ANCEST describe SARE_HTML_URI_REDASP body link apparently includes html redirect score SARE_HTML_URI_REDASP 0.100 #counts SARE_HTML_URI_REDASP 270s/91h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_REDASP 1438s/20h of 84988 corpus (62393s/22595h RM) 06/08/04 #counts SARE_HTML_URI_REDASP 0s/13h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_REDASP 287s/1h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_REDASP 53s/1h of 10826 corpus (6364s/4462h CT) 05/28/05 uri SARE_HTML_URI_RID /\?rid(?:\x10\x30\x30|=1000)/i describe SARE_HTML_URI_RID Spammer signature in URL score SARE_HTML_URI_RID 1.666 #hist SARE_HTML_URI_RID LW_URI_RID #counts SARE_HTML_URI_RID 4s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_RID 202s/0h of 85789 corpus (63590s/22199h RM) 05/30/04 #counts SARE_HTML_URI_RID 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_URI_RID 196s/0h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_URI_RID 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 #max SARE_HTML_URI_RID 9s/0h of 13454 corpus (11339s/2115h MY) 06/02/04 uri SARE_HTML_URI_SPACER /spacer\.gif/i describe SARE_HTML_URI_SPACER URI with common spammer graphic name score SARE_HTML_URI_SPACER 0.100 #counts SARE_HTML_URI_SPACER 3984s/3340h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_SPACER 111s/112h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_SPACER 163s/113h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_SPACER 2230s/224h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_SPACER 8s/452h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_URI_SPACER 178s/61h of 10826 corpus (6364s/4462h CT) 05/28/05 uri SARE_HTML_URI_UNSASP /unsubscribe\.asp/i describe SARE_HTML_URI_UNSASP text uri to unsubscribe link score SARE_HTML_URI_UNSASP 0.100 #counts SARE_HTML_URI_UNSASP 393s/139h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_URI_UNSASP 10s/30h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_URI_UNSASP 8s/1h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_URI_UNSASP 87s/2h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_URI_UNSASP 2s/4h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## # <TITLE> Tag Tests ######## ###################### ################################################## rawbody SARE_HTML_TITLE_FREE /(?!.*Freedom.*)<title>.*(?:Free|Approved).*<\/title>/i describe SARE_HTML_TITLE_FREE HTML Title contains spammer word or phrase score SARE_HTML_TITLE_FREE 0.100 #counts SARE_HTML_TITLE_FREE 283s/99h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_TITLE_FREE 13s/3h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_TITLE_FREE 7s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_TITLE_FREE 382s/7h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_TITLE_FREE 1s/19h of 4676 corpus (808s/3868h ft) 05/28/05 rawbody SARE_HTML_TITLE_YR /<title>.{0,100}your.{0,100}<\/title>/i score SARE_HTML_TITLE_YR 0.100 #hist SARE_HTML_TITLE_YR Fred T: FR_YOUR_TITLE #counts SARE_HTML_TITLE_YR 369s/343h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_TITLE_YR 13s/11h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_TITLE_YR 7s/9h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_TITLE_YR 1126s/60h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_TITLE_YR 0s/50h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## # <tag ... ALT= ...> tag tests ######## ###################### ################################################## rawbody SARE_HTML_ALT_WAIT2 /alt=Loading/i describe SARE_HTML_ALT_WAIT2 Asks you to wait while SPAM loads. score SARE_HTML_ALT_WAIT2 0.400 #counts SARE_HTML_ALT_WAIT2 0s/0h of 98542 corpus (76935s/21607h RM) 05/12/04 #counts SARE_HTML_ALT_WAIT2 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_ALT_WAIT2 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_ALT_WAIT2 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_ALT_WAIT2 12s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 ######## ###################### ################################################## # Obviously invalid html tag ######## ###################### ################################################## rawbody __SARE_HTML_BADOPEN /<\s*([a-z0-9]+\s+){4,}>/i meta SARE_HTML_BADOPEN __SARE_HTML_BADOPEN && HTML_MESSAGE describe SARE_HTML_BADOPEN Illegal Tag with lots of words score SARE_HTML_BADOPEN 0.100 #note SARE_HTML_BADOPEN Remove HTML_MESSAGE test, and adds only 2 spam but adds 4 ham. #hist SARE_HTML_BADOPEN Contrib by Matt Keller June 7 2004 #counts SARE_HTML_BADOPEN 0s/0h of 114422 corpus (81069s/33353h RM) 01/16/05 #max SARE_HTML_BADOPEN 578s/0h of 66351 corpus (40971s/25380h RM) 08/21/04 #counts SARE_HTML_BADOPEN 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_BADOPEN 235s/7h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_BADOPEN 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 ######## ###################### ################################################## # Useless tags (tag structures that do nothing) # Largely submitted by Matt Yackley, with contributions by # Carl Friend, Jennifer Wheeler, Scott Sprunger, Larry Gilson ######## ###################### ################################################## rawbody __SARE_HTML_USL_1CHAR2 /<([bipu])><\/\1>/i meta SARE_HTML_USL_1CHAR2 __SARE_HTML_USL_1CHAR2 && !SARE_HTML_USL_1CHAR && !SARE_HTML_USL_MULT describe SARE_HTML_USL_1CHAR2 Apparently empty 1-char tag and /tag combination score SARE_HTML_USL_1CHAR2 0.114 #counts SARE_HTML_USL_1CHAR2 2241s/2434h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_USL_1CHAR2 2714s/1441h of 175738 corpus (98979s/76759h RM) 02/14/05 #counts SARE_HTML_USL_1CHAR2 490s/184h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_USL_1CHAR2 423s/62h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_USL_1CHAR2 554s/118h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_USL_1CHAR2 135s/86h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_USL_1CHAR2 27s/297h of 4676 corpus (808s/3868h ft) 05/28/05 rawbody SARE_HTML_USL_A /<a [^>]*><\/a>/i describe SARE_HTML_USL_A Apparently valid but empty tag found in message score SARE_HTML_USL_A 0.118 #counts SARE_HTML_USL_A 1136s/1833h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_USL_A 1675s/831h of 175738 corpus (98979s/76759h RM) 02/14/05 #counts SARE_HTML_USL_A 520s/175h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_USL_A 306s/50h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_USL_A 271s/116h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_USL_A 101s/29h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_USL_A 5s/69h of 4676 corpus (808s/3868h ft) 05/28/05 rawbody SARE_HTML_USL_MULT m'<.></.>.{0,10}<.></.>.{0,10}<.></.>.{0,10}'i describe SARE_HTML_USL_MULT Message contains multiple useless html tags score SARE_HTML_USL_MULT 0.100 #counts SARE_HTML_USL_MULT 4s/181h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_USL_MULT 169s/0h of 93179 corpus (71385s/21794h RM) 05/16/04 #counts SARE_HTML_USL_MULT 18s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_USL_MULT 66s/15h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_USL_MULT 0s/1h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_USL_MULT 1s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 ######## ###################### ################################################## # Image tag tests ######## ###################### ################################################## rawbody SARE_HTML_GIF_SHORT m'(?!/(?:bg|go9|icq|pg1|top|yg).(?:gif|jpg))/.{1,3}\.(jpg|gif)' describe SARE_HTML_GIF_SHORT HTML references graphic with short name score SARE_HTML_GIF_SHORT 0.469 #stype SARE_HTML_GIF_SHORT max:0.5 #counts SARE_HTML_GIF_SHORT 12233s/3419h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_GIF_SHORT 1586s/151h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_GIF_SHORT 1509s/58h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_GIF_SHORT 3959s/117h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_GIF_SHORT 349s/84h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_GIF_SHORT 517s/99h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_GIF_SHORT 28s/295h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## # Paragraphs, breaks, and spacings ######## ###################### ################################################## rawbody SARE_HTML_P_BREAK /(?!<PRE>.+)(?!<P>\ <B><I>"?\w.+)<p\s{0,50}\S{0,50}>\s{0,50}(?:\ \;)+\s*\W.{0,20}/i describe SARE_HTML_P_BREAK Bad HTML form. Breaking lines with paragraphs. score SARE_HTML_P_BREAK 0.200 #stype SARE_HTML_P_BREAK max:0.2 #ham SARE_HTML_P_BREAK <P> <B><I>name or signature goes here #hist SARE_HTML_P_BREAK 2004-06-07: Added <PRE> exclusion, common signature exclusion, and exclusion for text following <P>  #hist SARE_HTML_P_BREAK 2004-06-07: Allow for multilpe   following <P> #counts SARE_HTML_P_BREAK 5538s/2325h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_P_BREAK 1226s/83h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_P_BREAK 2417s/37h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_P_BREAK 1379s/155h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_P_BREAK 18s/36h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_P_BREAK 171s/91h of 10826 corpus (6364s/4462h CT) 05/28/05 rawbody SARE_HTML_SPACES1 /   <BR><BR>   /i describe SARE_HTML_SPACES1 body has strange html spacing pattern score SARE_HTML_SPACES1 0.167 #ham SARE_HTML_SPACES1 confirmed (1) #counts SARE_HTML_SPACES1 0s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_SPACES1 3s/0h of 97268 corpus (79437s/17831h RM) 01/24/04 #counts SARE_HTML_SPACES1 0s/0h of 32900 corpus (9656s/23244h JH) 05/24/04 #counts SARE_HTML_SPACES1 0s/1h of 26326 corpus (22886s/3440h MY) 02/15/05 ######## ###################### ################################################## # Javascript and object tests ######## ###################### ################################################## rawbody __SARE_HTML_JVS_HREF /\<[^>]+on[a-z]+ ?=[^>]?location\.(?:href|replace){1}[^>]*\>/i meta SARE_HTML_JVS_HREF ((HTML_MESSAGE || MIME_HTML_ONLY) && __SARE_HTML_JVS_HREF) describe SARE_HTML_JVS_HREF location in javascript event score SARE_HTML_JVS_HREF 0.100 #hist SARE_HTML_JVS_HREF From Jesse Houwing May 14 2004 #counts SARE_HTML_JVS_HREF 0s/0h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_JVS_HREF 1s/0h of 175738 corpus (98979s/76759h RM) 02/14/05 #counts SARE_HTML_JVS_HREF 0s/0h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_JVS_HREF 0s/0h of 26326 corpus (22886s/3440h MY) 02/15/05 ######## ###################### ################################################## # Suspicious tag combinations ######## ###################### ################################################## body __TAG_EXISTS_BODY eval:html_tag_exists('body') body __TAG_EXISTS_HTML eval:html_tag_exists('html') meta SARE_HTML_NO_BODY1 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_TITLE) describe SARE_HTML_NO_BODY1 No body tag found in HTML email score SARE_HTML_NO_BODY1 0.100 #counts SARE_HTML_NO_BODY1 26675s/14533h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY1 3278s/1717h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #counts SARE_HTML_NO_BODY1 2s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_NO_BODY1 12555s/1533h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY1 98s/734h of 4676 corpus (808s/3868h ft) 05/28/05 #counts SARE_HTML_NO_BODY1 774s/448h of 10826 corpus (6364s/4462h CT) 05/28/05 body __TAG_EXISTS_BODY eval:html_tag_exists('body') body __TAG_EXISTS_HTML eval:html_tag_exists('html') meta SARE_HTML_NO_BODY2 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_IMG) score SARE_HTML_NO_BODY2 0.077 #counts SARE_HTML_NO_BODY2 45924s/19390h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY2 5056s/14737h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_BODY2 743s/2h of 38858 corpus (15368s/23490h JH-SA3.0rc1) 08/22/04 #counts SARE_HTML_NO_BODY2 12697s/627h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY2 1854s/344h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_BODY2 151s/995h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_BODY3 (!__TAG_EXISTS_BODY && __SARE_HTML_HAS_FONT) score SARE_HTML_NO_BODY3 0.247 #counts SARE_HTML_NO_BODY3 44782s/31577h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_BODY3 6855s/1741h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_BODY3 352s/58h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_BODY3 19008s/2078h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_BODY3 1989s/1349h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_BODY3 136s/1543h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML2 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_BR) score SARE_HTML_NO_HTML2 0.100 #counts SARE_HTML_NO_HTML2 51239s/33234h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML2 9486s/16140h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML2 304s/167h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML2 21230s/1903h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML2 2848s/1141h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML2 210s/2141h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML3 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_IMG) score SARE_HTML_NO_HTML3 0.074 #counts SARE_HTML_NO_HTML3 45924s/19390h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML3 5056s/14737h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML3 178s/8h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML3 12697s/627h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML3 1854s/344h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML3 151s/995h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML4 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_FONT) score SARE_HTML_NO_HTML4 0.200 #stype SARE_HTML_NO_HTML4 max:0.2 #counts SARE_HTML_NO_HTML4 44782s/31577h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML4 6855s/1741h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML4 168s/54h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML4 19008s/2078h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML4 1989s/1349h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML4 136s/1543h of 4676 corpus (808s/3868h ft) 05/28/05 meta SARE_HTML_NO_HTML5 (!__TAG_EXISTS_HTML && __SARE_HTML_HAS_TITLE) score SARE_HTML_NO_HTML5 0.212 #counts SARE_HTML_NO_HTML5 26675s/14533h of 258858 corpus (114246s/144612h RM) 05/27/05 #counts SARE_HTML_NO_HTML5 3278s/1717h of 54283 corpus (17106s/37177h JH-3.01) 02/13/05 #max SARE_HTML_NO_HTML5 11s/7h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #counts SARE_HTML_NO_HTML5 12555s/1533h of 26326 corpus (22886s/3440h MY) 02/15/05 #counts SARE_HTML_NO_HTML5 774s/448h of 10826 corpus (6364s/4462h CT) 05/28/05 #counts SARE_HTML_NO_HTML5 98s/734h of 4676 corpus (808s/3868h ft) 05/28/05 ######## ###################### ################################################## # Discontinued Rules ######## ###################### ################################################## uri SARE_HTML_URI_REMOVE /http:\/\/remove/i describe SARE_HTML_URI_REMOVE URI with common spammer email remove page name score SARE_HTML_URI_REMOVE 0.611 #hist SARE_HTML_URI_REMOVE Replaced by REMOVE_PAGE version 2.6x and 3.0.0 #counts SARE_HTML_URI_REMOVE 25s/8h of 258858 corpus (114246s/144612h RM) 05/27/05 #max SARE_HTML_URI_REMOVE 8s/0h of 85861 corpus (63662s/22199h RM) 05/30/04 #counts SARE_HTML_URI_REMOVE 0s/0h of 29365 corpus (5882s/23483h JH) 08/14/04 TM2 SA3.0-pre2 #max SARE_HTML_URI_REMOVE 1s/0h of 32900 corpus (9656s/23244h JH) 05/24/04 #counts SARE_HTML_URI_REMOVE 0s/0h of 10826 corpus (6364s/4462h CT) 05/28/05 #max SARE_HTML_URI_REMOVE 1s/0h of 6944 corpus (3188s/3756h CT) 05/19/04 #counts SARE_HTML_URI_REMOVE 0s/1h of 4676 corpus (808s/3868h ft) 05/28/05 # EOF