# SARE Spammer URI Rule Set for SpamAssassin - file 0 # Version: 01.01.02 # Created: 2004-09-13 # Modified: 2005-07-02 # Usage instructions and documentation are found in 70_sare_uri0.cf #@@# Revision History: Full Revision History stored in 70_sare_uri.log #@@# 01.01.02: July 02 2005 #@@# Moved from file 0 to 1: SARE_URI_ANUMA #@@# Moved from file 0 to 1: SARE_URI_OFF # # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Bob Menschel - uri@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_sare_uri0.cf # Usage: This family of files, 70_sare_uri*.cf, contain rules that test uri strings within emails # # These files are not intended to replace or supplement SURBL, nor its BigEvil # predecessor. We assume that systems that are interested in blocking spam that # identifies itself by referencing spammer domains will implement the SURBL # functionality within SpamAssassin to do so. # # These files aim to identify URI links that cannot be tested by SURBL or similar # methods because it does not reference any specific domain name. # # File 0: 70_sare_uri0.cf -- These are uri rules that hit at least 10 spam and no ham. # While SARE cannot guarantee they never will hit ham, they have not hit ham in any SARE mass-check, against tens of thousands of ham. # This is a rules file we expect any/all email systems using SpamAssassin to benefit from. # # File 1: 70_sare_uri1.cf -- These are uri rules that meet one of the follow criteria: # a) Rules that do, or in the past have hit ham during SARE mass-check tests # b) Rules that hit no ham and currently do not hit more than 10 spam in any single mass-check run. # If the rules hit ham, they hit at last 10 spam to each 1 ham. # With few exceptions these rules score significantly less than the rules in file 0. # Systems which are very sensitive to false positives and/or need to be very careful about resource use may want to exclude this ruleset, # pick and choose among its rules, or lower their scores. # Systems that use this file 1 should ALSO use file 0. # # File 2: 70_sare_uri2.cf -- (reserved, not currently used) # # File 3: 70_sare_uri3.cf -- These are uri rules that hit a significant amount of ham during SARE mass-check tests. # Systems which are very sensitive to false positives or to SA resource usage should NOT install this ruleset. # # File 4: 70_sare_uri4.cf -- (not currently used) # # eng: 70_sare_uri_eng.cf -- These are uri rules which work well within the English language, but are liable to cause false # positives in other languages. They include rules which test for letter combinations. Systems that # receive ham in languages other than English should NOT use this file. # # arc: 70_sare_uri_arc.cf -- These are uri rules that once were published in other files, but which have since lost all value. # They either hit too much ham (without hitting enough spam to make it worth while), or they don't hit any spam. # SARE regularly runs mass-checks on these rules to see if any of them are worth reviving, but # we expect that nobody will be running these rules in any production system. ######## ###################### ################################################## # Category: Sub-rules needed by others ######## ###################### ################################################## uri __SARE_URI_ANY /./ #hist __SARE_URI_ANY Murty Rompalli, 2005-01-03 body __SARE_BODY_BLNK_5_100 eval:check_blank_line_ratio('5','100') #hist __SARE_BODY_BLNK_5_100 Murty Rompalli, 2005-01-03 meta __SARE_META_MURTY3 (__SARE_URI_ANY && __SARE_BODY_BLNK_5_100) #hist __SARE_META_MURTY3 Murty Rompalli, 2005-01-03 meta SARE_URI_H0 0 meta SARE_URI_PORTD4 0 # Archived, Oct 2004 meta SARE_URI_DIG_LET_PIC 0 # Archived, Oct 2004 meta SARE_URI_SUCCEZZ 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_HOUSE 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_P8 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_MRTG 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_REFID2 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_REFID3 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_AFF_DIG 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_IPPORT3333 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_SQUARE 0 # Archived, 01.01.00, Feb 2005 meta SARE_URI_OPTOUT 0 # Moved from file 0 to file 3, 01.01.01, Mar 2005 meta SARE_URI_DIET 0 # Moved from file 1 to file 3, 01.01.01, Mar 2005 meta SARE_URI_DOM_ENDU 0 # Moved from file 1 to file 3, 01.01.01, Mar 2005 ######## ###################### ################################################## # Category: URI links identified by spammer words ######## ###################### ################################################## uri SARE_URI_DMEDZDc m'http://[^/]*(?:\d+medz?|medz?\d+)\.'i describe SARE_URI_DMEDZDc body contains link to likely spammer score SARE_URI_DMEDZDc 2.222 #stype SARE_URI_DMEDZDc spamp #hist SARE_URI_DMEDZDc Created by Bob Menschel Apr 23 2004; opt leading/trailing digits expanded Feb 2005 #counts SARE_URI_DMEDZDc 708s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #counts SARE_URI_DMEDZDc 72s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_DMEDZDc 4s/0h of 27707 corpus (24264s/3443h MY) 02/27/05 #counts SARE_URI_DMEDZDc 3s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_DMEDZDc 36s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_HGH m{/hgh/}i describe SARE_URI_HGH body link suggests spammer web page score SARE_URI_HGH 1.111 #stype SARE_URI_HGH spamp #hist SARE_URI_HGH Fred Tarasevicius - FU_HG_PATH #counts SARE_URI_HGH 2s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_HGH 61s/0h of 114212 corpus (81067s/33145h RM) 01/19/05 #counts SARE_URI_HGH 15s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_HGH 3s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #counts SARE_URI_HGH 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_HGH 1s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 body __SARE_URI_NO_THANKS /\bn(?:o|0)+[_\W]+thank(?:\S+\s+){1,4}(?:https?\:\/\/|www\.)/i meta SARE_URI_NO_THANKS (__SARE_URI_NO_THANKS && __SARE_META_MURTY3) describe SARE_URI_NO_THANKS Unsubscribe at this link score SARE_URI_NO_THANKS 3.333 #stype SARE_URI_NO_THANKS spamg #hist SARE_URI_NO_THANKS Murty Rompalli, 2005-01-03 #counts SARE_URI_NO_THANKS 10211s/0h of 261530 corpus (118674s/142856h RM) 03/09/05 #max SARE_URI_NO_THANKS 11124s/0h of 175589 corpus (98978s/76611h RM) 02/14/05 #counts SARE_URI_NO_THANKS 1045s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05 #counts SARE_URI_NO_THANKS 84s/0h of 31513 corpus (27912s/3601h MY) 03/09/05 #counts SARE_URI_NO_THANKS 22s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_NO_THANKS 9s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 #counts SARE_URI_NO_THANKS 22s/0h of 682 corpus (290s/392h CRF) 03/11/05 uri SARE_URI_PRIME m'/prime/' describe SARE_URI_PRIME body contains link to known spammer score SARE_URI_PRIME 1.666 #hist SARE_URI_PRIME Created by Bob Menschel Aug 09 2004 #counts SARE_URI_PRIME 92s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_PRIME 191s/0h of 175589 corpus (98978s/76611h RM) 02/14/05 #counts SARE_URI_PRIME 92s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_PRIME 191s/0h of 31513 corpus (27912s/3601h MY) 03/09/05 #counts SARE_URI_PRIME 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_PRIME 17s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 ######## ###################### ################################################## # Category: URI links identified by spammer names ######## ###################### ################################################## uri SARE_URI_GIGGLES /\?(?:hehkruto|giggles)/ describe SARE_URI_GIGGLES body contains link to known spammer score SARE_URI_GIGGLES 1.628 #hist SARE_URI_GIGGLES LW_URI_GIGGLES #counts SARE_URI_GIGGLES 0s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_GIGGLES 123s/0h of 175589 corpus (98978s/76611h RM) 02/14/05 #counts SARE_URI_GIGGLES 0s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #max SARE_URI_GIGGLES 5s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_GIGGLES 31s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #max SARE_URI_GIGGLES 63s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04 #counts SARE_URI_GIGGLES 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_GIGGLES 2s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_HARRYDAV /\bharryanddavid\b/i describe SARE_URI_HARRYDAV body contains link to known spammer score SARE_URI_HARRYDAV 3.333 #stype SARE_URI_HARRYDAV spamgg #hist SARE_URI_HARRYDAV Created by Bob Menschel Aug 26 2004 #counts SARE_URI_HARRYDAV 4s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_HARRYDAV 14s/0h of 70699 corpus (43133s/27566h RM) 10/02/04 #counts SARE_URI_HARRYDAV 0s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_HARRYDAV 0s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04 #counts SARE_URI_HARRYDAV 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_HARRYDAV 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_IHIRE /\biHire\w+\.com/i describe SARE_URI_IHIRE body contains link to known spammer score SARE_URI_IHIRE 3.333 #stype SARE_URI_IHIRE spamgg #hist SARE_URI_IHIRE Created by Bob Menschel Jul 17 2004 #counts SARE_URI_IHIRE 32s/0h of 271461 corpus (129860s/141601h RM) 06/12/05 #counts SARE_URI_IHIRE 0s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_IHIRE 0s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04 #counts SARE_URI_IHIRE 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_IHIRE 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_NORDTECHS /\b\w+\drneds\./i describe SARE_URI_NORDTECHS body contains link to probable spammer score SARE_URI_NORDTECHS 3.333 #stype SARE_URI_NORDTECHS spamgg #hist SARE_URI_NORDTECHS Created by Bob Menschel Aug 18 2004 #counts SARE_URI_NORDTECHS 0s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_NORDTECHS 96s/0h of 70699 corpus (43133s/27566h RM) 10/02/04 #counts SARE_URI_NORDTECHS 16s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #counts SARE_URI_NORDTECHS 12s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_NORDTECHS 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_NORDTECHS 2s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_SEABOURN /\bseabourn\b/i describe SARE_URI_SEABOURN body contains link to known spammer score SARE_URI_SEABOURN 2.500 #stype SARE_URI_SEABOURN spamgg #hist SARE_URI_SEABOURN Created by Bob Menschel Jul 24 2004 #counts SARE_URI_SEABOURN 14s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_SEABOURN 18s/0h of 175589 corpus (98978s/76611h RM) 02/14/05 #counts SARE_URI_SEABOURN 0s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_SEABOURN 0s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04 #counts SARE_URI_SEABOURN 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_SEABOURN 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_STOX /stox\d+\@yahoo/i score SARE_URI_STOX 1.666 #hist SARE_URI_STOX Bob Menschel, Feb 28 2005, from idea posted by Duncan Hill, Feb 24 2005 #counts SARE_URI_STOX 203s/0h of 238366 corpus (112473s/125893h RM) 02/28/05 #counts SARE_URI_STOX 0s/0h of 54806 corpus (17633s/37173h JH-3.01) 03/13/05 ######## ###################### ################################################## # Category: URI links identified by technical attributes ######## ###################### ################################################## uri SARE_URI_DIG_BIZ /\b\d+\.biz/i describe SARE_URI_DIG_BIZ body contains link to probable spammer score SARE_URI_DIG_BIZ 1.467 #hist SARE_URI_DIG_BIZ Created by Bob Menschel Jul 17 2004 #counts SARE_URI_DIG_BIZ 6s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_DIG_BIZ 147s/0h of 92181 corpus (67808s/24373h RM) 07/18/04 #counts SARE_URI_DIG_BIZ 0s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #max SARE_URI_DIG_BIZ 9s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_DIG_BIZ 2s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #max SARE_URI_DIG_BIZ 5s/0h of 44759 corpus (16528s/28231h JH-SA3.0rc1) 09/06/04 #counts SARE_URI_DIG_BIZ 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_DIG_BIZ 3s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_HEX32 m{^http://.{5,80}/_[a-z0-9]{32}/}i describe SARE_URI_HEX32 Spammer web page name pattern score SARE_URI_HEX32 1.666 #hist SARE_URI_HEX32 Fred Tarasevicius - FU_LONG_HEX_32 #counts SARE_URI_HEX32 173s/0h of 261530 corpus (118674s/142856h RM) 03/09/05 #max SARE_URI_HEX32 279s/0h of 114212 corpus (81067s/33145h RM) 01/19/05 #counts SARE_URI_HEX32 103s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05 #counts SARE_URI_HEX32 7s/0h of 27707 corpus (24264s/3443h MY) 02/27/05 #counts SARE_URI_HEX32 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_HEX32 7s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 ######## ###################### ################################################## # Category: URI links identified by use of randomizing characters ######## ###################### ################################################## rawbody SARE_URI_RAW_ONLY m{^http://[^.]{2,10}\.[^.]{6,9}\.(?:info|biz)/\?[^=./&]{15,30}$}i describe SARE_URI_RAW_ONLY URL contains apparent random name score SARE_URI_RAW_ONLY 1.666 #hist SARE_URI_RAW_ONLY Fred Tarasevicius - FU_RAW_ONLY_URI #counts SARE_URI_RAW_ONLY 731s/0h of 238365 corpus (112478s/125887h RM) 02/28/05 #max SARE_URI_RAW_ONLY 828s/0h of 175589 corpus (98978s/76611h RM) 02/14/05 #counts SARE_URI_RAW_ONLY 218s/0h of 54828 corpus (17650s/37178h JH-3.01) 03/13/05 #counts SARE_URI_RAW_ONLY 1s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #counts SARE_URI_RAW_ONLY 9s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_RAW_ONLY 79s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_SHARE_DIG /\d\.share\d\.(?:us|biz|info)/i describe SARE_URI_SHARE_DIG Domain is one of several, likely spammer score SARE_URI_SHARE_DIG 0.622 #hist SARE_URI_SHARE_DIG Fred Tarasevicius - FU_SHARE_DIGIT #counts SARE_URI_SHARE_DIG 0s/0h of 196626 corpus (96197s/100429h RM) 02/22/05 #max SARE_URI_SHARE_DIG 10s/0h of 114212 corpus (81067s/33145h RM) 01/19/05 #counts SARE_URI_SHARE_DIG 0s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_SHARE_DIG 2s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #counts SARE_URI_SHARE_DIG 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_SHARE_DIG 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 ######## ###################### ################################################## # Category: URI links identified by web page/file names ######## ###################### ################################################## uri SARE_URI_OC /\?oc=\d{4,10}/ describe SARE_URI_OC Possible spammer sign in URL score SARE_URI_OC 1.306 #counts SARE_URI_OC 2s/0h of 261530 corpus (118674s/142856h RM) 03/09/05 #max SARE_URI_OC 31s/0h of 66947 corpus (41732s/25215h RM) 09/06/04 #counts SARE_URI_OC 4s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_OC 0s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #max SARE_URI_OC 100s/0h of 19447 corpus (16862s/2585h MY) 09/06/04 #counts SARE_URI_OC 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_OC 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 uri SARE_URI_VDRUG_GIF /\/(?:c2|a3)\.gif/ describe SARE_URI_VDRUG_GIF Random Domain maker Vdrug seller score SARE_URI_VDRUG_GIF 1.666 #hist SARE_URI_VDRUG_GIF CS_uwm_VDRUG_RANDOM1 #counts SARE_URI_VDRUG_GIF 3s/0h of 196626 corpus (96197s/100429h RM) 02/22/05 #max SARE_URI_VDRUG_GIF 360s/0h of 115925 corpus (94616s/21309h RM) 05/01/04 #counts SARE_URI_VDRUG_GIF 3s/0h of 26190 corpus (22790s/3400h MY) 02/15/05 #max SARE_URI_VDRUG_GIF 7s/0h of 19448 corpus (16863s/2585h MY) 09/06/04 #counts SARE_URI_VDRUG_GIF 7s/0h of 54103 corpus (16925s/37178h JH-3.01) 02/15/05 #counts SARE_URI_VDRUG_GIF 0s/0h of 682 corpus (290s/392h CRF) 02/16/05 #counts SARE_URI_VDRUG_GIF 0s/0h of 11015 corpus (6587s/4428h CT) 03/10/05 # EOF