# SARE "General Subject" Ruleset for SpamAssassin # Version: 00.03.00 # Created: 04/19/2004 # Modified: 04/21/2004 # Changes: Beta Release # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj1.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # # Unlike 70_sare_genlsubj0.cf, this ...genlsubj1.cf ruleset contains rules which DO or in the # past have hit ham during SARE mass-check tests. The S/O calculated by SA's hit-frequencies # scripts are all at or above 0.900. # Systems which are excessively sensitive to false positives may want to exclude this ruleset, # pick and choose among its rules, or lower their scores. # # SARE_SUB_BETTER needs to have more common phrases added to exclusion list # Please send copies of ham hit by these rules to genlsubj@rulesemporium.com # Category: Adult, porn header SARE_SUB_NEW_CLUB Subject =~ /(?:dat(?:e|ing)|free|havana|jupiter|leads|new)\W*club|club\W*(?:casino|date|dice)/i describe SARE_SUB_NEW_CLUB Subject contains spammer subject - adult or porn score SARE_SUB_NEW_CLUB 0.675 #counts SARE_SUB_NEW_CLUB 43s/1h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_NEW_CLUB Beverages & More header SARE_SUB_PARIS_HILTON Subject =~ /\b(?:(?:P|Paris)\W*h[i|]lt[o0]n|hilton\W*sister)/i describe SARE_SUB_PARIS_HILTON Subject contains spammer subject - adult or porn score SARE_SUB_PARIS_HILTON 0.185 #counts SARE_SUB_PARIS_HILTON 98s/0h of 113374 corpus (92402s/20972h) 04/18/04 #counts SARE_SUB_PARIS_HILTON 2s/2h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_PARIS_HILTON 100s/2h of 134581 corpus # Category: Black market items, services, activities, scams, frauds # Category: Business header SARE_SUB_BOSS Subject =~ /(?:y.?[o0].?u.?r ([o0]wn )?|fired my)\W*b[o0]ss/i describe SARE_SUB_BOSS Subject contains spammer subject - business score SARE_SUB_BOSS 1.588 #counts SARE_SUB_BOSS 186s/0h of 113393 corpus (92421s/20972h) 04/18/04 #counts SARE_SUB_BOSS 0s/1h of 21207 corpus (6695s/14512h) 04/20/04 header SARE_SUB_HEALTH_CARE Subject =~ /(?:Health\W*(?:Care\W*(?:aff[o0]rdable|f[o0]r\W*(?:all\W*americans|y[o0]u\W*and\W*y[o0]ur\W*family)|insurance\W*leads|savings|sect[o0]r|st[o0]ck|technology|y[o0]u\W*can\W*aff[o0]rd)|risk\W*reducti[o0]n|screening.{1,10}testing|st[o0]re|update)|(?:(?:impr[o0]ve|restore)\W*y[o0]ur|l[o0]se\W*p[o0]unds\W*f[o0]r|pay.{1,30}too\W*much.{0,30}|sexual)\W*health|safe\W*and\W*healthy|with[o0]ut\W*health\W*insurance|(?:affordable|need|new)\W*health\W*(?:care|insurance|plan)|online\W*health.{1,30}store|(?:prevent|reduce|solve)\W*(?:your\W*)?health\W*(?:problems|risks)|american\W*medical\W*directory|the\W*(?:(?:daily|natural)\W*)?health\W*(?:e.?mail|e.?zine|chann?el|magazine|newsletter)|younger\W*and\W*healthier|health\W*is\W*your\W*wealth)/i describe SARE_SUB_HEALTH_CARE Subject contains spammer subject - business score SARE_SUB_HEALTH_CARE 1.366 #counts SARE_SUB_HEALTH_CARE 292s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_HEALTH_CARE "Health Screening and Testing" (1) drugstore.com header SARE_SUB_INCOME Subject =~ /\bincome\b/i describe SARE_SUB_INCOME Subject contains spammer subject - business score SARE_SUB_INCOME 0.816 #counts SARE_SUB_INCOME 188s/3h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_INCOME auto-responder income tax forms from CA FTB (2), personal email (1) # Category: Credit, debt, lending, mortgage, borrowing, investment, financing header SARE_SUB_MORTGAGE Subject =~ /(?:(?:\%|2nd|best|competitive|easy|EZ|fixed|for\W*your|great|home|instant|loans\W*and|lowest|\bno|online|rate|second)..?mortgage|mortgages?\W*(?:broker|gone|hunt|interest|lead|loan|manager|notif(?:ication|y)|quote|r.?[a\@].?t.?e.?s?|refinanc(?:e|ing)|shopping|too\W*high|verification)|mortgage.{1,30}reduced|(?:\$\d|compete|find|pay(ing|ment)|qualify|search|shopping).{1,30}mortgage)/i describe SARE_SUB_MORTGAGE Subject contains spammer subject - credit or money score SARE_SUB_MORTGAGE 1.133 #counts SARE_SUB_MORTGAGE 520s/4h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_MORTGAGE "mortgage manager" from user's bank (4) # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_AS_LOW_AS Subject =~ /(?:as\W*low\W*as|(?:at\W*low|smart)\W*prices?|\blow(?:est)?.?(?:cost|point|price[ds]?|rates?))/i describe SARE_SUB_AS_LOW_AS Subject contains spammer subject - marketing score SARE_SUB_AS_LOW_AS 1.591 #counts SARE_SUB_AS_LOW_AS 1679s/8h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_AS_LOW_AS (8 verified:) Marriott Rewards, Drugstore.com, Beverages & More, United Airlines, TiVo header SARE_SUB_GASOLINE Subject =~ /\bgas(oline)?\b/i describe SARE_SUB_GASOLINE Subject contains spammer subject - marketing score SARE_SUB_GASOLINE 0.752 #counts SARE_SUB_GASOLINE 248s/6h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_GASOLINE 336s/5h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_GASOLINE gas prices, gas chambers header SARE_SUB_GIFT_CARD Subject =~ /Gift\W*Card/i describe SARE_SUB_GIFT_CARD Subject contains spammer subject - marketing score SARE_SUB_GIFT_CARD 0.758 #stype SARE_SUB_GIFT_CARD spam #counts SARE_SUB_GIFT_CARD 73s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_GIFT_CARD Clark for President newsletter #hist SARE_SUB_GIFT_CARD Obfu tested 01/24/04 -- no obfu header SARE_SUB_PAYING Subject =~ /Paying/i describe SARE_SUB_PAYING Subject contains spammer subject - marketing score SARE_SUB_PAYING 0.784 #counts SARE_SUB_PAYING 206s/4h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PAYING 229s/4h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_PAYING paying for classes, paying your taxes, paying attention # Category: Medical header SARE_SUB_MEDICAT Subject =~ /medication/i describe SARE_SUB_MEDICAT Subject contains spammer subject - medical score SARE_SUB_MEDICAT 1.666 #counts SARE_SUB_MEDICAT 540s/1h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_MEDICAT 830s/1h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_MEDICAT personal medication info header SARE_SUB_MEDS Subject =~ /\bmeds\b/i describe SARE_SUB_MEDS Subject contains spammer subject - medical score SARE_SUB_MEDS 1.666 #counts SARE_SUB_MEDS 1465s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_MEDS About.com Health # Category: Generic words and phrases header __SARE_SUB_BETTER Subject =~ /BETTER/i header __SARE_SUB_BETTER_H1 Subject =~ /better late than never/i header __SARE_SUB_BETTER_H2 Subject =~ /try to do better/i header __SARE_SUB_BETTER_H3 Subject =~ /better fit/i meta SARE_SUB_BETTER __SARE_SUB_BETTER && !__SARE_SUB_BETTER_H1 && !__SARE_SUB_BETTER_H2 && !__SARE_SUB_BETTER_H3 describe SARE_SUB_BETTER Subject contains likely spammer phrase or word score SARE_SUB_BETTER 1.160 #counts SARE_SUB_BETTER 871s/4h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_BETTER about.com newsletters, mysurvey.com, "better" web page mentioned in mailing list, valid marketing emails, "better late than never" header SARE_SUB_BIGGER Subject =~ /bigger/i describe SARE_SUB_BIGGER Subject contains likely spammer phrase or word score SARE_SUB_BIGGER 1.205 #counts SARE_SUB_BIGGER 234s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_BIGGER About.com Health header SARE_SUB_FOREVER Subject =~ /for\W*?ever\b/i describe SARE_SUB_FOREVER Subject contains likely spammer phrase or word score SARE_SUB_FOREVER 0.655 #counts SARE_SUB_FOREVER 146s/0h of 111251 corpus (90481s/20770h) 04/15/04 #counts SARE_SUB_FOREVER 13s/9h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_FOREVER 179s/9h of 132458 corpus header SARE_SUB_INCHES Subject =~ /(?:(?:\d.*|add?|enlarge|gain|in.?crease|lose|more|shed)(?:ed|s)?\b.{1,30}\binch(?:es)?\b|inches\W*added)/i describe SARE_SUB_INCHES Subject contains likely spammer phrase or word score SARE_SUB_INCHES 1.666 #counts SARE_SUB_INCHES 441s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_INCHES price of a "7 inch saw blade" header SARE_SUB_LOOKING_FOR Subject =~ /(?!looking for help)(?:(?:they\W*are|We(?:\W*a|')re|wives|y[o0\@]u(?:(?:\W*a|')[rv]e(?:\W*been)?))\W*l[o0\@][o0\@]king\W*f[o0\@]r|l[o0\@][o0\@]king\W*f[o0\@]r.{0,30}(?:career|c[o0\@]nsultants|cust[o0\@]mers|empl[o0\@]yees|entrepreneur|free\W*lunch|help|h[o0\@]nest|investment|loan|l[o0\@]ve|med(?:s|icati[o0\@]ns)|m[o0\@]rtgage|pe[o0\@]ple|perfect|sex|s[o0\@]me[o0\@]ne|work|y[o0\@]u))\b/i describe SARE_SUB_LOOKING_FOR Subject contains likely spammer phrase or word score SARE_SUB_LOOKING_FOR 0.825 #counts SARE_SUB_LOOKING_FOR 92s/1h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_LOOKING_FOR "Looking for artist for CD artwork" header SARE_SUB_MINUTES Subject =~ /\d.?minutes/i describe SARE_SUB_MINUTES Subject contains likely spammer phrase or word score SARE_SUB_MINUTES 0.787 #counts SARE_SUB_MINUTES 123s/0h of 113374 corpus (92402s/20972h) 04/18/04 #counts SARE_SUB_MINUTES 2s/2h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_MINUTES 125s/2h of 134581 corpus header SARE_SUB_MONEY Subject =~ /(?:(?:save|make)[ -].{0,30}money[ -](?:in|on)|(?:free|grant|saving|with our|worth|(?:claim|keep) your) money|money machine)/i describe SARE_SUB_MONEY Subject contains likely spammer phrase or word score SARE_SUB_MONEY 0.772 #counts SARE_SUB_MONEY 78s/1h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_MONEY 102s/1h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_MONEY business email header SARE_SUB_ONLINE_OB1 Subject =~ /(?!\bONLINE\b)(?:\b[o0]|\B(?:[\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_ONLINE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_ONLINE_OB1 1.666 # type=obfu #stype SARE_SUB_ONLINE_OB1 obfu #counts SARE_SUB_ONLINE_OB1 363s/7h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_SEEN_ON Subject =~ /as\W*seen\W*on\W*(20\/20|abc|cbs|cnn|dateline|google|oprah|television|tv)/i score SARE_SUB_SEEN_ON 0.797 describe SARE_SUB_SEEN_ON Subject contains likely spammer phrase or word #counts SARE_SUB_SEEN_ON 87s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_SEEN_ON drugstore.com header SARE_SUB_TION_OB1 Subject =~ /(?!tion)(?!t-ion)(?!ti ?o ?n)(?!t lon)t[\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]/i describe SARE_SUB_TION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_TION_OB1 1.666 # type=obfu #stype SARE_SUB_TION_OB1 obfu #counts SARE_SUB_TION_OB1 824s/1h of 113373 corpus (92402s/20971h) 04/18/04 # ham: t1-on (a t1 on something) # Category: Technical spamsign header SARE_SUB_BAD_PUNCT Subject =~ /\w[\;\)\~\"]\w/ describe SARE_SUB_BAD_PUNCT Subject contains parentheses or similar chars apparently embedded within a word score SARE_SUB_BAD_PUNCT 0.943 #counts SARE_SUB_BAD_PUNCT 2167s/30h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_BAD_PUNCT avoid: _ -- used in URLs which can be valid in subjects #ham SARE_SUB_BAD_PUNCT avoid: : -- FP with re:subject (no space) #ham SARE_SUB_BAD_PUNCT avoid: ( -- often used for plural(s) header SARE_SUB_COMMA Subject =~ /\w,\w/ describe SARE_SUB_COMMA Subject contains comma apparently embedded within a word score SARE_SUB_COMMA 0.500 #stype SARE_SUB_COMMA max:0.5 #counts SARE_SUB_COMMA 1465s/32h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_COMMA_LEAD Subject =~ /(?!(greetings|help|please|reminder|Surplus), )^\w+, /i describe SARE_SUB_COMMA_LEAD Subject contains one word, possibly name or salutation, then comma. score SARE_SUB_COMMA_LEAD 0.923 #counts SARE_SUB_COMMA_LEAD 3182s/47h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LEAD_PUNCT Subject =~ /(?! :[osvdp]\b) [\;\:\)\,]\w/ describe SARE_SUB_LEAD_PUNCT Subject contains word which begins with non-word character score SARE_SUB_LEAD_PUNCT 1.036 #counts SARE_SUB_LEAD_PUNCT 433s/4h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_LEAD_PUNCT avoid "(" -- used for "(no subject)" #ham SARE_SUB_LEAD_PUNCT avoid "." -- Used for things like "... .zip file" #ham SARE_SUB_LEAD_PUNCT ham: " :RE: ", paypal response (2) #ham SARE_SUB_LEAD_PUNCT avoid ":o" and other smilies header SARE_SUB_LONG_SUBJ_140 Subject =~ /.{140,}/ describe SARE_SUB_LONG_SUBJ_140 Subject is excessively long -- more than 139 chars score SARE_SUB_LONG_SUBJ_140 1.000 # type=max:1.0 #stype SARE_SUB_LONG_SUBJ_140 max:1.0 #counts SARE_SUB_LONG_SUBJ_140 302s/0h of 113393 corpus (92421s/20972h) 04/18/04 #counts SARE_SUB_LONG_SUBJ_140 17s/3h of 16618 corpus (14409s/2209h) #total SARE_SUB_LONG_SUBJ_140 319s/3h of 130011 corpus # EOF