# SARE "General Subject" Ruleset for SpamAssassin # Version: 00.09.00 # Created: 04/249/2004 # Modified: 04/24/2004 # Changes: Final version before 4/25 release. Includes many rules later moved to other rulesets. # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # Rules to be wary of: # # Financial and investment companies will want to lower some scores in the Business section. # Credit, mortgage, and similar companies will want to lower some scores in the Credit section. # Schools will want to lower some scores in the Education section. # Insurance companies will want to lower some scores in the Insurance section. # Marketing companies and services will want to lower some scores in the Marketing section. # Medical professionals and companies will want to lower some scores in the Medical section. # Real estate companies may want to lower some scores in the Real Estate section. # Software companies may want to lower scores in the Software section # Category: Adult, porn header SARE_SUB_ABOUT_TODAY Subject =~ /about\W*today/i describe SARE_SUB_ABOUT_TODAY Subject contains spammer subject - adult or porn score SARE_SUB_ABOUT_TODAY 0.222 #counts SARE_SUB_ABOUT_TODAY 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_ABOUT_TODAY 4s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_ADULT_MOVIE Subject =~ /(?:[a\@]dult|free|h[a\@]rdc[o0]re.{0,30}|h[i|]lton.{0,30}|incest|n[o0]c[o0]st|sex|xxx).(?:dvd|feeds|pic|tv|m[o0]vie|video)/i describe SARE_SUB_ADULT_MOVIE Subject contains spammer subject - adult or porn score SARE_SUB_ADULT_MOVIE 1.666 # type=spamp #stype SARE_SUB_ADULT_MOVIE spamp #counts SARE_SUB_ADULT_MOVIE 180s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BEDROOM_SEC Subject =~ /(?:(?:Bedroom|family|seduction|sex.{0,30}).Secret|satisfaction.{1,30}bedroom|secret.{1,10}(?:dating|fantas(?:y|ies)|sex))/i describe SARE_SUB_BEDROOM_SEC Subject contains spammer subject - adult or porn score SARE_SUB_BEDROOM_SEC 0.677 #counts SARE_SUB_BEDROOM_SEC 22s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BOYS_OB1 Subject =~ /(?!\bboys?)\b[b8\xDF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[y\xA5\xDD\xFD][\W_]?[s5\$\xA7]?/i describe SARE_SUB_BOYS_OB1 Subject contains obfuscated spammer word score SARE_SUB_BOYS_OB1 1.666 # type=obfu #stype SARE_SUB_BOYS_OB1 OBFU #counts SARE_SUB_BOYS_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_GIRLS_OB1 Subject =~ /(?!\bgirls?)\b[g6][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[r\xAE][\W_]?[l1I\|\xA3][\W_]?[s5\$\xA7]?/i describe SARE_SUB_GIRLS_OB1 Subject contains obfuscated spammer word score SARE_SUB_GIRLS_OB1 2.500 # type=obfu #stype SARE_SUB_GIRLS_OB1 OBFU #counts SARE_SUB_GIRLS_OB1 61s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_HORNY_WOMEN Subject =~ /Horn(y|iest).{1,30}(?:alone|amateur|babe|swinger|wives|Women)/i describe SARE_SUB_HORNY_WOMEN Subject contains spammer subject - adult or porn score SARE_SUB_HORNY_WOMEN 1.111 # type=spamp #stype SARE_SUB_HORNY_WOMEN spamp #counts SARE_SUB_HORNY_WOMEN 26s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LONELY Subject =~ /\bl[o0]nely\b/i describe SARE_SUB_LONELY Subject contains spammer subject - adult or porn score SARE_SUB_LONELY 0.683 #counts SARE_SUB_LONELY 23s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PERVERT Subject =~ /pervert/i describe SARE_SUB_PERVERT Subject contains spammer subject - adult or porn score SARE_SUB_PERVERT 1.000 # type=spamp #stype SARE_SUB_PERVERT spamp #counts SARE_SUB_PERVERT 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PORN Subject =~ /(?:(?:big|black|dog(?:gy|gies)?fat|horse|massive|suck(?:ing)?|\d+\").?(?:bod(?:y|ies)|cock|dick)s?\b|violent.perv|\brap(?:e[sd]?|ing)\b)/i describe SARE_SUB_PORN Subject contains spammer subject - adult or porn score SARE_SUB_PORN 0.688 #counts SARE_SUB_PORN 24s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_PORN Apr 21 2003 added \b at end of first regex segment to avoid ham hits header SARE_SUB_SEX_OB3 Subject =~ /\bS_?eks\b/i describe SARE_SUB_SEX_OB3 Subject is intentionally misspelled word score SARE_SUB_SEX_OB3 2.500 # type=obfu #stype SARE_SUB_SEX_OB3 obfu #counts SARE_SUB_SEX_OB3 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_SEX_OB3 14s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_SINGLES Subject =~ /(?:(?:Christian|find(?:\W\w+)?|local|meet(?:\W\w+)?|millions.of|mingle.with|photos.of|real|view(?:\W\w+)?).singles|singles.(?:date|looking))/i describe SARE_SUB_SINGLES Subject contains spammer subject - adult or porn score SARE_SUB_SINGLES 0.877 #counts SARE_SUB_SINGLES 58s/0h of 111217 corpus (90485s/20732h) 04/17/04 header SARE_SUB_SOUL_MATE Subject =~ /soul\W*mate/i describe SARE_SUB_SOUL_MATE Subject contains spammer subject - adult or porn score SARE_SUB_SOUL_MATE 0.611 #counts SARE_SUB_SOUL_MATE 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_XRATED Subject =~ /x-?rated/i describe SARE_SUB_XRATED Subject contains spammer subject - adult or porn score SARE_SUB_XRATED 0.444 #counts SARE_SUB_XRATED 8s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_XRATED 10s/0h of 91714 corpus (74113s/17601h) 01/23/04 # Category: Black market items, services, activities, scams, frauds header SARE_SUB_BANNED_CD Subject =~ /b.?a.?n.?n.?e.?d.?c.?d/i describe SARE_SUB_BANNED_CD Subject contains spammer subject - black market or scam score SARE_SUB_BANNED_CD 3.333 # type=spamggg #stype SARE_SUB_BANNED_CD spamggg #counts SARE_SUB_BANNED_CD 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BANNED_CD 55s/0h of 63143 corpus header SARE_SUB_CARD_BILLED Subject =~ /(?:account|card).{1,30}(?:billed|charged)/i describe SARE_SUB_CARD_BILLED Subject contains spammer subject - black market or scam score SARE_SUB_CARD_BILLED 1.111 # type=spamp #stype SARE_SUB_CARD_BILLED spamp #counts SARE_SUB_CARD_BILLED 23s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_PPV Subject =~ /(?:(?:f.?r.?e.?e+|pay(?:ing)?.for(?:.your)?|unlimited).?(?:PPV|p[a\@]y.?per.?view)|(?:PPV|p[a\@]y.?per.?view).{0,30}free|ppv\'s)/i describe SARE_SUB_FREE_PPV Subject contains spammer subject - black market or scam score SARE_SUB_FREE_PPV 1.400 #counts SARE_SUB_FREE_PPV 152s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_KICKBACK Subject =~ /kick.{0,2}back/i describe SARE_SUB_KICKBACK Subject contains spammer subject - black market or scam score SARE_SUB_KICKBACK 0.166 #counts SARE_SUB_KICKBACK 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_KICKBACK 4s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_NAME_STAR Subject =~ /Name\W*A\W*Star/i describe SARE_SUB_NAME_STAR Subject contains spammer subject - black market or scam score SARE_SUB_NAME_STAR 1.111 # type=spamp #stype SARE_SUB_NAME_STAR spamp #counts SARE_SUB_NAME_STAR 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TAXES Subject =~ /(?:T[a\@]x(?:es)?.{0,30}(?:assistance|havenn?s|dollars|foreclose|information|legally|money\!|paid.for|problems|quick.and.easy|refund|this\W*year|write\W?off)|(?:avoid|doo?n't.pay|eliminate.(?:back|delinquent)|overpaid.your|paid.(?:for.with.your|too?.much)).tax(?:es)?|cigarette.tax.saving)/i describe SARE_SUB_TAXES Subject mentions taxes score SARE_SUB_TAXES 0.772 #counts SARE_SUB_TAXES 39s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_WINNING_NOT Subject =~ /(?:(?:Final|WINNING)(?:.award)?\s*NOTIFICATION|^NOTIFICATION\s*$|(?:auction|lucky).winning|notification.of.(?:an.instant|bequest|intent|unclaimed|multi.?item|promotion|winning)|notification.{1,30}final.notice|contrat.{1,30}winning.{1,30}promotion)/i describe SARE_SUB_WINNING_NOT Subject contains spammer subject - black market or scam score SARE_SUB_WINNING_NOT 1.444 #counts SARE_SUB_WINNING_NOT 106s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Business header SARE_SUB_ADVERTISER_DB Subject =~ /(?:(?:Advertisers|comprehensive|webmaster)\W*Database|(?:emails|Database)\W*of.{0,20}(?:Advertisers|Business|bidders|ebay)|email\W*database)/i describe SARE_SUB_ADVERTISER_DB Subject contains spammer subject - business score SARE_SUB_ADVERTISER_DB 0.555 # type=spamp #stype SARE_SUB_ADVERTISER_DB spamp #counts SARE_SUB_ADVERTISER_DB 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #hist SARE_SUB_ADVERTISER_DB 04/01/04 -- added "webmaster database" option #hist SARE_SUB_ADVERTISER_DB 03/28/04 -- added "email database" option #hist SARE_SUB_ADVERTISER_DB 02/17/04 -- added ebay #hist SARE_SUB_ADVERTISER_DB 08/23/03 -- Created header SARE_SUB_ANIM_LOGO Subject =~ /(?:(?:Animated|unique|impressive|custom|flash|personal(:?ized)?)\W*Logo|Logo\W*(?:Animation|Creation))/i describe SARE_SUB_ANIM_LOGO Subject contains spammer subject - business score SARE_SUB_ANIM_LOGO 1.111 # type=spamp #stype SARE_SUB_ANIM_LOGO spamp #counts SARE_SUB_ANIM_LOGO 47s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_BUS_PROMOTE Subject =~ /(?:Business|daily|lottery|special|website|WhereCanIBet).Promotion/i describe SARE_SUB_BUS_PROMOTE Subject contains spammer subject - business score SARE_SUB_BUS_PROMOTE 0.788 #counts SARE_SUB_BUS_PROMOTE 42s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_GET_PAID Subject =~ /get\W*p[a\@d]id/i describe SARE_SUB_GET_PAID Subject contains spammer subject - business score SARE_SUB_GET_PAID 1.022 #stype SARE_SUB_GET_PAID spam #counts SARE_SUB_GET_PAID 84s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_GET_PAID 104s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_INCOME2 Subject =~ /(?:(?:2nd|double|earn|excellent|expand|extra|(?:full|part).time|great|guaranteed|high(?:er)?|home|huge|increase|incredible|insure|monthly|\bnet|\bnew|online|residual|(?:six|6).figure|streams.of|substantial)(?:.your).?.?income|income.(?:opportunity|tip|to.you))/i describe SARE_SUB_INCOME2 Subject contains spammer subject - business score SARE_SUB_INCOME2 0.638 #counts SARE_SUB_INCOME2 15s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_INCOME_OB1 Subject =~ /(?!\bincome\b)\bi.?n.?c.?o.?m.?e\b/i describe SARE_SUB_INCOME_OB1 Subject contains obfuscated spammer word score SARE_SUB_INCOME_OB1 1.666 # type=obfu #stype SARE_SUB_INCOME_OB1 obfu #counts SARE_SUB_INCOME_OB1 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INCOME_OB1 11s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_INCOME_OB2 Subject =~ /(?!\bincome\b)(?:\b[il1]|\B(?:[:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_INCOME_OB2 Subject contains obfuscated spammer word score SARE_SUB_INCOME_OB2 1.666 # type=obfu #stype SARE_SUB_INCOME_OB2 obfu #counts SARE_SUB_INCOME_OB2 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INCOME_OB2 19s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_KS_CLIENTS Subject =~ /(?:Thousands\W*of|reach\W*your)\W*Clients/i describe SARE_SUB_KS_CLIENTS Subject contains spammer subject - business score SARE_SUB_KS_CLIENTS 0.388 #counts SARE_SUB_KS_CLIENTS 7s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MILLIONS Subject =~ /(?!million dollars)(?:million.{0,30}(?:dollars|e.?mail)|million.?s?.(?:Americans|bottles|dollar.(?:chance|jackpot)|kids|of.(?:customers|singles)|on.the.line|(?:in.)?(?:assets|revenues|sales)|ladies|peoples|regain|times|to.be.made|waiting.for.you)|win.{0,30}million|millionaire.mak(?:er|ing)|(?:make|your).million)/i describe SARE_SUB_MILLIONS Subject contains spammer subject - business score SARE_SUB_MILLIONS 1.066 #counts SARE_SUB_MILLIONS 92s/0h of 113393 corpus (92421s/20972h) 04/18/04 #note SARE_SUB_MILLIONS "million dollars" appears too frequently in normal emails (esp political) header SARE_SUB_PATENTS Subject =~ /(?:Patents\W*Filed|patent\W*it|our\W*new\W*patent)/i describe SARE_SUB_PATENTS Subject contains spammer subject - business score SARE_SUB_PATENTS 0.750 #counts SARE_SUB_PATENTS 35s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_PRICES_NOLAST Subject =~ /prices\W*(?:won\'t|will\W*not)\W*last/i describe SARE_SUB_PRICES_NOLAST Subject contains spammer subject - business score SARE_SUB_PRICES_NOLAST 0.500 #counts SARE_SUB_PRICES_NOLAST 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #hist SARE_SUB_PRICES_NOLAST Created by Bob Menschel Apr 04 2004 header SARE_SUB_SALARY_ADV Subject =~ /(?:Salary\W*(?:Advancement|increase)|(?:(?:increased?|advance)(?:\W*your)?|better|higher|earns?\W*more)\W*salary)/i describe SARE_SUB_SALARY_ADV Subject contains spammer subject - business score SARE_SUB_SALARY_ADV 0.655 #counts SARE_SUB_SALARY_ADV 18s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_SALARY_ADV 03/28/04 -- expanded to catch more variations from very simple rule # Category: Credit, debt, lending, mortgage, borrowing, investment, financing header SARE_SUB_ACCEPT_CCARDS Subject =~ /(?!processing credit card)(?:(?:Accept(?:ing)?|Process.{0,20})\W*credit\W*c[aâ\@]rds?|credit\W*card\W*(chargebacks?|terminals?|vendor))/i describe SARE_SUB_ACCEPT_CCARDS Subject contains spammer subject - credit or money score SARE_SUB_ACCEPT_CCARDS 0.661 #counts SARE_SUB_ACCEPT_CCARDS 19s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_DEBT Subject =~ /\bdebt\b/i describe SARE_SUB_DEBT Subject contains spammer subject - credit or money score SARE_SUB_DEBT 1.666 #counts SARE_SUB_DEBT 368s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_DEBT 609s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_DEBT_OB1 Subject =~ /(?!\bdebt\b)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_DEBT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DEBT_OB1 2.500 # type=obfu #stype SARE_SUB_DEBT_OB1 obfu #counts SARE_SUB_DEBT_OB1 23s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DEBT_OB2 Subject =~ /(?!deb[iu]?t)\bd.?e.?b.?t\b/i describe SARE_SUB_DEBT_OB2 Subject contains obfuscated spammer topic score SARE_SUB_DEBT_OB2 2.500 # type=obfu #stype SARE_SUB_DEBT_0B2 obfu #counts SARE_SUB_DEBT_OB2 21s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_DEBT_OB2 Apr 21 2004 - exclude debut as well as debit header SARE_SUB_GRANT Subject =~ /(?:(?:cash|collect\W*your|dollar|free(?:dom)?|get\W*a|government|gov't|qualify\W*for\W*a|taxes\W*paid\W*for\W*these)\W*grants?|grant\W*money\W*for\W*you|grants.{1,30}paid\W*for\W*with\W*your\W*taxes)/i describe SARE_SUB_GRANT Subject contains spammer subject - credit or money score SARE_SUB_GRANT 1.011 #stype SARE_SUB_GRANT spam #counts SARE_SUB_GRANT 82s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INVESTMENTS Subject =~ /(?:(?:invest(?:ing|ments?|or)|promotion|stock\W*market).(?:alert|assistance|bulletin|data|forecast|funds|insight|knowledge|like|member|news|opp|option|profile|program|proposal|rewards|surprise|update|workshop)|(?:\$\d+.{0,10}|better.{0,30}|business|easy|fund.{0,30}|joint|make\W*an|proven|real\W*estate|secrets?.{0,30}|secured|smart|stock|time\W*to|your|zero)\W*invest(?:ing|ments?)|help.{1,10}invest)/i describe SARE_SUB_INVESTMENTS Subject contains spammer subject - credit or money score SARE_SUB_INVESTMENTS 1.666 #counts SARE_SUB_INVESTMENTS 351s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INVESTORS Subject =~ /investors/i describe SARE_SUB_INVESTORS Subject contains spammer subject - credit or money score SARE_SUB_INVESTORS 1.211 #counts SARE_SUB_INVESTORS 118s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MORTGAGE_OB1 Subject =~ /(?!mortgage)(?:\bm|\B(?:rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_MORTGAGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MORTGAGE_OB1 1.666 # type=obfu #stype SARE_SUB_MORTGAGE_OB1 obfu #counts SARE_SUB_MORTGAGE_OB1 362s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MORTGAGE_OB2 Subject =~ /(?!mortgage)\bm.o.r.t.g.a.g.e\b/i describe SARE_SUB_MORTGAGE_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MORTGAGE_OB2 1.666 # type=obfu #stye SARE_SUB_MORTGAGE_OB2 obfu #counts SARE_SUB_MORTGAGE_OB2 2s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_NEW_CREDIT Subject =~ /(?:(?:all|any)\W*(?:credit.(?:accepted|.{0,30}loan)|loan.{1,30}credit)|\b(?:easy|EZ)\W*(credit|home\W*loan|mortgage)|(?:best|get.{0,30}|right)\W*creditvcard|get\W*cash\W*out|(?:home|m.?[o0].?r.?t.?g.?[a\@].?g.?e)\W*loan.{1,30}credit|lines?\W*of\W*credit|(?:new|your.{0,30})\W*credit\W*line)/i describe SARE_SUB_NEW_CREDIT Subject contains spammer subject - credit or money score SARE_SUB_NEW_CREDIT 1.338 #counts SARE_SUB_NEW_CREDIT 141s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_POOR_CREDIT Subject =~ /(?!credit card (?:bill|declined))(?:(?:bad|poor|less\W*than\W*perfect|fix\W*your)\W*cr[eé]d[iï]t|cr[eé]d[iï]t.{1,20}declined|declined.{1,20}cr[eé]d[iï]t|cr[eé]d[iï]t\W*(?:bad|can\W*be\W*fix|card\W*(?:balances?|bills?|debt|elimination)|Counseling|profiles?|rating)|no\W*cr[eé]d[iï]t.check)/i describe SARE_SUB_POOR_CREDIT Subject contains spammer subject - credit or money score SARE_SUB_POOR_CREDIT 1.666 #counts SARE_SUB_POOR_CREDIT 358s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_REFINANCE Subject =~ /refinance/i describe SARE_SUB_REFINANCE Subject contains spammer subject - credit or money score SARE_SUB_REFINANCE 1.666 #counts SARE_SUB_REFINANCE 562s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_VISA_CARD Subject =~ /Visa\W*(?:card\W*easy|approve\W*all)/i describe SARE_SUB_VISA_CARD Subject contains spammer subject - credit or money score SARE_SUB_VISA_CARD 0.222 #counts SARE_SUB_VISA_CARD 4s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_VISA_CARD Created by Bob Menschel Mar 30 2004 # Cateogry: Education, Education-related scams header SARE_SUB_COLLEGE_OB1 Subject =~ /(?!\bcollege\b)\b[c\xC7\xE7\xA2\xA9][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[l1I\|\xA3][\W_]?[l1I\|\xA3][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[g6][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_COLLEGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_COLLEGE_OB1 1.666 # type=obfu #stype SARE_SUB_COLLEGE_OB1 obfu #counts SARE_SUB_COLLEGE_OB1 5s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_DIPLOMA Subject =~ /(?:(?:Bachelor'?s?|buy\W*a|cheap|degree.{1,10}|don't\W*have\W*a|earn.{1,30}with|\bg[e3]t.{1,30}|give\W*you\W*a|got\W*no|great|help\W*you.{1,30}|if\W*you\W*had\W*a|instant|life\W*experience{1,30}|need\W*a|real|rec(?:ei|ie)ve\W*a|wait\W*no\W*longer{1,30}|we\W*promise.{1,30}|\b(?:yo)?ur\W*(?:college|own))\W*Diploma|diplomas?\W*(?:can\W*bevyours|for\W*sale)|(?:college|university)\W*diplomas)/i describe SARE_SUB_DIPLOMA Subject contains spammer subject - education score SARE_SUB_DIPLOMA 1.666 #counts SARE_SUB_DIPLOMA 252s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DOCTORATE Subject =~ /Doctorate/i describe SARE_SUB_DOCTORATE Subject contains spammer subject - education score SARE_SUB_DOCTORATE 0.638 #counts SARE_SUB_DOCTORATE 15s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MBA Subject =~ /\bMBA\b/i describe SARE_SUB_MBA Subject contains spammer subject - education score SARE_SUB_MBA 0.744 #counts SARE_SUB_MBA 34s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_MBA 38s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_NO_CLASSES Subject =~ /(?:No\W*Classes\W*(?:Necessary|needed)|.{0,5}No\W*Books)/i describe SARE_SUB_NO_CLASSES Subject contains spammer subject - education score SARE_SUB_NO_CLASSES 1.205 #counts SARE_SUB_NO_CLASSES 117s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Gambling, Lotto, Sweepstakes, Winnings, Losses header SARE_SUB_CASINO_OB1 Subject =~ /(?!\bcasino)(?:\bc|\B(?:[\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)/i describe SARE_SUB_CASINO_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CASINO_OB1 2.500 # type=obfu #stype SARE_SUB_CASINO_OB1 obfu #counts SARE_SUB_CASINO_OB1 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CASINO_OB1 22s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_CASINO_OB2 Subject =~ /(?!\bcasino)\bc.?a.?s.?i.?n.?o/i describe SARE_SUB_CASINO_OB2 Subject contains obfuscated spammer topic score SARE_SUB_CASINO_OB2 1.666 # type=obfu #stype SARE_SUB_CASINO_OB2 obfu #counts SARE_SUB_CASINO_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 # Category: Insurance header SARE_SUB_INSURANCE Subject =~ /(?:(?:aff[o0]rdable|cheap(?:est)?|free|good\W*news|l[o0]w\W*c[o0]st|(?:over)?pay(?:ing)?\W*t[o0][o0]\W*much|reduce|save|sell).{1,30}insurance|insurance.{1,30}(?:available|everyone|f[o0]r\W*less|leads|[o0]ffers|[o0]pti[o0]ns?|qu[o0]tes?)|(?:FYI:?|new|special|sub|update(?:\W*sub)?)\W*construction\W*insurance|new\W*insurnace\W*product)/i describe SARE_SUB_INSURANCE Subject contains spammer subject - insurance score SARE_SUB_INSURANCE 1.666 #counts SARE_SUB_INSURANCE 505s/0h of 113374 corpus (92402s/20972h) 04/18/04 #note SARE_SUB_INSURANCE "insurance coverage" hits too much ham #note SARE_SUB_INSURANCE "term life" covered by SARE_SUB_TERM_LIFE header SARE_SUB_CAR_INSURANCE Subject =~ /(?:car|auto(?:mobile)?) insurance/i describe SARE_SUB_CAR_INSURANCE Subject contains spammer subject - insurance score SARE_SUB_CAR_INSURANCE 0.672 #counts SARE_SUB_CAR_INSURANCE 21s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_LT_CARE Subject =~ /Long Term C[a\@]re/i describe SARE_SUB_LT_CARE Subject contains spammer subject - insurance score SARE_SUB_LT_CARE 0.661 #stype SARE_SUB_LT_CARE spam #counts SARE_SUB_LT_CARE 19s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_REPAIR_BILLS Subject =~ /(?:large\W*repair\W*bills|(?:(?:costly|major)\W*auto|m[o0]ney\W*for|pay(?:ing)?\W*for|save\b.{1,30}\bon)\W*repairs?)/i describe SARE_SUB_REPAIR_BILLS Subject contains spammer subject - insurance score SARE_SUB_REPAIR_BILLS 0.877 #counts SARE_SUB_REPAIR_BILLS 58s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_REPAIR_BILLS Created by Bob Menschel Mar 22 2004 header SARE_SUB_PROTECT_FAM Subject =~ /(?:Protect\W*your\W*famil(?:y|ies)|protect(?:ion)?(?:\W*for)?\W*your\W*(?:vehicle|car)|secure\W*your\W*future|protect.{1,10}from.{1,10}repair\W*bills?|extended\W*warranty\W*protection)/i describe SARE_SUB_PROTECT_FAM Subject contains spammer subject - insurance score SARE_SUB_PROTECT_FAM 1.072 #counts SARE_SUB_PROTECT_FAM 93s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_ROADSIDE_AID Subject =~ /(?:Roadside\W*Assistance\W*24\/7|24\W*hour\W*roadside\W*assistance)/i describe SARE_SUB_ROADSIDE_AID Subject contains spammer subject - insurance score SARE_SUB_ROADSIDE_AID 0.627 #counts SARE_SUB_ROADSIDE_AID 13s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_ROADSIDE_AID Created by Bob Menschel Mar 22 2004 header SARE_SUB_TERM_LIFE Subject =~ /Term\W*Life/i describe SARE_SUB_TERM_LIFE Subject contains spammer subject - insurance score SARE_SUB_TERM_LIFE 1.666 #counts SARE_SUB_TERM_LIFE 214s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_AFFORDABLE Subject =~ /\baffordable\b/i describe SARE_SUB_AFFORDABLE Subject contains spammer subject - marketing score SARE_SUB_AFFORDABLE 1.666 #counts SARE_SUB_AFFORDABLE 261s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_AFFORDABLE 305s/0h of 125093 corpus (104905s/20188h) 03/28/04 #hist SARE_SUB_AFFORDABLE 03/28/04 -- generalized to one-word rule from low-scoring "made affordable" rule header SARE_SUB_DISCOUNT_OB1 Subject =~ /(?!discount)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_DISCOUNT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DISCOUNT_OB1 1.666 # type=obfu #stype SARE_SUB_DISCOUNT_OB1 obfu #counts SARE_SUB_DISCOUNT_OB1 200s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_OB1 Subject =~ /(?!free)(?:\bf|\B(?:\xC5\xBF|\xC6\x92|\xD2[\x92-\x93]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_FREE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_FREE_OB1 3.333 # type=obfu #stype SARE_SUB_FREE_OB1 obfu #counts SARE_SUB_FREE_OB1 334s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_OB2 Subject =~ /(?!free(?:ze)?)\bf.?r.?e.?e\b/i describe SARE_SUB_FREE_OB2 Subject includes word suggesting spammer score SARE_SUB_FREE_OB2 3.333 # type=obfu #counts SARE_SUB_FREE_OB2 281s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_FREE_OB2 Apr 21 2004 - Added exclusion for freeze header SARE_SUB_TOO_HIGH Subject =~ /(?:(?:all\W*time|too)\W*high|high\W*(costs?|payments?))/i describe SARE_SUB_TOO_HIGH Subject contains spammer subject - marketing score SARE_SUB_TOO_HIGH 1.044 #counts SARE_SUB_TOO_HIGH 88s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MORE_TRAFFIC Subject =~ /(?:(?:more|engine|targeted|web)\W*traffic|traffic\W*(?:online|partner|volume))/ describe SARE_SUB_MORE_TRAFFIC Subject contains spammer subject - marketing score SARE_SUB_MORE_TRAFFIC 0.655 #counts SARE_SUB_MORE_TRAFFIC 18s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_MORE_TRAFFIC Created by Bob Menschel Mar 25 2004 header SARE_SUB_SAVE_OB1 Subject =~/(?!\bsave\b)(?:\b[s5]|\B(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[vu]|\\\/|\xCE\xBD])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_SAVE_OB1 Subject contains spammer subject - marketing score SARE_SUB_SAVE_OB1 3.333 # type=obfu #stype SARE_SUB_SAVE_OB1 obfu #counts SARE_SUB_SAVE_OB1 121s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SAVE_OB2 Subject =~/(?!\bs[hlt]?ave\b)\bs.?a.?v.?e\b/i describe SARE_SUB_SAVE_OB2 Subject contains spammer subject - marketing score SARE_SUB_SAVE_OB2 3.333 # type=obfu - 57s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_SAVE_OB2 105s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SPENDING2MUCH Subject =~ /(?:Spending\W*(?:TOO|so)\W*MUCH|(?:control\W*your|no\W*limit\W*on)\W*spending)/i describe SARE_SUB_SPENDING2MUCH Subject contains spammer subject - marketing score SARE_SUB_SPENDING2MUCH 0.683 #counts SARE_SUB_SPENDING2MUCH 23s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WORTH_CASH Subject =~ /(?!take a look)\b(?:Worth|Win|take|extra|earn|dollars|Short|need|claim|free|get|opinions?|surveys?)\b.{0,30}(?:fast)?(?:C[a\@]sh|M[0o]ney|a\W*(?:look|raise)|of\W*sports?\W*tickets|the\W*price)\b/i describe SARE_SUB_WORTH_CASH Subject contains spammer subject - marketing score SARE_SUB_WORTH_CASH 1.666 #counts SARE_SUB_WORTH_CASH 546s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_YOUR_AFFILIATE Subject =~ /(?:your affiliate|affiliate\W*(?:program|software)|affiliates\W*make\W*\$)/i describe SARE_SUB_YOUR_AFFILIATE Subject contains spammer subject - marketing score SARE_SUB_YOUR_AFFILIATE 0.916 #counts SARE_SUB_YOUR_AFFILIATE 65s/0h of 113393 corpus (92421s/20972h) 04/18/04 # Category: Medical header SARE_SUB_AGING Subject =~ /\bAging\b/i describe SARE_SUB_AGING Subject contains spammer subject - medical score SARE_SUB_AGING 1.516 #counts SARE_SUB_AGING 173s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AGING_OB1 Subject =~ /(?!\bAging\b)(?:\b[a4]|\B(?:[\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]\b|(?:\xC4[\x9C-\xA3])\B)/i describe SARE_SUB_AGING_OB1 Subject contains obfuscated spammer topic score SARE_SUB_AGING_OB1 1.666 # type=obfu #stype SARE_SUB_AGING_OB1 obfu #counts SARE_SUB_AGING_OB1 1s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AGING_OB2 Subject =~ /(?!\bAging\b)\bA.?g.?i.?n.?g\b/i describe SARE_SUB_AGING_OB2 Subject contains obfuscated spammer topic score SARE_SUB_AGING_OB2 2.500 # type=obfu #stype SARE_SUB_AGING_OB2 obfu #counts SARE_SUB_AGING_OB2 37s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AM_MED_DICT Subject =~ /American Medical Directory/i describe SARE_SUB_AM_MED_DICT Subject contains spammer subject - medical score SARE_SUB_AM_MED_DICT 0.905 #counts SARE_SUB_AM_MED_DICT 63s/0h of 113373 corpus (92402s/20971h) 04/18/04 header SARE_SUB_BUY_MEDS subject =~ /(?:b[uv]y|p.?[uv].?r.?c.?h.?[a\@].?s.?e|get)\W*(?:[a\@]ll\W*)(?:y[o0\@][uv]r\W*)?(?:c.?h.?e.?[a\@].?p\W*)?(?:[a\@].?[l|].?p.?r.?[a\@].?z.?[o0\@].?[l|]|B.?[o0\@].?n.?t.?r.?i.?[l|]|c.?i.?[a\@].?[l|].?i.?s|C.?[o0\@].?d.?e.?i.?n.?e|D.?i.?d.?r.?e.?x|d.?i.?e.?t|F.?[l|].?e.?x.?e.?r.?i.?[l|]|g.?e.?n.?e.?r.?i.?c|h.?g.?h|H.?y.?d.?r.?[o0\@].?c.?[o0\@].?d.?[o0\@].?n.?e|[l|].?e.?v.?i.?t.?r.?[a\@]|m.?e.?d.?(?:i.?c.?[a\@].?t.?i.?[o0\@].?n.?)?s|M.?[uv].?s.?c.?[l|].?e.?R.?e.?[l|].?[a\@].?x.?[a\@].?n.?t.?s?|p.?[a\@].?i.?n|P.?[a\@].?x.?i.?[l|]|P.?h.?e.?n.?t.?e.?r.?m.?i.?n.?e|P.?r.?e.?s.?c.?r.?i.?p.?t.?i.?[o0\@].?n.?s?|P.?r.?[o0\@].?z.?[a\@].?c|S.?i.?[l|].?d.?e.?n.?[a\@].?f.?i.?[l|]|S.?k.?e.?[l|].?[a\@].?x.?i.?n|s.?[l|].?e.?e.?p.?i.?n.?g|s.?[o0\@].?m.?[a\@]|T.?r.?[a\@].?m.?[a\@].?d.?[o0\@].?[l|]|v.?[a\@].?[l|].?i.?[uv].?m|v.?i.?[a\@].?g.?r.?[a\@]|V.?i.?c.?[o0\@].?d.?i.?n|V.?i.?[o0\@].?x.?x|x.?[a\@].?n.?[a\@].?x|Z.?[o0\@].?[l|].?[o0\@].?f.?t)\b/i describe SARE_SUB_BUY_MEDS Subject contains spammer subject - medical score SARE_SUB_BUY_MEDS 1.261 #counts SARE_SUB_BUY_MEDS 127s/0h of 113267 corpus (92361s/20906h) 04/24/04 #hist SARE_SUB_BUY_MEDS Created by Bob Menschel April 24 2004 header SARE_SUB_COLLAGEN Subject =~ /Collagen/i describe SARE_SUB_COLLAGEN Subject contains spammer subject - medical score SARE_SUB_COLLAGEN 0.672 #counts SARE_SUB_COLLAGEN 21s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CONSULTATION Subject =~ /\bconsultations?\b/i describe SARE_SUB_CONSULTATION Subject contains spammer subject - medical score SARE_SUB_CONSULTATION 0.705 #counts SARE_SUB_CONSULTATION 27s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_CONSULTN_OB1 Subject =~ /(?!consultations?)(?:\bc|\B(?:[\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i describe SARE_SUB_CONSULTN_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CONSULTN_OB1 1.666 # type=obfu #stype SARE_SUB_CONSULTN_OB1 obfu #counts SARE_SUB_CONSULTN_OB1 9s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_IMPROVE Subject =~ /improve.{1,30}(?:cell\W*phone|cholesterol|credit|desire|hair|health|home|kisser|love\W*life|memory|performance|possibilities|self\W*image|sex(?:\W*life|ual\W*(?:endurance|health))|signal|sleep|stamina|stock\W*market|vision)/i describe SARE_SUB_IMPROVE Subject contains spammer subject - medical score SARE_SUB_IMPROVE 1.450 #counts SARE_SUB_IMPROVE 161s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INET_PHARM Subject =~ /(?!Pharmacy selection)(?:(?:American|best|(?:by|from)\W*(?:a\W*_?US|cheap|cyber|discreet|\e-|FDA|free|generic|genuine|Internet|low\W*cost|new|off\W*shore|on\W*line(?:.{1,5}USA)?|overnight|perfect|smart|super|US\W*doctors\W*US)|(?:discreet|no\W*doctor).{1,30})\W*Pharmacy|Pharmacy.{1,30}(?:deals|sale|prices?|related\W*drugs|selection|verification)|your\W*pharmacy\W*order)/i describe SARE_SUB_INET_PHARM Common spammer subject header -- Medical score SARE_SUB_INET_PHARM 1.666 #counts SARE_SUB_INET_PHARM 340s/0h of 113373 corpus (92402s/20971h) 04/18/04 #hist SARE_SUB_INET_PHARM Created by Bob Menschel Apr 09 2004 #ham SARE_SUB_INET_PHARM "Pharmacy selection" in email discussing employee's health benefits header SARE_SUB_MALE_MUSCLE Subject =~ /Male muscle/i describe SARE_SUB_MALE_MUSCLE Subject contains spammer subject - medical score SARE_SUB_MALE_MUSCLE 0.222 #stype SARE_SUB_MALE_MUSCLE spam #counts SARE_SUB_MALE_MUSCLE 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_MEDICAT_OB1 Subject =~ /(?!medication)(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)/i describe SARE_SUB_MEDICAT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MEDICAT_OB1 3.333 # type=obfu #stype SARE_SUB_MEDICAT_OB1 obfu #counts SARE_SUB_MEDICAT_OB1 172s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDICAT_OB2 Subject =~ /(?!medication)m.?e.?d.?i.?c.?a.?t.?i.?o.?n/i describe SARE_SUB_MEDICAT_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MEDICAT_OB2 3.333 # type=obfu #stype SARE_SUB_MEDICAT_OB2 obfu #counts SARE_SUB_MEDICAT_OB2 117s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDICAL_NEWS Subject =~ /(?:medical\W*(?:announcement|breakthrough|discover|info|innovation|marvel|miracle|news|post|update)|(?:news|notice).{1,3}medical)/i describe SARE_SUB_MEDICAL_NEWS Subject contains spammer subject - medical score SARE_SUB_MEDICAL_NEWS 1.016 #counts SARE_SUB_MEDICAL_NEWS 83s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_MEDICAL_NEWS Created by Bob Menschel Apr 05 2004 header SARE_SUB_MEDS_OB1 Subject =~ /(?!\bmeds\b)(?:\bm|\B(?:rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5]\b|(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)\B)/i describe SARE_SUB_MEDS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MEDS_OB1 3.333 # type=obfu #stype SARE_SUB_MEDS_OB1 obfu #counts SARE_SUB_MEDS_OB1 244s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDS_OB2 Subject =~ /(?!\bmeds\b)\bm.?e.?d.?s\b/i describe SARE_SUB_MEDS_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MEDS_OB2 2.500 # type=obfu #stype SARE_SUB_MEDS_OB2 obfu #counts SARE_SUB_MEDS_OB2 71s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MENS_HEALTH Subject =~ /Men'?s'? Health/i describe SARE_SUB_MENS_HEALTH Subject contains spammer subject - medical score SARE_SUB_MENS_HEALTH 0.500 #counts SARE_SUB_MENS_HEALTH 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ONLINE_DRUGS Subject =~ /(?:[o0].?n.?l.?i.?n.?e.{1,30}d.?r.?u.?g.?s|d.?r.?u.?g.?s.{1,30}[o0].?n.?l.?i.?n.?e)/i describe SARE_SUB_ONLINE_DRUGS Subject contains spammer subject - medical score SARE_SUB_ONLINE_DRUGS 1.666 #counts SARE_SUB_ONLINE_DRUGS 268s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_ONLINE_DRUGS Created by Bob Menschel Apr 07 2004 header SARE_SUB_PHYSICIAN Subject =~ /\bphysicians?\b/i describe SARE_SUB_PHYSICIAN Subject contains spammer subject - medical score SARE_SUB_PHYSICIAN 0.955 #counts SARE_SUB_PHYSICIAN 72s/0h of 113272 corpus (92366s/20906h) 04/23/04 #hist SARE_SUB_PHYSICIAN 04/23/2004 - Added to testing header SARE_SUB_SAMPLES subject =~ /\b(?:c[o0]mp[l|]iment[a\@]ry|d[a\@]y|free|tri[a\@][l|])\W*s[a\@]mp[l|]es?\b/i describe SARE_SUB_SAMPLES Subject contains spammer subject - medical score SARE_SUB_SAMPLES 0.744 #counts SARE_SUB_SAMPLES 34s/0h of 113267 corpus (92361s/20906h) 04/24/04 #hist SARE_SUB_SAMPLES Created by Bob Menschel April 24 2004 header SARE_SUB_STRETCH_MARK Subject =~ /stretch\W*mark/i describe SARE_SUB_STRETCH_MARK Subject contains spammer subject - medical score SARE_SUB_STRETCH_MARK 0.650 #counts SARE_SUB_STRETCH_MARK 17s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_VALIUM Subject =~ /Valium/i describe SARE_SUB_VALIUM Subject contains spammer subject - medical score SARE_SUB_VALIUM 2.222 #counts SARE_SUB_VALIUM 1099s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WEIGHT_OB1 Subject =~ /(?!weight)(?:\bw|\B(?:\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_WEIGHT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_WEIGHT_OB1 3.333 # type=obfu #stype SARE_SUB_WEIGHT_OB1 obfu #counts SARE_SUB_WEIGHT_OB1 148s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_YOUNGER Subject =~ /\bYOUNGER\b/i describe SARE_SUB_YOUNGER Subject contains spammer subject - medical score SARE_SUB_YOUNGER 1.588 #counts SARE_SUB_YOUNGER 186s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_YOUNGER_OB1 Subject =~ /(?!\bYOUNGER\b)(?:\by|\B(?:[\xA5\xDD\xFD]|\xC5[\xB6-\xB8]|\xCE\x8E|\xCE\xA5|\xCE\xA8|\xCE\xAB|\xCE\xB3|\xD0\xA3|\xD1\x83|\xD1\x9E|\xD2[\xAE-\xB1]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:r\b|(?:[\xAE]|\xC5[\x94-\x99]|\xD1\x93)\B)/i describe SARE_SUB_YOUNGER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_YOUNGER_OB1 2.500 # type=obfu #stype SARE_SUB_YOUNGER_OB1 obfu #counts SARE_SUB_YOUNGER_OB1 20s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_YOUNGER_OB2 Subject =~ /(?!\bYOUNGER\b)\by.?o.?u.?n.?g.?e.?r\b/i describe SARE_SUB_YOUNGER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_YOUNGER_OB2 2.500 # type=obfu #stype SARE_SUB_YOUNGER_OB2 obfu #counts SARE_SUB_YOUNGER_OB2 29s/0h of 113393 corpus (92421s/20972h) 04/18/04 # Category: Real Estate header SARE_SUB_FORECLOSURE Subject =~ /Foreclosure/i describe SARE_SUB_FORECLOSURE Subject contains spammer subject - real estate score SARE_SUB_FORECLOSURE 0.700 #counts SARE_SUB_FORECLOSURE 26s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_FORECLOSURE 29s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_HOMEOWNER Subject =~ /homeowner/i describe SARE_SUB_HOMEOWNER Subject contains spammer subject - real estate score SARE_SUB_HOMEOWNER 1.183 #counts SARE_SUB_HOMEOWNER 113s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_HOMEOWNER_OB1 Subject =~ /(?!homeowner)(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:w|\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_HOMEOWNER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_HOMEOWNER_OB1 1.666 # type=obfu #stype SARE_SUB_HOMEOWNER_OB1 obfu #counts SARE_SUB_HOMEOWNER_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_HOMEOWNER_OB2 Subject =~ /(?!homeowner)h.?o.?m.?e.?o.?w.?n.?e.?r/i describe SARE_SUB_HOMEOWNER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_HOMEOWNER_OB2 1.666 # type=obfu #stype SARE_SUB_HOMEOWNER_OB2 obfu #counts SARE_SUB_HOMEOWNER_OB2 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TIMESHARE Subject =~ /timeshare/i describe SARE_SUB_TIMESHARE Subject contains spammer subject - real estate score SARE_SUB_TIMESHARE 0.711 #counts SARE_SUB_TIMESHARE 5s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_TIMESHARE 28s/0h of 91714 corpus (74113s/17601h) 01/24/04 # Category: Religious, including religious scams header SARE_SUB_CHRISTIAN Subject =~ /\bchristian\b/i describe SARE_SUB_CHRISTIAN Subject contains spammer subject - religion score SARE_SUB_CHRISTIAN 0.766 #counts SARE_SUB_CHRISTIAN 38s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_LEGAL_ORDIN Subject =~ /(?:(?:LEGAL|online)\W*ORDINATION|proceed\W*with.{1,30}ordination)/i describe SARE_SUB_LEGAL_ORDIN Subject contains spammer subject - religion score SARE_SUB_LEGAL_ORDIN 0.333 #counts SARE_SUB_LEGAL_ORDIN 6s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Software header SARE_SUB_CHEAP_SW Subject =~ /(?:(?:bargain|bucks|C.?h.?e.?a.?p|discount|expensive|p.?r.?i.?c.?e|s.?a.?v.?e|special\W*offer|spend).{1,30}software|s.?o.?f.?t.?w.?a.?r.?e.{1,30}(?:\%.off|at\W*only|bargain|bucks|c.?h.?e.?a.?p|deal|loww?.c.?o.?s.?t|price))/i describe SARE_SUB_CHEAP_SW Subject contains spammer subject - software score SARE_SUB_CHEAP_SW 1.666 #counts SARE_SUB_CHEAP_SW 482s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_CHEAP_SW Created by Bob Menschel Apr 09 2004 header SARE_SUB_DOWNLOAD Subject =~ /(?:downloadable\W*software|(?:available\W*for|cds\W*(?:and|or)|easy|free\W*to)\W*download|download(?:ing)\W*(?:(?:for\W*)?free|games|movies|music|now|software|under|video))/i describe SARE_SUB_DOWNLOAD Subject contains spammer subject - software score SARE_SUB_DOWNLOAD 0.705 #counts SARE_SUB_DOWNLOAD 27s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DOWNLOAD_OB1 Subject =~ /(?!\bdownload)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)(?:w|\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E])(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)(?:[d\xD0]|\xC4[\x8E-\x91])/i describe SARE_SUB_DOWNLOAD_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DOWNLOAD_OB1 1.666 # type=obfu #stype SARE_SUB_DOWNLOAD_OB1 obfu #counts SARE_SUB_DOWNLOAD_OB1 6s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SW_ON_CD Subject =~ /software\W*(?:on\W*)CD/i describe SARE_SUB_SW_ON_CD Subject contains spammer subject - software score SARE_SUB_SW_ON_CD 0.222 #stype SARE_SUB_SW_ON_CD spam #hist SARE_SUB_SW_ON_CD Created by Bob Menschel Apr 09 2004 #counts SARE_SUB_SW_ON_CD 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_SWTYPES Subject =~ /(?:hate\W*typing|it\W*types|never\W*type|no\W*typing\W*required|Talk\W*It\W*Type\W*It|voice\W*recognition)/i describe SARE_SUB_SWTYPES Subject contains a spammer subject - Software score SARE_SUB_SWTYPES 0.622 #counts SARE_SUB_SWTYPES 12s/0h of 111217 corpus (90485s/20732h) 04/17/04 #note SARE_SUB_SWTYPES beware: "attachment type" in virus bounce subject headings. header SARE_SUB_SYSTEMWORKS Subject =~ /(?:get|sav(?:e|ing)).{1,30}system\W*works/i describe SARE_SUB_SYSTEMWORKS Subject contains a spammer subject - Software score SARE_SUB_SYSTEMWORKS 0.622 #counts SARE_SUB_SYSTEMWORKS 12s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WP_OFFICE Subject =~ /(?:\%|Sav(?:e|ing)).{1,30}(?:Corel|WordPerfect).{1,30}Office/i describe SARE_SUB_WP_OFFICE Subject contains spammer subject - software score SARE_SUB_WP_OFFICE 0.388 #counts SARE_SUB_WP_OFFICE 7s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_WP_OFFICE 8s/0h of 58857 corpus # Category: Spamming header SARE_SUB_ADV_SEARCH Subject =~ /emails?.{1,30}(?:7Search|ebay|google|goClick|yahoo)/i describe SARE_SUB_ADV_SEARCH Subject contains spammer subject - spamming score SARE_SUB_ADV_SEARCH 0.555 # type=spamp #stype SARE_SUB_ADV_SEARCH spamp #counts SARE_SUB_ADV_SEARCH 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_ADV_SEARCH 4s/0h of 81383 corpus ??/??/03 #hist SARE_SUB_ADV_SEARCH 03/28/04 -- added "ebay" option header SARE_SUB_BULK_EMAIL Subject =~ /(?:(?:junk|bulk)\W*(?:e\W*mail|fax\W*numbers)|fax\W*bulk\W*numbers)/i describe SARE_SUB_BULK_EMAIL Subject contains spammer subject - spamming score SARE_SUB_BULK_EMAIL 0.772 #counts SARE_SUB_BULK_EMAIL 39s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_COMM_MAILERS Subject =~ /(?:commissions?.{1,30}(?:mailers?|web\W*site)|(?:bulk|google).{1,30}commission)/i describe SARE_SUB_COMM_MAILERS Subject contains spammer subject - spamming score SARE_SUB_COMM_MAILERS 0.627 #counts SARE_SUB_COMM_MAILERS 13s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INET_CONN Subject =~ /(?:internet\W*connection\W*problem|(?:frequent|slow)\W*internet\W*connection)/i describe SARE_SUB_INET_CONN Subject contains spammer subject - spamming score SARE_SUB_INET_CONN 0.677 #counts SARE_SUB_INET_CONN 22s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_WSEAS Subject =~ /\bWSEAS\b/i describe SARE_SUB_WSEAS Subject contains spammer subject - spamming score SARE_SUB_WSEAS 1.666 # type=spamg #stype SARE_SUB_WSEAS spamg #counts SARE_SUB_WSEAS 4s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Generic words and phrases header SARE_SUB_ACTION_OB1 Subject =~ /(?!\baction\b)\b[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[c\xC7\xE7\xA2\xA9][\W_]?t[\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]\b/i describe SARE_SUB_ACTION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_ACTION_OB1 1.666 # type=obfu #stype SARE_SUB_ACTION_OB1 obfu #counts SARE_SUB_ACTION_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BE_HERE Subject =~ /be here/i describe SARE_SUB_BE_HERE Subject contains likely spammer phrase or word score SARE_SUB_BE_HERE 0.500 #counts SARE_SUB_BE_HERE 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BIGGER_OB1 Subject =~ /(?!bigger)b.?i.?g.?g.?e.?r/i describe SARE_SUB_BIGGER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BIGGER_OB1 1.666 # type=obfu - 23s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BIGGER_OB1 9s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BIGGER_OB2 Subject =~ /(?!bigger)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_BIGGER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BIGGER_OB2 2.500 # type=obfu - 30s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BIGGER_OB2 10s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BETTER_OB2 Subject =~ /(?!BETTER)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_BETTER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BETTER_OB2 2.500 # type=obfu - 30s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BETTER_OB2 55s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BOOST Subject =~ /(?:boost.{1,20}(?:(?:cable|PC).{1,10}speed|confidence|in\W*bed|(?:love|se.?x)\W*life|mileage|size|stamina)|(?:manhood|muscle|sex|super).{0,30}boost)/i describe SARE_SUB_BOOST Subject contains likely spammer phrase or word score SARE_SUB_BOOST 1.666 #counts SARE_SUB_BOOST 243s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_BOOST_OB1 Subject =~ /(?!\bboost\b)(?:\b[b8]|\B(?:[\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_BOOST_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BOOST_OB1 1.666 # type=obfu - 20s/0h of 97268 corpus (79437s/17831h) 01/24/04 #stype SARE_SUB_BOOST_OB1 obfu #counts SARE_SUB_BOOST_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BOOST_OB2 Subject =~ /(?!\bboost\b)\bb.?o.?o.?s.?t\b/i describe SARE_SUB_BOOST_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BOOST_OB2 1.666 # type=obfu #stype SARE_SUB_BOOST_OB2 obfu #counts SARE_SUB_BOOST_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BREAKTHRU Subject =~ /Breakthrough/i describe SARE_SUB_BREAKTHRU Subject contains likely spammer phrase or word score SARE_SUB_BREAKTHRU 1.200 #counts SARE_SUB_BREAKTHRU 116s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BREAKTHRU_OB1 Subject =~ /(?!Breakthrough)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])/i describe SARE_SUB_BREAKTHRU_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BREAKTHRU_OB1 1.666 # type=obfu #stype SARE_SUB_BREAKTHRU_OB1 obfu #counts SARE_SUB_BREAKTHRU_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BREAKTHRU_OB1 5s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_BREAKTHRU_OB2 Subject =~ /(?!Breakthrough)B.?r.?e.?a.?k.?t.?h.?r.?o.?u.?g.?h/i describe SARE_SUB_BREAKTHRU_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BREAKTHRU_OB2 1.666 # type=obfu #stype SARE_SUB_BREAKTHRU_OB2 obfu #counts SARE_SUB_BREAKTHRU_OB2 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BREAKTHRU_OB2 6s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_BUY_OB1 Subject =~ /\bbvy\b/i describe SARE_SUB_BUY_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BUY_OB1 1.666 # type=obfu #stype SARE_SUB_BUY_OB1 obfu #counts SARE_SUB_BUY_OB1 3s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_CARTRIDGE_OB1 Subject =~/(?!Cartridge)(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)/i describe SARE_SUB_CARTRIDGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CARTRIDGE_OB1 1.666 # type=obfu #stype SARE_SUB_CARTRIDGE_OB1 obfu #counts SARE_SUB_CARTRIDGE_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_CARTRIDGE_OB2 Subject =~/(?!Cartridge)C.?a.?r.?t.?r.?i.?d.?g.?e/i describe SARE_SUB_CARTRIDGE_OB2 Subject contains obfuscated spammer topic score SARE_SUB_CARTRIDGE_OB2 1.666 # type=obfu #stype SARE_SUB_CARTRIDGE_OB2 obfu #counts SARE_SUB_CARTRIDGE_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_CHANGE_LIFE subject =~ /changed? (?:my|your) (?:love\W*)life/i describe SARE_SUB_CHANGE_LIFE Subject contains likely spammer phrase or word score SARE_SUB_CHANGE_LIFE 0.900 #hist SARE_SUB_CHANGE_LIFE Created by Bob Menschel Mar 31 2004 #counts SARE_SUB_CHANGE_LIFE 62s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CHARGE_OB1 Subject =~ /(?!\bcharge\b)\b[c\xC7\xE7\xA2\xA9][\W_]?h[\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[r\xAE][\W_]?[g6][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_CHARGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CHARGE_OB1 1.666 # type=obfu #stype SARE_SUB_CHARGE_OB1 obfu #counts SARE_SUB_CHARGE_OB1 8s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CHARGE_OB1 17s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_CHEAP_OB1 Subject =~ /(?!\bcheap(er)?)\b[c\xC7\xE7\xA2\xA9][\W_]?h[\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?p([e3\*\xC8-\xCB\xE8-\xEB][\W_]?[r\xAE])?/i describe SARE_SUB_CHEAP_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CHEAP_OB1 3.333 # type=obfu #stype SARE_SUB_CHEAP_OB1 obfu #counts SARE_SUB_CHEAP_OB1 174s/0h of 113374 corpus (92402s/20972h) 04/18/04 header __SARE_SUB_CONFID_W Subject =~ /\bconfidential(?:ity|ly)?\b/i header SARE_SUB_CONFID_P Subject =~ /(?:confidential.+(?:assured|brand|business|delivery|discreet|embarrass|medicine|offer|opportunity|orders|prescription|shopping|stock)|(?:assistance|business|mutual|priv(?:at)?e|relationship|strict?ly|urgent).+confiden[tc]ial|\bconfidant\b|can i confide)/i describe SARE_SUB_CONFID_P Subject contains likely spammer phrase or word score SARE_SUB_CONFID_P 0.794 #counts SARE_SUB_CONFID_P 43s/0h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_CONFID_P organization's emails flagged: "- confidential" header SARE_SUB_CONF_INFO Subject =~ /(?:Confidential (?:business|info|med|assist)|Confidentiality assured|Fwd: Confidential)/i describe SARE_SUB_CONF_INFO Subject contains likely spammer phrase or word score SARE_SUB_CONF_INFO 0.700 #counts SARE_SUB_CONF_INFO 26s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_EXCITING_NEW Subject =~ /exciting\W*(and\W*prosperous|business\W*opportunity|new)/i describe SARE_SUB_EXCITING_NEW Subject contains likely spammer phrase or word score SARE_SUB_EXCITING_NEW 0.644 #counts SARE_SUB_EXCITING_NEW 16s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_EXCITING_NEW Created by Bob Menschel Apr 05 2004 header SARE_SUB_EBAY_OB1 Subject =~ /(?!e-?bay)\b[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[b8\xDF][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[y\xA5\xDD\xFD]\b/i describe SARE_SUB_EBAY_OB1 Subject contains obfuscated spammer topic score SARE_SUB_EBAY_OB1 2.500 # type=obfu #stype SARE_SUB_EBAY_OB1 obfu #counts SARE_SUB_EBAY_OB1 28s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_EXCL_OB1 Subject =~ /(?!\bexclusive\b)\b[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[x\xD7][\W_]?[c\xC7\xE7\xA2\xA9][\W_]?[l1I\|\xA3][\W_]?[uv\*\xB5\xD9-\xDC\xF9-\xFC][\W_]?[s5\$\xA7][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[vu][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_EXCL_OB1 Subject contains obfuscated spammer topic score SARE_SUB_EXCL_OB1 1.666 # type=obfu #stype SARE_SUB_EXCL_OB1 obfu #counts SARE_SUB_EXCL_OB1 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_EXCL_OB1 14s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_EXPIRED Subject =~ /(?:(?:skills|update\W*your)\W*expired|expired\W*(?:academic|account))/i describe SARE_SUB_EXPIRED Subject contains likely spammer phrase or word score SARE_SUB_EXPIRED 0.666 #counts SARE_SUB_EXPIRED 20s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_FIND_YOUR Subject =~ /find your/i describe SARE_SUB_FIND_YOUR Subject contains likely spammer phrase or word score SARE_SUB_FIND_YOUR 0.738 #counts SARE_SUB_FIND_YOUR 33s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_FOR_OB1 Subject =~ /(?!\bFor\b)\bf[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[r\xAE]\b/i describe SARE_SUB_FOR_OB1 Subject contains obfuscated spammer topic score SARE_SUB_FOR_OB1 2.500 # type=obfu #stype SARE_SUB_FOR_OB1 obfu #counts SARE_SUB_FOR_OB1 57s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_FOR_WOMEN Subject =~ /(?:Women:|for women only)/i describe SARE_SUB_FOR_WOMEN Subject contains likely spammer phrase or word score SARE_SUB_FOR_WOMEN 0.166 #counts SARE_SUB_FOR_WOMEN 3s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_HARD_OB1 Subject =~ /(?!\bhard\b)\bh[\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[r\xAE][\W_]?[d\xD0]\b/i describe SARE_SUB_HARD_OB1 Subject contains obfuscated spammer topic score SARE_SUB_HARD_OB1 2.500 # type=obfu #stype SARE_SUB_HARD_OB1 obfu #counts SARE_SUB_HARD_OB1 30s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_INKJET Subject =~/Inkjet/i describe SARE_SUB_INKJET Subject contains likely spammer phrase or word score SARE_SUB_INKJET 0.850 #counts SARE_SUB_INKJET 53s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_INKJET 87s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_INKJET_OB1 Subject =~/(?!Inkjet)(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:j|\xC4[\xB4-\xB5]|\xD0\x88|\xD1\x98])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)/i describe SARE_SUB_INKJET_OB1 Subject contains obfuscated spammer topic score SARE_SUB_INKJET_OB1 1.666 # type=obfu #stype SARE_SUB_INKJET_OB1 obfu #counts SARE_SUB_INKJET_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INKJET_OB1 12s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_INKJET_OB2 Subject =~/(?!Inkjet)i.?n.?k.?j.?e.?t/i describe SARE_SUB_INKJET_OB2 Subject contains obfuscated spammer topic score SARE_SUB_INKJET_OB2 1.666 # type=obfu #stype SARE_SUB_INKJET_OB2 obfu #counts SARE_SUB_INKJET_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INKJET_OB2 10s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_JOB Subject =~ /(?:(?:dead\W*end|does\W*your|dream|find\W*people|get\W*(?:a|the)(?:\W*better)?|(?:keep|quit)\W*(?:your|their)(?:\W*day)?|real|run\W*your|that\W*great|wanna|with\W*a\W*new|(?:yo)?ur\W*(?:current|full\W*time))\W*job|good\W*jobs|global\W*job\W*vacancy|success\W*job\W*story|job\W*(?:confirmation|feel\W*like|journal|opportunity|you\W*want)|joboffer)/i describe SARE_SUB_JOB Subject contains likely spammer phrase or word score SARE_SUB_JOB 1.666 #counts SARE_SUB_JOB 313s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_LIKE_YOU Subject =~ /(?:(?:singles(?: just)?|(?:looking(?: for)?|(?:need|surprise)) someone|who might) like you|like you (?:have )?never seen)/i describe SARE_SUB_LIKE_YOU Subject contains likely spammer phrase or word score SARE_SUB_LIKE_YOU 0.444 #counts SARE_SUB_LIKE_YOU 8s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE2 Subject =~ /Re: ?LAST CHANCE/i describe SARE_SUB_LAST_CHANCE2 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE2 0.333 #counts SARE_SUB_LAST_CHANCE2 6s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE3 Subject =~ /LAST CHANCE\s{3,}\S/i describe SARE_SUB_LAST_CHANCE3 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE3 0.333 #counts SARE_SUB_LAST_CHANCE3 6s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE4 Subject =~ /one last chance/i describe SARE_SUB_LAST_CHANCE4 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE4 0.111 #counts SARE_SUB_LAST_CHANCE4 2s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LOSE_OB1 Subject =~ /(?!\bLoSE\b)\b[l1I\|\xA3][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[s5\$\xA7][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_LOSE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_LOSE_OB1 2.500 # type=obfu #stype SARE_SUB_LOSE_OB1 obfu #counts SARE_SUB_LOSE_OB1 50s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_MISC_1 Subject =~ /\b(?:get rid of|sexy)\b/i describe SARE_SUB_MISC_1 Subject contains likely spammer phrase or word score SARE_SUB_MISC_1 1.272 #counts SARE_SUB_MISC_1 129s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MOON Subject =~ /(?:admiring\W*the\W*moon|moonlight\W*(?:so\W*long|to\W*find)|(?:on|to)\W*the\W*moon)/i describe SARE_SUB_MOON Subject contains likely spammer phrase or word score SARE_SUB_MOON 0.638 #counts SARE_SUB_MOON 15s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_MOVE_OB1 Subject =~ /(?!\bmove)\b(?:\/\\\/\\|\/V\\|rn|[m])[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[vu][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]/i describe SARE_SUB_MOVE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MOVE_OB1 1.666 # type=obfu #stype SARE_SUB_MOVE_OB1 OBFU #counts SARE_SUB_MOVE_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PASSION_OB1 Subject =~ /(?!Passion)(?:[p\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)/i describe SARE_SUB_PASSION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PASSION_OB1 1.666 # type=obfu - 4s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_PASSION_OB1 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PASSION_OB2 Subject =~ /(?!Passion)p.?a.?s.?s.?i.?o.?n/i describe SARE_SUB_PASSION_OB2 Subject contains obfuscated spammer topic score SARE_SUB_PASSION_OB2 1.666 # type=obfu - 4s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_PASSION_OB2 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PENIS Subject =~ /\bpenis\b/i describe SARE_SUB_PENIS Subject contains likely spammer phrase or word score SARE_SUB_PENIS 1.338 #counts SARE_SUB_PENIS 141s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PENIS 368s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_PENIS_OB1 Subject =~ /(?!\bpenis\b)\bp[\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[n\xD1\xF1][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[s5\$\xA7]\b/i describe SARE_SUB_PENIS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PENIS_OB1 3.333 # type=obfu #stype SARE_SUB_PENIS_OB1 obfu #counts SARE_SUB_PENIS_OB1 533s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PERFECTLY Subject =~ /\bperfectly\W*(?:creative|legal|smooth)/i describe SARE_SUB_PERFECTLY Subject contains likely spammer phrase or word score SARE_SUB_PERFECTLY 0.166 #counts SARE_SUB_PERFECTLY 3s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PHOTOS_OB1 Subject =~ /(?!\bphotos?)\bp[\W_]?h[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?t[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[s5\$\xA7]?/i describe SARE_SUB_PHOTOS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PHOTOS_OB1 2.5000 # type=obfu #stype SARE_SUB_PHOTOS_OB1 OBFU #counts SARE_SUB_PHOTOS_OB1 11s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PLEASE_OB1 Subject =~ /(?!\bPlease\b)\bp[\W_]?[l1I\|\xA3][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[s5\$\xA7][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_PLEASE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PLEASE_OB1 1.666 # type=obfu #stype SARE_SUB_PLEASE_OB1 obfu #counts SARE_SUB_PLEASE_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PRINTER_OB1 Subject =~ /(?!printer)\b(?:[p\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_PRINTER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PRINTER_OB1 1.000 # type=obfu #stype SARE_SUB_PRINTER_OB1 obfu #counts SARE_SUB_PRINTER_OB1 1s/0h of 113373 corpus (92402s/20971h) 04/18/04 header SARE_SUB_PRINTER_OB2 Subject =~ /(?!printer)\bp.?r.?i.?n.?t.?e.?r/i describe SARE_SUB_PRINTER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_PRINTER_OB2 1.000 # type=obfu #stype SARE_SUB_PRINTER_OB2 obfu #counts SARE_SUB_PRINTER_OB2 2s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PROFILE Subject =~ /(?:your profile.{1,30}need|(?:cholesterol|company|featured|financial|I saw your|new|saw you|special|stock(?:\W*market)?|Your Personal)\W*profile|profiled?\W*compan(?:y|ies))/i describe SARE_SUB_PROFILE Subject contains likely spammer phrase or word score SARE_SUB_PROFILE 1.138 #counts SARE_SUB_PROFILE 105s/0h of 113393 corpus (92421s/20972h) 04/18/04 #note SARE_SUB_PROFILE "Investment Profile" matched by SARE_SUB_INVESTMENTS #note SARE_SUB_PROFILE "Credit Profile" matched by SARE_SUB_POOR_CREDIT header SARE_SUB_PROVEN Subject =~ /\bproven\b/i describe SARE_SUB_PROVEN Subject contains likely spammer phrase or word score SARE_SUB_PROVEN 1.666 #counts SARE_SUB_PROVEN 266s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_REAL_OB1 Subject =~ /(?!\breal\b)\b[r\xAE][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[l1I\|\xA3]\b/i describe SARE_SUB_REAL_OB1 Subject contains obfuscated spammer topic score SARE_SUB_REAL_OB1 1.666 # type=obfu #stype SARE_SUB_REAL_OB1 obfu #counts SARE_SUB_REAL_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_SPYWARE Subject =~ /\bSPYWARE\b/i describe SARE_SUB_SPYWARE Subject contains likely spammer phrase or word score SARE_SUB_SPYWARE 0.772 #counts SARE_SUB_SPYWARE 39s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_STRONG Subject =~ /\bstrong\b/i describe SARE_SUB_STRONG Subject contains likely spammer phrase or word score SARE_SUB_STRONG 0.761 #counts SARE_SUB_STRONG 37s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_SURVEY Subject =~ /(?:campaign|Fill\W*out|questions|rated.{1,30}by\W*a|short|simple|tak(e|ing)|womens)\W*survey|survey\W*(?:opportunity|says)/ describe SARE_SUB_SURVEY Subject contains likely spammer phrase or word score SARE_SUB_SURVEY 0.677 #counts SARE_SUB_SURVEY 22s/0h of 111217 corpus (90485s/20732h) 04/17/04 header SARE_SUB_TONER Subject =~ /\btoner\b/i describe SARE_SUB_TONER Subject contains likely spammer phrase or word score SARE_SUB_TONER 0.622 #counts SARE_SUB_TONER 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_TONER 23s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_TONER_OB1 Subject =~ /(?!\btoner\b)(?:\bt|\B(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:r\b|(?:[\xAE]|\xC5[\x94-\x99]|\xD1\x93)\B)/i describe SARE_SUB_TONER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_TONER_OB1 1.666 # type=obfu #stype SARE_SUB_TONER_OB1 obfu #counts SARE_SUB_TONER_OB1 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TONER_OB2 Subject =~ /(?!\btoner\b)\bt.?o.?n.?e.?r\b/i describe SARE_SUB_TONER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_TONER_OB2 1.666 # type=obfu #stype SARE_SUB_TONER_OB2 obfu #counts SARE_SUB_TONER_OB2 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_VIDEO_OB1 Subject =~ /(?!\bvideo\b)\b[vu][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[d\xD0][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])\b/i describe SARE_SUB_VIDEO_OB1 Subject contains obfuscated spammer topic #stype SARE_SUB_VIDEO_OB1 obfu score SARE_SUB_VIDEO_OB1 2.500 # type=obfu #counts SARE_SUB_VIDEO_OB1 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_YOUR_WOMAN Subject =~ /Your woman/i describe SARE_SUB_YOUR_WOMAN Subject contains likely spammer phrase or word score SARE_SUB_YOUR_WOMAN 0.800 #counts SARE_SUB_YOUR_WOMAN 44s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_YOUR_WOMAN 56s/0h of 97268 corpus (79437s/17831h) 01/24/04 header __SARE_SUB_WEBMASTER1 Subject =~ /webmaster/i header __SARE_SUB_WEBMASTER2 From =~ /webmaster\@/i meta SARE_SUB_WEBMASTER2 __SARE_SUB_WEBMASTER1 && __SARE_SUB_WEBMASTER2 describe SARE_SUB_WEBMASTER2 Subject contains likely spammer phrase or word score SARE_SUB_WEBMASTER2 0.166 #counts SARE_SUB_WEBMASTER2 3s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_SION_OB1 Subject =~ /(?!sion)(?!s lon)(?!s: on)[s5\$\xA7][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]/i describe SARE_SUB_SION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_SION_OB1 1.666 # type=obfu #stype SARE_SUB_SION_OB1 obfu #counts SARE_SUB_SION_OB1 9s/0h of 113373 corpus (92402s/20971h) 04/18/04 # type=obfu ham: "looks longer", "as long" # Category: Technical spamsign header SARE_SUB_6CONS_WORD Subject =~ /(?!.m+)(?!xpvpnsrv)\b[bcghjklmnpqrstvwxz]{6,20}\b/ describe SARE_SUB_6CONS_WORD subject contains word consisting of consecutive consonants score SARE_SUB_6CONS_WORD 1.666 #counts SARE_SUB_6CONS_WORD 533s/0h of 113373 corpus (92402s/20971h) 04/20/04 #max SARE_SUB_6CONS_WORD 863s/0h of 97268 corpus (79437s/17831h) 01/24/04 #hist SARE_SUB_6CONS_WORD 04/20/2004 -- Added exclusion for hmmmmm header SARE_SUB_7CONS_WORD Subject =~ /(?!JDBGMGR)(?!.m+)\b[bcdfghjklmnpqrstvwxz]{7}\b/i describe SARE_SUB_7CONS_WORD subject contains word consisting of consecutive consonants score SARE_SUB_7CONS_WORD 1.666 #counts SARE_SUB_7CONS_WORD 485s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_7CONS_WORD 04/20/2004 -- Added exclusion for hmmmmm header SARE_SUB_ACCENT_CHAR Subject =~ /\w[äëöü]\w/ describe SARE_SUB_ACCENT_CHAR Subject contains foreign character apparently embedded within a word score SARE_SUB_ACCENT_CHAR 1.666 #counts SARE_SUB_ACCENT_CHAR 205s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CASH_CHAR Subject =~ /[a-zA-Z]\$[a-zA-Z]/ describe SARE_SUB_CASH_CHAR Subject has letter then $ then letter score SARE_SUB_CASH_CHAR 1.638 #counts SARE_SUB_CASH_CHAR 195s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_COMMA_FIRST Subject =~ /^,/ describe SARE_SUB_COMMA_FIRST Subject starts with a Comma. score SARE_SUB_COMMA_FIRST 0.955 #counts SARE_SUB_COMMA_FIRST 72s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ENC_KS5601 Subject:raw =~ /\=\?ks_c_5601\-1987\?/i describe SARE_SUB_ENC_KS5601 Subject specifies display in Korean?, unnecessary unless spam hides subject score SARE_SUB_ENC_KS5601 0.877 #counts SARE_SUB_ENC_KS5601 58s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_ENC_WIN1251 Subject:raw =~ /windows-1251/i describe SARE_SUB_ENC_WIN1251 Subject specifies display in windows-1251, , unnecessary unless spam hides subject score SARE_SUB_ENC_WIN1251 1.111 # type=spamp #stype SARE_SUB_ENC_WIN1251 spamp #counts SARE_SUB_ENC_WIN1251 56s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ENC_WIN1255 Subject:raw =~ /windows-1255/i describe SARE_SUB_ENC_WIN1255 Subject specifies display in windows-1255, unnecessary unless spam hides subject score SARE_SUB_ENC_WIN1255 1.111 # type=spamp #stype SARE_SUB_ENC_WIN1255 spamp #counts SARE_SUB_ENC_WIN1255 25s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LETTERS_NUMS Subject =~ /[a-z]{1,5}[0-9]{1,5}[a-z]{1,5}[0-9]{1,5}[a-z]{1,5}[0-9]{1,5}/ describe SARE_SUB_LETTERS_NUMS Subject contains multiple mixed letters and numbers in one "word" score SARE_SUB_LETTERS_NUMS 0.700 #counts SARE_SUB_LETTERS_NUMS 26s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_LETTERS_NUMS 199s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_LONG_SUBJ_170 Subject =~ /.{170,}/ describe SARE_SUB_LONG_SUBJ_170 Subject is excessively long -- more than 170 chars score SARE_SUB_LONG_SUBJ_170 3.333 # type=spamg #stype SARE_SUB_LONG_SUBJ_170 spamg #counts SARE_SUB_LONG_SUBJ_170 224s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_LONG_SUBJ_170 04/17/2004 - Created _140 rule based on large corpus (safety cushion above 130 which hit 0 ham in test corpus). #hist SARE_SUB_LONG_SUBJ_170 However, -140 hits 3 auto-generated auto-response ham on alternate corpus. So score that low, and score _170 high. header SARE_SUB_PAREN_NUM Subject =~ /(?!\(\d{3}\)[- ]?\d{3}-\d{4}\s+\(\d{3}\))\(\d{1,3}\).*\(\d{1,3}\)/ describe SARE_SUB_PAREN_NUM Subject contains (00)Subject(00) score SARE_SUB_PAREN_NUM 1.666 - type=spamg #stype SARE_SUB_PAREN_NUM spamg #counts SARE_SUB_PAREN_NUM 371s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_PAREN_NUM 04/02/2004 - http://www.rulesemporium.com/rules/99_FVGT_subject.cf #hist SARE_SUB_PAREN_NUM 04/20/2004 - added exclusion for USA telephone numbers in subject. header SARE_SUB_PCT_LETTER Subject =~ /%[A-Z]{1}/i describe SARE_SUB_PCT_LETTER Subject contains random-text spamsign score SARE_SUB_PCT_LETTER 1.666 #counts SARE_SUB_PCT_LETTER 671s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PCT_LETTER 1282s/0h of 125097 corpus (104914s/20183h) 04/02/04 header SARE_SUB_RAND_LETTRS2 Subject =~ /(?!N[BGJU])(?!SU)(?!VB)\b[cjnqstuvwxz][bgjqu]\b/i describe SARE_SUB_RAND_LETTRS2 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS2 1.666 #Ham SARE_SUB_RAND_LETTRS2 exclude: NG = abbr No Good, NJ = New Jersey, SU = subjective universe #counts SARE_SUB_RAND_LETTRS2 585s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_RAND_LETTRS2B Subject =~ /(?!xls)(?!xsl)\bx[bfghjklnpqrstwz][bfghjklnpqrstwz]\b/i describe SARE_SUB_RAND_LETTRS2B Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS2B 1.055 #counts SARE_SUB_RAND_LETTRS2B 90s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_RAND_LETTRS2B 113s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_RAND_LETTRS4 Subject =~ /(?!uh+)\b[eiou][bfghjklnpqrtwz]{3}\b/i describe SARE_SUB_RAND_LETTRS4 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS4 1.616 #counts SARE_SUB_RAND_LETTRS4 189s/0h of 113373 corpus (92402s/20971h) 04/20/04 #ham SARE_SUB_RAND_LETTRS4 lots of ham with leading A header SARE_SUB_RAND_LETTRS5 Subject =~ /(?!LPRng)\b[bcdfghjklnpqrvwz]{5}\b/i describe SARE_SUB_RAND_LETTRS5 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS5 1.666 #counts SARE_SUB_RAND_LETTRS5 387s/0h of 113373 corpus (92402s/20971h) 04/18/04 #max SARE_SUB_RAND_LETTRS5 473s/0h of 97268 corpus (79437s/17831h) 01/24/04 #hist SARE_SUB_RAND_LETTRS5 04/20/2004 - Added LPRng exclusion, to avoid single ham hit. header SARE_SUB_RAND_UC Subject =~ /^Re:\s[A-Z]{2,},(\d+,)?(\s[a-z]+[.,:;'!?-]?){3,}(\[\d+\])?\s*$/ describe SARE_SUB_RAND_UC Subject contains random-text spamsign score SARE_SUB_RAND_UC 2.222 #counts SARE_SUB_RAND_UC 7612s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_RAND_UC 8372s/0h of 125097 corpus (104914s/20183h) 04/02/04 # EOF # SARE "General Subject" Ruleset for SpamAssassin # Version: 00.03.00 # Created: 04/19/2004 # Modified: 04/21/2004 # Changes: Beta Release # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj1.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # # Unlike 70_sare_genlsubj0.cf, this ...genlsubj1.cf ruleset contains rules which DO or in the # past have hit ham during SARE mass-check tests. The S/O calculated by SA's hit-frequencies # scripts are all at or above 0.900. # Systems which are excessively sensitive to false positives may want to exclude this ruleset, # pick and choose among its rules, or lower their scores. # # SARE_SUB_BETTER needs to have more common phrases added to exclusion list # Please send copies of ham hit by these rules to genlsubj@rulesemporium.com # Category: Adult, porn header SARE_SUB_NEW_CLUB Subject =~ /(?:dat(?:e|ing)|free|havana|jupiter|leads|new)\W*club|club\W*(?:casino|date|dice)/i describe SARE_SUB_NEW_CLUB Subject contains spammer subject - adult or porn score SARE_SUB_NEW_CLUB 0.675 #counts SARE_SUB_NEW_CLUB 43s/1h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_NEW_CLUB Beverages & More header SARE_SUB_PARIS_HILTON Subject =~ /\b(?:(?:P|Paris)\W*h[i|]lt[o0]n|hilton\W*sister)/i describe SARE_SUB_PARIS_HILTON Subject contains spammer subject - adult or porn score SARE_SUB_PARIS_HILTON 0.185 #counts SARE_SUB_PARIS_HILTON 98s/0h of 113374 corpus (92402s/20972h) 04/18/04 #counts SARE_SUB_PARIS_HILTON 2s/2h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_PARIS_HILTON 100s/2h of 134581 corpus # Category: Black market items, services, activities, scams, frauds # Category: Business header SARE_SUB_BOSS Subject =~ /(?:y.?[o0].?u.?r ([o0]wn )?|fired my)\W*b[o0]ss/i describe SARE_SUB_BOSS Subject contains spammer subject - business score SARE_SUB_BOSS 1.588 #counts SARE_SUB_BOSS 186s/0h of 113393 corpus (92421s/20972h) 04/18/04 #counts SARE_SUB_BOSS 0s/1h of 21207 corpus (6695s/14512h) 04/20/04 header SARE_SUB_HEALTH_CARE Subject =~ /(?:Health\W*(?:Care\W*(?:aff[o0]rdable|f[o0]r\W*(?:all\W*americans|y[o0]u\W*and\W*y[o0]ur\W*family)|insurance\W*leads|savings|sect[o0]r|st[o0]ck|technology|y[o0]u\W*can\W*aff[o0]rd)|risk\W*reducti[o0]n|screening.{1,10}testing|st[o0]re|update)|(?:(?:impr[o0]ve|restore)\W*y[o0]ur|l[o0]se\W*p[o0]unds\W*f[o0]r|pay.{1,30}too\W*much.{0,30}|sexual)\W*health|safe\W*and\W*healthy|with[o0]ut\W*health\W*insurance|(?:affordable|need|new)\W*health\W*(?:care|insurance|plan)|online\W*health.{1,30}store|(?:prevent|reduce|solve)\W*(?:your\W*)?health\W*(?:problems|risks)|american\W*medical\W*directory|the\W*(?:(?:daily|natural)\W*)?health\W*(?:e.?mail|e.?zine|chann?el|magazine|newsletter)|younger\W*and\W*healthier|health\W*is\W*your\W*wealth)/i describe SARE_SUB_HEALTH_CARE Subject contains spammer subject - business score SARE_SUB_HEALTH_CARE 1.366 #counts SARE_SUB_HEALTH_CARE 292s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_HEALTH_CARE "Health Screening and Testing" (1) drugstore.com header SARE_SUB_INCOME Subject =~ /\bincome\b/i describe SARE_SUB_INCOME Subject contains spammer subject - business score SARE_SUB_INCOME 0.816 #counts SARE_SUB_INCOME 188s/3h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_INCOME auto-responder income tax forms from CA FTB (2), personal email (1) # Category: Credit, debt, lending, mortgage, borrowing, investment, financing header SARE_SUB_MORTGAGE Subject =~ /(?:(?:\%|2nd|best|competitive|easy|EZ|fixed|for\W*your|great|home|instant|loans\W*and|lowest|\bno|online|rate|second)..?mortgage|mortgages?\W*(?:broker|gone|hunt|interest|lead|loan|manager|notif(?:ication|y)|quote|r.?[a\@].?t.?e.?s?|refinanc(?:e|ing)|shopping|too\W*high|verification)|mortgage.{1,30}reduced|(?:\$\d|compete|find|pay(ing|ment)|qualify|search|shopping).{1,30}mortgage)/i describe SARE_SUB_MORTGAGE Subject contains spammer subject - credit or money score SARE_SUB_MORTGAGE 1.133 #counts SARE_SUB_MORTGAGE 520s/4h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_MORTGAGE "mortgage manager" from user's bank (4) # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_AS_LOW_AS Subject =~ /(?:as\W*low\W*as|(?:at\W*low|smart)\W*prices?|\blow(?:est)?.?(?:cost|point|price[ds]?|rates?))/i describe SARE_SUB_AS_LOW_AS Subject contains spammer subject - marketing score SARE_SUB_AS_LOW_AS 1.591 #counts SARE_SUB_AS_LOW_AS 1679s/8h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_AS_LOW_AS (8 verified:) Marriott Rewards, Drugstore.com, Beverages & More, United Airlines, TiVo header SARE_SUB_GASOLINE Subject =~ /\bgas(oline)?\b/i describe SARE_SUB_GASOLINE Subject contains spammer subject - marketing score SARE_SUB_GASOLINE 0.752 #counts SARE_SUB_GASOLINE 248s/6h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_GASOLINE 336s/5h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_GASOLINE gas prices, gas chambers header SARE_SUB_GIFT_CARD Subject =~ /Gift\W*Card/i describe SARE_SUB_GIFT_CARD Subject contains spammer subject - marketing score SARE_SUB_GIFT_CARD 0.758 #stype SARE_SUB_GIFT_CARD spam #counts SARE_SUB_GIFT_CARD 73s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_GIFT_CARD Clark for President newsletter #hist SARE_SUB_GIFT_CARD Obfu tested 01/24/04 -- no obfu header SARE_SUB_PAYING Subject =~ /Paying/i describe SARE_SUB_PAYING Subject contains spammer subject - marketing score SARE_SUB_PAYING 0.784 #counts SARE_SUB_PAYING 206s/4h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PAYING 229s/4h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_PAYING paying for classes, paying your taxes, paying attention # Category: Medical header SARE_SUB_MEDICAT Subject =~ /medication/i describe SARE_SUB_MEDICAT Subject contains spammer subject - medical score SARE_SUB_MEDICAT 1.666 #counts SARE_SUB_MEDICAT 540s/1h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_MEDICAT 830s/1h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_MEDICAT personal medication info header SARE_SUB_MEDS Subject =~ /\bmeds\b/i describe SARE_SUB_MEDS Subject contains spammer subject - medical score SARE_SUB_MEDS 1.666 #counts SARE_SUB_MEDS 1465s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_MEDS About.com Health # Category: Generic words and phrases header __SARE_SUB_BETTER Subject =~ /BETTER/i header __SARE_SUB_BETTER_H1 Subject =~ /better late than never/i header __SARE_SUB_BETTER_H2 Subject =~ /try to do better/i header __SARE_SUB_BETTER_H3 Subject =~ /better fit/i meta SARE_SUB_BETTER __SARE_SUB_BETTER && !__SARE_SUB_BETTER_H1 && !__SARE_SUB_BETTER_H2 && !__SARE_SUB_BETTER_H3 describe SARE_SUB_BETTER Subject contains likely spammer phrase or word score SARE_SUB_BETTER 1.160 #counts SARE_SUB_BETTER 871s/4h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_BETTER about.com newsletters, mysurvey.com, "better" web page mentioned in mailing list, valid marketing emails, "better late than never" header SARE_SUB_BIGGER Subject =~ /bigger/i describe SARE_SUB_BIGGER Subject contains likely spammer phrase or word score SARE_SUB_BIGGER 1.205 #counts SARE_SUB_BIGGER 234s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_BIGGER About.com Health header SARE_SUB_FOREVER Subject =~ /for\W*?ever\b/i describe SARE_SUB_FOREVER Subject contains likely spammer phrase or word score SARE_SUB_FOREVER 0.655 #counts SARE_SUB_FOREVER 146s/0h of 111251 corpus (90481s/20770h) 04/15/04 #counts SARE_SUB_FOREVER 13s/9h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_FOREVER 179s/9h of 132458 corpus header SARE_SUB_INCHES Subject =~ /(?:(?:\d.*|add?|enlarge|gain|in.?crease|lose|more|shed)(?:ed|s)?\b.{1,30}\binch(?:es)?\b|inches\W*added)/i describe SARE_SUB_INCHES Subject contains likely spammer phrase or word score SARE_SUB_INCHES 1.666 #counts SARE_SUB_INCHES 441s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_INCHES price of a "7 inch saw blade" header SARE_SUB_LOOKING_FOR Subject =~ /(?!looking for help)(?:(?:they\W*are|We(?:\W*a|')re|wives|y[o0\@]u(?:(?:\W*a|')[rv]e(?:\W*been)?))\W*l[o0\@][o0\@]king\W*f[o0\@]r|l[o0\@][o0\@]king\W*f[o0\@]r.{0,30}(?:career|c[o0\@]nsultants|cust[o0\@]mers|empl[o0\@]yees|entrepreneur|free\W*lunch|help|h[o0\@]nest|investment|loan|l[o0\@]ve|med(?:s|icati[o0\@]ns)|m[o0\@]rtgage|pe[o0\@]ple|perfect|sex|s[o0\@]me[o0\@]ne|work|y[o0\@]u))\b/i describe SARE_SUB_LOOKING_FOR Subject contains likely spammer phrase or word score SARE_SUB_LOOKING_FOR 0.825 #counts SARE_SUB_LOOKING_FOR 92s/1h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_LOOKING_FOR "Looking for artist for CD artwork" header SARE_SUB_MINUTES Subject =~ /\d.?minutes/i describe SARE_SUB_MINUTES Subject contains likely spammer phrase or word score SARE_SUB_MINUTES 0.787 #counts SARE_SUB_MINUTES 123s/0h of 113374 corpus (92402s/20972h) 04/18/04 #counts SARE_SUB_MINUTES 2s/2h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_MINUTES 125s/2h of 134581 corpus header SARE_SUB_MONEY Subject =~ /(?:(?:save|make)[ -].{0,30}money[ -](?:in|on)|(?:free|grant|saving|with our|worth|(?:claim|keep) your) money|money machine)/i describe SARE_SUB_MONEY Subject contains likely spammer phrase or word score SARE_SUB_MONEY 0.772 #counts SARE_SUB_MONEY 78s/1h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_MONEY 102s/1h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_MONEY business email header SARE_SUB_ONLINE_OB1 Subject =~ /(?!\bONLINE\b)(?:\b[o0]|\B(?:[\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_ONLINE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_ONLINE_OB1 1.666 # type=obfu #stype SARE_SUB_ONLINE_OB1 obfu #counts SARE_SUB_ONLINE_OB1 363s/7h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_SEEN_ON Subject =~ /as\W*seen\W*on\W*(20\/20|abc|cbs|cnn|dateline|google|oprah|television|tv)/i score SARE_SUB_SEEN_ON 0.797 describe SARE_SUB_SEEN_ON Subject contains likely spammer phrase or word #counts SARE_SUB_SEEN_ON 87s/1h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_SEEN_ON drugstore.com header SARE_SUB_TION_OB1 Subject =~ /(?!tion)(?!t-ion)(?!ti ?o ?n)(?!t lon)t[\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]/i describe SARE_SUB_TION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_TION_OB1 1.666 # type=obfu #stype SARE_SUB_TION_OB1 obfu #counts SARE_SUB_TION_OB1 824s/1h of 113373 corpus (92402s/20971h) 04/18/04 # ham: t1-on (a t1 on something) # Category: Technical spamsign header SARE_SUB_BAD_PUNCT Subject =~ /\w[\;\)\~\"]\w/ describe SARE_SUB_BAD_PUNCT Subject contains parentheses or similar chars apparently embedded within a word score SARE_SUB_BAD_PUNCT 0.943 #counts SARE_SUB_BAD_PUNCT 2167s/30h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_BAD_PUNCT avoid: _ -- used in URLs which can be valid in subjects #ham SARE_SUB_BAD_PUNCT avoid: : -- FP with re:subject (no space) #ham SARE_SUB_BAD_PUNCT avoid: ( -- often used for plural(s) header SARE_SUB_COMMA Subject =~ /\w,\w/ describe SARE_SUB_COMMA Subject contains comma apparently embedded within a word score SARE_SUB_COMMA 0.500 #stype SARE_SUB_COMMA max:0.5 #counts SARE_SUB_COMMA 1465s/32h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_COMMA_LEAD Subject =~ /(?!(greetings|help|please|reminder|Surplus), )^\w+, /i describe SARE_SUB_COMMA_LEAD Subject contains one word, possibly name or salutation, then comma. score SARE_SUB_COMMA_LEAD 0.923 #counts SARE_SUB_COMMA_LEAD 3182s/47h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LEAD_PUNCT Subject =~ /(?! :[osvdp]\b) [\;\:\)\,]\w/ describe SARE_SUB_LEAD_PUNCT Subject contains word which begins with non-word character score SARE_SUB_LEAD_PUNCT 1.036 #counts SARE_SUB_LEAD_PUNCT 433s/4h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_LEAD_PUNCT avoid "(" -- used for "(no subject)" #ham SARE_SUB_LEAD_PUNCT avoid "." -- Used for things like "... .zip file" #ham SARE_SUB_LEAD_PUNCT ham: " :RE: ", paypal response (2) #ham SARE_SUB_LEAD_PUNCT avoid ":o" and other smilies header SARE_SUB_LONG_SUBJ_140 Subject =~ /.{140,}/ describe SARE_SUB_LONG_SUBJ_140 Subject is excessively long -- more than 139 chars score SARE_SUB_LONG_SUBJ_140 1.000 # type=max:1.0 #stype SARE_SUB_LONG_SUBJ_140 max:1.0 #counts SARE_SUB_LONG_SUBJ_140 302s/0h of 113393 corpus (92421s/20972h) 04/18/04 #counts SARE_SUB_LONG_SUBJ_140 17s/3h of 16618 corpus (14409s/2209h) #total SARE_SUB_LONG_SUBJ_140 319s/3h of 130011 corpus # EOF # SARE "General Subject" Ruleset for SpamAssassin # Version: 00.03.00 # Created: 04/19/2004 # Modified: 04/21/2004 # Changes: Beta Release # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj3.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # # This ruleset contains only rules which test for obfuscation within subject headers. # # This subset of SARE_SUB_*_OB* rules do not hit any emails during SARE mass-check testing # against current corpi. # # Therefore, systems which are very sensitive to SpamAssassin overhead may want to exclude # this ruleset to avoid its regex overhead. # Cateogry: Education, Education-related scams header SARE_SUB_DOCTORATE_OB1 Subject =~ /(?!Doctorate)(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)/i describe SARE_SUB_DOCTORATE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DOCTORATE_OB1 1.666 # type=obfu - 0s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_DOCTORATE_OB1 0s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_DOCTORATE_OB2 Subject =~ /(?!Doctorate)D.?o.?c.?t.?o.?r.?a.?t.?e/i describe SARE_SUB_DOCTORATE_OB2 Subject contains obfuscated spammer topic score SARE_SUB_DOCTORATE_OB2 1.666 # type=obfu - 0s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_DOCTORATE_OB2 0s/0h of 111253 corpus (90483s/20770h) 04/15/04 # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_WEBSITE_OB1 Subject =~ /(?!website)(?!web site)(?!web-site)(?:\bw|\B(?:\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)s?\b/i describe SARE_SUB_WEBSITE_OB1 Subject contains spammer subject - marketing score SARE_SUB_WEBSITE_OB1 1.666 # type=obfu #stype SARE_SUB_WEBSITE_OB1 obfu #counts SARE_SUB_WEBSITE_OB1 0s/0h of 113373 corpus (92402s/20971h) 04/18/04 # Category: Medical header SARE_SUB_PHYSICIAN_OB1 Subject =~ /(?!physicians?)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[y\xA5\xDD\xFD]|\xC5[\xB6-\xB8]|\xCE\x8E|\xCE\xA5|\xCE\xA8|\xCE\xAB|\xCE\xB3|\xD0\xA3|\xD1\x83|\xD1\x9E|\xD2[\xAE-\xB1])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i describe SARE_SUB_PHYSICIAN_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PHYSICIAN_OB1 1.666 # type=obfu #stype SARE_SUB_PHYSICIAN_OB1 obfu #counts SARE_SUB_PHYSICIAN_OB1 0s/0h of 113272 corpus (92366s/20906h) 04/23/04 # Category: Software header SARE_SUB_DOWNLOAD_OB2 Subject =~ /(?!DOWNLOAD)\bd.?o.?w.?n.?l.?o.?a.?d/i describe SARE_SUB_DOWNLOAD_OB2 Subject contains obfuscated spammer topic score SARE_SUB_DOWNLOAD_OB2 1.666 #counts SARE_SUB_DOWNLOAD_OB2 0s/0h of 113393 corpus (92421s/20972h) 04/18/04 # Category: Generic words and phrases header SARE_SUB_CONFID_OB1 Subject =~ /(?!confidential)c.?o.?n.?f.?i.?d.?e.?n.?t.?i.?a.?l/i describe SARE_SUB_CONFID_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CONFID_OB1 1.666 # type=obfu #stype SARE_SUB_CONFID_OB1 obfu #counts SARE_SUB_CONFID_OB1 0s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CONFID_OB1 3s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_CONFID_OB2 Subject =~ /(?!confidential)(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:f|\xC5\xBF|\xC6\x92|\xD2[\x92-\x93]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))/i describe SARE_SUB_CONFID_OB2 Subject contains obfuscated spammer topic score SARE_SUB_CONFID_OB2 1.666 # type=obfu - 2s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_CONFID_OB2 0s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CONFID_OB2 2s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_PROVEN_OB1 Subject =~ /(?!\bproven\b)(?:\bp|\B(?:[\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[vu]|\\\/|\xCE\xBD])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:n\b|(?:[\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)\B)/i describe SARE_SUB_PROVEN_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PROVEN_OB1 1.666 # type=obfu #stype SARE_SUB_PROVEN_OB1 obfu #counts SARE_SUB_PROVEN_OB1 0s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_PROVEN_OB1 17s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_STRONG_OB1 Subject =~ /(?!\bstrong\b)(?:\b[s5]|\B(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]\b|(?:\xC4[\x9C-\xA3])\B)/i score SARE_SUB_STRONG_OB1 1.666 # type=obfu #stype SARE_SUB_STRONG_OB1 obfu #counts SARE_SUB_STRONG_OB1 0s/0h of 113272 corpus (92366s/20906h) 04/23/04 # EOF # SARE "General Subject" Ruleset for SpamAssassin # Version: 00.03.00 # Created: 04/19/2004 # Modified: 04/21/2004 # Changes: Beta Release # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj3.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # # WARNING - WARNING - WARNING - WARNING # # This ruleset contains a subset of SARE_SUB_* rules which hit a # significant amount of ham during SARE mass-check tests. # # Systems which are very sensitive to false positives should probably NOT install # this ruleset. # # hit-frequencies results for these rules are shown below. # Category: Black market items, services, activities, scams, frauds header SARE_SUB_COPYDVD Subject =~ /(?:(?:burn|c[o0]py).{0,30}dvd|dvd.{0,30}c[o0]p(?:y|ier)|dvd magic)/i describe SARE_SUB_COPYDVD Subject contains spammer subject - black market or scam score SARE_SUB_COPYDVD 0.350 #counts SARE_SUB_COPYDVD 46s/0h of 111251 corpus (90481s/20770h) 04/15/04 #counts SARE_SUB_COPYDVD 17s/9h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_COPYDVD 63s/9h of 132458 corpus header SARE_SUB_DVD Subject =~ /\b(?:DVD)\b/i describe SARE_SUB_DVD Subject contains spammer subject - black market or scam score SARE_SUB_DVD 0.234 #counts SARE_SUB_DVD 142s/0h of 113374 corpus (92402s/20972h) 04/18/04 #counts SARE_SUB_DVD 129s/4h of 16618 corpus (14409s/2209h) 04/19/04 #counts SARE_SUB_DVD 53s/72h of 21207 corpus (6695s/14512h) 04/20/04 #total SARE_SUB_DVD 324s/76h of 151199 corpus header SARE_SUB_NEED_REPLY Subject =~ /(?:(?:(?:appreciate|a?waiting(?:\W*for)?)\W*your|request|urgent)\W*(?:answer|assist|reply|response)|(?:answer|reply|response)\W*(?:needed|urgent))/i describe SARE_SUB_NEED_REPLY Subject contains spammer subject - black market or scam score SARE_SUB_NEED_REPLY 0.444 #counts SARE_SUB_NEED_REPLY 88s/10h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Business header SARE_SUB_ENTREPRENEUR Subject =~/Entrepreneur/i describe SARE_SUB_ENTREPRENEUR Subject contains spammer subject - business score SARE_SUB_ENTREPRENEUR 0.166 #counts SARE_SUB_ENTREPRENEUR 9s/2h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_ENTREPRENEUR resume # Category: Gambling, Lotto, Sweepstakes, Winnings, Losses header SARE_SUB_CASINO Subject =~ /\bc[a\@]sin[o0]/i describe SARE_SUB_CASINO Subject contains spammer subject - gambling score SARE_SUB_CASINO 0.500 # type=max:0.5 #stype SARE_SUB_CASINO max:0.5 #counts SARE_SUB_CASINO 390s/15h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_CASINO score max set to 0.5 to keep in line with other rules with similar hit rates header SARE_SUB_WIN Subject =~ /(?!win (?:"?(xp|me|98|95|NT|200\w)"?|s\/?w))\b(?:win\b(?:\W+\w+|.{1,30}\b(?:\$|big|bike|car|cash|CD|cigarettes|computer|convertible|Dell|dollars|dvd|harley|home|hummer|iPod|jaguar|laptop|loan|lott(?:o|ery)|money|mortgage|play\W*station\W*|prize|[SX]UV|tivo|today|trip|TV|up\W*to|vacation))|(?:answer|click|help).{1,20}win)\b/i describe SARE_SUB_WIN Subject contains spammer subject - gambling score SARE_SUB_WIN 0.726 #counts SARE_SUB_WIN 277s/8h of 113373 corpus (92402s/20971h) 04/20/04 #ham SARE_SUB_WIN flipsidenewsletter.com, staples-deals.com, clark04.com (Clark 4 Pres), wish-bone.com #hist SARE_SUB_WIN Apr 21 2004 - Better exclusion of Windows # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_DOMAIN Subject =~ /(?:domain\W*(?:records.{1,30}CD|registration\W*contacts|the\W*net)|(?:,000|market to over|million|verify.{1,30}contact|your).{1,10}Domain|discount\W*domain\W*registration)/i describe SARE_SUB_DOMAIN Subject contains spammer subject - marketing score SARE_SUB_DOMAIN 0.606 #counts SARE_SUB_DOMAIN 64s/6h of 113305 corpus (92399s/20906h) 04/22/04 #notes SARE_SUB_DOMAIN avoid dupes with DOMAINS_CHEAP #ham SARE_SUB_DOMAIN valid emails from registrars to registrants (6) header __SARE_SUB_FREE Subject =~ /\bfree\b/ #counts __SARE_SUB_FREE 458s/20h of 113374 corpus (92402s/20972h) 04/18/04 meta SARE_SUB_FREE __SARE_SUB_FREE && !SUB_FREE_CAP && !SUB_FREE_INSTANT && !SUB_FREE_OFFER && !SARE_SUB_FREE_BANG describe SARE_SUB_FREE Subject contains spammer subject - marketing score SARE_SUB_FREE 0.500 # type=max:0.5 #stype SARE_SUB_FREE max:0.5 #counts SARE_SUB_FREE 450s/20h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_FREE score max set to 0.5 to keep in line with other rules with similar hit rates header SARE_SUB_FREE_BANG Subject =~ /\bFree\!/i describe SARE_SUB_FREE_BANG Subject contains spammer subject - marketing score SARE_SUB_FREE_BANG 0.668 #counts SARE_SUB_FREE_BANG 61s/2h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_FREE_BANG 76s/2h of 97268 corpus (79437s/17831h) 01/24/04 #ham SARE_SUB_FREE_BANG Dell, Visicom Media header SARE_SUB_SAVE_UP_TO Subject =~ /(?:save.?(?:\$(?:\$|,)|\d+(?:\%|.?percent)|as\W*much\W*as|big|fuel|hundred.?s|mnoey|money\W*(?:now|on\W*med)|more\W*now|now\W*with|on\W*(?:all|canadian|cigarettes|gas|generic|life|med(?:s|ication|prescriptions|repairs|term|that|y[op]ur\W*{?|cable|term))|over|(?:you\W*)?thou[sz]ands|time\W*(?:and|\&)\W*(?:money|dollars)|up\W*(?:to|\d+\%)|your\W*(?:computer|time))|(?:(?:but\W*you'll|last\W*chance|you\W*want)\W*to|(?:early|now|order\W*here)\W*and\W*)\W*save|\b(?:you|u)\W*can\W*save\W*on)/i describe SARE_SUB_SAVE_UP_TO Subject contains spammer subject - marketing score SARE_SUB_SAVE_UP_TO 0.746 #stype SARE_SUB_SAVE_UP_TO spam #counts SARE_SUB_SAVE_UP_TO 655s/18h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_SAVE_UP_TO "save up to" (12), "save big" (1), "save nn%" (2), "last chance to save" (2), "you want to save" (1) # Category: Generic words and phrases header SARE_SUB_BETTER_OB1 Subject =~ /(?!BETTER)b.?e.?t.?t.?e.?r/i describe SARE_SUB_BETTER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BETTER_OB1 0.100 # max:0.1 #stype SARE_SUB_BETTER_OB1 max:0.1 #counts SARE_SUB_BETTER_OB1 28s/14h of 111251 corpus (90481s/20770h) 04/15/04 #ham SARE_SUB_BETTER_OB1 misspelled: bettter (14) header __SARE_RCVD_GANNETT Received =~ /gannet\.com/ header __SARE_SUB_BRKING_NEWS Subject =~ /breaking news/i meta SARE_SUB_BRKING_NEWS __SARE_SUB_BRKING_NEWS && !__SARE_RCVD_GANNETT describe SARE_SUB_BRKING_NEWS Subject contains likely spammer phrase or word score SARE_SUB_BRKING_NEWS 0.662 #counts SARE_SUB_BRKING_NEWS 135s/6h of 113374 corpus (92402s/20972h) 04/18/04 #note SARE_SUB_BRKING_NEWS avoid hits on valid news agencies/channels header SARE_SUB_CARTRIDGE Subject =~/Cartridge/i describe SARE_SUB_CARTRIDGE Subject contains likely spammer phrase or word score SARE_SUB_CARTRIDGE 0.500 #counts SARE_SUB_CARTRIDGE 63s/6h of 113374 corpus (92402s/20972h) 04/18/04 meta SARE_SUB_CONFID_W __SARE_SUB_CONFID_W && !SARE_SUB_CONFID_P describe SARE_SUB_CONFID_W Subject contains likely spammer phrase or word score SARE_SUB_CONFID_W 0.287 #counts SARE_SUB_CONFID_W 57s/10h of 113393 corpus (92421s/20972h) 04/18/04 #ham SARE_SUB_CONFID_W organization's emails flagged: "- confidential" header SARE_SUB_ONLINE Subject =~ /\bONLINE\b/i describe SARE_SUB_ONLINE Subject contains likely spammer phrase or word score SARE_SUB_ONLINE 0.500 #counts SARE_SUB_ONLINE 2857s/113h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_PERFECT Subject =~ /\bperfect\W*(?:body|chart|credit|gift|home|loan|match|mate|pharmacy|soft\W*ware|solution|source|summer|time|tool|travel|valentine)/i describe SARE_SUB_PERFECT Subject contains likely spammer phrase or word score SARE_SUB_PERFECT 0.688 #counts SARE_SUB_PERFECT 72s/2h of 113374 corpus (92402s/20972h) 04/18/04 #ham SARE_SUB_PERFECT "perfect valentine" and "perfect match" header SARE_SUB_SPECIAL_BANG Subject =~ /Special\!/i describe SARE_SUB_SPECIAL_BANG Subject contains likely spammer phrase or word score SARE_SUB_SPECIAL_BANG 0.608 #counts SARE_SUB_SPECIAL_BANG 19s/1h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_SPECIAL_BANG 39s/1h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_WEBMASTER Subject =~ /(?:webmaster(?:[,:\@\?]| - |.(?:\$\d|are\W*you||database))|(?:(?:hi|success)\W*|--\W*)webmaster)/i describe SARE_SUB_WEBMASTER Subject contains likely spammer phrase or word score SARE_SUB_WEBMASTER 0.100 # type=max:0.1 #stype SARE_SUB_WEBMASTER max:0.1 #counts SARE_SUB_WEBMASTER 52s/13h of 113309 corpus (92403s/20906h) 04/24/04 # EOF