# SARE "General Subject" Ruleset for SpamAssassin # Version: 00.04.00 # Created: 04/19/2004 # Modified: 04/23/2004 # Changes: Beta Release # License: Artistic - see http://www.rulesemporium.com/license.txt # Current Maintainer: Sare Ninja - genlsubj@rulesemporium.com # Current Home: http://www.rulesemporium.com/rules/70_SARE_SUB_genlsubj0.cf # # Related rules files: # 70_sare_genlsubj0.cf -- SARE_SUB_* rules that hit spam and no ham # 70_sare_genlsubj1.cf -- SARE_SUB_* rules that hit ham and S/O > 0.900 # 70_sare_genlsubj2.cf -- SARE_SUB_* obfu rules that hit no emails # 70_sare_genlsubj3.cf -- SARE_SUB_* rules that hit ham, but aggressive sites may want to use # Rules to be wary of: # # Financial and investment companies will want to lower some scores in the Business section. # Credit, mortgage, and similar companies will want to lower some scores in the Credit section. # Schools will want to lower some scores in the Education section. # Insurance companies will want to lower some scores in the Insurance section. # Marketing companies and services will want to lower some scores in the Marketing section. # Medical professionals and companies will want to lower some scores in the Medical section. # Real estate companies may want to lower some scores in the Real Estate section. # Software companies may want to lower scores in the Software section # Category: Adult, porn header SARE_SUB_ABOUT_TODAY Subject =~ /about\W*today/i describe SARE_SUB_ABOUT_TODAY Subject contains spammer subject - adult or porn score SARE_SUB_ABOUT_TODAY 0.222 #counts SARE_SUB_ABOUT_TODAY 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_ABOUT_TODAY 4s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_ADULT_MOVIE Subject =~ /(?:[a\@]dult|free|h[a\@]rdc[o0]re.{0,30}|h[i|]lton.{0,30}|incest|n[o0]c[o0]st|sex|xxx).(?:dvd|feeds|pic|tv|m[o0]vie|video)/i describe SARE_SUB_ADULT_MOVIE Subject contains spammer subject - adult or porn score SARE_SUB_ADULT_MOVIE 1.666 # type=spamp #stype SARE_SUB_ADULT_MOVIE spamp #counts SARE_SUB_ADULT_MOVIE 180s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BEDROOM_SEC Subject =~ /(?:(?:Bedroom|family|seduction|sex.{0,30}).Secret|satisfaction.{1,30}bedroom|secret.{1,10}(?:dating|fantas(?:y|ies)|sex))/i describe SARE_SUB_BEDROOM_SEC Subject contains spammer subject - adult or porn score SARE_SUB_BEDROOM_SEC 0.677 #counts SARE_SUB_BEDROOM_SEC 22s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BOYS_OB1 Subject =~ /(?!\bboys?)\b[b8\xDF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[y\xA5\xDD\xFD][\W_]?[s5\$\xA7]?/i describe SARE_SUB_BOYS_OB1 Subject contains obfuscated spammer word score SARE_SUB_BOYS_OB1 1.666 # type=obfu #stype SARE_SUB_BOYS_OB1 OBFU #counts SARE_SUB_BOYS_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_GIRLS_OB1 Subject =~ /(?!\bgirls?)\b[g6][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[r\xAE][\W_]?[l1I\|\xA3][\W_]?[s5\$\xA7]?/i describe SARE_SUB_GIRLS_OB1 Subject contains obfuscated spammer word score SARE_SUB_GIRLS_OB1 2.500 # type=obfu #stype SARE_SUB_GIRLS_OB1 OBFU #counts SARE_SUB_GIRLS_OB1 61s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_HORNY_WOMEN Subject =~ /Horn(y|iest).{1,30}(?:alone|amateur|babe|swinger|wives|Women)/i describe SARE_SUB_HORNY_WOMEN Subject contains spammer subject - adult or porn score SARE_SUB_HORNY_WOMEN 1.111 # type=spamp #stype SARE_SUB_HORNY_WOMEN spamp #counts SARE_SUB_HORNY_WOMEN 26s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LONELY Subject =~ /\bl[o0]nely\b/i describe SARE_SUB_LONELY Subject contains spammer subject - adult or porn score SARE_SUB_LONELY 0.683 #counts SARE_SUB_LONELY 23s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PERVERT Subject =~ /pervert/i describe SARE_SUB_PERVERT Subject contains spammer subject - adult or porn score SARE_SUB_PERVERT 1.000 # type=spamp #stype SARE_SUB_PERVERT spamp #counts SARE_SUB_PERVERT 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PORN Subject =~ /(?:(?:big|black|dog(?:gy|gies)?fat|horse|massive|suck(?:ing)?|\d+\").?(?:bod(?:y|ies)|cock|dick)s?\b|violent.perv|\brap(?:e[sd]?|ing)\b)/i describe SARE_SUB_PORN Subject contains spammer subject - adult or porn score SARE_SUB_PORN 0.688 #counts SARE_SUB_PORN 24s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_PORN Apr 21 2003 added \b at end of first regex segment to avoid ham hits header SARE_SUB_SEX_OB3 Subject =~ /\bS_?eks\b/i describe SARE_SUB_SEX_OB3 Subject is intentionally misspelled word score SARE_SUB_SEX_OB3 2.500 # type=obfu #stype SARE_SUB_SEX_OB3 obfu #counts SARE_SUB_SEX_OB3 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_SEX_OB3 14s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_SINGLES Subject =~ /(?:(?:Christian|find(?:\W\w+)?|local|meet(?:\W\w+)?|millions.of|mingle.with|photos.of|real|view(?:\W\w+)?).singles|singles.(?:date|looking))/i describe SARE_SUB_SINGLES Subject contains spammer subject - adult or porn score SARE_SUB_SINGLES 0.877 #counts SARE_SUB_SINGLES 58s/0h of 111217 corpus (90485s/20732h) 04/17/04 header SARE_SUB_SOUL_MATE Subject =~ /soul\W*mate/i describe SARE_SUB_SOUL_MATE Subject contains spammer subject - adult or porn score SARE_SUB_SOUL_MATE 0.611 #counts SARE_SUB_SOUL_MATE 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_XRATED Subject =~ /x-?rated/i describe SARE_SUB_XRATED Subject contains spammer subject - adult or porn score SARE_SUB_XRATED 0.444 #counts SARE_SUB_XRATED 8s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_XRATED 10s/0h of 91714 corpus (74113s/17601h) 01/23/04 # Category: Black market items, services, activities, scams, frauds header SARE_SUB_BANNED_CD Subject =~ /b.?a.?n.?n.?e.?d.?c.?d/i describe SARE_SUB_BANNED_CD Subject contains spammer subject - black market or scam score SARE_SUB_BANNED_CD 3.333 # type=spamggg #stype SARE_SUB_BANNED_CD spamggg #counts SARE_SUB_BANNED_CD 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BANNED_CD 55s/0h of 63143 corpus header SARE_SUB_CARD_BILLED Subject =~ /(?:account|card).{1,30}(?:billed|charged)/i describe SARE_SUB_CARD_BILLED Subject contains spammer subject - black market or scam score SARE_SUB_CARD_BILLED 1.111 # type=spamp #stype SARE_SUB_CARD_BILLED spamp #counts SARE_SUB_CARD_BILLED 23s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_PPV Subject =~ /(?:(?:f.?r.?e.?e+|pay(?:ing)?.for(?:.your)?|unlimited).?(?:PPV|p[a\@]y.?per.?view)|(?:PPV|p[a\@]y.?per.?view).{0,30}free|ppv\'s)/i describe SARE_SUB_FREE_PPV Subject contains spammer subject - black market or scam score SARE_SUB_FREE_PPV 1.400 #counts SARE_SUB_FREE_PPV 152s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_KICKBACK Subject =~ /kick.{0,2}back/i describe SARE_SUB_KICKBACK Subject contains spammer subject - black market or scam score SARE_SUB_KICKBACK 0.166 #counts SARE_SUB_KICKBACK 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_KICKBACK 4s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_NAME_STAR Subject =~ /Name\W*A\W*Star/i describe SARE_SUB_NAME_STAR Subject contains spammer subject - black market or scam score SARE_SUB_NAME_STAR 1.111 # type=spamp #stype SARE_SUB_NAME_STAR spamp #counts SARE_SUB_NAME_STAR 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TAXES Subject =~ /(?:T[a\@]x(?:es)?.{0,30}(?:assistance|havenn?s|dollars|foreclose|information|legally|money\!|paid.for|problems|quick.and.easy|refund|this\W*year|write\W?off)|(?:avoid|doo?n't.pay|eliminate.(?:back|delinquent)|overpaid.your|paid.(?:for.with.your|too?.much)).tax(?:es)?|cigarette.tax.saving)/i describe SARE_SUB_TAXES Subject mentions taxes score SARE_SUB_TAXES 0.772 #counts SARE_SUB_TAXES 39s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_WINNING_NOT Subject =~ /(?:(?:Final|WINNING)(?:.award)?\s*NOTIFICATION|^NOTIFICATION\s*$|(?:auction|lucky).winning|notification.of.(?:an.instant|bequest|intent|unclaimed|multi.?item|promotion|winning)|notification.{1,30}final.notice|contrat.{1,30}winning.{1,30}promotion)/i describe SARE_SUB_WINNING_NOT Subject contains spammer subject - black market or scam score SARE_SUB_WINNING_NOT 1.444 #counts SARE_SUB_WINNING_NOT 106s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Business header SARE_SUB_ADVERTISER_DB Subject =~ /(?:(?:Advertisers|comprehensive|webmaster)\W*Database|(?:emails|Database)\W*of.{0,20}(?:Advertisers|Business|bidders|ebay)|email\W*database)/i describe SARE_SUB_ADVERTISER_DB Subject contains spammer subject - business score SARE_SUB_ADVERTISER_DB 0.555 # type=spamp #stype SARE_SUB_ADVERTISER_DB spamp #counts SARE_SUB_ADVERTISER_DB 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #hist SARE_SUB_ADVERTISER_DB 04/01/04 -- added "webmaster database" option #hist SARE_SUB_ADVERTISER_DB 03/28/04 -- added "email database" option #hist SARE_SUB_ADVERTISER_DB 02/17/04 -- added ebay #hist SARE_SUB_ADVERTISER_DB 08/23/03 -- Created header SARE_SUB_ANIM_LOGO Subject =~ /(?:(?:Animated|unique|impressive|custom|flash|personal(:?ized)?)\W*Logo|Logo\W*(?:Animation|Creation))/i describe SARE_SUB_ANIM_LOGO Subject contains spammer subject - business score SARE_SUB_ANIM_LOGO 1.111 # type=spamp #stype SARE_SUB_ANIM_LOGO spamp #counts SARE_SUB_ANIM_LOGO 47s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_BUS_PROMOTE Subject =~ /(?:Business|daily|lottery|special|website|WhereCanIBet).Promotion/i describe SARE_SUB_BUS_PROMOTE Subject contains spammer subject - business score SARE_SUB_BUS_PROMOTE 0.788 #counts SARE_SUB_BUS_PROMOTE 42s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_GET_PAID Subject =~ /get\W*p[a\@d]id/i describe SARE_SUB_GET_PAID Subject contains spammer subject - business score SARE_SUB_GET_PAID 1.022 #stype SARE_SUB_GET_PAID spam #counts SARE_SUB_GET_PAID 84s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_GET_PAID 104s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_INCOME2 Subject =~ /(?:(?:2nd|double|earn|excellent|expand|extra|(?:full|part).time|great|guaranteed|high(?:er)?|home|huge|increase|incredible|insure|monthly|\bnet|\bnew|online|residual|(?:six|6).figure|streams.of|substantial)(?:.your).?.?income|income.(?:opportunity|tip|to.you))/i describe SARE_SUB_INCOME2 Subject contains spammer subject - business score SARE_SUB_INCOME2 0.638 #counts SARE_SUB_INCOME2 15s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_INCOME_OB1 Subject =~ /(?!\bincome\b)\bi.?n.?c.?o.?m.?e\b/i describe SARE_SUB_INCOME_OB1 Subject contains obfuscated spammer word score SARE_SUB_INCOME_OB1 1.666 # type=obfu #stype SARE_SUB_INCOME_OB1 obfu #counts SARE_SUB_INCOME_OB1 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INCOME_OB1 11s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_INCOME_OB2 Subject =~ /(?!\bincome\b)(?:\b[il1]|\B(?:[:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_INCOME_OB2 Subject contains obfuscated spammer word score SARE_SUB_INCOME_OB2 1.666 # type=obfu #stype SARE_SUB_INCOME_OB2 obfu #counts SARE_SUB_INCOME_OB2 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INCOME_OB2 19s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_KS_CLIENTS Subject =~ /(?:Thousands\W*of|reach\W*your)\W*Clients/i describe SARE_SUB_KS_CLIENTS Subject contains spammer subject - business score SARE_SUB_KS_CLIENTS 0.388 #counts SARE_SUB_KS_CLIENTS 7s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MILLIONS Subject =~ /(?!million dollars)(?:million.{0,30}(?:dollars|e.?mail)|million.?s?.(?:Americans|bottles|dollar.(?:chance|jackpot)|kids|of.(?:customers|singles)|on.the.line|(?:in.)?(?:assets|revenues|sales)|ladies|peoples|regain|times|to.be.made|waiting.for.you)|win.{0,30}million|millionaire.mak(?:er|ing)|(?:make|your).million)/i describe SARE_SUB_MILLIONS Subject contains spammer subject - business score SARE_SUB_MILLIONS 1.066 #counts SARE_SUB_MILLIONS 92s/0h of 113393 corpus (92421s/20972h) 04/18/04 #note SARE_SUB_MILLIONS "million dollars" appears too frequently in normal emails (esp political) header SARE_SUB_PATENTS Subject =~ /(?:Patents\W*Filed|patent\W*it|our\W*new\W*patent)/i describe SARE_SUB_PATENTS Subject contains spammer subject - business score SARE_SUB_PATENTS 0.750 #counts SARE_SUB_PATENTS 35s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_PRICES_NOLAST Subject =~ /prices\W*(?:won\'t|will\W*not)\W*last/i describe SARE_SUB_PRICES_NOLAST Subject contains spammer subject - business score SARE_SUB_PRICES_NOLAST 0.500 #counts SARE_SUB_PRICES_NOLAST 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #hist SARE_SUB_PRICES_NOLAST Created by Bob Menschel Apr 04 2004 header SARE_SUB_SALARY_ADV Subject =~ /(?:Salary\W*(?:Advancement|increase)|(?:(?:increased?|advance)(?:\W*your)?|better|higher|earns?\W*more)\W*salary)/i describe SARE_SUB_SALARY_ADV Subject contains spammer subject - business score SARE_SUB_SALARY_ADV 0.655 #counts SARE_SUB_SALARY_ADV 18s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_SALARY_ADV 03/28/04 -- expanded to catch more variations from very simple rule # Category: Credit, debt, lending, mortgage, borrowing, investment, financing header SARE_SUB_ACCEPT_CCARDS Subject =~ /(?!processing credit card)(?:(?:Accept(?:ing)?|Process.{0,20})\W*credit\W*c[aâ\@]rds?|credit\W*card\W*(chargebacks?|terminals?|vendor))/i describe SARE_SUB_ACCEPT_CCARDS Subject contains spammer subject - credit or money score SARE_SUB_ACCEPT_CCARDS 0.661 #counts SARE_SUB_ACCEPT_CCARDS 19s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_DEBT Subject =~ /\bdebt\b/i describe SARE_SUB_DEBT Subject contains spammer subject - credit or money score SARE_SUB_DEBT 1.666 #counts SARE_SUB_DEBT 368s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_DEBT 609s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_DEBT_OB1 Subject =~ /(?!\bdebt\b)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_DEBT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DEBT_OB1 2.500 # type=obfu #stype SARE_SUB_DEBT_OB1 obfu #counts SARE_SUB_DEBT_OB1 23s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DEBT_OB2 Subject =~ /(?!deb[iu]?t)\bd.?e.?b.?t\b/i describe SARE_SUB_DEBT_OB2 Subject contains obfuscated spammer topic score SARE_SUB_DEBT_OB2 2.500 # type=obfu #stype SARE_SUB_DEBT_0B2 obfu #counts SARE_SUB_DEBT_OB2 21s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_DEBT_OB2 Apr 21 2004 - exclude debut as well as debit header SARE_SUB_GRANT Subject =~ /(?:(?:cash|collect\W*your|dollar|free(?:dom)?|get\W*a|government|gov't|qualify\W*for\W*a|taxes\W*paid\W*for\W*these)\W*grants?|grant\W*money\W*for\W*you|grants.{1,30}paid\W*for\W*with\W*your\W*taxes)/i describe SARE_SUB_GRANT Subject contains spammer subject - credit or money score SARE_SUB_GRANT 1.011 #stype SARE_SUB_GRANT spam #counts SARE_SUB_GRANT 82s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INVESTMENTS Subject =~ /(?:(?:invest(?:ing|ments?|or)|promotion|stock\W*market).(?:alert|assistance|bulletin|data|forecast|funds|insight|knowledge|like|member|news|opp|option|profile|program|proposal|rewards|surprise|update|workshop)|(?:\$\d+.{0,10}|better.{0,30}|business|easy|fund.{0,30}|joint|make\W*an|proven|real\W*estate|secrets?.{0,30}|secured|smart|stock|time\W*to|your|zero)\W*invest(?:ing|ments?)|help.{1,10}invest)/i describe SARE_SUB_INVESTMENTS Subject contains spammer subject - credit or money score SARE_SUB_INVESTMENTS 1.666 #counts SARE_SUB_INVESTMENTS 351s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INVESTORS Subject =~ /investors/i describe SARE_SUB_INVESTORS Subject contains spammer subject - credit or money score SARE_SUB_INVESTORS 1.211 #counts SARE_SUB_INVESTORS 118s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MORTGAGE_OB1 Subject =~ /(?!mortgage)(?:\bm|\B(?:rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_MORTGAGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MORTGAGE_OB1 1.666 # type=obfu #stype SARE_SUB_MORTGAGE_OB1 obfu #counts SARE_SUB_MORTGAGE_OB1 362s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MORTGAGE_OB2 Subject =~ /(?!mortgage)\bm.o.r.t.g.a.g.e\b/i describe SARE_SUB_MORTGAGE_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MORTGAGE_OB2 1.666 # type=obfu #stye SARE_SUB_MORTGAGE_OB2 obfu #counts SARE_SUB_MORTGAGE_OB2 2s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_NEW_CREDIT Subject =~ /(?:(?:all|any)\W*(?:credit.(?:accepted|.{0,30}loan)|loan.{1,30}credit)|\b(?:easy|EZ)\W*(credit|home\W*loan|mortgage)|(?:best|get.{0,30}|right)\W*creditvcard|get\W*cash\W*out|(?:home|m.?[o0].?r.?t.?g.?[a\@].?g.?e)\W*loan.{1,30}credit|lines?\W*of\W*credit|(?:new|your.{0,30})\W*credit\W*line)/i describe SARE_SUB_NEW_CREDIT Subject contains spammer subject - credit or money score SARE_SUB_NEW_CREDIT 1.338 #counts SARE_SUB_NEW_CREDIT 141s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_POOR_CREDIT Subject =~ /(?!credit card (?:bill|declined))(?:(?:bad|poor|less\W*than\W*perfect|fix\W*your)\W*cr[eé]d[iï]t|cr[eé]d[iï]t.{1,20}declined|declined.{1,20}cr[eé]d[iï]t|cr[eé]d[iï]t\W*(?:bad|can\W*be\W*fix|card\W*(?:balances?|bills?|debt|elimination)|Counseling|profiles?|rating)|no\W*cr[eé]d[iï]t.check)/i describe SARE_SUB_POOR_CREDIT Subject contains spammer subject - credit or money score SARE_SUB_POOR_CREDIT 1.666 #counts SARE_SUB_POOR_CREDIT 358s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_REFINANCE Subject =~ /refinance/i describe SARE_SUB_REFINANCE Subject contains spammer subject - credit or money score SARE_SUB_REFINANCE 1.666 #counts SARE_SUB_REFINANCE 562s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_VISA_CARD Subject =~ /Visa\W*(?:card\W*easy|approve\W*all)/i describe SARE_SUB_VISA_CARD Subject contains spammer subject - credit or money score SARE_SUB_VISA_CARD 0.222 #counts SARE_SUB_VISA_CARD 4s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_VISA_CARD Created by Bob Menschel Mar 30 2004 # Cateogry: Education, Education-related scams header SARE_SUB_COLLEGE_OB1 Subject =~ /(?!\bcollege\b)\b[c\xC7\xE7\xA2\xA9][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[l1I\|\xA3][\W_]?[l1I\|\xA3][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[g6][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_COLLEGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_COLLEGE_OB1 1.666 # type=obfu #stype SARE_SUB_COLLEGE_OB1 obfu #counts SARE_SUB_COLLEGE_OB1 5s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_DIPLOMA Subject =~ /(?:(?:Bachelor'?s?|buy\W*a|cheap|degree.{1,10}|don't\W*have\W*a|earn.{1,30}with|\bg[e3]t.{1,30}|give\W*you\W*a|got\W*no|great|help\W*you.{1,30}|if\W*you\W*had\W*a|instant|life\W*experience{1,30}|need\W*a|real|rec(?:ei|ie)ve\W*a|wait\W*no\W*longer{1,30}|we\W*promise.{1,30}|\b(?:yo)?ur\W*(?:college|own))\W*Diploma|diplomas?\W*(?:can\W*bevyours|for\W*sale)|(?:college|university)\W*diplomas)/i describe SARE_SUB_DIPLOMA Subject contains spammer subject - education score SARE_SUB_DIPLOMA 1.666 #counts SARE_SUB_DIPLOMA 252s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DOCTORATE Subject =~ /Doctorate/i describe SARE_SUB_DOCTORATE Subject contains spammer subject - education score SARE_SUB_DOCTORATE 0.638 #counts SARE_SUB_DOCTORATE 15s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MBA Subject =~ /\bMBA\b/i describe SARE_SUB_MBA Subject contains spammer subject - education score SARE_SUB_MBA 0.744 #counts SARE_SUB_MBA 34s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_MBA 38s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_NO_CLASSES Subject =~ /(?:No\W*Classes\W*(?:Necessary|needed)|.{0,5}No\W*Books)/i describe SARE_SUB_NO_CLASSES Subject contains spammer subject - education score SARE_SUB_NO_CLASSES 1.205 #counts SARE_SUB_NO_CLASSES 117s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Gambling, Lotto, Sweepstakes, Winnings, Losses header SARE_SUB_CASINO_OB1 Subject =~ /(?!\bcasino)(?:\bc|\B(?:[\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)/i describe SARE_SUB_CASINO_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CASINO_OB1 2.500 # type=obfu #stype SARE_SUB_CASINO_OB1 obfu #counts SARE_SUB_CASINO_OB1 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CASINO_OB1 22s/0h of 91714 corpus (74113s/17601h) 01/23/04 header SARE_SUB_CASINO_OB2 Subject =~ /(?!\bcasino)\bc.?a.?s.?i.?n.?o/i describe SARE_SUB_CASINO_OB2 Subject contains obfuscated spammer topic score SARE_SUB_CASINO_OB2 1.666 # type=obfu #stype SARE_SUB_CASINO_OB2 obfu #counts SARE_SUB_CASINO_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 # Category: Insurance header SARE_SUB_INSURANCE Subject =~ /(?:(?:aff[o0]rdable|cheap(?:est)?|free|good\W*news|l[o0]w\W*c[o0]st|(?:over)?pay(?:ing)?\W*t[o0][o0]\W*much|reduce|save|sell).{1,30}insurance|insurance.{1,30}(?:available|everyone|f[o0]r\W*less|leads|[o0]ffers|[o0]pti[o0]ns?|qu[o0]tes?)|(?:FYI:?|new|special|sub|update(?:\W*sub)?)\W*construction\W*insurance|new\W*insurnace\W*product)/i describe SARE_SUB_INSURANCE Subject contains spammer subject - insurance score SARE_SUB_INSURANCE 1.666 #counts SARE_SUB_INSURANCE 505s/0h of 113374 corpus (92402s/20972h) 04/18/04 #note SARE_SUB_INSURANCE "insurance coverage" hits too much ham #note SARE_SUB_INSURANCE "term life" covered by SARE_SUB_TERM_LIFE header SARE_SUB_CAR_INSURANCE Subject =~ /(?:car|auto(?:mobile)?) insurance/i describe SARE_SUB_CAR_INSURANCE Subject contains spammer subject - insurance score SARE_SUB_CAR_INSURANCE 0.672 #counts SARE_SUB_CAR_INSURANCE 21s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_LT_CARE Subject =~ /Long Term C[a\@]re/i describe SARE_SUB_LT_CARE Subject contains spammer subject - insurance score SARE_SUB_LT_CARE 0.661 #stype SARE_SUB_LT_CARE spam #counts SARE_SUB_LT_CARE 19s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_REPAIR_BILLS Subject =~ /(?:large\W*repair\W*bills|(?:(?:costly|major)\W*auto|m[o0]ney\W*for|pay(?:ing)?\W*for|save\b.{1,30}\bon)\W*repairs?)/i describe SARE_SUB_REPAIR_BILLS Subject contains spammer subject - insurance score SARE_SUB_REPAIR_BILLS 0.877 #counts SARE_SUB_REPAIR_BILLS 58s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_REPAIR_BILLS Created by Bob Menschel Mar 22 2004 header SARE_SUB_PROTECT_FAM Subject =~ /(?:Protect\W*your\W*famil(?:y|ies)|protect(?:ion)?(?:\W*for)?\W*your\W*(?:vehicle|car)|secure\W*your\W*future|protect.{1,10}from.{1,10}repair\W*bills?|extended\W*warranty\W*protection)/i describe SARE_SUB_PROTECT_FAM Subject contains spammer subject - insurance score SARE_SUB_PROTECT_FAM 1.072 #counts SARE_SUB_PROTECT_FAM 93s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_ROADSIDE_AID Subject =~ /(?:Roadside\W*Assistance\W*24\/7|24\W*hour\W*roadside\W*assistance)/i describe SARE_SUB_ROADSIDE_AID Subject contains spammer subject - insurance score SARE_SUB_ROADSIDE_AID 0.627 #counts SARE_SUB_ROADSIDE_AID 13s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_ROADSIDE_AID Created by Bob Menschel Mar 22 2004 header SARE_SUB_TERM_LIFE Subject =~ /Term\W*Life/i describe SARE_SUB_TERM_LIFE Subject contains spammer subject - insurance score SARE_SUB_TERM_LIFE 1.666 #counts SARE_SUB_TERM_LIFE 214s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Marketing, Pricing, Selling, Buying header SARE_SUB_AFFORDABLE Subject =~ /\baffordable\b/i describe SARE_SUB_AFFORDABLE Subject contains spammer subject - marketing score SARE_SUB_AFFORDABLE 1.666 #counts SARE_SUB_AFFORDABLE 261s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_AFFORDABLE 305s/0h of 125093 corpus (104905s/20188h) 03/28/04 #hist SARE_SUB_AFFORDABLE 03/28/04 -- generalized to one-word rule from low-scoring "made affordable" rule header SARE_SUB_DISCOUNT_OB1 Subject =~ /(?!discount)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_DISCOUNT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DISCOUNT_OB1 1.666 # type=obfu #stype SARE_SUB_DISCOUNT_OB1 obfu #counts SARE_SUB_DISCOUNT_OB1 200s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_OB1 Subject =~ /(?!free)(?:\bf|\B(?:\xC5\xBF|\xC6\x92|\xD2[\x92-\x93]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_FREE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_FREE_OB1 3.333 # type=obfu #stype SARE_SUB_FREE_OB1 obfu #counts SARE_SUB_FREE_OB1 334s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_FREE_OB2 Subject =~ /(?!free(?:ze)?)\bf.?r.?e.?e\b/i describe SARE_SUB_FREE_OB2 Subject includes word suggesting spammer score SARE_SUB_FREE_OB2 3.333 # type=obfu #counts SARE_SUB_FREE_OB2 281s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_FREE_OB2 Apr 21 2004 - Added exclusion for freeze header SARE_SUB_TOO_HIGH Subject =~ /(?:(?:all\W*time|too)\W*high|high\W*(costs?|payments?))/i describe SARE_SUB_TOO_HIGH Subject contains spammer subject - marketing score SARE_SUB_TOO_HIGH 1.044 #counts SARE_SUB_TOO_HIGH 88s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MORE_TRAFFIC Subject =~ /(?:(?:more|engine|targeted|web)\W*traffic|traffic\W*(?:online|partner|volume))/ describe SARE_SUB_MORE_TRAFFIC Subject contains spammer subject - marketing score SARE_SUB_MORE_TRAFFIC 0.655 #counts SARE_SUB_MORE_TRAFFIC 18s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_MORE_TRAFFIC Created by Bob Menschel Mar 25 2004 header SARE_SUB_SAVE_OB1 Subject =~/(?!\bsave\b)(?:\b[s5]|\B(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[vu]|\\\/|\xCE\xBD])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3]\b|(?:[\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)\B)/i describe SARE_SUB_SAVE_OB1 Subject contains spammer subject - marketing score SARE_SUB_SAVE_OB1 3.333 # type=obfu #stype SARE_SUB_SAVE_OB1 obfu #counts SARE_SUB_SAVE_OB1 121s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SAVE_OB2 Subject =~/(?!\bs[hlt]?ave\b)\bs.?a.?v.?e\b/i describe SARE_SUB_SAVE_OB2 Subject contains spammer subject - marketing score SARE_SUB_SAVE_OB2 3.333 # type=obfu - 57s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_SAVE_OB2 105s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SPENDING2MUCH Subject =~ /(?:Spending\W*(?:TOO|so)\W*MUCH|(?:control\W*your|no\W*limit\W*on)\W*spending)/i describe SARE_SUB_SPENDING2MUCH Subject contains spammer subject - marketing score SARE_SUB_SPENDING2MUCH 0.683 #counts SARE_SUB_SPENDING2MUCH 23s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WORTH_CASH Subject =~ /(?!take a look)\b(?:Worth|Win|take|extra|earn|dollars|Short|need|claim|free|get|opinions?|surveys?)\b.{0,30}(?:fast)?(?:C[a\@]sh|M[0o]ney|a\W*(?:look|raise)|of\W*sports?\W*tickets|the\W*price)\b/i describe SARE_SUB_WORTH_CASH Subject contains spammer subject - marketing score SARE_SUB_WORTH_CASH 1.666 #counts SARE_SUB_WORTH_CASH 546s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_YOUR_AFFILIATE Subject =~ /(?:your affiliate|affiliate\W*(?:program|software)|affiliates\W*make\W*\$)/i describe SARE_SUB_YOUR_AFFILIATE Subject contains spammer subject - marketing score SARE_SUB_YOUR_AFFILIATE 0.916 #counts SARE_SUB_YOUR_AFFILIATE 65s/0h of 113393 corpus (92421s/20972h) 04/18/04 # Category: Medical header SARE_SUB_AGING Subject =~ /\bAging\b/i describe SARE_SUB_AGING Subject contains spammer subject - medical score SARE_SUB_AGING 1.516 #counts SARE_SUB_AGING 173s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AGING_OB1 Subject =~ /(?!\bAging\b)(?:\b[a4]|\B(?:[\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]\b|(?:\xC4[\x9C-\xA3])\B)/i describe SARE_SUB_AGING_OB1 Subject contains obfuscated spammer topic score SARE_SUB_AGING_OB1 1.666 # type=obfu #stype SARE_SUB_AGING_OB1 obfu #counts SARE_SUB_AGING_OB1 1s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AGING_OB2 Subject =~ /(?!\bAging\b)\bA.?g.?i.?n.?g\b/i describe SARE_SUB_AGING_OB2 Subject contains obfuscated spammer topic score SARE_SUB_AGING_OB2 2.500 # type=obfu #stype SARE_SUB_AGING_OB2 obfu #counts SARE_SUB_AGING_OB2 37s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_AM_MED_DICT Subject =~ /American Medical Directory/i describe SARE_SUB_AM_MED_DICT Subject contains spammer subject - medical score SARE_SUB_AM_MED_DICT 0.905 #counts SARE_SUB_AM_MED_DICT 63s/0h of 113373 corpus (92402s/20971h) 04/18/04 header SARE_SUB_BUY_MEDS subject =~ /(?:b[uv]y|p.?[uv].?r.?c.?h.?[a\@].?s.?e|get)\W*(?:[a\@]ll\W*)(?:y[o0\@][uv]r\W*)?(?:c.?h.?e.?[a\@].?p\W*)?(?:[a\@].?[l|].?p.?r.?[a\@].?z.?[o0\@].?[l|]|B.?[o0\@].?n.?t.?r.?i.?[l|]|c.?i.?[a\@].?[l|].?i.?s|C.?[o0\@].?d.?e.?i.?n.?e|D.?i.?d.?r.?e.?x|d.?i.?e.?t|F.?[l|].?e.?x.?e.?r.?i.?[l|]|g.?e.?n.?e.?r.?i.?c|h.?g.?h|H.?y.?d.?r.?[o0\@].?c.?[o0\@].?d.?[o0\@].?n.?e|[l|].?e.?v.?i.?t.?r.?[a\@]|m.?e.?d.?(?:i.?c.?[a\@].?t.?i.?[o0\@].?n.?)?s|M.?[uv].?s.?c.?[l|].?e.?R.?e.?[l|].?[a\@].?x.?[a\@].?n.?t.?s?|p.?[a\@].?i.?n|P.?[a\@].?x.?i.?[l|]|P.?h.?e.?n.?t.?e.?r.?m.?i.?n.?e|P.?r.?e.?s.?c.?r.?i.?p.?t.?i.?[o0\@].?n.?s?|P.?r.?[o0\@].?z.?[a\@].?c|S.?i.?[l|].?d.?e.?n.?[a\@].?f.?i.?[l|]|S.?k.?e.?[l|].?[a\@].?x.?i.?n|s.?[l|].?e.?e.?p.?i.?n.?g|s.?[o0\@].?m.?[a\@]|T.?r.?[a\@].?m.?[a\@].?d.?[o0\@].?[l|]|v.?[a\@].?[l|].?i.?[uv].?m|v.?i.?[a\@].?g.?r.?[a\@]|V.?i.?c.?[o0\@].?d.?i.?n|V.?i.?[o0\@].?x.?x|x.?[a\@].?n.?[a\@].?x|Z.?[o0\@].?[l|].?[o0\@].?f.?t)\b/i describe SARE_SUB_BUY_MEDS Subject contains spammer subject - medical score SARE_SUB_BUY_MEDS 1.261 #counts SARE_SUB_BUY_MEDS 127s/0h of 113267 corpus (92361s/20906h) 04/24/04 #hist SARE_SUB_BUY_MEDS Created by Bob Menschel April 24 2004 header SARE_SUB_COLLAGEN Subject =~ /Collagen/i describe SARE_SUB_COLLAGEN Subject contains spammer subject - medical score SARE_SUB_COLLAGEN 0.672 #counts SARE_SUB_COLLAGEN 21s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CONSULTATION Subject =~ /\bconsultations?\b/i describe SARE_SUB_CONSULTATION Subject contains spammer subject - medical score SARE_SUB_CONSULTATION 0.705 #counts SARE_SUB_CONSULTATION 27s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_CONSULTN_OB1 Subject =~ /(?!consultations?)(?:\bc|\B(?:[\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)?\b/i describe SARE_SUB_CONSULTN_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CONSULTN_OB1 1.666 # type=obfu #stype SARE_SUB_CONSULTN_OB1 obfu #counts SARE_SUB_CONSULTN_OB1 9s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_IMPROVE Subject =~ /improve.{1,30}(?:cell\W*phone|cholesterol|credit|desire|hair|health|home|kisser|love\W*life|memory|performance|possibilities|self\W*image|sex(?:\W*life|ual\W*(?:endurance|health))|signal|sleep|stamina|stock\W*market|vision)/i describe SARE_SUB_IMPROVE Subject contains spammer subject - medical score SARE_SUB_IMPROVE 1.450 #counts SARE_SUB_IMPROVE 161s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INET_PHARM Subject =~ /(?!Pharmacy selection)(?:(?:American|best|(?:by|from)\W*(?:a\W*_?US|cheap|cyber|discreet|\e-|FDA|free|generic|genuine|Internet|low\W*cost|new|off\W*shore|on\W*line(?:.{1,5}USA)?|overnight|perfect|smart|super|US\W*doctors\W*US)|(?:discreet|no\W*doctor).{1,30})\W*Pharmacy|Pharmacy.{1,30}(?:deals|sale|prices?|related\W*drugs|selection|verification)|your\W*pharmacy\W*order)/i describe SARE_SUB_INET_PHARM Common spammer subject header -- Medical score SARE_SUB_INET_PHARM 1.666 #counts SARE_SUB_INET_PHARM 340s/0h of 113373 corpus (92402s/20971h) 04/18/04 #hist SARE_SUB_INET_PHARM Created by Bob Menschel Apr 09 2004 #ham SARE_SUB_INET_PHARM "Pharmacy selection" in email discussing employee's health benefits header SARE_SUB_MALE_MUSCLE Subject =~ /Male muscle/i describe SARE_SUB_MALE_MUSCLE Subject contains spammer subject - medical score SARE_SUB_MALE_MUSCLE 0.222 #stype SARE_SUB_MALE_MUSCLE spam #counts SARE_SUB_MALE_MUSCLE 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_MEDICAT_OB1 Subject =~ /(?!medication)(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)/i describe SARE_SUB_MEDICAT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MEDICAT_OB1 3.333 # type=obfu #stype SARE_SUB_MEDICAT_OB1 obfu #counts SARE_SUB_MEDICAT_OB1 172s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDICAT_OB2 Subject =~ /(?!medication)m.?e.?d.?i.?c.?a.?t.?i.?o.?n/i describe SARE_SUB_MEDICAT_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MEDICAT_OB2 3.333 # type=obfu #stype SARE_SUB_MEDICAT_OB2 obfu #counts SARE_SUB_MEDICAT_OB2 117s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDICAL_NEWS Subject =~ /(?:medical\W*(?:announcement|breakthrough|discover|info|innovation|marvel|miracle|news|post|update)|(?:news|notice).{1,3}medical)/i describe SARE_SUB_MEDICAL_NEWS Subject contains spammer subject - medical score SARE_SUB_MEDICAL_NEWS 1.016 #counts SARE_SUB_MEDICAL_NEWS 83s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_MEDICAL_NEWS Created by Bob Menschel Apr 05 2004 header SARE_SUB_MEDS_OB1 Subject =~ /(?!\bmeds\b)(?:\bm|\B(?:rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5]\b|(?:[\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)\B)/i describe SARE_SUB_MEDS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MEDS_OB1 3.333 # type=obfu #stype SARE_SUB_MEDS_OB1 obfu #counts SARE_SUB_MEDS_OB1 244s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MEDS_OB2 Subject =~ /(?!\bmeds\b)\bm.?e.?d.?s\b/i describe SARE_SUB_MEDS_OB2 Subject contains obfuscated spammer topic score SARE_SUB_MEDS_OB2 2.500 # type=obfu #stype SARE_SUB_MEDS_OB2 obfu #counts SARE_SUB_MEDS_OB2 71s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_MENS_HEALTH Subject =~ /Men'?s'? Health/i describe SARE_SUB_MENS_HEALTH Subject contains spammer subject - medical score SARE_SUB_MENS_HEALTH 0.500 #counts SARE_SUB_MENS_HEALTH 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ONLINE_DRUGS Subject =~ /(?:[o0].?n.?l.?i.?n.?e.{1,30}d.?r.?u.?g.?s|d.?r.?u.?g.?s.{1,30}[o0].?n.?l.?i.?n.?e)/i describe SARE_SUB_ONLINE_DRUGS Subject contains spammer subject - medical score SARE_SUB_ONLINE_DRUGS 1.666 #counts SARE_SUB_ONLINE_DRUGS 268s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_ONLINE_DRUGS Created by Bob Menschel Apr 07 2004 header SARE_SUB_PHYSICIAN Subject =~ /\bphysicians?\b/i describe SARE_SUB_PHYSICIAN Subject contains spammer subject - medical score SARE_SUB_PHYSICIAN 0.955 #counts SARE_SUB_PHYSICIAN 72s/0h of 113272 corpus (92366s/20906h) 04/23/04 #hist SARE_SUB_PHYSICIAN 04/23/2004 - Added to testing header SARE_SUB_SAMPLES subject =~ /\b(?:c[o0]mp[l|]iment[a\@]ry|d[a\@]y|free|tri[a\@][l|])\W*s[a\@]mp[l|]es?\b/i describe SARE_SUB_SAMPLES Subject contains spammer subject - medical score SARE_SUB_SAMPLES 0.744 #counts SARE_SUB_SAMPLES 34s/0h of 113267 corpus (92361s/20906h) 04/24/04 #hist SARE_SUB_SAMPLES Created by Bob Menschel April 24 2004 header SARE_SUB_STRETCH_MARK Subject =~ /stretch\W*mark/i describe SARE_SUB_STRETCH_MARK Subject contains spammer subject - medical score SARE_SUB_STRETCH_MARK 0.650 #counts SARE_SUB_STRETCH_MARK 17s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_VALIUM Subject =~ /Valium/i describe SARE_SUB_VALIUM Subject contains spammer subject - medical score SARE_SUB_VALIUM 2.222 #counts SARE_SUB_VALIUM 1099s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WEIGHT_OB1 Subject =~ /(?!weight)(?:\bw|\B(?:\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_WEIGHT_OB1 Subject contains obfuscated spammer topic score SARE_SUB_WEIGHT_OB1 3.333 # type=obfu #stype SARE_SUB_WEIGHT_OB1 obfu #counts SARE_SUB_WEIGHT_OB1 148s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_YOUNGER Subject =~ /\bYOUNGER\b/i describe SARE_SUB_YOUNGER Subject contains spammer subject - medical score SARE_SUB_YOUNGER 1.588 #counts SARE_SUB_YOUNGER 186s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_YOUNGER_OB1 Subject =~ /(?!\bYOUNGER\b)(?:\by|\B(?:[\xA5\xDD\xFD]|\xC5[\xB6-\xB8]|\xCE\x8E|\xCE\xA5|\xCE\xA8|\xCE\xAB|\xCE\xB3|\xD0\xA3|\xD1\x83|\xD1\x9E|\xD2[\xAE-\xB1]))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:r\b|(?:[\xAE]|\xC5[\x94-\x99]|\xD1\x93)\B)/i describe SARE_SUB_YOUNGER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_YOUNGER_OB1 2.500 # type=obfu #stype SARE_SUB_YOUNGER_OB1 obfu #counts SARE_SUB_YOUNGER_OB1 20s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_YOUNGER_OB2 Subject =~ /(?!\bYOUNGER\b)\by.?o.?u.?n.?g.?e.?r\b/i describe SARE_SUB_YOUNGER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_YOUNGER_OB2 2.500 # type=obfu #stype SARE_SUB_YOUNGER_OB2 obfu #counts SARE_SUB_YOUNGER_OB2 29s/0h of 113393 corpus (92421s/20972h) 04/18/04 # Category: Real Estate header SARE_SUB_FORECLOSURE Subject =~ /Foreclosure/i describe SARE_SUB_FORECLOSURE Subject contains spammer subject - real estate score SARE_SUB_FORECLOSURE 0.700 #counts SARE_SUB_FORECLOSURE 26s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_FORECLOSURE 29s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_HOMEOWNER Subject =~ /homeowner/i describe SARE_SUB_HOMEOWNER Subject contains spammer subject - real estate score SARE_SUB_HOMEOWNER 1.183 #counts SARE_SUB_HOMEOWNER 113s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_HOMEOWNER_OB1 Subject =~ /(?!homeowner)(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:m|rn|\/V\\|\/\\\/\\|\xCE\x9C|\xD0\x9C|\xD0\xBC])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:w|\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_HOMEOWNER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_HOMEOWNER_OB1 1.666 # type=obfu #stype SARE_SUB_HOMEOWNER_OB1 obfu #counts SARE_SUB_HOMEOWNER_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_HOMEOWNER_OB2 Subject =~ /(?!homeowner)h.?o.?m.?e.?o.?w.?n.?e.?r/i describe SARE_SUB_HOMEOWNER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_HOMEOWNER_OB2 1.666 # type=obfu #stype SARE_SUB_HOMEOWNER_OB2 obfu #counts SARE_SUB_HOMEOWNER_OB2 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TIMESHARE Subject =~ /timeshare/i describe SARE_SUB_TIMESHARE Subject contains spammer subject - real estate score SARE_SUB_TIMESHARE 0.711 #counts SARE_SUB_TIMESHARE 5s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_TIMESHARE 28s/0h of 91714 corpus (74113s/17601h) 01/24/04 # Category: Religious, including religious scams header SARE_SUB_CHRISTIAN Subject =~ /\bchristian\b/i describe SARE_SUB_CHRISTIAN Subject contains spammer subject - religion score SARE_SUB_CHRISTIAN 0.766 #counts SARE_SUB_CHRISTIAN 38s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_LEGAL_ORDIN Subject =~ /(?:(?:LEGAL|online)\W*ORDINATION|proceed\W*with.{1,30}ordination)/i describe SARE_SUB_LEGAL_ORDIN Subject contains spammer subject - religion score SARE_SUB_LEGAL_ORDIN 0.333 #counts SARE_SUB_LEGAL_ORDIN 6s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Software header SARE_SUB_CHEAP_SW Subject =~ /(?:(?:bargain|bucks|C.?h.?e.?a.?p|discount|expensive|p.?r.?i.?c.?e|s.?a.?v.?e|special\W*offer|spend).{1,30}software|s.?o.?f.?t.?w.?a.?r.?e.{1,30}(?:\%.off|at\W*only|bargain|bucks|c.?h.?e.?a.?p|deal|loww?.c.?o.?s.?t|price))/i describe SARE_SUB_CHEAP_SW Subject contains spammer subject - software score SARE_SUB_CHEAP_SW 1.666 #counts SARE_SUB_CHEAP_SW 482s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_CHEAP_SW Created by Bob Menschel Apr 09 2004 header SARE_SUB_DOWNLOAD Subject =~ /(?:downloadable\W*software|(?:available\W*for|cds\W*(?:and|or)|easy|free\W*to)\W*download|download(?:ing)\W*(?:(?:for\W*)?free|games|movies|music|now|software|under|video))/i describe SARE_SUB_DOWNLOAD Subject contains spammer subject - software score SARE_SUB_DOWNLOAD 0.705 #counts SARE_SUB_DOWNLOAD 27s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_DOWNLOAD_OB1 Subject =~ /(?!\bdownload)(?:\bd|\B(?:[\xD0]|\xC4[\x8E-\x91]))(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)(?:w|\\\/\\\/|VV|\xC5[\xB4-\xB5]|\xCF[\x88-\x89]|\xCF\x8E])(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)(?:[l1I\|\xA3]|(?:\xC5[\x80-\x82]|\xC4[\xB9-\xBF]))(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)(?:[d\xD0]|\xC4[\x8E-\x91])/i describe SARE_SUB_DOWNLOAD_OB1 Subject contains obfuscated spammer topic score SARE_SUB_DOWNLOAD_OB1 1.666 # type=obfu #stype SARE_SUB_DOWNLOAD_OB1 obfu #counts SARE_SUB_DOWNLOAD_OB1 6s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_SW_ON_CD Subject =~ /software\W*(?:on\W*)CD/i describe SARE_SUB_SW_ON_CD Subject contains spammer subject - software score SARE_SUB_SW_ON_CD 0.222 #stype SARE_SUB_SW_ON_CD spam #hist SARE_SUB_SW_ON_CD Created by Bob Menschel Apr 09 2004 #counts SARE_SUB_SW_ON_CD 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_SWTYPES Subject =~ /(?:hate\W*typing|it\W*types|never\W*type|no\W*typing\W*required|Talk\W*It\W*Type\W*It|voice\W*recognition)/i describe SARE_SUB_SWTYPES Subject contains a spammer subject - Software score SARE_SUB_SWTYPES 0.622 #counts SARE_SUB_SWTYPES 12s/0h of 111217 corpus (90485s/20732h) 04/17/04 #note SARE_SUB_SWTYPES beware: "attachment type" in virus bounce subject headings. header SARE_SUB_SYSTEMWORKS Subject =~ /(?:get|sav(?:e|ing)).{1,30}system\W*works/i describe SARE_SUB_SYSTEMWORKS Subject contains a spammer subject - Software score SARE_SUB_SYSTEMWORKS 0.622 #counts SARE_SUB_SYSTEMWORKS 12s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_WP_OFFICE Subject =~ /(?:\%|Sav(?:e|ing)).{1,30}(?:Corel|WordPerfect).{1,30}Office/i describe SARE_SUB_WP_OFFICE Subject contains spammer subject - software score SARE_SUB_WP_OFFICE 0.388 #counts SARE_SUB_WP_OFFICE 7s/0h of 113393 corpus (92421s/20972h) 04/18/04 #max SARE_SUB_WP_OFFICE 8s/0h of 58857 corpus # Category: Spamming header SARE_SUB_ADV_SEARCH Subject =~ /emails?.{1,30}(?:7Search|ebay|google|goClick|yahoo)/i describe SARE_SUB_ADV_SEARCH Subject contains spammer subject - spamming score SARE_SUB_ADV_SEARCH 0.555 # type=spamp #stype SARE_SUB_ADV_SEARCH spamp #counts SARE_SUB_ADV_SEARCH 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_ADV_SEARCH 4s/0h of 81383 corpus ??/??/03 #hist SARE_SUB_ADV_SEARCH 03/28/04 -- added "ebay" option header SARE_SUB_BULK_EMAIL Subject =~ /(?:(?:junk|bulk)\W*(?:e\W*mail|fax\W*numbers)|fax\W*bulk\W*numbers)/i describe SARE_SUB_BULK_EMAIL Subject contains spammer subject - spamming score SARE_SUB_BULK_EMAIL 0.772 #counts SARE_SUB_BULK_EMAIL 39s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_COMM_MAILERS Subject =~ /(?:commissions?.{1,30}(?:mailers?|web\W*site)|(?:bulk|google).{1,30}commission)/i describe SARE_SUB_COMM_MAILERS Subject contains spammer subject - spamming score SARE_SUB_COMM_MAILERS 0.627 #counts SARE_SUB_COMM_MAILERS 13s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_INET_CONN Subject =~ /(?:internet\W*connection\W*problem|(?:frequent|slow)\W*internet\W*connection)/i describe SARE_SUB_INET_CONN Subject contains spammer subject - spamming score SARE_SUB_INET_CONN 0.677 #counts SARE_SUB_INET_CONN 22s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_WSEAS Subject =~ /\bWSEAS\b/i describe SARE_SUB_WSEAS Subject contains spammer subject - spamming score SARE_SUB_WSEAS 1.666 # type=spamg #stype SARE_SUB_WSEAS spamg #counts SARE_SUB_WSEAS 4s/0h of 113374 corpus (92402s/20972h) 04/18/04 # Category: Generic words and phrases header SARE_SUB_ACTION_OB1 Subject =~ /(?!\baction\b)\b[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[c\xC7\xE7\xA2\xA9][\W_]?t[\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]\b/i describe SARE_SUB_ACTION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_ACTION_OB1 1.666 # type=obfu #stype SARE_SUB_ACTION_OB1 obfu #counts SARE_SUB_ACTION_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BE_HERE Subject =~ /be here/i describe SARE_SUB_BE_HERE Subject contains likely spammer phrase or word score SARE_SUB_BE_HERE 0.500 #counts SARE_SUB_BE_HERE 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BIGGER_OB1 Subject =~ /(?!bigger)b.?i.?g.?g.?e.?r/i describe SARE_SUB_BIGGER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BIGGER_OB1 1.666 # type=obfu - 23s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BIGGER_OB1 9s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BIGGER_OB2 Subject =~ /(?!bigger)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_BIGGER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BIGGER_OB2 2.500 # type=obfu - 30s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BIGGER_OB2 10s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BETTER_OB2 Subject =~ /(?!BETTER)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_BETTER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BETTER_OB2 2.500 # type=obfu - 30s/0h of 91714 corpus (74113s/17601h) 01/23/04 #counts SARE_SUB_BETTER_OB2 55s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_BOOST Subject =~ /(?:boost.{1,20}(?:(?:cable|PC).{1,10}speed|confidence|in\W*bed|(?:love|se.?x)\W*life|mileage|size|stamina)|(?:manhood|muscle|sex|super).{0,30}boost)/i describe SARE_SUB_BOOST Subject contains likely spammer phrase or word score SARE_SUB_BOOST 1.666 #counts SARE_SUB_BOOST 243s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_BOOST_OB1 Subject =~ /(?!\bboost\b)(?:\b[b8]|\B(?:[\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:t\b|(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)\B)/i describe SARE_SUB_BOOST_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BOOST_OB1 1.666 # type=obfu - 20s/0h of 97268 corpus (79437s/17831h) 01/24/04 #stype SARE_SUB_BOOST_OB1 obfu #counts SARE_SUB_BOOST_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BOOST_OB2 Subject =~ /(?!\bboost\b)\bb.?o.?o.?s.?t\b/i describe SARE_SUB_BOOST_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BOOST_OB2 1.666 # type=obfu #stype SARE_SUB_BOOST_OB2 obfu #counts SARE_SUB_BOOST_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_BREAKTHRU Subject =~ /Breakthrough/i describe SARE_SUB_BREAKTHRU Subject contains likely spammer phrase or word score SARE_SUB_BREAKTHRU 1.200 #counts SARE_SUB_BREAKTHRU 116s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_BREAKTHRU_OB1 Subject =~ /(?!Breakthrough)(?:[b8\xDF]|\xCE\x92|\xCE\xB2|\xD0\x92|\xD0\xB2)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[uv\*\xB5\xD9-\xDC\xF9-\xFC]|\xC5[\xA8-\xB3]|\xC6[\xAF-\xB0]|\xC7[\x93-\x9C]|\xCE\xB0|\xCE\xBC|\xCF\x8B|\xCF\x8D|\xD4\xB1|\xD5\x84|\xD5\x8D)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:h|\xC4[\xA4-\xA7]|\xCE\x89|\xCE\x97|\xD0\x9D|\xD0\xBD|\xD1\x92|\xD2[\xA2-\xA3]|\xD2[\xBA-\xBB]|\xD5\xB0])/i describe SARE_SUB_BREAKTHRU_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BREAKTHRU_OB1 1.666 # type=obfu #stype SARE_SUB_BREAKTHRU_OB1 obfu #counts SARE_SUB_BREAKTHRU_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BREAKTHRU_OB1 5s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_BREAKTHRU_OB2 Subject =~ /(?!Breakthrough)B.?r.?e.?a.?k.?t.?h.?r.?o.?u.?g.?h/i describe SARE_SUB_BREAKTHRU_OB2 Subject contains obfuscated spammer topic score SARE_SUB_BREAKTHRU_OB2 1.666 # type=obfu #stype SARE_SUB_BREAKTHRU_OB2 obfu #counts SARE_SUB_BREAKTHRU_OB2 4s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_BREAKTHRU_OB2 6s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_BUY_OB1 Subject =~ /\bbvy\b/i describe SARE_SUB_BUY_OB1 Subject contains obfuscated spammer topic score SARE_SUB_BUY_OB1 1.666 # type=obfu #stype SARE_SUB_BUY_OB1 obfu #counts SARE_SUB_BUY_OB1 3s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_CARTRIDGE_OB1 Subject =~/(?!Cartridge)(?:[c\*\xC7\xE7\xA2\xA9]|\xC4[\x86-\x8D]|\xD0\xA1|\xD1\x81)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[d\xD0]|\xC4[\x8E-\x91])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[g6]|\xC4[\x9C-\xA3]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)/i describe SARE_SUB_CARTRIDGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CARTRIDGE_OB1 1.666 # type=obfu #stype SARE_SUB_CARTRIDGE_OB1 obfu #counts SARE_SUB_CARTRIDGE_OB1 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_CARTRIDGE_OB2 Subject =~/(?!Cartridge)C.?a.?r.?t.?r.?i.?d.?g.?e/i describe SARE_SUB_CARTRIDGE_OB2 Subject contains obfuscated spammer topic score SARE_SUB_CARTRIDGE_OB2 1.666 # type=obfu #stype SARE_SUB_CARTRIDGE_OB2 obfu #counts SARE_SUB_CARTRIDGE_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_CHANGE_LIFE subject =~ /changed? (?:my|your) (?:love\W*)life/i describe SARE_SUB_CHANGE_LIFE Subject contains likely spammer phrase or word score SARE_SUB_CHANGE_LIFE 0.900 #hist SARE_SUB_CHANGE_LIFE Created by Bob Menschel Mar 31 2004 #counts SARE_SUB_CHANGE_LIFE 62s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CHARGE_OB1 Subject =~ /(?!\bcharge\b)\b[c\xC7\xE7\xA2\xA9][\W_]?h[\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[r\xAE][\W_]?[g6][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_CHARGE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CHARGE_OB1 1.666 # type=obfu #stype SARE_SUB_CHARGE_OB1 obfu #counts SARE_SUB_CHARGE_OB1 8s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_CHARGE_OB1 17s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_CHEAP_OB1 Subject =~ /(?!\bcheap(er)?)\b[c\xC7\xE7\xA2\xA9][\W_]?h[\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?p([e3\*\xC8-\xCB\xE8-\xEB][\W_]?[r\xAE])?/i describe SARE_SUB_CHEAP_OB1 Subject contains obfuscated spammer topic score SARE_SUB_CHEAP_OB1 3.333 # type=obfu #stype SARE_SUB_CHEAP_OB1 obfu #counts SARE_SUB_CHEAP_OB1 174s/0h of 113374 corpus (92402s/20972h) 04/18/04 header __SARE_SUB_CONFID_W Subject =~ /\bconfidential(?:ity|ly)?\b/i header SARE_SUB_CONFID_P Subject =~ /(?:confidential.+(?:assured|brand|business|delivery|discreet|embarrass|medicine|offer|opportunity|orders|prescription|shopping|stock)|(?:assistance|business|mutual|priv(?:at)?e|relationship|strict?ly|urgent).+confiden[tc]ial|\bconfidant\b|can i confide)/i describe SARE_SUB_CONFID_P Subject contains likely spammer phrase or word score SARE_SUB_CONFID_P 0.794 #counts SARE_SUB_CONFID_P 43s/0h of 113305 corpus (92399s/20906h) 04/22/04 #ham SARE_SUB_CONFID_P organization's emails flagged: "- confidential" header SARE_SUB_CONF_INFO Subject =~ /(?:Confidential (?:business|info|med|assist)|Confidentiality assured|Fwd: Confidential)/i describe SARE_SUB_CONF_INFO Subject contains likely spammer phrase or word score SARE_SUB_CONF_INFO 0.700 #counts SARE_SUB_CONF_INFO 26s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_EXCITING_NEW Subject =~ /exciting\W*(and\W*prosperous|business\W*opportunity|new)/i describe SARE_SUB_EXCITING_NEW Subject contains likely spammer phrase or word score SARE_SUB_EXCITING_NEW 0.644 #counts SARE_SUB_EXCITING_NEW 16s/0h of 113374 corpus (92402s/20972h) 04/18/04 #hist SARE_SUB_EXCITING_NEW Created by Bob Menschel Apr 05 2004 header SARE_SUB_EBAY_OB1 Subject =~ /(?!e-?bay)\b[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[b8\xDF][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[y\xA5\xDD\xFD]\b/i describe SARE_SUB_EBAY_OB1 Subject contains obfuscated spammer topic score SARE_SUB_EBAY_OB1 2.500 # type=obfu #stype SARE_SUB_EBAY_OB1 obfu #counts SARE_SUB_EBAY_OB1 28s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_EXCL_OB1 Subject =~ /(?!\bexclusive\b)\b[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[x\xD7][\W_]?[c\xC7\xE7\xA2\xA9][\W_]?[l1I\|\xA3][\W_]?[uv\*\xB5\xD9-\xDC\xF9-\xFC][\W_]?[s5\$\xA7][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[vu][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_EXCL_OB1 Subject contains obfuscated spammer topic score SARE_SUB_EXCL_OB1 1.666 # type=obfu #stype SARE_SUB_EXCL_OB1 obfu #counts SARE_SUB_EXCL_OB1 9s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_EXCL_OB1 14s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_EXPIRED Subject =~ /(?:(?:skills|update\W*your)\W*expired|expired\W*(?:academic|account))/i describe SARE_SUB_EXPIRED Subject contains likely spammer phrase or word score SARE_SUB_EXPIRED 0.666 #counts SARE_SUB_EXPIRED 20s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_FIND_YOUR Subject =~ /find your/i describe SARE_SUB_FIND_YOUR Subject contains likely spammer phrase or word score SARE_SUB_FIND_YOUR 0.738 #counts SARE_SUB_FIND_YOUR 33s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_FOR_OB1 Subject =~ /(?!\bFor\b)\bf[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[r\xAE]\b/i describe SARE_SUB_FOR_OB1 Subject contains obfuscated spammer topic score SARE_SUB_FOR_OB1 2.500 # type=obfu #stype SARE_SUB_FOR_OB1 obfu #counts SARE_SUB_FOR_OB1 57s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_FOR_WOMEN Subject =~ /(?:Women:|for women only)/i describe SARE_SUB_FOR_WOMEN Subject contains likely spammer phrase or word score SARE_SUB_FOR_WOMEN 0.166 #counts SARE_SUB_FOR_WOMEN 3s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_HARD_OB1 Subject =~ /(?!\bhard\b)\bh[\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[r\xAE][\W_]?[d\xD0]\b/i describe SARE_SUB_HARD_OB1 Subject contains obfuscated spammer topic score SARE_SUB_HARD_OB1 2.500 # type=obfu #stype SARE_SUB_HARD_OB1 obfu #counts SARE_SUB_HARD_OB1 30s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_INKJET Subject =~/Inkjet/i describe SARE_SUB_INKJET Subject contains likely spammer phrase or word score SARE_SUB_INKJET 0.850 #counts SARE_SUB_INKJET 53s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_INKJET 87s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_INKJET_OB1 Subject =~/(?!Inkjet)(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:k|\xC4[\xB6-\xB8]|\xCE\x9A|\xCE\xBA|\xD0\x8C|\xD0\x9A|\xD0\xBA|\xD1\x9C|\xD2[\x9A-\x9D]])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:j|\xC4[\xB4-\xB5]|\xD0\x88|\xD1\x98])[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)/i describe SARE_SUB_INKJET_OB1 Subject contains obfuscated spammer topic score SARE_SUB_INKJET_OB1 1.666 # type=obfu #stype SARE_SUB_INKJET_OB1 obfu #counts SARE_SUB_INKJET_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INKJET_OB1 12s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_INKJET_OB2 Subject =~/(?!Inkjet)i.?n.?k.?j.?e.?t/i describe SARE_SUB_INKJET_OB2 Subject contains obfuscated spammer topic score SARE_SUB_INKJET_OB2 1.666 # type=obfu #stype SARE_SUB_INKJET_OB2 obfu #counts SARE_SUB_INKJET_OB2 3s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_INKJET_OB2 10s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_JOB Subject =~ /(?:(?:dead\W*end|does\W*your|dream|find\W*people|get\W*(?:a|the)(?:\W*better)?|(?:keep|quit)\W*(?:your|their)(?:\W*day)?|real|run\W*your|that\W*great|wanna|with\W*a\W*new|(?:yo)?ur\W*(?:current|full\W*time))\W*job|good\W*jobs|global\W*job\W*vacancy|success\W*job\W*story|job\W*(?:confirmation|feel\W*like|journal|opportunity|you\W*want)|joboffer)/i describe SARE_SUB_JOB Subject contains likely spammer phrase or word score SARE_SUB_JOB 1.666 #counts SARE_SUB_JOB 313s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_LIKE_YOU Subject =~ /(?:(?:singles(?: just)?|(?:looking(?: for)?|(?:need|surprise)) someone|who might) like you|like you (?:have )?never seen)/i describe SARE_SUB_LIKE_YOU Subject contains likely spammer phrase or word score SARE_SUB_LIKE_YOU 0.444 #counts SARE_SUB_LIKE_YOU 8s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE2 Subject =~ /Re: ?LAST CHANCE/i describe SARE_SUB_LAST_CHANCE2 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE2 0.333 #counts SARE_SUB_LAST_CHANCE2 6s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE3 Subject =~ /LAST CHANCE\s{3,}\S/i describe SARE_SUB_LAST_CHANCE3 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE3 0.333 #counts SARE_SUB_LAST_CHANCE3 6s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LAST_CHANCE4 Subject =~ /one last chance/i describe SARE_SUB_LAST_CHANCE4 Subject contains likely spammer phrase or word score SARE_SUB_LAST_CHANCE4 0.111 #counts SARE_SUB_LAST_CHANCE4 2s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_LOSE_OB1 Subject =~ /(?!\bLoSE\b)\b[l1I\|\xA3][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[s5\$\xA7][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_LOSE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_LOSE_OB1 2.500 # type=obfu #stype SARE_SUB_LOSE_OB1 obfu #counts SARE_SUB_LOSE_OB1 50s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_MISC_1 Subject =~ /\b(?:get rid of|sexy)\b/i describe SARE_SUB_MISC_1 Subject contains likely spammer phrase or word score SARE_SUB_MISC_1 1.272 #counts SARE_SUB_MISC_1 129s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_MOON Subject =~ /(?:admiring\W*the\W*moon|moonlight\W*(?:so\W*long|to\W*find)|(?:on|to)\W*the\W*moon)/i describe SARE_SUB_MOON Subject contains likely spammer phrase or word score SARE_SUB_MOON 0.638 #counts SARE_SUB_MOON 15s/0h of 111251 corpus (90481s/20770h) 04/15/04 header SARE_SUB_MOVE_OB1 Subject =~ /(?!\bmove)\b(?:\/\\\/\\|\/V\\|rn|[m])[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[vu][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]/i describe SARE_SUB_MOVE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_MOVE_OB1 1.666 # type=obfu #stype SARE_SUB_MOVE_OB1 OBFU #counts SARE_SUB_MOVE_OB1 2s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PASSION_OB1 Subject =~ /(?!Passion)(?:[p\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[a4\*\@\xC0-\xC5\xAA\xE0-\xE5]|\/\\|\xC4[\x80-\x85]|\xC7[\x8D-\x8E]|\xC7[\xBA-\xBB]|\xCE\x86|\xCE\x91|\xCE\x94|\xCE\x9B|\xCE\xAC|\xCE\xB1|\xD0\x90|\xD0\xB0)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[s5\$\xA7]|\xC5[\x9A-\xA1]|\xD0\x85|\xD1\x95|\xD5\x8F)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)/i describe SARE_SUB_PASSION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PASSION_OB1 1.666 # type=obfu - 4s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_PASSION_OB1 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PASSION_OB2 Subject =~ /(?!Passion)p.?a.?s.?s.?i.?o.?n/i describe SARE_SUB_PASSION_OB2 Subject contains obfuscated spammer topic score SARE_SUB_PASSION_OB2 1.666 # type=obfu - 4s/0h of 91714 corpus (74113s/17601h) 01/24/04 #counts SARE_SUB_PASSION_OB2 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PENIS Subject =~ /\bpenis\b/i describe SARE_SUB_PENIS Subject contains likely spammer phrase or word score SARE_SUB_PENIS 1.338 #counts SARE_SUB_PENIS 141s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PENIS 368s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_PENIS_OB1 Subject =~ /(?!\bpenis\b)\bp[\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[n\xD1\xF1][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[s5\$\xA7]\b/i describe SARE_SUB_PENIS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PENIS_OB1 3.333 # type=obfu #stype SARE_SUB_PENIS_OB1 obfu #counts SARE_SUB_PENIS_OB1 533s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PERFECTLY Subject =~ /\bperfectly\W*(?:creative|legal|smooth)/i describe SARE_SUB_PERFECTLY Subject contains likely spammer phrase or word score SARE_SUB_PERFECTLY 0.166 #counts SARE_SUB_PERFECTLY 3s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PHOTOS_OB1 Subject =~ /(?!\bphotos?)\bp[\W_]?h[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?t[\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[s5\$\xA7]?/i describe SARE_SUB_PHOTOS_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PHOTOS_OB1 2.5000 # type=obfu #stype SARE_SUB_PHOTOS_OB1 OBFU #counts SARE_SUB_PHOTOS_OB1 11s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PLEASE_OB1 Subject =~ /(?!\bPlease\b)\bp[\W_]?[l1I\|\xA3][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[s5\$\xA7][\W_]?[e3\*\xC8-\xCB\xE8-\xEB]\b/i describe SARE_SUB_PLEASE_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PLEASE_OB1 1.666 # type=obfu #stype SARE_SUB_PLEASE_OB1 obfu #counts SARE_SUB_PLEASE_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_PRINTER_OB1 Subject =~ /(?!printer)\b(?:[p\xDE]|\xCE\xA1|\xCF\x81|\xD0\xA0|\xD1\x80)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[il1:\|\*\xCC-\xCF\xEC-\xEF\xA6]|\xC4[\xA8-\xB0]|\xC4\xBA|\xC4\xBC|\xC4\xBE|\xC5\x80|\xC5\x82|\xC7[\x8F-\x90]|\xD0[\x86-\x87]|\xD1[\x96-\x97]|\xCE\x8A|\xCE\x90|\xCE\x99|\xCE\xAA|\xCE\xAF|\xCE\xB9|\xCF\x8A)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[t\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[r\xAE]|\xC5[\x94-\x99]|\xD1\x93)/i describe SARE_SUB_PRINTER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_PRINTER_OB1 1.000 # type=obfu #stype SARE_SUB_PRINTER_OB1 obfu #counts SARE_SUB_PRINTER_OB1 1s/0h of 113373 corpus (92402s/20971h) 04/18/04 header SARE_SUB_PRINTER_OB2 Subject =~ /(?!printer)\bp.?r.?i.?n.?t.?e.?r/i describe SARE_SUB_PRINTER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_PRINTER_OB2 1.000 # type=obfu #stype SARE_SUB_PRINTER_OB2 obfu #counts SARE_SUB_PRINTER_OB2 2s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_PROFILE Subject =~ /(?:your profile.{1,30}need|(?:cholesterol|company|featured|financial|I saw your|new|saw you|special|stock(?:\W*market)?|Your Personal)\W*profile|profiled?\W*compan(?:y|ies))/i describe SARE_SUB_PROFILE Subject contains likely spammer phrase or word score SARE_SUB_PROFILE 1.138 #counts SARE_SUB_PROFILE 105s/0h of 113393 corpus (92421s/20972h) 04/18/04 #note SARE_SUB_PROFILE "Investment Profile" matched by SARE_SUB_INVESTMENTS #note SARE_SUB_PROFILE "Credit Profile" matched by SARE_SUB_POOR_CREDIT header SARE_SUB_PROVEN Subject =~ /\bproven\b/i describe SARE_SUB_PROVEN Subject contains likely spammer phrase or word score SARE_SUB_PROVEN 1.666 #counts SARE_SUB_PROVEN 266s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_REAL_OB1 Subject =~ /(?!\breal\b)\b[r\xAE][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?[a4\*\@\xC0-\xC5\xAA\xE0-\xE5][\W_]?[l1I\|\xA3]\b/i describe SARE_SUB_REAL_OB1 Subject contains obfuscated spammer topic score SARE_SUB_REAL_OB1 1.666 # type=obfu #stype SARE_SUB_REAL_OB1 obfu #counts SARE_SUB_REAL_OB1 6s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_SPYWARE Subject =~ /\bSPYWARE\b/i describe SARE_SUB_SPYWARE Subject contains likely spammer phrase or word score SARE_SUB_SPYWARE 0.772 #counts SARE_SUB_SPYWARE 39s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_STRONG Subject =~ /\bstrong\b/i describe SARE_SUB_STRONG Subject contains likely spammer phrase or word score SARE_SUB_STRONG 0.761 #counts SARE_SUB_STRONG 37s/0h of 113272 corpus (92366s/20906h) 04/23/04 header SARE_SUB_SURVEY Subject =~ /(?:campaign|Fill\W*out|questions|rated.{1,30}by\W*a|short|simple|tak(e|ing)|womens)\W*survey|survey\W*(?:opportunity|says)/ describe SARE_SUB_SURVEY Subject contains likely spammer phrase or word score SARE_SUB_SURVEY 0.677 #counts SARE_SUB_SURVEY 22s/0h of 111217 corpus (90485s/20732h) 04/17/04 header SARE_SUB_TONER Subject =~ /\btoner\b/i describe SARE_SUB_TONER Subject contains likely spammer phrase or word score SARE_SUB_TONER 0.622 #counts SARE_SUB_TONER 12s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_TONER 23s/0h of 91714 corpus (74113s/17601h) 01/24/04 header SARE_SUB_TONER_OB1 Subject =~ /(?!\btoner\b)(?:\bt|\B(?:[\+]|\xC5[\xA2-\xA7]|\xCE\xA4|\xCF\x84|\xD0\xA2|\xD1\x82))[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[o0\*\xB0\xBA\xD8\xF8\xD2-\xD6\xF2-\xF6]|\(\)|\[\]|\xC5[\x8C-\x91]|\xC6[\xA0-\xA1]|\xC7[\x91-\x92]|\xC7[\xBE-\xBF]|\xCE\x8C|\xCE\x98|\xCE\x9F|\xCE\xB8|\xCE\xBF|\xCF\x8C|\xD0\x9E|\xD0\xBE|\xD5\x95)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[n\xD1\xF1]|\|\\\||\xC5[\x83-\x8B]|\xCE\x9D|\xCE\xA0|\xCE\xAE|\xCE\xB7|\xD5\xB2|\xD5\xB8)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:[e3\*\xC8-\xCB\xE8-\xEB]|\xC4[\x92-\x9B]|\xCE\x88|\xCE\x95|\xCE\xA3|\xCE\xAD|\xCE\xB5|\xD0\x81|\xD0\x95|\xD0\xB5|\xD1\x91)[\x01-\x2F\\\^_`\|\x7F-\xA1\xA4-\xA8\xAB-\xAD\xAF-\xB1\xB4\xB7-\xBB\xBF\xF7]?(?:r\b|(?:[\xAE]|\xC5[\x94-\x99]|\xD1\x93)\B)/i describe SARE_SUB_TONER_OB1 Subject contains obfuscated spammer topic score SARE_SUB_TONER_OB1 1.666 # type=obfu #stype SARE_SUB_TONER_OB1 obfu #counts SARE_SUB_TONER_OB1 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_TONER_OB2 Subject =~ /(?!\btoner\b)\bt.?o.?n.?e.?r\b/i describe SARE_SUB_TONER_OB2 Subject contains obfuscated spammer topic score SARE_SUB_TONER_OB2 1.666 # type=obfu #stype SARE_SUB_TONER_OB2 obfu #counts SARE_SUB_TONER_OB2 1s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_VIDEO_OB1 Subject =~ /(?!\bvideo\b)\b[vu][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?[d\xD0][\W_]?[e3\*\xC8-\xCB\xE8-\xEB][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])\b/i describe SARE_SUB_VIDEO_OB1 Subject contains obfuscated spammer topic #stype SARE_SUB_VIDEO_OB1 obfu score SARE_SUB_VIDEO_OB1 2.500 # type=obfu #counts SARE_SUB_VIDEO_OB1 10s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_YOUR_WOMAN Subject =~ /Your woman/i describe SARE_SUB_YOUR_WOMAN Subject contains likely spammer phrase or word score SARE_SUB_YOUR_WOMAN 0.800 #counts SARE_SUB_YOUR_WOMAN 44s/0h of 111253 corpus (90483s/20770h) 04/15/04 #max SARE_SUB_YOUR_WOMAN 56s/0h of 97268 corpus (79437s/17831h) 01/24/04 header __SARE_SUB_WEBMASTER1 Subject =~ /webmaster/i header __SARE_SUB_WEBMASTER2 From =~ /webmaster\@/i meta SARE_SUB_WEBMASTER2 __SARE_SUB_WEBMASTER1 && __SARE_SUB_WEBMASTER2 describe SARE_SUB_WEBMASTER2 Subject contains likely spammer phrase or word score SARE_SUB_WEBMASTER2 0.166 #counts SARE_SUB_WEBMASTER2 3s/0h of 113305 corpus (92399s/20906h) 04/22/04 header SARE_SUB_SION_OB1 Subject =~ /(?!sion)(?!s lon)(?!s: on)[s5\$\xA7][\W_]?[il1:\|\*\xCC-\xCF\xEC-\xEF][\W_]?(?:\[\]|\(\)|[o0\*\xD2-\xD6\xF2-\xF6])[\W_]?[n\xD1\xF1]/i describe SARE_SUB_SION_OB1 Subject contains obfuscated spammer topic score SARE_SUB_SION_OB1 1.666 # type=obfu #stype SARE_SUB_SION_OB1 obfu #counts SARE_SUB_SION_OB1 9s/0h of 113373 corpus (92402s/20971h) 04/18/04 # type=obfu ham: "looks longer", "as long" # Category: Technical spamsign header SARE_SUB_6CONS_WORD Subject =~ /(?!.m+)(?!xpvpnsrv)\b[bcghjklmnpqrstvwxz]{6,20}\b/ describe SARE_SUB_6CONS_WORD subject contains word consisting of consecutive consonants score SARE_SUB_6CONS_WORD 1.666 #counts SARE_SUB_6CONS_WORD 533s/0h of 113373 corpus (92402s/20971h) 04/20/04 #max SARE_SUB_6CONS_WORD 863s/0h of 97268 corpus (79437s/17831h) 01/24/04 #hist SARE_SUB_6CONS_WORD 04/20/2004 -- Added exclusion for hmmmmm header SARE_SUB_7CONS_WORD Subject =~ /(?!JDBGMGR)(?!.m+)\b[bcdfghjklmnpqrstvwxz]{7}\b/i describe SARE_SUB_7CONS_WORD subject contains word consisting of consecutive consonants score SARE_SUB_7CONS_WORD 1.666 #counts SARE_SUB_7CONS_WORD 485s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_7CONS_WORD 04/20/2004 -- Added exclusion for hmmmmm header SARE_SUB_ACCENT_CHAR Subject =~ /\w[äëöü]\w/ describe SARE_SUB_ACCENT_CHAR Subject contains foreign character apparently embedded within a word score SARE_SUB_ACCENT_CHAR 1.666 #counts SARE_SUB_ACCENT_CHAR 205s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_CASH_CHAR Subject =~ /[a-zA-Z]\$[a-zA-Z]/ describe SARE_SUB_CASH_CHAR Subject has letter then $ then letter score SARE_SUB_CASH_CHAR 1.638 #counts SARE_SUB_CASH_CHAR 195s/0h of 113393 corpus (92421s/20972h) 04/18/04 header SARE_SUB_COMMA_FIRST Subject =~ /^,/ describe SARE_SUB_COMMA_FIRST Subject starts with a Comma. score SARE_SUB_COMMA_FIRST 0.955 #counts SARE_SUB_COMMA_FIRST 72s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ENC_KS5601 Subject:raw =~ /\=\?ks_c_5601\-1987\?/i describe SARE_SUB_ENC_KS5601 Subject specifies display in Korean?, unnecessary unless spam hides subject score SARE_SUB_ENC_KS5601 0.877 #counts SARE_SUB_ENC_KS5601 58s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_ENC_WIN1251 Subject:raw =~ /windows-1251/i describe SARE_SUB_ENC_WIN1251 Subject specifies display in windows-1251, , unnecessary unless spam hides subject score SARE_SUB_ENC_WIN1251 1.111 # type=spamp #stype SARE_SUB_ENC_WIN1251 spamp #counts SARE_SUB_ENC_WIN1251 56s/0h of 111253 corpus (90483s/20770h) 04/15/04 header SARE_SUB_ENC_WIN1255 Subject:raw =~ /windows-1255/i describe SARE_SUB_ENC_WIN1255 Subject specifies display in windows-1255, unnecessary unless spam hides subject score SARE_SUB_ENC_WIN1255 1.111 # type=spamp #stype SARE_SUB_ENC_WIN1255 spamp #counts SARE_SUB_ENC_WIN1255 25s/0h of 113374 corpus (92402s/20972h) 04/18/04 header SARE_SUB_LETTERS_NUMS Subject =~ /[a-z]{1,5}[0-9]{1,5}[a-z]{1,5}[0-9]{1,5}[a-z]{1,5}[0-9]{1,5}/ describe SARE_SUB_LETTERS_NUMS Subject contains multiple mixed letters and numbers in one "word" score SARE_SUB_LETTERS_NUMS 0.700 #counts SARE_SUB_LETTERS_NUMS 26s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_LETTERS_NUMS 199s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_LONG_SUBJ_170 Subject =~ /.{170,}/ describe SARE_SUB_LONG_SUBJ_170 Subject is excessively long -- more than 170 chars score SARE_SUB_LONG_SUBJ_170 3.333 # type=spamg #stype SARE_SUB_LONG_SUBJ_170 spamg #counts SARE_SUB_LONG_SUBJ_170 224s/0h of 113373 corpus (92402s/20971h) 04/20/04 #hist SARE_SUB_LONG_SUBJ_170 04/17/2004 - Created _140 rule based on large corpus (safety cushion above 130 which hit 0 ham in test corpus). #hist SARE_SUB_LONG_SUBJ_170 However, -140 hits 3 auto-generated auto-response ham on alternate corpus. So score that low, and score _170 high. header SARE_SUB_PAREN_NUM Subject =~ /(?!\(\d{3}\)[- ]?\d{3}-\d{4}\s+\(\d{3}\))\(\d{1,3}\).*\(\d{1,3}\)/ describe SARE_SUB_PAREN_NUM Subject contains (00)Subject(00) score SARE_SUB_PAREN_NUM 1.666 - type=spamg #stype SARE_SUB_PAREN_NUM spamg #counts SARE_SUB_PAREN_NUM 371s/0h of 113393 corpus (92421s/20972h) 04/18/04 #hist SARE_SUB_PAREN_NUM 04/02/2004 - http://www.rulesemporium.com/rules/99_FVGT_subject.cf #hist SARE_SUB_PAREN_NUM 04/20/2004 - added exclusion for USA telephone numbers in subject. header SARE_SUB_PCT_LETTER Subject =~ /%[A-Z]{1}/i describe SARE_SUB_PCT_LETTER Subject contains random-text spamsign score SARE_SUB_PCT_LETTER 1.666 #counts SARE_SUB_PCT_LETTER 671s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_PCT_LETTER 1282s/0h of 125097 corpus (104914s/20183h) 04/02/04 header SARE_SUB_RAND_LETTRS2 Subject =~ /(?!N[BGJU])(?!SU)(?!VB)\b[cjnqstuvwxz][bgjqu]\b/i describe SARE_SUB_RAND_LETTRS2 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS2 1.666 #Ham SARE_SUB_RAND_LETTRS2 exclude: NG = abbr No Good, NJ = New Jersey, SU = subjective universe #counts SARE_SUB_RAND_LETTRS2 585s/0h of 113373 corpus (92402s/20971h) 04/20/04 header SARE_SUB_RAND_LETTRS2B Subject =~ /(?!xls)(?!xsl)\bx[bfghjklnpqrstwz][bfghjklnpqrstwz]\b/i describe SARE_SUB_RAND_LETTRS2B Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS2B 1.055 #counts SARE_SUB_RAND_LETTRS2B 90s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_RAND_LETTRS2B 113s/0h of 97268 corpus (79437s/17831h) 01/24/04 header SARE_SUB_RAND_LETTRS4 Subject =~ /(?!uh+)\b[eiou][bfghjklnpqrtwz]{3}\b/i describe SARE_SUB_RAND_LETTRS4 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS4 1.616 #counts SARE_SUB_RAND_LETTRS4 189s/0h of 113373 corpus (92402s/20971h) 04/20/04 #ham SARE_SUB_RAND_LETTRS4 lots of ham with leading A header SARE_SUB_RAND_LETTRS5 Subject =~ /(?!LPRng)\b[bcdfghjklnpqrvwz]{5}\b/i describe SARE_SUB_RAND_LETTRS5 Subject contains random-text spamsign score SARE_SUB_RAND_LETTRS5 1.666 #counts SARE_SUB_RAND_LETTRS5 387s/0h of 113373 corpus (92402s/20971h) 04/18/04 #max SARE_SUB_RAND_LETTRS5 473s/0h of 97268 corpus (79437s/17831h) 01/24/04 #hist SARE_SUB_RAND_LETTRS5 04/20/2004 - Added LPRng exclusion, to avoid single ham hit. header SARE_SUB_RAND_UC Subject =~ /^Re:\s[A-Z]{2,},(\d+,)?(\s[a-z]+[.,:;'!?-]?){3,}(\[\d+\])?\s*$/ describe SARE_SUB_RAND_UC Subject contains random-text spamsign score SARE_SUB_RAND_UC 2.222 #counts SARE_SUB_RAND_UC 7612s/0h of 113374 corpus (92402s/20972h) 04/18/04 #max SARE_SUB_RAND_UC 8372s/0h of 125097 corpus (104914s/20183h) 04/02/04 # EOF