Initial commit.
This commit is contained in:
25
mail/spamassassin/abc/99_struction_IXHASH.cf
Normal file
25
mail/spamassassin/abc/99_struction_IXHASH.cf
Normal file
@@ -0,0 +1,25 @@
|
||||
loadplugin Mail::SpamAssassin::Plugin::iXhash /etc/mail/spamassassin/iXhash.pm
|
||||
# This makes DNS queries time out after 10 seconds (2x default)
|
||||
ixhash_timeout 10
|
||||
|
||||
# This list uses iX Magazine's spam as datasource.
|
||||
body IXHASH1 eval:ixhashtest('ix.dnsbl.manitu.net')
|
||||
describe IXHASH1 This mail has been classified as spam @ iX Magazine, Germany
|
||||
tflags IXHASH1 net
|
||||
score IXHASH1 2.5
|
||||
|
||||
# This list comes in @ spamtraps run by former LogIn & Solutions AG, Germany
|
||||
body IXHASH2 eval:ixhashtest('generic.ixhash.net')
|
||||
describe IXHASH2 mail has been classified as spam @ former LogIn&Solutions AG, Germany
|
||||
tflags IXHASH2 net
|
||||
score IXHASH2 1.5
|
||||
|
||||
body IXHASH3 eval:ixhashtest('ctyme.ixhash.net')
|
||||
describe IXHASH3 mail has been classified as spam @ JunkEmailFilter, Germany
|
||||
tflags IXHASH3 net
|
||||
score IXHASH3 1.0
|
||||
|
||||
body IXHASH4 eval:ixhashtest('hosteurope.ixhash.net')
|
||||
describe IXHASH4 mail has been classified as spam @ HostEurope, Germany
|
||||
tflags IXHASH4 net
|
||||
score IXHASH4 1.0
|
||||
67
mail/spamassassin/abc/BayesOCR_PLG.cf
Normal file
67
mail/spamassassin/abc/BayesOCR_PLG.cf
Normal file
@@ -0,0 +1,67 @@
|
||||
#*************************************************************************
|
||||
# Bayes OCR Plugin, version 0.1
|
||||
#*************************************************************************
|
||||
# Copyright 2007 P.R.A. Group - D.I.E.E. - University of Cagliari (ITA)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#*************************************************************************
|
||||
|
||||
loadplugin BayesOCR_PLG BayesOCR_PLG.pm
|
||||
# Cerberus guarded the gate to Hades and ensured
|
||||
# that spirits of the dead could enter...
|
||||
# BayesOCR Plugin guards the inboxes and ensures
|
||||
# that only legitimate images can enter,
|
||||
# spam images are detected and eated..
|
||||
|
||||
# Rule: BayesOCR_check(thr)
|
||||
# Categorisation of text embedded in images with TextCategorisation techniques.
|
||||
# Require gocr, convert (imagemagick)
|
||||
|
||||
|
||||
body BayesOCR_PLG40 eval:BayesOCR_check(0.40, 0.50)
|
||||
body BayesOCR_PLG50 eval:BayesOCR_check(0.50, 0.60)
|
||||
body BayesOCR_PLG60 eval:BayesOCR_check(0.60, 0.70)
|
||||
body BayesOCR_PLG70 eval:BayesOCR_check(0.70, 0.80)
|
||||
body BayesOCR_PLG80 eval:BayesOCR_check(0.80, 0.90)
|
||||
body BayesOCR_PLG90 eval:BayesOCR_check(0.90, 0.95)
|
||||
body BayesOCR_PLG95 eval:BayesOCR_check(0.95, 0.99)
|
||||
body BayesOCR_PLG99 eval:BayesOCR_check(0.99, 1.00)
|
||||
|
||||
describe BayesOCR_PLG40 Bayesian ImageSpam probability is 40% to 50%
|
||||
describe BayesOCR_PLG50 Bayesian ImageSpam probability is 50% to 60%
|
||||
describe BayesOCR_PLG60 Bayesian ImageSpam probability is 60% to 70%
|
||||
describe BayesOCR_PLG70 Bayesian ImageSpam probability is 70% to 80%
|
||||
describe BayesOCR_PLG80 Bayesian ImageSpam probability is 80% to 90%
|
||||
describe BayesOCR_PLG90 Bayesian ImageSpam probability is 90% to 95%
|
||||
describe BayesOCR_PLG95 Bayesian ImageSpam probability is 95% to 99%
|
||||
describe BayesOCR_PLG99 Bayesian ImageSpam probability is 99% to 100%
|
||||
|
||||
add_header all BayesOCR-OUT _PLGBAYESOCROUT_
|
||||
|
||||
priority BayesOCR_PLG40 1000
|
||||
priority BayesOCR_PLG50 1000
|
||||
priority BayesOCR_PLG60 1000
|
||||
priority BayesOCR_PLG70 1000
|
||||
priority BayesOCR_PLG80 1000
|
||||
priority BayesOCR_PLG90 1000
|
||||
priority BayesOCR_PLG95 1000
|
||||
priority BayesOCR_PLG99 1000
|
||||
|
||||
score BayesOCR_PLG40 0 0 0.5 0.5
|
||||
score BayesOCR_PLG50 0 0 1.0 1.0
|
||||
score BayesOCR_PLG60 0 0 1.5 1.5
|
||||
score BayesOCR_PLG70 0 0 2.0 2.0
|
||||
score BayesOCR_PLG80 0 0 2.7 2.7
|
||||
score BayesOCR_PLG90 0 0 3.5 3.5
|
||||
score BayesOCR_PLG95 0 0 4.0 4.0
|
||||
score BayesOCR_PLG99 0 0 4.5 4.5
|
||||
400
mail/spamassassin/abc/BayesOCR_PLG.pm
Normal file
400
mail/spamassassin/abc/BayesOCR_PLG.pm
Normal file
@@ -0,0 +1,400 @@
|
||||
#*************************************************************************
|
||||
# Bayes OCR Plugin, version 0.1
|
||||
#*************************************************************************
|
||||
# Copyright 2007 P.R.A. Group - D.I.E.E. - University of Cagliari (ITA)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#*************************************************************************
|
||||
|
||||
package BayesOCR_PLG;
|
||||
|
||||
use strict;
|
||||
use Mail::SpamAssassin;
|
||||
use Mail::SpamAssassin::Util;
|
||||
use Mail::SpamAssassin::Plugin;
|
||||
use Mail::SpamAssassin::Logger;
|
||||
|
||||
our @ISA = qw (Mail::SpamAssassin::Plugin);
|
||||
|
||||
# constructor: register the eval rule
|
||||
sub new {
|
||||
my ( $class, $mailsa ) = @_;
|
||||
$class = ref($class) || $class;
|
||||
my $self = $class->SUPER::new($mailsa);
|
||||
bless( $self, $class );
|
||||
dbg("PLG-BayesOCR:: new:: register_eval_rule");
|
||||
|
||||
$self->register_eval_rule("BayesOCR_check");
|
||||
$self->{'imgTxt_classifierOut'} = -1;
|
||||
$self->{'imgTxt_tagmsg'} = ""; #msg to be saved in e-mail tag when $self->{'imgTxt_classifierOut'} <= 0
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
#===========================================================================
|
||||
#===========================================================================
|
||||
|
||||
sub check_start{
|
||||
# Called before eval rule
|
||||
my ( $self, $pms ) = @_;
|
||||
dbg("PLG-BayesOCR:: check_start:: init score");
|
||||
|
||||
#Init outNB_imgTxt
|
||||
$self->{'imgTxt_classifierOut'} = -1;
|
||||
$self->{'imgTxt_tagmsg'} = "";
|
||||
}
|
||||
|
||||
sub isValidUser{
|
||||
my ($pms) = @_;
|
||||
my $username = $pms->{main}->{username};
|
||||
dbg("PLG-BayesOCR:: isValidUser:: Username: $username");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
sub BayesOCR_check {
|
||||
# BayesOCR_check(thr)
|
||||
# Return an hit when (outNB > thr)
|
||||
# The score is computed as (weigth * outNB)
|
||||
#
|
||||
my ($self, $pms, $unused, $thrL, $thrH) = @_;
|
||||
my $plgRuleName = $pms->get_current_eval_rule_name();
|
||||
|
||||
#if( isValidUser($pms) == 0) { return 0; }
|
||||
|
||||
dbg("PLG-BayesOCR:: BayesOCR_check :: Rule: $plgRuleName");
|
||||
dbg("PLG-BayesOCR:: BayesOCR_check :: thr: ($thrH, $thrL)");
|
||||
|
||||
|
||||
if($self->{'imgTxt_classifierOut'} < 0)
|
||||
{
|
||||
#Output
|
||||
if( $self->imageSpam_OCRTextProcessing($pms ) )
|
||||
{
|
||||
$self->{'imgTxt_tagmsg'} = $self->{'imgTxt_classifierOut'};
|
||||
}
|
||||
|
||||
dbg("PLG-BayesOCR:: BayesOCR_check:: Write Mail Header\n\n");
|
||||
$pms->set_tag ("PLGBAYESOCROUT", $self->{'imgTxt_tagmsg'} );
|
||||
}
|
||||
my $resHit = ($self->{'imgTxt_classifierOut'} > $thrL) && ($self->{'imgTxt_classifierOut'} <= $thrH );
|
||||
|
||||
return $resHit;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
#===========================================================================
|
||||
|
||||
sub imageSpam_OCRTextProcessing
|
||||
# boolen $self->imageSpam_OCRTextProcessing($pms)
|
||||
#
|
||||
# imageSpam processing by image's text analisys with SA's NaiveBayes
|
||||
# return 1 : (sucess) image's text has beeen extract and processed by NB
|
||||
# return 0 : (failed) no images, no text, no NB.
|
||||
{
|
||||
my ( $self, $pms ) = @_;
|
||||
# $self :: Obj Plugin
|
||||
# $pms :: Obj Mail::SpamAssassin::PerMsgStatus
|
||||
# $pms->{msg} :: message of class Mail::SpamAssassin::Message
|
||||
|
||||
#================================
|
||||
# Init result
|
||||
#================================
|
||||
$self->{'imgTxt_classifierOut'} = 0;
|
||||
|
||||
#================================
|
||||
# Check & Create Classifier
|
||||
#================================
|
||||
|
||||
my $nbSA = $pms->{main}->{bayes_scanner};
|
||||
#my $nbSA = new Mail::SpamAssassin::Bayes ($pms->{main});
|
||||
|
||||
if( $nbSA->is_scan_available() == 0)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextClassifierOutEstimation: NB scan not available");
|
||||
$self->{'imgTxt_tagmsg'} = "0.0 (NaiveBayes not available)";
|
||||
return 0;
|
||||
}
|
||||
|
||||
#================================
|
||||
# Image extraction
|
||||
#================================
|
||||
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: Check for Attached Images");
|
||||
my ($imgTextOcr, $numImages) = imageTextExtractionFromMSG($pms->{msg});
|
||||
|
||||
if($numImages == 0)
|
||||
{
|
||||
$self->{'imgTxt_tagmsg'} = "0.0 (No images found)";
|
||||
return 0;
|
||||
}
|
||||
|
||||
# Check extracted text
|
||||
my $numWord = 0;
|
||||
while($imgTextOcr =~ /[a-z]{3,}/gi)
|
||||
{
|
||||
$numWord++;
|
||||
}
|
||||
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: $numWord words (3+ chars) recognised");
|
||||
|
||||
|
||||
if($numWord <= 3)
|
||||
{
|
||||
$self->{'imgTxt_tagmsg'} = "0.0 (No usefull text found)";
|
||||
return 0;
|
||||
}
|
||||
|
||||
#================================
|
||||
# Classifier's output estimation
|
||||
#================================
|
||||
|
||||
# creation of msg with image's text
|
||||
my $mailraw = createMSGFromText($pms, $imgTextOcr);
|
||||
my $msgTmp = $pms->{main}->parse($mailraw,1);
|
||||
|
||||
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: Compute score with trained NaiveBayes");
|
||||
my $pmsTMP = new Mail::SpamAssassin::PerMsgStatus($pms->{main}, $msgTmp);
|
||||
|
||||
# Classification
|
||||
my $outNB = $nbSA->scan($pmsTMP, $msgTmp);
|
||||
$self->{'imgTxt_classifierOut'} = sprintf("%0.3f", $outNB);
|
||||
|
||||
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: classifier's out = $self->{'imgTxt_classifierOut'}" );
|
||||
|
||||
return 1; # All OK
|
||||
}
|
||||
|
||||
|
||||
#===========================================================================
|
||||
|
||||
sub imageTextExtractionFromMSG
|
||||
# ($imgTextOcr, $numImages) = imageTextExtractionFromMSG($msg)
|
||||
# Extract the text from all attached images
|
||||
# Return all text anche the number of attached images
|
||||
{
|
||||
my $msg = $_[0];
|
||||
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionFromMSG:: Extract & Convert Images");
|
||||
|
||||
my @mimeStr = ("image/*", "img/*");
|
||||
my @tmpImgFile;
|
||||
my $num=0;
|
||||
|
||||
my $imgTextOcr = "";
|
||||
|
||||
foreach (@mimeStr)
|
||||
{
|
||||
# Search all attach with current MIME
|
||||
my @img_parts = $msg->find_parts($_);
|
||||
for (my $i=0; $i <= $#img_parts; $i++)
|
||||
{
|
||||
my $imagestream = $img_parts[$i]->decode(1048000); # ~ 1 MB
|
||||
$imgTextOcr = join $imgTextOcr, imageTextExtractionByOCR($imagestream), "\n";
|
||||
|
||||
$num++;
|
||||
}
|
||||
}
|
||||
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionFromMSG:: $num images extracted");
|
||||
return ($imgTextOcr, $num);
|
||||
}
|
||||
|
||||
#===========================================================================
|
||||
|
||||
sub imageTextExtractionByOCR
|
||||
# $textOut = imageTextExtractionByOCR( $imagestream )
|
||||
# Text extraction from imge file "" by OCR engine
|
||||
{
|
||||
my $imagestream = $_[0];
|
||||
my $imagelen = length($imagestream) / 1024;
|
||||
my $tmpDir = "/tmp"; #Get tmp dir
|
||||
my $tmpFile = "$tmpDir/sa_bayesOCR_tmpImg.$$";
|
||||
|
||||
# Zooming small images could improve OCR accuracy
|
||||
|
||||
# Byte Check
|
||||
# > 1000K => no OCR
|
||||
# < 15K => OCR + zoom 4X
|
||||
# else => Check resolution
|
||||
|
||||
# Check resolution
|
||||
# res > 1400x1050 => no OCR
|
||||
# 1024x768 <= res < 1400x1050 => OCR (no zoom)
|
||||
# 800x600 <= res < 1024x768 => OCR + zoom 2X
|
||||
# res < 800x600 => OCR + zoom 4X
|
||||
|
||||
if ($imagelen > 1000)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Skip, image size = $imagelen");
|
||||
return "";
|
||||
}
|
||||
|
||||
open (FILE, ">$tmpFile.tmp") or return "";
|
||||
print FILE "$imagestream \n";
|
||||
close FILE;
|
||||
|
||||
my $convertOPT = "";
|
||||
my $imageIdentifyTxt = "";
|
||||
if($imagelen < 20 )
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 4X");
|
||||
$convertOPT = "-sample 400% -density 280";
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Check image dim");
|
||||
|
||||
# check WxH
|
||||
open EXEFH, "identify -quiet -ping $tmpFile.tmp |";
|
||||
$imageIdentifyTxt = join "", <EXEFH>;
|
||||
close EXEFH;
|
||||
|
||||
if( $imageIdentifyTxt =~ s/\s(\d*)x(\d*)\s//i )
|
||||
{
|
||||
my $size1 = $1;
|
||||
my $size2 = $2;
|
||||
|
||||
if($size1 * $size2 > 1400*1050 && $size1 > 1280 && $size2 > 1024)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Skip, image dim = $size1 x $size2");
|
||||
unlink "$tmpFile.tmp";
|
||||
return "";
|
||||
}
|
||||
|
||||
if( $size1 * $size2 < 800*600)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 4X");
|
||||
$convertOPT = "-sample 400% -density 280";
|
||||
}
|
||||
elsif( $size1 * $size2 < 1024*768)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 2X");
|
||||
$convertOPT = "-sample 200% -density 280";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Convert & OCR");
|
||||
# -append :: concatenate image i layers
|
||||
# -flatten :: fuse layers
|
||||
# -density :: set dpi
|
||||
|
||||
my $exstatus = system("convert $tmpFile.tmp -append -flatten $convertOPT $tmpFile.pnm");
|
||||
if($exstatus != 0)
|
||||
{
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Convert ERROR!!");
|
||||
#Catturo SDOUT e STERR
|
||||
open EXEFH, "identify -verbose -strip $tmpFile.tmp 2>&1 |";
|
||||
$imageIdentifyTxt = join "", <EXEFH>;
|
||||
close EXEFH;
|
||||
|
||||
my $msg = "Stream size (kb): $imagelen\nIdentify output: \n$imageIdentifyTxt\n";
|
||||
saveLogMsg($tmpDir, "Convert Error", $msg);
|
||||
|
||||
unlink "$tmpFile.tmp";
|
||||
return "";
|
||||
}
|
||||
|
||||
# GOCR call with timeout (thanks to B. Austin for the usefull suggestions)
|
||||
my $textOut = "";
|
||||
eval {
|
||||
local $SIG{ALRM} = sub { die "GOCR_TIMEOUT\n" };
|
||||
alarm 10;
|
||||
|
||||
# Retrieve gocr output
|
||||
open EXEFH, "gocr $tmpFile.pnm |";
|
||||
$textOut = join "", <EXEFH>;
|
||||
close EXEFH;
|
||||
|
||||
alarm 0;
|
||||
};
|
||||
|
||||
if ($@) {
|
||||
die unless $@ eq "GOCR_TIMEOUT\n"; # propagate unexpected errors
|
||||
|
||||
# timed out
|
||||
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: OCR timeout!!");
|
||||
|
||||
# Extract the list of all child of this process
|
||||
open PSFH, "ps -o pid,cmd --ppid $$ |";
|
||||
my $psOut = join "", <PSFH>;
|
||||
close PSFH;
|
||||
|
||||
#Get the PID of gocr child
|
||||
if( $psOut =~ s/(\d*) gocr//i)
|
||||
{
|
||||
kill 9, $1;
|
||||
}
|
||||
|
||||
my $msg = "Stream size (kb): $imagelen\nPS out:\n $psOut\n";
|
||||
saveLogMsg($tmpDir, "OCR timeout", $msg);
|
||||
|
||||
$textOut = "";
|
||||
}
|
||||
|
||||
unlink "$tmpFile.tmp";
|
||||
unlink "$tmpFile.pnm";
|
||||
|
||||
return $textOut;
|
||||
}
|
||||
|
||||
#===========================================================================
|
||||
|
||||
sub createMSGFromText
|
||||
# msg = createMSGFromText(@img_ocrText)
|
||||
{
|
||||
my ($pms, $ocrText) = @_;
|
||||
dbg("PLG-BayesOCR: createMSGFromText:: Make temp email with OCR's text");
|
||||
|
||||
my $subject = "";
|
||||
my $date = $pms->{msg}->get_pristine_header("Date");
|
||||
my $from = ""; #$pms->{msg}->get_pristine_header("From");
|
||||
my $to = ""; #$pms->{msg}->get_pristine_header("To");
|
||||
|
||||
|
||||
my $mailraw = "From: $from\nTo: $to\nSubject: $subject\nDate: $date\nContent-Type: text/plain;\n charset=\"us-ascii\"\nContent-Disposition: inline\n\n$ocrText\n";
|
||||
|
||||
return $mailraw
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#===========================================================================
|
||||
#===========================================================================
|
||||
sub saveLogMsg()
|
||||
{
|
||||
my ($tmpDir, $title, $msg) = @_;
|
||||
my $timenow = localtime time;
|
||||
open (FILE, ">>$tmpDir/sa_bayesOCR.log");
|
||||
|
||||
print FILE "#--------------------------------\n";
|
||||
print FILE " $timenow\n";
|
||||
print FILE " $title\n";
|
||||
print FILE "#--------------------------------\n";
|
||||
print FILE "$msg\n";
|
||||
|
||||
close FILE;
|
||||
}
|
||||
#===========================================================================
|
||||
|
||||
|
||||
273
mail/spamassassin/abc/DNSWLh.pm
Normal file
273
mail/spamassassin/abc/DNSWLh.pm
Normal file
@@ -0,0 +1,273 @@
|
||||
# Adds DNSWL.org to recipients of spamassassin --report.
|
||||
#
|
||||
# In a SpamAssassin config file, add the lines:
|
||||
#
|
||||
# loadplugin Mail::SpamAssassin::Plugin::DNSWLh
|
||||
# dnswl_address user@example.com
|
||||
# dnswl_password yourpassword
|
||||
#
|
||||
# The last two must be from an account created via
|
||||
# http://www.dnswl.org/registerreporter.pl
|
||||
#
|
||||
#
|
||||
# 2010-02-26-23 Initial release.
|
||||
# 2010-02-27-11 Also call report successful on unlisted IPs.
|
||||
# 2010-02-28-20 State when reported email has trust level "Unlisted".
|
||||
# 2010-03-02-10 Report the IP DNSWL thought was interesting.
|
||||
|
||||
# <@LICENSE>
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to you under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# </@LICENSE>
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Mail::SpamAssassin::Plugin::DNSWL - perform DNSWL reporting of messages
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
loadplugin Mail::SpamAssassin::Plugin::DNSWL
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
DNSWL is a service which lists known legitimate mail servers.
|
||||
This module enables automatic reporting of spam to DNSWL, to improve
|
||||
the accuracy of their database.
|
||||
|
||||
Note that spam reports sent by this plugin to DNSWL each include the
|
||||
entire spam message.
|
||||
|
||||
See http://www.dnswl.org/ for more information about DNSWL.
|
||||
|
||||
=cut
|
||||
|
||||
package Mail::SpamAssassin::Plugin::DNSWLh;
|
||||
|
||||
use Mail::SpamAssassin::Plugin;
|
||||
use Mail::SpamAssassin::Logger;
|
||||
use IO::Socket;
|
||||
use strict;
|
||||
use warnings;
|
||||
use bytes;
|
||||
use re 'taint';
|
||||
|
||||
use constant HAS_LWP_USERAGENT => eval { require LWP::UserAgent; };
|
||||
|
||||
use vars qw(@ISA);
|
||||
@ISA = qw(Mail::SpamAssassin::Plugin);
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $mailsaobject = shift;
|
||||
|
||||
$class = ref($class) || $class;
|
||||
my $self = $class->SUPER::new($mailsaobject);
|
||||
bless ($self, $class);
|
||||
|
||||
# are network tests enabled?
|
||||
if (!$mailsaobject->{local_tests_only} && HAS_LWP_USERAGENT) {
|
||||
$self->{dnswl_available} = 1;
|
||||
dbg("DNSWL: network tests on, attempting DNSWL");
|
||||
}
|
||||
else {
|
||||
$self->{dnswl_available} = 0;
|
||||
dbg("DNSWL: local tests only, disabling DNSWL");
|
||||
}
|
||||
|
||||
$self->set_config($mailsaobject->{conf});
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub set_config {
|
||||
my($self, $conf) = @_;
|
||||
my @cmds;
|
||||
|
||||
=head1 USER OPTIONS
|
||||
|
||||
=over 4
|
||||
|
||||
=cut
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'dnswl_address',
|
||||
default => 'spamassassin-submit@spam.dnswl.chaosreigns.com',
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
|
||||
code => sub {
|
||||
my ($self, $key, $value, $line) = @_;
|
||||
if ($value =~ /^([^<\s]+\@[^>\s]+)$/) {
|
||||
$self->{dnswl_address} = $1;
|
||||
}
|
||||
elsif ($value =~ /^$/) {
|
||||
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
|
||||
}
|
||||
else {
|
||||
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
||||
}
|
||||
},
|
||||
});
|
||||
push (@cmds, {
|
||||
setting => 'dnswl_password',
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
|
||||
code => sub {
|
||||
my ($self, $key, $value, $line) = @_;
|
||||
if ($value =~ /^(\S+)$/) {
|
||||
$self->{dnswl_password} = $1;
|
||||
}
|
||||
elsif ($value =~ /^$/) {
|
||||
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
|
||||
}
|
||||
else {
|
||||
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
=item dnswl_max_report_size (default: 50)
|
||||
|
||||
Messages larger than this size (in kilobytes) will be truncated in
|
||||
report messages sent to DNSWL. The default setting is the maximum
|
||||
size that DNSWL will accept at the time of release.
|
||||
|
||||
=cut
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'dnswl_max_report_size',
|
||||
default => 50,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
|
||||
});
|
||||
|
||||
$conf->{parser}->register_commands(\@cmds);
|
||||
}
|
||||
|
||||
sub plugin_report {
|
||||
my ($self, $options) = @_;
|
||||
|
||||
return unless $self->{dnswl_available};
|
||||
|
||||
#dbg("DNSWL: address/pass: " . $options->{report}->{conf}->{dnswl_address}
|
||||
# .' '. $options->{report}->{conf}->{dnswl_password} );
|
||||
|
||||
if (!$options->{report}->{options}->{dont_report_to_dnswl}) {
|
||||
if ($options->{report}->{conf}->{dnswl_address} and
|
||||
$options->{report}->{conf}->{dnswl_password}) {
|
||||
if ($self->dnswl_report($options)) {
|
||||
$options->{report}->{report_available} = 1;
|
||||
info("DNSWL: spam reported to DNSWL");
|
||||
$options->{report}->{report_return} = 1;
|
||||
} else {
|
||||
info("DNSWL: could not report spam to DNSWL");
|
||||
}
|
||||
} else {
|
||||
dbg("DNSWL: dnswl_address and/or dnswl_password not defined.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub dnswl_report {
|
||||
my ($self, $options) = @_;
|
||||
|
||||
# original text
|
||||
my $original = ${$options->{text}};
|
||||
|
||||
# check date
|
||||
my $header = $original;
|
||||
$header =~ s/\r?\n\r?\n.*//s;
|
||||
my $date = Mail::SpamAssassin::Util::receive_date($header);
|
||||
if ($date && $date < time - 2*86400) {
|
||||
warn("DNSWL: Message older than 2 days, not reporting\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
# message variables
|
||||
my $description = "spam report via " . Mail::SpamAssassin::Version();
|
||||
my $trusted = $options->{msg}->{metadata}->{relays_trusted_str};
|
||||
my $untrusted = $options->{msg}->{metadata}->{relays_untrusted_str};
|
||||
|
||||
# message data
|
||||
|
||||
# truncate message
|
||||
if (length($original) > $self->{main}->{conf}->{dnswl_max_report_size} * 1024) {
|
||||
substr($original, ($self->{main}->{conf}->{dnswl_max_report_size} * 1024)) =
|
||||
"\n[truncated by SpamAssassin]\n";
|
||||
}
|
||||
|
||||
my $body = <<"EOM";
|
||||
Content-Description: $description
|
||||
X-Spam-Relays-Trusted: $trusted
|
||||
X-Spam-Relays-Untrusted: $untrusted
|
||||
$original
|
||||
EOM
|
||||
|
||||
# compose message
|
||||
my $message;
|
||||
$message = $body;
|
||||
|
||||
# send message
|
||||
|
||||
my %form = (
|
||||
'action', 'save',
|
||||
'abuseReport',$message,
|
||||
);
|
||||
|
||||
my $ua = LWP::UserAgent->new;
|
||||
|
||||
my $netloc = 'www.dnswl.org:80';
|
||||
my $realm = 'dnswl.org Abuse Reporting';
|
||||
$ua->credentials( $netloc, $realm, $options->{report}->{conf}->{dnswl_address}, $options->{report}->{conf}->{dnswl_password} );
|
||||
|
||||
my $response = $ua->post('http://www.dnswl.org/abuse/report.pl', \%form);
|
||||
# my $response = $ua->post('http://www.dnswl.org/abuse/report.test.pl', \%form);
|
||||
# open OUT, ">/tmp/dnswlbody.".time.".txt";
|
||||
# print OUT $form{'abuseReport'};
|
||||
# close OUT;
|
||||
|
||||
if ($response->is_success) {
|
||||
#if ( $response->content =~ m#Thank you for your report# ) {
|
||||
if ( $response->content =~ m#IP ([\d\.]+) matches with DNSWL# ) {
|
||||
my $reportedip = $1;
|
||||
dbg("DNSWL: Successfully reported $reportedip.");
|
||||
print "Successfully reported to DNSWL $reportedip.\n";
|
||||
return 1;
|
||||
#} elsif ( $response->content =~ m#No matching entry found for#) {
|
||||
} elsif ( $response->content =~ m#No matching entry found for IP ([\d\.]+)#) {
|
||||
my $reportedip = $1;
|
||||
dbg("DNSWL: Successfully reported $reportedip. Current trust level is: Unlisted.");
|
||||
print "Successfully reported to DNSWL $reportedip. Current trust level is: Unlisted.\n";
|
||||
return 1;
|
||||
} else {
|
||||
dbg("DNSWL: Failed to report, acknowledgement not received.");
|
||||
print "Failed to report to DNSWL, acknowledgement not received.\n";
|
||||
# open OUT, ">/tmp/dnswlerr.".time.".txt";
|
||||
# print OUT $response->content;
|
||||
# close OUT;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
dbg("DNSWL: Failed to report: ". $response->status_line);
|
||||
print "Failed to report to DNSWL, HTTP error: ". $response->status_line ."\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
dbg("DNSWL: Error: This isn't possible.");
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
604
mail/spamassassin/abc/DecodeShortURLs.cf
Normal file
604
mail/spamassassin/abc/DecodeShortURLs.cf
Normal file
@@ -0,0 +1,604 @@
|
||||
loadplugin Mail::SpamAssassin::Plugin::DecodeShortURLs /etc/mail/spamassassin/DecodeShortURLs.pm
|
||||
|
||||
body HAS_SHORT_URL eval:short_url_tests()
|
||||
describe HAS_SHORT_URL Message contains one or more shortened URLs
|
||||
score HAS_SHORT_URL 0.01
|
||||
|
||||
body SHORT_URL_CHAINED eval:short_url_tests()
|
||||
describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
|
||||
score SHORT_URL_CHAINED 3.0
|
||||
|
||||
body SHORT_URL_MAXCHAIN eval:short_url_tests()
|
||||
describe SHORT_URL_MAXCHAIN Message has shortened URL that causes more than 10 redirections
|
||||
score SHORT_URL_MAXCHAIN 5.0
|
||||
|
||||
body SHORT_URL_LOOP eval:short_url_tests()
|
||||
describe SHORT_URL_LOOP Message has short URL that loops back to itself
|
||||
score SHORT_URL_LOOP 0.01
|
||||
|
||||
body SHORT_URL_404 eval:short_url_tests()
|
||||
describe SHORT_URL_404 Message has short URL that returns 404
|
||||
score SHORT_URL_404 1.0
|
||||
|
||||
uri URI_BITLY_BLOCKED /^http:\/\/bit\.ly\/a\/warning/i
|
||||
describe URI_BITLY_BLOCKED Message contains a bit.ly URL that has been disabled due to abuse
|
||||
score URI_BITLY_BLOCKED 10.0
|
||||
|
||||
uri URI_SIMURL_BLOCKED /^http:\/\/simurl\.com\/redirect_black\.php/i
|
||||
describe URI_SIMURL_BLOCKED Message contains a simurl URL that has been disabled due to abuse
|
||||
score URI_SIMURL_BLOCKED 10.0
|
||||
|
||||
uri URI_MIGRE_BLOCKED /^http:\/\/migre\.me\/bloqueado/i
|
||||
describe URI_MIGRE_BLOCKED Message contains a migre.me URL that has been disabled due to abuse
|
||||
score URI_MIGRE_BLOCKED 10.0
|
||||
|
||||
meta SHORT_URIBL HAS_SHORT_URL && (URIBL_BLACK || URIBL_AB_SURBL || URIBL_WS_SURBL || URIBL_JP_SURBL || URIBL_SC_SURBL || URIBL_RHS_DOB || URIBL_DBL_SPAM || URIBL_SBL)
|
||||
describe SHORT_URIBL Message contains shortened URL(s) and also hits a URIDNSBL
|
||||
score SHORT_URIBL 0.01
|
||||
|
||||
url_shortener_log /tmp/DecodeShortURLs.txt
|
||||
url_shortener_cache /tmp/DecodeShortURLs.sq3
|
||||
#url_shortener_syslog 1
|
||||
|
||||
url_shortener 0rz.tw
|
||||
url_shortener 1l2.us
|
||||
url_shortener 1u.ro
|
||||
url_shortener 1url.com
|
||||
url_shortener 2.gp
|
||||
url_shortener 2.ly
|
||||
url_shortener 2chap.it
|
||||
url_shortener 2pl.us
|
||||
url_shortener 2su.de
|
||||
url_shortener 2tu.us
|
||||
url_shortener 2ze.us
|
||||
url_shortener 3.ly
|
||||
url_shortener 301.to
|
||||
url_shortener 301url.com
|
||||
url_shortener 307.to
|
||||
# url_shortener 4sq.com
|
||||
url_shortener 6url.com
|
||||
url_shortener 7.ly
|
||||
url_shortener 9mp.com
|
||||
url_shortener a.gd
|
||||
url_shortener a.gg
|
||||
url_shortener a.nf
|
||||
url_shortener a2a.me
|
||||
url_shortener a2n.eu
|
||||
url_shortener abbr.com
|
||||
url_shortener abe5.com
|
||||
url_shortener access.im
|
||||
url_shortener ad.vu
|
||||
url_shortener adf.ly
|
||||
url_shortener adjix.com
|
||||
url_shortener alturl.com
|
||||
url_shortener amzn.com
|
||||
url_shortener amzn.to
|
||||
url_shortener arm.in
|
||||
url_shortener asso.in
|
||||
url_shortener atu.ca
|
||||
url_shortener aurls.info
|
||||
url_shortener awe.sm
|
||||
url_shortener ayl.lv
|
||||
url_shortener azqq.com
|
||||
url_shortener b23.ru
|
||||
url_shortener b65.com
|
||||
url_shortener b65.us
|
||||
url_shortener bacn.me
|
||||
url_shortener beam.to
|
||||
url_shortener bgl.me
|
||||
url_shortener bit.ly
|
||||
url_shortener bkite.com
|
||||
url_shortener blippr.com
|
||||
url_shortener bloat.me
|
||||
url_shortener blu.cc
|
||||
url_shortener bon.no
|
||||
url_shortener bt.io
|
||||
url_shortener budurl.com
|
||||
url_shortener buk.me
|
||||
url_shortener burnurl.com
|
||||
url_shortener c-o.in
|
||||
url_shortener c.shamekh.ws
|
||||
url_shortener canurl.com
|
||||
url_shortener cd4.me
|
||||
url_shortener chilp.it
|
||||
url_shortener chopd.it
|
||||
url_shortener chpt.me
|
||||
url_shortener chs.mx
|
||||
url_shortener chzb.gr
|
||||
url_shortener clck.ru
|
||||
url_shortener cli.gs
|
||||
url_shortener cliccami.info
|
||||
url_shortener clickthru.ca
|
||||
url_shortener clipurl.us
|
||||
url_shortener clk.my
|
||||
url_shortener clop.in
|
||||
url_shortener clp.ly
|
||||
url_shortener coge.la
|
||||
url_shortener cokeurl.com
|
||||
url_shortener cort.as
|
||||
url_shortener cot.ag
|
||||
url_shortener crum.pl
|
||||
url_shortener curio.us
|
||||
url_shortener cuthut.com
|
||||
url_shortener cuturl.com
|
||||
url_shortener cuturls.com
|
||||
url_shortener dealspl.us
|
||||
url_shortener decenturl.com
|
||||
url_shortener df9.net
|
||||
url_shortener digbig.com
|
||||
url_shortener digg.com
|
||||
url_shortener digipills.com
|
||||
url_shortener digs.by
|
||||
url_shortener dld.bz
|
||||
url_shortener dlvr.it
|
||||
url_shortener dn.vc
|
||||
url_shortener doi.org
|
||||
url_shortener doiop.com
|
||||
url_shortener dr.tl
|
||||
url_shortener durl.me
|
||||
url_shortener durl.us
|
||||
url_shortener dvlr.it
|
||||
url_shortener dwarfurl.com
|
||||
url_shortener easyurl.net
|
||||
url_shortener eca.sh
|
||||
url_shortener eclurl.com
|
||||
url_shortener eepurl.com
|
||||
url_shortener eezurl.com
|
||||
url_shortener ewerl.com
|
||||
url_shortener ezurl.eu
|
||||
url_shortener fa.by
|
||||
url_shortener faceto.us
|
||||
url_shortener fav.me
|
||||
url_shortener fb.me
|
||||
url_shortener ff.im
|
||||
url_shortener fff.to
|
||||
url_shortener fhurl.com
|
||||
url_shortener flic.kr
|
||||
url_shortener flingk.com
|
||||
url_shortener flq.us
|
||||
url_shortener fly2.ws
|
||||
url_shortener fon.gs
|
||||
url_shortener foxyurl.com
|
||||
url_shortener fuseurl.com
|
||||
url_shortener fwd4.me
|
||||
url_shortener fwdurl.net
|
||||
url_shortener fwib.net
|
||||
url_shortener g8l.us
|
||||
url_shortener get-shorty.com
|
||||
url_shortener get-url.com
|
||||
url_shortener get.sh
|
||||
url_shortener gi.vc
|
||||
url_shortener gkurl.us
|
||||
url_shortener gl.am
|
||||
url_shortener go.9nl.com
|
||||
url_shortener go.to
|
||||
url_shortener go2.me
|
||||
url_shortener golmao.com
|
||||
url_shortener goo.gl
|
||||
url_shortener good.ly
|
||||
url_shortener goshrink.com
|
||||
url_shortener gri.ms
|
||||
url_shortener gurl.es
|
||||
url_shortener hao.jp
|
||||
url_shortener hellotxt.com
|
||||
url_shortener hex.io
|
||||
url_shortener hiderefer.com
|
||||
url_shortener hop.im
|
||||
url_shortener hotredirect.com
|
||||
url_shortener hotshorturl.com
|
||||
url_shortener href.in
|
||||
url_shortener ht.ly
|
||||
url_shortener htxt.it
|
||||
url_shortener hugeurl.com
|
||||
url_shortener hurl.it
|
||||
url_shortener hurl.no
|
||||
url_shortener hurl.ws
|
||||
url_shortener icanhaz.com
|
||||
url_shortener icio.us
|
||||
url_shortener idek.net
|
||||
url_shortener ikr.me
|
||||
url_shortener ir.pe
|
||||
url_shortener irt.me
|
||||
url_shortener is.gd
|
||||
url_shortener iscool.net
|
||||
url_shortener it2.in
|
||||
url_shortener ito.mx
|
||||
url_shortener j.mp
|
||||
url_shortener j2j.de
|
||||
url_shortener jdem.cz
|
||||
url_shortener jijr.com
|
||||
url_shortener just.as
|
||||
url_shortener k.vu
|
||||
url_shortener ketkp.in
|
||||
url_shortener kisa.ch
|
||||
url_shortener kissa.be
|
||||
url_shortener kl.am
|
||||
url_shortener klck.me
|
||||
url_shortener kore.us
|
||||
url_shortener korta.nu
|
||||
url_shortener kots.nu
|
||||
url_shortener krz.ch
|
||||
url_shortener ktzr.us
|
||||
url_shortener kxk.me
|
||||
url_shortener l.pr
|
||||
url_shortener l9k.net
|
||||
url_shortener liip.to
|
||||
url_shortener liltext.com
|
||||
url_shortener lin.cr
|
||||
url_shortener lin.io
|
||||
url_shortener linkbee.com
|
||||
url_shortener linkee.com
|
||||
url_shortener linkgap.com
|
||||
url_shortener linkslice.com
|
||||
url_shortener linxfix.de
|
||||
url_shortener liteurl.net
|
||||
url_shortener liurl.cn
|
||||
url_shortener livesi.de
|
||||
url_shortener lix.in
|
||||
url_shortener lk.ht
|
||||
url_shortener ln-s.net
|
||||
url_shortener ln-s.ru
|
||||
url_shortener lnk.by
|
||||
url_shortener lnk.in
|
||||
url_shortener lnk.ly
|
||||
url_shortener lnk.ms
|
||||
url_shortener lnk.sk
|
||||
url_shortener lnkurl.com
|
||||
url_shortener loopt.us
|
||||
url_shortener lost.in
|
||||
url_shortener lru.jp
|
||||
url_shortener lt.tl
|
||||
url_shortener lu.to
|
||||
url_shortener lurl.no
|
||||
url_shortener mavrev.com
|
||||
url_shortener memurl.com
|
||||
url_shortener merky.de
|
||||
url_shortener metamark.net
|
||||
url_shortener migre.me
|
||||
url_shortener min2.me
|
||||
url_shortener minilien.com
|
||||
url_shortener minilink.org
|
||||
url_shortener miniurl.com
|
||||
url_shortener minurl.fr
|
||||
url_shortener moby.to
|
||||
url_shortener moourl.com
|
||||
url_shortener msg.sg
|
||||
url_shortener murl.kz
|
||||
url_shortener mv2.me
|
||||
url_shortener mysp.in
|
||||
url_shortener myurl.in
|
||||
url_shortener myurl.si
|
||||
url_shortener nanoref.com
|
||||
url_shortener nanourl.se
|
||||
url_shortener nbx.ch
|
||||
url_shortener ncane.com
|
||||
url_shortener ndurl.com
|
||||
url_shortener ne1.net
|
||||
url_shortener netnet.me
|
||||
url_shortener netshortcut.com
|
||||
url_shortener ni.to
|
||||
url_shortener nig.gr
|
||||
url_shortener nm.ly
|
||||
url_shortener nn.nf
|
||||
url_shortener notlong.com
|
||||
url_shortener nutshellurl.com
|
||||
url_shortener nyti.ms
|
||||
url_shortener o-x.fr
|
||||
url_shortener o.ly
|
||||
url_shortener oboeyasui.com
|
||||
url_shortener offur.com
|
||||
url_shortener ofl.me
|
||||
url_shortener om.ly
|
||||
url_shortener omf.gd
|
||||
url_shortener onecent.us
|
||||
url_shortener onion.com
|
||||
url_shortener onsaas.info
|
||||
url_shortener ooqx.com
|
||||
url_shortener oreil.ly
|
||||
url_shortener ow.ly
|
||||
url_shortener oxyz.info
|
||||
url_shortener p.ly
|
||||
url_shortener p8g.tw
|
||||
url_shortener parv.us
|
||||
url_shortener paulding.net
|
||||
url_shortener pduda.mobi
|
||||
url_shortener peaurl.com
|
||||
url_shortener pendek.in
|
||||
url_shortener pep.si
|
||||
url_shortener pic.gd
|
||||
url_shortener piko.me
|
||||
url_shortener ping.fm
|
||||
url_shortener piurl.com
|
||||
url_shortener plumurl.com
|
||||
url_shortener plurl.me
|
||||
url_shortener pnt.me
|
||||
url_shortener poll.fm
|
||||
url_shortener pop.ly
|
||||
url_shortener poprl.com
|
||||
url_shortener post.ly
|
||||
url_shortener posted.at
|
||||
url_shortener pt2.me
|
||||
url_shortener ptiturl.com
|
||||
url_shortener puke.it
|
||||
url_shortener pysper.com
|
||||
url_shortener qik.li
|
||||
url_shortener qlnk.net
|
||||
url_shortener qoiob.com
|
||||
url_shortener qr.cx
|
||||
url_shortener quickurl.co.uk
|
||||
url_shortener qurl.com
|
||||
url_shortener qurlyq.com
|
||||
url_shortener quu.nu
|
||||
url_shortener qux.in
|
||||
url_shortener r.im
|
||||
url_shortener rb6.me
|
||||
url_shortener rde.me
|
||||
url_shortener readthis.ca
|
||||
url_shortener reallytinyurl.com
|
||||
url_shortener redir.ec
|
||||
url_shortener redirects.ca
|
||||
url_shortener redirx.com
|
||||
url_shortener relyt.us
|
||||
url_shortener retwt.me
|
||||
url_shortener ri.ms
|
||||
url_shortener rickroll.it
|
||||
url_shortener rivva.de
|
||||
url_shortener rly.cc
|
||||
url_shortener rnk.me
|
||||
url_shortener rsmonkey.com
|
||||
url_shortener rt.nu
|
||||
url_shortener rubyurl.com
|
||||
url_shortener rurl.org
|
||||
url_shortener s.gnoss.us
|
||||
url_shortener s3nt.com
|
||||
url_shortener s4c.in
|
||||
url_shortener s7y.us
|
||||
url_shortener safe.mn
|
||||
url_shortener safelinks.ru
|
||||
url_shortener sai.ly
|
||||
url_shortener SameURL.com
|
||||
url_shortener sfu.ca
|
||||
url_shortener shadyurl.com
|
||||
url_shortener shar.es
|
||||
url_shortener shim.net
|
||||
url_shortener shink.de
|
||||
url_shortener shorl.com
|
||||
url_shortener short.ie
|
||||
url_shortener short.to
|
||||
url_shortener shorten.ws
|
||||
url_shortener shortenurl.com
|
||||
url_shortener shorterlink.com
|
||||
url_shortener shortio.com
|
||||
url_shortener shortlinks.co.uk
|
||||
url_shortener shortn.me
|
||||
url_shortener shortna.me
|
||||
url_shortener shortr.me
|
||||
url_shortener shorturl.com
|
||||
url_shortener shortz.me
|
||||
url_shortener shoturl.us
|
||||
url_shortener shredu
|
||||
url_shortener shredurl.com
|
||||
url_shortener shrinkify.com
|
||||
url_shortener shrinkr.com
|
||||
url_shortener shrinkster.com
|
||||
url_shortener shrinkurl.us
|
||||
url_shortener shrt.fr
|
||||
url_shortener shrt.ws
|
||||
url_shortener shrtl.com
|
||||
url_shortener shrtn.com
|
||||
url_shortener shrtnd.com
|
||||
url_shortener shurl.net
|
||||
url_shortener shw.me
|
||||
url_shortener simurl.com
|
||||
url_shortener simurl.net
|
||||
url_shortener simurl.org
|
||||
url_shortener simurl.us
|
||||
url_shortener sitelutions.com
|
||||
url_shortener siteo.us
|
||||
url_shortener sl.ly
|
||||
url_shortener slidesha.re
|
||||
url_shortener slki.ru
|
||||
url_shortener smallr.com
|
||||
url_shortener smallr.net
|
||||
url_shortener smfu.in
|
||||
url_shortener smsh.me
|
||||
url_shortener smurl.com
|
||||
url_shortener sn.im
|
||||
url_shortener sn.vc
|
||||
url_shortener snadr.it
|
||||
url_shortener snipie.com
|
||||
url_shortener snipr.com
|
||||
url_shortener snipurl.com
|
||||
url_shortener snkr.me
|
||||
url_shortener snurl.com
|
||||
url_shortener song.ly
|
||||
url_shortener sp2.ro
|
||||
url_shortener spedr.com
|
||||
url_shortener sqze.it
|
||||
url_shortener srnk.net
|
||||
url_shortener srs.li
|
||||
url_shortener starturl.com
|
||||
url_shortener stickurl.com
|
||||
url_shortener stpmvt.com
|
||||
url_shortener sturly.com
|
||||
url_shortener su.pr
|
||||
url_shortener surl.co.uk
|
||||
url_shortener surl.it
|
||||
url_shortener t.co
|
||||
url_shortener t.lh.com
|
||||
url_shortener ta.gd
|
||||
url_shortener takemyfile.com
|
||||
url_shortener tcrn.ch
|
||||
url_shortener tgr.me
|
||||
url_shortener th8.us
|
||||
url_shortener thecow.me
|
||||
url_shortener thrdl.es
|
||||
url_shortener tighturl.com
|
||||
url_shortener timesurl.at
|
||||
url_shortener tini.us
|
||||
url_shortener tiniuri.com
|
||||
url_shortener tiny.cc
|
||||
url_shortener tiny.pl
|
||||
url_shortener tinyarro.ws
|
||||
url_shortener tinylink.com
|
||||
url_shortener tinypl.us
|
||||
url_shortener tinysong.com
|
||||
url_shortener tinytw.it
|
||||
url_shortener tinyurl.com
|
||||
url_shortener tl.gd
|
||||
url_shortener tllg.net
|
||||
url_shortener tncr.ws
|
||||
url_shortener tnw.to
|
||||
url_shortener to.je
|
||||
url_shortener to.ly
|
||||
url_shortener to.vg
|
||||
url_shortener togoto.us
|
||||
url_shortener tr.im
|
||||
url_shortener tr.my
|
||||
url_shortener tra.kz
|
||||
url_shortener traceurl.com
|
||||
url_shortener trcb.me
|
||||
url_shortener trg.li
|
||||
url_shortener trick.ly
|
||||
url_shortener trii.us
|
||||
url_shortener trim.li
|
||||
url_shortener trumpink.lt
|
||||
url_shortener trunc.it
|
||||
url_shortener truncurl.com
|
||||
url_shortener tsort.us
|
||||
url_shortener tubeurl.com
|
||||
# url_shortener tumblr.com
|
||||
url_shortener turo.us
|
||||
url_shortener tw0.us
|
||||
url_shortener tw1.us
|
||||
url_shortener tw2.us
|
||||
url_shortener tw5.us
|
||||
url_shortener tw6.us
|
||||
url_shortener tw8.us
|
||||
url_shortener tw9.us
|
||||
url_shortener twa.lk
|
||||
url_shortener tweet.me
|
||||
url_shortener tweetburner.com
|
||||
url_shortener tweetl.com
|
||||
url_shortener twi.gy
|
||||
url_shortener twip.us
|
||||
url_shortener twirl.at
|
||||
url_shortener twit.ac
|
||||
url_shortener twitclicks.com
|
||||
url_shortener twitterurl.net
|
||||
url_shortener twitthis.com
|
||||
url_shortener twittu.ms
|
||||
url_shortener twiturl.de
|
||||
url_shortener twitzap.com
|
||||
url_shortener twlv.net
|
||||
url_shortener twtr.us
|
||||
url_shortener twurl.cc
|
||||
url_shortener twurl.nl
|
||||
url_shortener u.mavrev.com
|
||||
url_shortener u.nu
|
||||
url_shortener u76.org
|
||||
url_shortener ub0.cc
|
||||
url_shortener uiop.me
|
||||
url_shortener ulimit.com
|
||||
url_shortener ulu.lu
|
||||
url_shortener unfaker.it
|
||||
url_shortener updating.me
|
||||
url_shortener ur.ly
|
||||
url_shortener ur1.ca
|
||||
url_shortener urizy.com
|
||||
url_shortener url.ag
|
||||
url_shortener url.az
|
||||
url_shortener url.co.uk
|
||||
url_shortener url.go.it
|
||||
url_shortener url.ie
|
||||
url_shortener url.inc-x.eu
|
||||
url_shortener url.lotpatrol.com
|
||||
# url_shortener url4.eu
|
||||
url_shortener urlao.com
|
||||
url_shortener urlbee.com
|
||||
url_shortener urlborg.com
|
||||
url_shortener urlbrief.com
|
||||
url_shortener urlcorta.es
|
||||
url_shortener urlcut.com
|
||||
url_shortener urlcutter.com
|
||||
url_shortener urlg.info
|
||||
url_shortener urlhawk.com
|
||||
url_shortener urli.nl
|
||||
url_shortener urlkiss.com
|
||||
url_shortener urloo.com
|
||||
url_shortener urlpire.com
|
||||
url_shortener urltea.com
|
||||
url_shortener urlu.ms
|
||||
url_shortener urlvi.b
|
||||
url_shortener urlvi.be
|
||||
url_shortener urlx.ie
|
||||
url_shortener urlz.at
|
||||
url_shortener urlzen.com
|
||||
url_shortener usat.ly
|
||||
url_shortener uservoice.com
|
||||
url_shortener ustre.am
|
||||
url_shortener vado.it
|
||||
url_shortener vb.ly
|
||||
url_shortener vdirect.com
|
||||
url_shortener vi.ly
|
||||
url_shortener viigo.im
|
||||
url_shortener virl.com
|
||||
url_shortener vl.am
|
||||
url_shortener voizle.com
|
||||
url_shortener vtc.es
|
||||
url_shortener w0r.me
|
||||
url_shortener w33.us
|
||||
url_shortener w34.us
|
||||
url_shortener w3t.org
|
||||
url_shortener wa9.la
|
||||
url_shortener wapurl.co.uk
|
||||
url_shortener webalias.com
|
||||
url_shortener welcome.to
|
||||
url_shortener wh.gov
|
||||
url_shortener wipi.es
|
||||
url_shortener wkrg.com
|
||||
url_shortener woo.ly
|
||||
url_shortener wp.me
|
||||
url_shortener x.hypem.com
|
||||
url_shortener x.se
|
||||
url_shortener x.vu
|
||||
url_shortener xeeurl.com
|
||||
url_shortener xil.in
|
||||
url_shortener xlurl.de
|
||||
url_shortener xr.com
|
||||
url_shortener xrl.in
|
||||
url_shortener xrl.us
|
||||
url_shortener xrt.me
|
||||
url_shortener xurl.jp
|
||||
url_shortener xxsurl.de
|
||||
url_shortener xzb.cc
|
||||
url_shortener yatuc.com
|
||||
url_shortener ye-s.com
|
||||
url_shortener yep.it
|
||||
# url_shortener youtu.be
|
||||
url_shortener z.pe
|
||||
url_shortener zapt.in
|
||||
url_shortener zi.ma
|
||||
url_shortener zi.me
|
||||
url_shortener zi.pe
|
||||
url_shortener zip.li
|
||||
url_shortener zipmyurl.com
|
||||
url_shortener zootit.com
|
||||
url_shortener zud.me
|
||||
url_shortener zurl.ws
|
||||
url_shortener zz.gd
|
||||
url_shortener zzang.kr
|
||||
url_shortener xn--cwg.ws
|
||||
url_shortener xn--fwg.ws
|
||||
url_shortener xn--bih.ws
|
||||
url_shortener xn--l3h.ws
|
||||
url_shortener xn--1ci.ws
|
||||
url_shortener xn--odi.ws
|
||||
url_shortener xn--rei.ws
|
||||
url_shortener xn--3fi.ws
|
||||
url_shortener xn--egi.ws
|
||||
url_shortener xn--hgi.ws
|
||||
url_shortener xn--ogi.ws
|
||||
url_shortener xn--vgi.ws
|
||||
url_shortener xn--5gi.ws
|
||||
url_shortener xn--9gi.ws
|
||||
564
mail/spamassassin/abc/DecodeShortURLs.pm
Normal file
564
mail/spamassassin/abc/DecodeShortURLs.pm
Normal file
@@ -0,0 +1,564 @@
|
||||
# <@LICENSE>
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to you under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# </@LICENSE>
|
||||
|
||||
# Author: Steve Freegard <steve.freegard@fsl.com>
|
||||
|
||||
=head1 NAME
|
||||
|
||||
DecodeShortURLs - Expand shortened URLs
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
loadplugin Mail::SpamAssassin::Plugin::DecodeShortURLs
|
||||
|
||||
url_shortener bit.ly
|
||||
url_shortener go.to
|
||||
...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This plugin looks for URLs shortened by a list of URL shortening services and
|
||||
upon finding a matching URL will connect using to the shortening service and
|
||||
do an HTTP HEAD lookup and retrieve the location header which points to the
|
||||
actual shortened URL, it then adds this URL to the list of URIs extracted by
|
||||
SpamAssassin which can then be accessed by other plug-ins, such as URIDNSBL.
|
||||
|
||||
This plugin also sets the rule HAS_SHORT_URL if any matching short URLs are
|
||||
found.
|
||||
|
||||
Regular 'uri' rules can be used to detect and score links disabled by the
|
||||
shortening service for abuse and URL_BITLY_BLOCKED is supplied as an example.
|
||||
It should be safe to score this rule highly on a match as experience shows
|
||||
that bit.ly only blocks access to a URL if it has seen consistent abuse and
|
||||
problem reports.
|
||||
|
||||
As of version 0.3 this plug-in will follow 'chained' shorteners e.g.
|
||||
|
||||
|
||||
short URL -> short URL -> short URL -> real URL
|
||||
|
||||
|
||||
If this form of chaining is found, then the rule 'SHORT_URL_CHAINED' will be
|
||||
fired. If a loop is detected then 'SHORT_URL_LOOP' will be fired.
|
||||
This plug-in limits the number of chained shorteners to a maximim of 10 at
|
||||
which point it will fire the rule 'SHORT_URL_MAXCHAIN' and go no further.
|
||||
|
||||
If a shortener returns a '404 Not Found' result for the short URL then the
|
||||
rule 'SHORT_URL_404' will be fired.
|
||||
|
||||
=head1 NOTES
|
||||
|
||||
This plugin runs the parsed_metadata hook with a priority of -1 so that
|
||||
it may modify the parsed URI list prior to the URIDNSBL plugin which
|
||||
runs as priority 0.
|
||||
|
||||
Currently the plugin queries a maximum of 10 distinct shortened URLs with
|
||||
a maximum timeout of 5 seconds per lookup. It does not recurse and follow
|
||||
'chained' shortening as the author has no examples of this happening.
|
||||
|
||||
=head1 ACKNOWLEDGEMENTS
|
||||
|
||||
A lot of this plugin has been hacked together by using other plugins as
|
||||
examples. The author would particularly like to tip his hat to Karsten
|
||||
Bräckelmann for the _add_uri_detail_list() function that he stole from
|
||||
GUDO.pm for which this plugin would not be possible due to the SpamAssassin
|
||||
API making no provision for adding to the base list of extracted URIs and
|
||||
the author not knowing enough about Perl to be able to achieve this without
|
||||
a good example from someone that does ;-)
|
||||
|
||||
=cut
|
||||
|
||||
package Mail::SpamAssassin::Plugin::DecodeShortURLs;
|
||||
|
||||
my $VERSION = 0.6;
|
||||
|
||||
use Mail::SpamAssassin::Plugin;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use vars qw(@ISA);
|
||||
@ISA = qw(Mail::SpamAssassin::Plugin);
|
||||
|
||||
use constant HAS_LWP_USERAGENT => eval { local $SIG{'__DIE__'}; require LWP::UserAgent; };
|
||||
use constant HAS_SQLITE => eval { local $SIG{'__DIE__'}; require DBD::SQLite; };
|
||||
use Fcntl qw(:flock SEEK_END);
|
||||
use Sys::Syslog qw(:DEFAULT setlogsock);
|
||||
|
||||
|
||||
sub dbg {
|
||||
my $msg = shift;
|
||||
return Mail::SpamAssassin::Logger::dbg("DecodeShortURLs: $msg");
|
||||
}
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $mailsaobject = shift;
|
||||
|
||||
$class = ref($class) || $class;
|
||||
my $self = $class->SUPER::new($mailsaobject);
|
||||
bless ($self, $class);
|
||||
|
||||
if ($mailsaobject->{local_tests_only} || !HAS_LWP_USERAGENT) {
|
||||
$self->{disabled} = 1;
|
||||
} else {
|
||||
$self->{disabled} = 0;
|
||||
}
|
||||
|
||||
unless ($self->{disabled}) {
|
||||
$self->{ua} = new LWP::UserAgent;
|
||||
$self->{ua}->{max_redirect} = 0;
|
||||
$self->{ua}->{timeout} = 5;
|
||||
$self->{ua}->env_proxy;
|
||||
$self->{logging} = 0;
|
||||
$self->{caching} = 0;
|
||||
$self->{syslog} = 0;
|
||||
}
|
||||
|
||||
$self->set_config($mailsaobject->{conf});
|
||||
$self->register_method_priority ('parsed_metadata', -1);
|
||||
$self->register_eval_rule('short_url_tests');
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub set_config {
|
||||
my($self, $conf) = @_;
|
||||
my @cmds = ();
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'url_shortener',
|
||||
default => {},
|
||||
code => sub {
|
||||
my ($self, $key, $value, $line) = @_;
|
||||
if ($value =~ /^$/) {
|
||||
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
|
||||
}
|
||||
foreach my $domain (split(/\s+/, $value)) {
|
||||
$self->{url_shorteners}->{lc $domain} = 1;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
=cut
|
||||
|
||||
=head1 PRIVILEGED SETTINGS
|
||||
|
||||
=over 4
|
||||
|
||||
=item url_shortener_log (default: none)
|
||||
|
||||
A path to a log file to be written to. The file will be created if it does
|
||||
not already exist and must be writable by the user running spamassassin.
|
||||
|
||||
For each short URL found the following will be written to the log file:
|
||||
[unix_epoch_time] <short url> => <decoded url>
|
||||
|
||||
=cut
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'url_shortener_log',
|
||||
default => '',
|
||||
is_priv => 1,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
|
||||
});
|
||||
|
||||
=item url_shortener_cache (default: none)
|
||||
|
||||
The full path to a database file to write cache entries to. The database will
|
||||
be created automatically if is does not already exist but the supplied path
|
||||
and file must be read/writable by the user running spamassassin or spamd.
|
||||
|
||||
|
||||
NOTE: you will need the DBD::SQLite module installed to use this feature.
|
||||
|
||||
Example:
|
||||
|
||||
url_shortener_cache /tmp/DecodeShortURLs.sq3
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'url_shortener_cache',
|
||||
default => '',
|
||||
is_priv => 1,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
|
||||
});
|
||||
|
||||
=item url_shortener_cache_ttl (default: 86400)
|
||||
|
||||
The length of time a cache entry will be valid for in seconds.
|
||||
Default is 86400 (1 day).
|
||||
|
||||
|
||||
NOTE: you will also need to run the following via cron to actually remove the
|
||||
records from the database:
|
||||
|
||||
echo "DELETE FROM short_url_cache WHERE modified < strftime('%s',now) - <ttl>; | sqlite3 /path/to/database"
|
||||
|
||||
|
||||
NOTE: replace <ttl> above with the same value you use for this option
|
||||
|
||||
=cut
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'url_shortener_cache_ttl',
|
||||
is_admin => 1,
|
||||
default => 86400,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
|
||||
});
|
||||
|
||||
=item url_shortener_syslog (default: 0 (off))
|
||||
|
||||
If this option is enabled (set to 1), then short URLs and the decoded URLs will be logged to syslog (mail.info).
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
push (@cmds, {
|
||||
setting => 'url_shortener_syslog',
|
||||
is_admin => 1,
|
||||
default => 0,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
|
||||
});
|
||||
|
||||
|
||||
$conf->{parser}->register_commands(\@cmds);
|
||||
}
|
||||
|
||||
sub parsed_metadata {
|
||||
my ($self, $opts) = @_;
|
||||
my $pms = $opts->{permsgstatus};
|
||||
my $msg = $opts->{msg};
|
||||
|
||||
return if $self->{disabled};
|
||||
|
||||
dbg ('warn: get_uri_detail_list() has been called already')
|
||||
if exists $pms->{uri_detail_list};
|
||||
|
||||
# don't keep dereferencing these
|
||||
$self->{url_shorteners} = $pms->{main}->{conf}->{url_shorteners};
|
||||
($self->{url_shortener_log}) = ($pms->{main}->{conf}->{url_shortener_log} =~ /^(.*)$/g);
|
||||
($self->{url_shortener_cache}) = ($pms->{main}->{conf}->{url_shortener_cache} =~ /^(.*)$/g);
|
||||
$self->{url_shortener_cache_ttl} = $pms->{main}->{conf}->{url_shortener_cache_ttl};
|
||||
$self->{url_shortener_syslog} = $pms->{main}->{conf}->{url_shortener_syslog};
|
||||
|
||||
# Sort short URLs into hash to de-dup them
|
||||
my %short_urls;
|
||||
my $uris = $pms->get_uri_detail_list();
|
||||
while (my($uri, $info) = each %{$uris}) {
|
||||
next unless ($info->{domains});
|
||||
foreach ( keys %{ $info->{domains} } ) {
|
||||
if (exists $self->{url_shorteners}->{lc $_}) {
|
||||
# NOTE: $info->{domains} appears to contain all the domains parsed
|
||||
# from the single input URI with no way to work out what the base
|
||||
# domain is. So to prevent someone from stuffing the URI with a
|
||||
# shortener to force this plug-in to follow a link that *isn't* on
|
||||
# the list of shorteners; we enforce that the shortener must be the
|
||||
# base URI and that a path must be present.
|
||||
if ($uri !~ /^http:\/\/(?:www\.)?$_\/.+$/) {
|
||||
dbg("Discarding URI: $uri");
|
||||
next;
|
||||
}
|
||||
$short_urls{$uri} = 1;
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Make sure we have some work to do
|
||||
# Before we open any log files etc.
|
||||
my $count = scalar keys %short_urls;
|
||||
return undef unless $count gt 0;
|
||||
|
||||
# Initialise logging if enabled
|
||||
if ($self->{url_shortener_log}) {
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
open($self->{logfh}, '>>'.$self->{url_shortener_log}) or die $!;
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
} else {
|
||||
$self->{logging} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Initialise syslog if enabled
|
||||
if ($self->{url_shortener_syslog}) {
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
openlog('DecodeShortURLs','ndelay,pid','mail');
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
} else {
|
||||
$self->{syslog} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Initialise cache if enabled
|
||||
if ($self->{url_shortener_cache} && HAS_SQLITE) {
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
$self->{dbh} = DBI->connect_cached("dbi:SQLite:dbname=".$self->{url_shortener_cache},"","", {RaiseError => 1, PrintError => 0, InactiveDestroy => 1}) or die $!;
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
} else {
|
||||
$self->{caching} = 1;
|
||||
|
||||
# Create database if needed
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
$self->{dbh}->do("
|
||||
CREATE TABLE IF NOT EXISTS short_url_cache (
|
||||
short_url TEXT PRIMARY KEY NOT NULL,
|
||||
decoded_url TEXT NOT NULL,
|
||||
hits INTEGER NOT NULL DEFAULT 1,
|
||||
created INTEGER NOT NULL DEFAULT (strftime('%s','now')),
|
||||
modified INTEGER NOT NULL DEFAULT (strftime('%s','now'))
|
||||
)
|
||||
");
|
||||
$self->{dbh}->do("
|
||||
CREATE INDEX IF NOT EXISTS short_url_by_modified
|
||||
ON short_url_cache(short_url, modified)
|
||||
");
|
||||
$self->{dbh}->do("
|
||||
CREATE INDEX IF NOT EXISTS short_url_modified
|
||||
ON short_url_cache(modified)
|
||||
");
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
$self->{caching} = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $max_short_urls = 10;
|
||||
foreach my $short_url (keys %short_urls) {
|
||||
next if ($max_short_urls le 0);
|
||||
my $location = $self->recursive_lookup($short_url, $pms);
|
||||
$max_short_urls--;
|
||||
}
|
||||
|
||||
# Close log
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
close($self->{logfh}) or die $!;
|
||||
} if $self->{logging};
|
||||
|
||||
# Close syslog
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
closelog() or die $!;
|
||||
} if $self->{syslog};
|
||||
|
||||
# Don't disconnect cached database handle
|
||||
# eval { $self->{dbh}->disconnect() or die $!; } if $self->{caching};
|
||||
}
|
||||
|
||||
sub recursive_lookup {
|
||||
my ($self, $short_url, $pms, %been_here) = @_;
|
||||
|
||||
my $count = scalar keys %been_here;
|
||||
dbg("Redirection count $count") if $count gt 0;
|
||||
if ($count ge 10) {
|
||||
dbg("Error: more than 10 shortener redirections");
|
||||
# Fire test
|
||||
$pms->got_hit('SHORT_URL_MAXCHAIN');
|
||||
return undef;
|
||||
}
|
||||
|
||||
my $location;
|
||||
|
||||
if ($self->{caching} && ($location = $self->cache_get($short_url))) {
|
||||
dbg("Found cached $short_url => $location");
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
$self->log_to_file("$short_url => $location")
|
||||
} if $self->{logging};
|
||||
syslog('info',"Found cached $short_url => $location") if $self->{syslog};
|
||||
} else {
|
||||
# Not cached; do lookup
|
||||
my $response = $self->{ua}->head($short_url);
|
||||
if (!$response->is_redirect) {
|
||||
dbg("Skipping URL as not redirect: $short_url = ".$response->status_line);
|
||||
$pms->got_hit('SHORT_URL_404') if($response->code == '404');
|
||||
return undef;
|
||||
}
|
||||
$location = $response->headers->{location};
|
||||
# Bail out if $short_url redirects to itself
|
||||
return undef if ($short_url eq $location);
|
||||
$self->cache_add($short_url, $location) if $self->{caching};
|
||||
dbg("Found $short_url => $location");
|
||||
eval {
|
||||
local $SIG{'__DIE__'};
|
||||
$self->log_to_file("$short_url => $location")
|
||||
} if $self->{logging};
|
||||
syslog('info',"Found $short_url => $location") if $self->{syslog};
|
||||
}
|
||||
|
||||
# At this point we have a new URL in $response
|
||||
$pms->got_hit('HAS_SHORT_URL');
|
||||
_add_uri_detail_list($pms, $location);
|
||||
|
||||
# Set chained here otherwise we might mark a disabled page or
|
||||
# redirect back to the same host as chaining incorrectly.
|
||||
$pms->got_hit('SHORT_URL_CHAINED') if ($count gt 0);
|
||||
|
||||
# Check if we are being redirected to a local page
|
||||
# Don't recurse in this case...
|
||||
if($location !~ /^https?:/) {
|
||||
my($host) = ($short_url =~ /^(https?:\/\/\S+)\//);
|
||||
$location = "$host/$location";
|
||||
dbg("Looks like a local redirection: $short_url => $location");
|
||||
_add_uri_detail_list($pms, $location);
|
||||
return $location;
|
||||
}
|
||||
|
||||
# Check for recursion
|
||||
if ((my ($domain) = ($location =~ /^https?:\/\/(\S+)\//))) {
|
||||
if (exists $been_here{$location}) {
|
||||
# Loop detected
|
||||
dbg("Error: loop detected");
|
||||
$pms->got_hit('SHORT_URL_LOOP');
|
||||
return $location;
|
||||
} else {
|
||||
if (exists $self->{url_shorteners}->{$domain}) {
|
||||
$been_here{$location} = 1;
|
||||
# Recurse...
|
||||
return $self->recursive_lookup($location, $pms, %been_here);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# No recursion; just return the final location...
|
||||
return $location;
|
||||
}
|
||||
|
||||
sub short_url_tests {
|
||||
# Set by parsed_metadata
|
||||
return 0;
|
||||
}
|
||||
|
||||
# Beware. Code copied from PerMsgStatus get_uri_detail_list().
|
||||
# Stolen from GUDO.pm
|
||||
sub _add_uri_detail_list {
|
||||
my ($pms, $uri) = @_;
|
||||
my $info;
|
||||
|
||||
# Cache of text parsed URIs, as previously used by get_uri_detail_list().
|
||||
push @{$pms->{parsed_uri_list}}, $uri;
|
||||
|
||||
$info->{types}->{parsed} = 1;
|
||||
|
||||
$info->{cleaned} =
|
||||
[Mail::SpamAssassin::Util::uri_list_canonify (undef, $uri)];
|
||||
|
||||
foreach (@{$info->{cleaned}}) {
|
||||
my $dom = Mail::SpamAssassin::Util::uri_to_domain($_);
|
||||
|
||||
if ($dom && !$info->{domains}->{$dom}) {
|
||||
$info->{domains}->{$dom} = 1;
|
||||
$pms->{uri_domain_count}++;
|
||||
}
|
||||
}
|
||||
|
||||
$pms->{uri_detail_list}->{$uri} = $info;
|
||||
|
||||
# And of course, copied code from PerMsgStatus get_uri_list(). *sigh*
|
||||
dbg ('warn: PMS::get_uri_list() appears to have been harvested'),
|
||||
push @{$pms->{uri_list}}, @{$info->{cleaned}}
|
||||
if exists $pms->{uri_list};
|
||||
}
|
||||
|
||||
sub log_to_file {
|
||||
my ($self, $msg) = @_;
|
||||
return undef if not $self->{logging};
|
||||
my $fh = $self->{logfh};
|
||||
eval {
|
||||
flock($fh, LOCK_EX) or die $!;
|
||||
seek($fh, 0, SEEK_END) or die $!;
|
||||
print $fh '['.time.'] '.$msg."\n";
|
||||
flock($fh, LOCK_UN) or die $!;
|
||||
};
|
||||
}
|
||||
|
||||
sub cache_add {
|
||||
my ($self, $short_url, $decoded_url) = @_;
|
||||
return undef if not $self->{caching};
|
||||
|
||||
eval {
|
||||
$self->{sth_insert} = $self->{dbh}->prepare_cached("
|
||||
INSERT INTO short_url_cache (short_url, decoded_url)
|
||||
VALUES (?,?)
|
||||
");
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
return undef;
|
||||
};
|
||||
|
||||
$self->{sth_insert}->execute($short_url, $decoded_url);
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub cache_get {
|
||||
my ($self, $key) = @_;
|
||||
return undef if not $self->{caching};
|
||||
|
||||
eval {
|
||||
$self->{sth_select} = $self->{dbh}->prepare_cached("
|
||||
SELECT decoded_url FROM short_url_cache
|
||||
WHERE short_url = ? AND modified > (strftime('%s','now') - ?)
|
||||
");
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
return undef;
|
||||
}
|
||||
|
||||
eval {
|
||||
$self->{sth_update} = $self->{dbh}->prepare_cached("
|
||||
UPDATE short_url_cache
|
||||
SET modified=strftime('%s','now'), hits=hits+1
|
||||
WHERE short_url = ?
|
||||
");
|
||||
};
|
||||
if ($@) {
|
||||
dbg("warn: $@");
|
||||
return undef;
|
||||
}
|
||||
|
||||
$self->{sth_select}->execute($key, $self->{url_shortener_cache_ttl});
|
||||
my $row = $self->{sth_select}->fetchrow_array();
|
||||
if($row) {
|
||||
# Found cache entry; touch it to prevent expiry
|
||||
$self->{sth_update}->execute($key);
|
||||
$self->{sth_select}->finish();
|
||||
$self->{sth_update}->finish();
|
||||
return $row;
|
||||
}
|
||||
|
||||
$self->{sth_select}->finish();
|
||||
$self->{sth_update}->finish();
|
||||
return undef;
|
||||
}
|
||||
|
||||
1;
|
||||
3
mail/spamassassin/abc/dnswlh.cf
Normal file
3
mail/spamassassin/abc/dnswlh.cf
Normal file
@@ -0,0 +1,3 @@
|
||||
loadplugin Mail::SpamAssassin::Plugin::DNSWLh
|
||||
dnswl_address bogdan@vrem.ro
|
||||
dnswl_password 7llfxe
|
||||
404
mail/spamassassin/abc/iXhash.pm
Normal file
404
mail/spamassassin/abc/iXhash.pm
Normal file
@@ -0,0 +1,404 @@
|
||||
=head1 NAME
|
||||
Mail::SpamAssassin::Plugin::iXhash - compute fuzzy checksums from mail bodies and compare to known spam ones via DNS
|
||||
=head1 SYNOPSIS
|
||||
loadplugin Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
|
||||
# Timeout in seconds - default is 10 seconds
|
||||
ixhash_timeout 10
|
||||
|
||||
# Should we add the hashes to the messages' metadata for later re-use
|
||||
# Default is not to cache hashes (i.e. re-compute them for every check)
|
||||
use_ixhash_cache 0
|
||||
|
||||
# wether to only use perl (ixhash_pureperl = 1) or the system's 'tr' and 'md5sum'
|
||||
# Default is to use Perl only
|
||||
ixhash_pureperl 1
|
||||
|
||||
# If you should have 'tr' and/or 'md5sum' in some weird place (e.g on a Windows server)
|
||||
# or you want to specify which version to use you can specifiy the exact paths here
|
||||
# Default is to have SpamAssassin find the executables
|
||||
ixhash_tr_path "/usr/bin/tr"
|
||||
ixhash_md5sum_path "/usr/bin/md5sum"
|
||||
|
||||
# The actual rule
|
||||
body IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
|
||||
describe IXHASH This mail has been classified as spam @ iX Magazine, Germany
|
||||
tflags IXHASH net
|
||||
score IXHASH 1.5
|
||||
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, strips parts from it and then computes a hash value
|
||||
from the rest. These values will then be looked up via DNS to see if the hashes have already been categorized as spam by others.
|
||||
This plugin is based on parts of the procmail-based project 'NiX Spam', developed by Bert Ungerer.(un@ix.de)
|
||||
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
|
||||
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums
|
||||
|
||||
To see which DNS zones are currently available see http://www.ixhash.net
|
||||
|
||||
|
||||
=cut
|
||||
|
||||
package Mail::SpamAssassin::Plugin::iXhash;
|
||||
|
||||
use strict;
|
||||
use Mail::SpamAssassin::Plugin;
|
||||
use Mail::SpamAssassin::Logger;
|
||||
use Mail::SpamAssassin::Timeout;
|
||||
|
||||
use Digest::MD5 qw(md5 md5_hex md5_base64);
|
||||
use Net::DNS;
|
||||
|
||||
use vars qw(@ISA);
|
||||
|
||||
|
||||
@ISA = qw(Mail::SpamAssassin::Plugin);
|
||||
|
||||
my $VERSION = "1.5.5";
|
||||
|
||||
sub new {
|
||||
my ($class, $mailsa, $server) = @_;
|
||||
$class = ref($class) || $class;
|
||||
my $self = $class->SUPER::new($mailsa);
|
||||
bless ($self, $class);
|
||||
# Are network tests enabled?
|
||||
if ($mailsa->{local_tests_only}) {
|
||||
dbg("IXHASH: local tests only, not using iXhash plugin");
|
||||
$self->{iXhash_available} = 0;
|
||||
}
|
||||
else {
|
||||
dbg("IXHASH: Using iXhash plugin $VERSION");
|
||||
$self->{iXhash_available} = 1;
|
||||
}
|
||||
|
||||
$self->set_config($mailsa->{conf});
|
||||
$self->register_eval_rule ("ixhashtest");
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub set_config {
|
||||
my ($self, $conf) = @_;
|
||||
my @cmds = ();
|
||||
# implements iXhash_timeout config option - by dallase@uribl.com
|
||||
push(@cmds, {
|
||||
setting => 'ixhash_timeout',
|
||||
default => 10,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
|
||||
}
|
||||
);
|
||||
push(@cmds, {
|
||||
setting => 'use_ixhash_cache',
|
||||
default => 0,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
|
||||
}
|
||||
);
|
||||
push(@cmds, {
|
||||
setting => 'ixhash_pureperl',
|
||||
default => 1,
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
|
||||
}
|
||||
);
|
||||
push(@cmds, {
|
||||
setting => 'ixhash_tr_path',
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
|
||||
}
|
||||
);
|
||||
push(@cmds, {
|
||||
setting => 'ixhash_md5sum_path',
|
||||
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
|
||||
}
|
||||
);
|
||||
$conf->{parser}->register_commands(\@cmds);
|
||||
}
|
||||
|
||||
sub ixhashtest {
|
||||
my ($self, $permsgstatus,$full,$dnszone) = @_;
|
||||
dbg("IXHASH: IxHash querying $dnszone");
|
||||
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 0){
|
||||
# Return subito if we are do not find the tools we need
|
||||
# Only relevant if we are those tools in the 1st way
|
||||
return 0 unless $self->is_md5sum_available();
|
||||
return 0 unless $self->is_tr_available();
|
||||
}
|
||||
my ($answer,$ixdigest) = "";
|
||||
# Changed to use get_pristine_body returning a scalar
|
||||
my $body = $permsgstatus->{msg}->get_pristine_body();
|
||||
my $resolver = Net::DNS::Resolver->new;
|
||||
my $body_copy = "";
|
||||
my $rr;
|
||||
my $tmpfile = '';
|
||||
my $tmpfh = undef;
|
||||
my $hits = 0;
|
||||
my $digest = 0;
|
||||
# alarm the dns query - dallase@uribl.com
|
||||
# --------------------------------------------------------------------------
|
||||
# here we implement proper alarms, ala Pyzor, Razor2 plugins.
|
||||
# keep the alarm as $oldalarm, so we dont loose the timeout-child alarm
|
||||
# see http://issues.apache.org/SpamAssassin/show_bug.cgi?id=3828#c123
|
||||
my $oldalarm = 0;
|
||||
my $timer = Mail::SpamAssassin::Timeout->new({ secs => $permsgstatus->{main}->{conf}->{'ixhash_timeout'}});
|
||||
|
||||
my $time_err = $timer->run_and_catch(sub {
|
||||
# create a temporary file unless we are to use only Perl code and we don't find a hash value in metadata
|
||||
# If we use the system's 'tr' and 'md5sum' utilities we need this.
|
||||
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 0){
|
||||
unless ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-1') or $permsgstatus->{msg}->get_metadata('X-iXhash-hash-2') or $permsgstatus->{msg}->get_metadata('X-iXhash-hash-3')) {
|
||||
($tmpfile, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();
|
||||
$body_copy = $body;
|
||||
$body_copy =~ s/\r\n/\n/g;
|
||||
print $tmpfh $body_copy;
|
||||
close $tmpfh;
|
||||
dbg ("IXHASH: Writing body to temporary file $tmpfile");
|
||||
}
|
||||
else {
|
||||
dbg ("IXHASH: Not writing body to temporary file - reusing stored hashes");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
my $digest = compute1sthash($permsgstatus,$body, $tmpfile);
|
||||
if ($digest){
|
||||
dbg ("IXHASH: Now checking $digest.$dnszone");
|
||||
# Now check via DNS query
|
||||
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
|
||||
if ($answer) {
|
||||
foreach $rr ($answer->answer) {
|
||||
next unless $rr->type eq "A";
|
||||
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
|
||||
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
|
||||
}
|
||||
}
|
||||
}
|
||||
# Only go ahead if $hits ist still 0 - i.e hash #1 didn't score a hit
|
||||
if ($hits == 0 ){
|
||||
$digest = compute2ndhash($permsgstatus,$body, $tmpfile);
|
||||
if ($digest){
|
||||
dbg ("IXHASH: Now checking $digest.$dnszone");
|
||||
# Now check via DNS query
|
||||
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
|
||||
if ($answer) {
|
||||
foreach $rr ($answer->answer) {
|
||||
next unless $rr->type eq "A";
|
||||
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
|
||||
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
|
||||
} # end foreach
|
||||
} # end if $answer
|
||||
} # end if $digest
|
||||
} # end if $hits
|
||||
|
||||
if ( $hits == 0 ){
|
||||
$digest = compute3rdhash($permsgstatus,$body, $tmpfile);
|
||||
if (length($digest) == 32){
|
||||
dbg ("IXHASH: Now checking $digest.$dnszone");
|
||||
# Now check via DNS query
|
||||
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
|
||||
if ($answer) {
|
||||
foreach $rr ($answer->answer) {
|
||||
next unless $rr->type eq "A";
|
||||
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
|
||||
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
|
||||
} # foreach $answer
|
||||
} # end if $anser
|
||||
} # end if $digest
|
||||
} # end if $hits
|
||||
} # end of sub{
|
||||
); # end of timer->run_and_catch
|
||||
|
||||
if ($timer->timed_out()) {
|
||||
dbg("IXHASH: ".$permsgstatus->{main}->{conf}->{'ixhash_timeout'}." second timeout exceeded while checking ".$digest.".".$dnszone."!");
|
||||
}
|
||||
elsif ($time_err) {
|
||||
chomp $time_err;
|
||||
dbg("IXHASH: iXhash lookup failed: $time_err");
|
||||
}
|
||||
unlink $tmpfile;
|
||||
return $hits;
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub compute1sthash {
|
||||
my ($permsgstatus, $body, $tmpfile) = @_;
|
||||
my $body_copy = '';
|
||||
my $digest = '';
|
||||
# Creation of hash # 1 if following conditions are met:
|
||||
# - mail contains 20 spaces or tabs or more - changed follwoing a suggestion by Karsten Br<42>ckelmann
|
||||
# - mail consists of at least 2 lines
|
||||
# This should generate the most hits (according to Bert Ungerer about 70%)
|
||||
# This also is where you can tweak your plugin if you have problems with short mails FP'ing -
|
||||
# simply raise that barrier here.
|
||||
# We'll try to find the required hash in this message's metadata first.
|
||||
# This might be the case if another zone has been queried already
|
||||
|
||||
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1 ) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-1'))) {
|
||||
dbg ("IXHASH: Hash value for method #1 found in metadata, re-using that one");
|
||||
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-1');
|
||||
}
|
||||
else
|
||||
{
|
||||
if (($body =~ /(?>\s.+?){20}/g) || ( $body =~ /\n.*\n/ ) ){
|
||||
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1 ){
|
||||
# All space class chars just one time
|
||||
# Do this in two steps to avoid Perl segfaults
|
||||
# if there are more than x identical chars to be replaced
|
||||
# Thanks to Martin Blapp for finding that out and suggesting this workaround concerning spaces only
|
||||
# Thanks to Karsten Br<42>ckelmann for pointing out this would also be the case with _any_ characater, not only spaces
|
||||
$body_copy = $body;
|
||||
$body_copy =~ s/\r\n/\n/g;
|
||||
# Step One
|
||||
$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
|
||||
# Step Two
|
||||
$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
|
||||
# remove graph class chars and some specials
|
||||
$body_copy =~ s/[[:graph:]]+//go;
|
||||
# Create actual digest
|
||||
$digest = md5_hex($body_copy);
|
||||
dbg ("IXHASH: Computed hash-value ".$digest." via method 1, using perl exclusively");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-1', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
} else {
|
||||
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:space:]' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:graph:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
|
||||
chop($digest);
|
||||
dbg ("IXHASH: Computed hash-value ".$digest." via method 1, using system utilities");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-1', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg ("IXHASH: Hash value #1 not computed, requirements not met");
|
||||
}
|
||||
}
|
||||
return $digest;
|
||||
}
|
||||
|
||||
sub compute2ndhash{
|
||||
my ($permsgstatus, $body, $tmpfile) = @_;
|
||||
my $body_copy = '';
|
||||
my $digest = '';
|
||||
# See if this hash has been computed already
|
||||
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-2'))) {
|
||||
dbg ("IXHASH: Hash value for method #2 found in metadata, re-using that one");
|
||||
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-2');
|
||||
}
|
||||
else
|
||||
{
|
||||
# Creation of hash # 2 if mail contains at least 3 of the following characters:
|
||||
# '[<>()|@*'!?,]' or the combination of ':/'
|
||||
# (To match something like "Already seen? http:/host.domain.tld/")
|
||||
if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
|
||||
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1 ){
|
||||
$body_copy = $body;
|
||||
# remove redundant stuff
|
||||
$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
|
||||
# replace '_' with '.'
|
||||
$body_copy =~ tr/_/./;
|
||||
# replace duplicate chars. This too suffers from a bug in perl
|
||||
# so we do it in two steps
|
||||
# Step One
|
||||
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
|
||||
# Step Two
|
||||
$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
|
||||
# Computing hash...
|
||||
$digest = md5_hex($body_copy);
|
||||
dbg ("IXHASH: Computed hash-value $digest via method 2, using perl exclusively");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-2', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
}
|
||||
else {
|
||||
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:cntrl:][:alnum:]%&#;=' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} '_' '.' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:print:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
|
||||
chop($digest);
|
||||
dbg ("IXHASH: Computed hash-value ".$digest." via method 2, using system utilities");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-2', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg ("IXHASH: Hash value #2 not computed, requirements not met");
|
||||
}
|
||||
}
|
||||
return $digest;
|
||||
}
|
||||
|
||||
sub compute3rdhash{
|
||||
my ($permsgstatus, $body, $tmpfile ) = @_;
|
||||
my $body_copy = '';
|
||||
my $digest = '';
|
||||
# See if this hash has been computed already
|
||||
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-3'))) {
|
||||
dbg ("IXHASH: Hash value for method #3 found in metadata, re-using that one");
|
||||
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-3');
|
||||
}
|
||||
else
|
||||
{
|
||||
# Compute hash # 3 if
|
||||
# - there are at least 8 non-space characters in the body and
|
||||
# - neither hash #1 nor hash #2 have been computed
|
||||
# (which means $digest is still empty, in any case < 32)
|
||||
if (($body =~ /[\S]{8}/) && (length($digest) < 32)) {
|
||||
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1){
|
||||
$body_copy = $body;
|
||||
$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
|
||||
# replace duplicate chars. This too suffers from a bug in perl
|
||||
# so we do it in two steps
|
||||
# Step One
|
||||
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
|
||||
# Step Two
|
||||
$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
|
||||
# Computing actual hash
|
||||
$digest = md5_hex($body_copy);
|
||||
dbg ("IXHASH: Computed hash-value $digest via method 3");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-3', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
}
|
||||
else {
|
||||
# shellcode
|
||||
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:cntrl:][:space:]=' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:graph:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
|
||||
chop($digest);
|
||||
dbg ("IXHASH: Computed hash-value ".$digest." via method 3, using system utilities");
|
||||
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-3', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg ("IXHASH: Hash value #3 not computed, requirements not met");
|
||||
}
|
||||
}
|
||||
return $digest;
|
||||
}
|
||||
|
||||
sub is_tr_available {
|
||||
# Find out where your 'tr' lives
|
||||
# shamelessly stolen from the Pyzor plugin code
|
||||
my ($self) = @_;
|
||||
my $tr = $self->{main}->{conf}->{ixhash_tr_path} || '';
|
||||
unless ($tr) {
|
||||
$tr = Mail::SpamAssassin::Util::find_executable_in_env_path('tr');
|
||||
}
|
||||
unless ($tr && -x $tr) {
|
||||
dbg("IXHASH: tr is not available: no tr executable found");
|
||||
return 0;
|
||||
}
|
||||
# remember any found tr
|
||||
$self->{main}->{conf}->{ixhash_tr_path} = $tr;
|
||||
dbg("IXHASH: tr is available: " . $self->{main}->{conf}->{ixhash_tr_path});
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub is_md5sum_available {
|
||||
# Find out where your 'md5sum' lives
|
||||
# again shamelessly stolen from the Pyzor plugin code
|
||||
my ($self) = @_;
|
||||
my $md5sum = $self->{main}->{conf}->{ixhash_md5sum_path} || '';
|
||||
unless ($md5sum) {
|
||||
$md5sum = Mail::SpamAssassin::Util::find_executable_in_env_path('md5sum');
|
||||
}
|
||||
unless ($md5sum && -x $md5sum) {
|
||||
dbg("IXHASH: md5sum is not available: no md5sum executable found");
|
||||
return 0;
|
||||
}
|
||||
# remember any found md5sum
|
||||
$self->{main}->{conf}->{ixhash_md5sum_path} = $md5sum;
|
||||
dbg("IXHASH: md5sum is available: " . $self->{main}->{conf}->{ixhash_md5sum_path});
|
||||
return 1;
|
||||
}
|
||||
|
||||
1;
|
||||
Reference in New Issue
Block a user