Initial commit.

This commit is contained in:
2021-05-24 22:18:33 +03:00
commit e2954d55f4
3701 changed files with 330017 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
loadplugin Mail::SpamAssassin::Plugin::iXhash /etc/mail/spamassassin/iXhash.pm
# This makes DNS queries time out after 10 seconds (2x default)
ixhash_timeout 10
# This list uses iX Magazine's spam as datasource.
body IXHASH1 eval:ixhashtest('ix.dnsbl.manitu.net')
describe IXHASH1 This mail has been classified as spam @ iX Magazine, Germany
tflags IXHASH1 net
score IXHASH1 2.5
# This list comes in @ spamtraps run by former LogIn & Solutions AG, Germany
body IXHASH2 eval:ixhashtest('generic.ixhash.net')
describe IXHASH2 mail has been classified as spam @ former LogIn&Solutions AG, Germany
tflags IXHASH2 net
score IXHASH2 1.5
body IXHASH3 eval:ixhashtest('ctyme.ixhash.net')
describe IXHASH3 mail has been classified as spam @ JunkEmailFilter, Germany
tflags IXHASH3 net
score IXHASH3 1.0
body IXHASH4 eval:ixhashtest('hosteurope.ixhash.net')
describe IXHASH4 mail has been classified as spam @ HostEurope, Germany
tflags IXHASH4 net
score IXHASH4 1.0

View File

@@ -0,0 +1,67 @@
#*************************************************************************
# Bayes OCR Plugin, version 0.1
#*************************************************************************
# Copyright 2007 P.R.A. Group - D.I.E.E. - University of Cagliari (ITA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#*************************************************************************
loadplugin BayesOCR_PLG BayesOCR_PLG.pm
# Cerberus guarded the gate to Hades and ensured
# that spirits of the dead could enter...
# BayesOCR Plugin guards the inboxes and ensures
# that only legitimate images can enter,
# spam images are detected and eated..
# Rule: BayesOCR_check(thr)
# Categorisation of text embedded in images with TextCategorisation techniques.
# Require gocr, convert (imagemagick)
body BayesOCR_PLG40 eval:BayesOCR_check(0.40, 0.50)
body BayesOCR_PLG50 eval:BayesOCR_check(0.50, 0.60)
body BayesOCR_PLG60 eval:BayesOCR_check(0.60, 0.70)
body BayesOCR_PLG70 eval:BayesOCR_check(0.70, 0.80)
body BayesOCR_PLG80 eval:BayesOCR_check(0.80, 0.90)
body BayesOCR_PLG90 eval:BayesOCR_check(0.90, 0.95)
body BayesOCR_PLG95 eval:BayesOCR_check(0.95, 0.99)
body BayesOCR_PLG99 eval:BayesOCR_check(0.99, 1.00)
describe BayesOCR_PLG40 Bayesian ImageSpam probability is 40% to 50%
describe BayesOCR_PLG50 Bayesian ImageSpam probability is 50% to 60%
describe BayesOCR_PLG60 Bayesian ImageSpam probability is 60% to 70%
describe BayesOCR_PLG70 Bayesian ImageSpam probability is 70% to 80%
describe BayesOCR_PLG80 Bayesian ImageSpam probability is 80% to 90%
describe BayesOCR_PLG90 Bayesian ImageSpam probability is 90% to 95%
describe BayesOCR_PLG95 Bayesian ImageSpam probability is 95% to 99%
describe BayesOCR_PLG99 Bayesian ImageSpam probability is 99% to 100%
add_header all BayesOCR-OUT _PLGBAYESOCROUT_
priority BayesOCR_PLG40 1000
priority BayesOCR_PLG50 1000
priority BayesOCR_PLG60 1000
priority BayesOCR_PLG70 1000
priority BayesOCR_PLG80 1000
priority BayesOCR_PLG90 1000
priority BayesOCR_PLG95 1000
priority BayesOCR_PLG99 1000
score BayesOCR_PLG40 0 0 0.5 0.5
score BayesOCR_PLG50 0 0 1.0 1.0
score BayesOCR_PLG60 0 0 1.5 1.5
score BayesOCR_PLG70 0 0 2.0 2.0
score BayesOCR_PLG80 0 0 2.7 2.7
score BayesOCR_PLG90 0 0 3.5 3.5
score BayesOCR_PLG95 0 0 4.0 4.0
score BayesOCR_PLG99 0 0 4.5 4.5

View File

@@ -0,0 +1,400 @@
#*************************************************************************
# Bayes OCR Plugin, version 0.1
#*************************************************************************
# Copyright 2007 P.R.A. Group - D.I.E.E. - University of Cagliari (ITA)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#*************************************************************************
package BayesOCR_PLG;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Util;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
our @ISA = qw (Mail::SpamAssassin::Plugin);
# constructor: register the eval rule
sub new {
my ( $class, $mailsa ) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless( $self, $class );
dbg("PLG-BayesOCR:: new:: register_eval_rule");
$self->register_eval_rule("BayesOCR_check");
$self->{'imgTxt_classifierOut'} = -1;
$self->{'imgTxt_tagmsg'} = ""; #msg to be saved in e-mail tag when $self->{'imgTxt_classifierOut'} <= 0
return $self;
}
#===========================================================================
#===========================================================================
sub check_start{
# Called before eval rule
my ( $self, $pms ) = @_;
dbg("PLG-BayesOCR:: check_start:: init score");
#Init outNB_imgTxt
$self->{'imgTxt_classifierOut'} = -1;
$self->{'imgTxt_tagmsg'} = "";
}
sub isValidUser{
my ($pms) = @_;
my $username = $pms->{main}->{username};
dbg("PLG-BayesOCR:: isValidUser:: Username: $username");
return 1;
}
sub BayesOCR_check {
# BayesOCR_check(thr)
# Return an hit when (outNB > thr)
# The score is computed as (weigth * outNB)
#
my ($self, $pms, $unused, $thrL, $thrH) = @_;
my $plgRuleName = $pms->get_current_eval_rule_name();
#if( isValidUser($pms) == 0) { return 0; }
dbg("PLG-BayesOCR:: BayesOCR_check :: Rule: $plgRuleName");
dbg("PLG-BayesOCR:: BayesOCR_check :: thr: ($thrH, $thrL)");
if($self->{'imgTxt_classifierOut'} < 0)
{
#Output
if( $self->imageSpam_OCRTextProcessing($pms ) )
{
$self->{'imgTxt_tagmsg'} = $self->{'imgTxt_classifierOut'};
}
dbg("PLG-BayesOCR:: BayesOCR_check:: Write Mail Header\n\n");
$pms->set_tag ("PLGBAYESOCROUT", $self->{'imgTxt_tagmsg'} );
}
my $resHit = ($self->{'imgTxt_classifierOut'} > $thrL) && ($self->{'imgTxt_classifierOut'} <= $thrH );
return $resHit;
}
1;
#===========================================================================
sub imageSpam_OCRTextProcessing
# boolen $self->imageSpam_OCRTextProcessing($pms)
#
# imageSpam processing by image's text analisys with SA's NaiveBayes
# return 1 : (sucess) image's text has beeen extract and processed by NB
# return 0 : (failed) no images, no text, no NB.
{
my ( $self, $pms ) = @_;
# $self :: Obj Plugin
# $pms :: Obj Mail::SpamAssassin::PerMsgStatus
# $pms->{msg} :: message of class Mail::SpamAssassin::Message
#================================
# Init result
#================================
$self->{'imgTxt_classifierOut'} = 0;
#================================
# Check & Create Classifier
#================================
my $nbSA = $pms->{main}->{bayes_scanner};
#my $nbSA = new Mail::SpamAssassin::Bayes ($pms->{main});
if( $nbSA->is_scan_available() == 0)
{
dbg("PLG-BayesOCR:: imageTextClassifierOutEstimation: NB scan not available");
$self->{'imgTxt_tagmsg'} = "0.0 (NaiveBayes not available)";
return 0;
}
#================================
# Image extraction
#================================
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: Check for Attached Images");
my ($imgTextOcr, $numImages) = imageTextExtractionFromMSG($pms->{msg});
if($numImages == 0)
{
$self->{'imgTxt_tagmsg'} = "0.0 (No images found)";
return 0;
}
# Check extracted text
my $numWord = 0;
while($imgTextOcr =~ /[a-z]{3,}/gi)
{
$numWord++;
}
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: $numWord words (3+ chars) recognised");
if($numWord <= 3)
{
$self->{'imgTxt_tagmsg'} = "0.0 (No usefull text found)";
return 0;
}
#================================
# Classifier's output estimation
#================================
# creation of msg with image's text
my $mailraw = createMSGFromText($pms, $imgTextOcr);
my $msgTmp = $pms->{main}->parse($mailraw,1);
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: Compute score with trained NaiveBayes");
my $pmsTMP = new Mail::SpamAssassin::PerMsgStatus($pms->{main}, $msgTmp);
# Classification
my $outNB = $nbSA->scan($pmsTMP, $msgTmp);
$self->{'imgTxt_classifierOut'} = sprintf("%0.3f", $outNB);
dbg("PLG-BayesOCR:: imageSpam_OCRTextProcessing:: classifier's out = $self->{'imgTxt_classifierOut'}" );
return 1; # All OK
}
#===========================================================================
sub imageTextExtractionFromMSG
# ($imgTextOcr, $numImages) = imageTextExtractionFromMSG($msg)
# Extract the text from all attached images
# Return all text anche the number of attached images
{
my $msg = $_[0];
dbg("PLG-BayesOCR:: imageTextExtractionFromMSG:: Extract & Convert Images");
my @mimeStr = ("image/*", "img/*");
my @tmpImgFile;
my $num=0;
my $imgTextOcr = "";
foreach (@mimeStr)
{
# Search all attach with current MIME
my @img_parts = $msg->find_parts($_);
for (my $i=0; $i <= $#img_parts; $i++)
{
my $imagestream = $img_parts[$i]->decode(1048000); # ~ 1 MB
$imgTextOcr = join $imgTextOcr, imageTextExtractionByOCR($imagestream), "\n";
$num++;
}
}
dbg("PLG-BayesOCR:: imageTextExtractionFromMSG:: $num images extracted");
return ($imgTextOcr, $num);
}
#===========================================================================
sub imageTextExtractionByOCR
# $textOut = imageTextExtractionByOCR( $imagestream )
# Text extraction from imge file "" by OCR engine
{
my $imagestream = $_[0];
my $imagelen = length($imagestream) / 1024;
my $tmpDir = "/tmp"; #Get tmp dir
my $tmpFile = "$tmpDir/sa_bayesOCR_tmpImg.$$";
# Zooming small images could improve OCR accuracy
# Byte Check
# > 1000K => no OCR
# < 15K => OCR + zoom 4X
# else => Check resolution
# Check resolution
# res > 1400x1050 => no OCR
# 1024x768 <= res < 1400x1050 => OCR (no zoom)
# 800x600 <= res < 1024x768 => OCR + zoom 2X
# res < 800x600 => OCR + zoom 4X
if ($imagelen > 1000)
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Skip, image size = $imagelen");
return "";
}
open (FILE, ">$tmpFile.tmp") or return "";
print FILE "$imagestream \n";
close FILE;
my $convertOPT = "";
my $imageIdentifyTxt = "";
if($imagelen < 20 )
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 4X");
$convertOPT = "-sample 400% -density 280";
}
else
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Check image dim");
# check WxH
open EXEFH, "identify -quiet -ping $tmpFile.tmp |";
$imageIdentifyTxt = join "", <EXEFH>;
close EXEFH;
if( $imageIdentifyTxt =~ s/\s(\d*)x(\d*)\s//i )
{
my $size1 = $1;
my $size2 = $2;
if($size1 * $size2 > 1400*1050 && $size1 > 1280 && $size2 > 1024)
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Skip, image dim = $size1 x $size2");
unlink "$tmpFile.tmp";
return "";
}
if( $size1 * $size2 < 800*600)
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 4X");
$convertOPT = "-sample 400% -density 280";
}
elsif( $size1 * $size2 < 1024*768)
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Enable zoom 2X");
$convertOPT = "-sample 200% -density 280";
}
}
}
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Convert & OCR");
# -append :: concatenate image i layers
# -flatten :: fuse layers
# -density :: set dpi
my $exstatus = system("convert $tmpFile.tmp -append -flatten $convertOPT $tmpFile.pnm");
if($exstatus != 0)
{
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: Convert ERROR!!");
#Catturo SDOUT e STERR
open EXEFH, "identify -verbose -strip $tmpFile.tmp 2>&1 |";
$imageIdentifyTxt = join "", <EXEFH>;
close EXEFH;
my $msg = "Stream size (kb): $imagelen\nIdentify output: \n$imageIdentifyTxt\n";
saveLogMsg($tmpDir, "Convert Error", $msg);
unlink "$tmpFile.tmp";
return "";
}
# GOCR call with timeout (thanks to B. Austin for the usefull suggestions)
my $textOut = "";
eval {
local $SIG{ALRM} = sub { die "GOCR_TIMEOUT\n" };
alarm 10;
# Retrieve gocr output
open EXEFH, "gocr $tmpFile.pnm |";
$textOut = join "", <EXEFH>;
close EXEFH;
alarm 0;
};
if ($@) {
die unless $@ eq "GOCR_TIMEOUT\n"; # propagate unexpected errors
# timed out
dbg("PLG-BayesOCR:: imageTextExtractionByOCR:: OCR timeout!!");
# Extract the list of all child of this process
open PSFH, "ps -o pid,cmd --ppid $$ |";
my $psOut = join "", <PSFH>;
close PSFH;
#Get the PID of gocr child
if( $psOut =~ s/(\d*) gocr//i)
{
kill 9, $1;
}
my $msg = "Stream size (kb): $imagelen\nPS out:\n $psOut\n";
saveLogMsg($tmpDir, "OCR timeout", $msg);
$textOut = "";
}
unlink "$tmpFile.tmp";
unlink "$tmpFile.pnm";
return $textOut;
}
#===========================================================================
sub createMSGFromText
# msg = createMSGFromText(@img_ocrText)
{
my ($pms, $ocrText) = @_;
dbg("PLG-BayesOCR: createMSGFromText:: Make temp email with OCR's text");
my $subject = "";
my $date = $pms->{msg}->get_pristine_header("Date");
my $from = ""; #$pms->{msg}->get_pristine_header("From");
my $to = ""; #$pms->{msg}->get_pristine_header("To");
my $mailraw = "From: $from\nTo: $to\nSubject: $subject\nDate: $date\nContent-Type: text/plain;\n charset=\"us-ascii\"\nContent-Disposition: inline\n\n$ocrText\n";
return $mailraw
}
#===========================================================================
#===========================================================================
sub saveLogMsg()
{
my ($tmpDir, $title, $msg) = @_;
my $timenow = localtime time;
open (FILE, ">>$tmpDir/sa_bayesOCR.log");
print FILE "#--------------------------------\n";
print FILE " $timenow\n";
print FILE " $title\n";
print FILE "#--------------------------------\n";
print FILE "$msg\n";
close FILE;
}
#===========================================================================

View File

@@ -0,0 +1,273 @@
# Adds DNSWL.org to recipients of spamassassin --report.
#
# In a SpamAssassin config file, add the lines:
#
# loadplugin Mail::SpamAssassin::Plugin::DNSWLh
# dnswl_address user@example.com
# dnswl_password yourpassword
#
# The last two must be from an account created via
# http://www.dnswl.org/registerreporter.pl
#
#
# 2010-02-26-23 Initial release.
# 2010-02-27-11 Also call report successful on unlisted IPs.
# 2010-02-28-20 State when reported email has trust level "Unlisted".
# 2010-03-02-10 Report the IP DNSWL thought was interesting.
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
=head1 NAME
Mail::SpamAssassin::Plugin::DNSWL - perform DNSWL reporting of messages
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::DNSWL
=head1 DESCRIPTION
DNSWL is a service which lists known legitimate mail servers.
This module enables automatic reporting of spam to DNSWL, to improve
the accuracy of their database.
Note that spam reports sent by this plugin to DNSWL each include the
entire spam message.
See http://www.dnswl.org/ for more information about DNSWL.
=cut
package Mail::SpamAssassin::Plugin::DNSWLh;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use IO::Socket;
use strict;
use warnings;
use bytes;
use re 'taint';
use constant HAS_LWP_USERAGENT => eval { require LWP::UserAgent; };
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
# are network tests enabled?
if (!$mailsaobject->{local_tests_only} && HAS_LWP_USERAGENT) {
$self->{dnswl_available} = 1;
dbg("DNSWL: network tests on, attempting DNSWL");
}
else {
$self->{dnswl_available} = 0;
dbg("DNSWL: local tests only, disabling DNSWL");
}
$self->set_config($mailsaobject->{conf});
return $self;
}
sub set_config {
my($self, $conf) = @_;
my @cmds;
=head1 USER OPTIONS
=over 4
=cut
push (@cmds, {
setting => 'dnswl_address',
default => 'spamassassin-submit@spam.dnswl.chaosreigns.com',
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
code => sub {
my ($self, $key, $value, $line) = @_;
if ($value =~ /^([^<\s]+\@[^>\s]+)$/) {
$self->{dnswl_address} = $1;
}
elsif ($value =~ /^$/) {
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
}
else {
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
},
});
push (@cmds, {
setting => 'dnswl_password',
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
code => sub {
my ($self, $key, $value, $line) = @_;
if ($value =~ /^(\S+)$/) {
$self->{dnswl_password} = $1;
}
elsif ($value =~ /^$/) {
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
}
else {
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
},
});
=item dnswl_max_report_size (default: 50)
Messages larger than this size (in kilobytes) will be truncated in
report messages sent to DNSWL. The default setting is the maximum
size that DNSWL will accept at the time of release.
=cut
push (@cmds, {
setting => 'dnswl_max_report_size',
default => 50,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
$conf->{parser}->register_commands(\@cmds);
}
sub plugin_report {
my ($self, $options) = @_;
return unless $self->{dnswl_available};
#dbg("DNSWL: address/pass: " . $options->{report}->{conf}->{dnswl_address}
# .' '. $options->{report}->{conf}->{dnswl_password} );
if (!$options->{report}->{options}->{dont_report_to_dnswl}) {
if ($options->{report}->{conf}->{dnswl_address} and
$options->{report}->{conf}->{dnswl_password}) {
if ($self->dnswl_report($options)) {
$options->{report}->{report_available} = 1;
info("DNSWL: spam reported to DNSWL");
$options->{report}->{report_return} = 1;
} else {
info("DNSWL: could not report spam to DNSWL");
}
} else {
dbg("DNSWL: dnswl_address and/or dnswl_password not defined.");
}
}
}
sub dnswl_report {
my ($self, $options) = @_;
# original text
my $original = ${$options->{text}};
# check date
my $header = $original;
$header =~ s/\r?\n\r?\n.*//s;
my $date = Mail::SpamAssassin::Util::receive_date($header);
if ($date && $date < time - 2*86400) {
warn("DNSWL: Message older than 2 days, not reporting\n");
return 0;
}
# message variables
my $description = "spam report via " . Mail::SpamAssassin::Version();
my $trusted = $options->{msg}->{metadata}->{relays_trusted_str};
my $untrusted = $options->{msg}->{metadata}->{relays_untrusted_str};
# message data
# truncate message
if (length($original) > $self->{main}->{conf}->{dnswl_max_report_size} * 1024) {
substr($original, ($self->{main}->{conf}->{dnswl_max_report_size} * 1024)) =
"\n[truncated by SpamAssassin]\n";
}
my $body = <<"EOM";
Content-Description: $description
X-Spam-Relays-Trusted: $trusted
X-Spam-Relays-Untrusted: $untrusted
$original
EOM
# compose message
my $message;
$message = $body;
# send message
my %form = (
'action', 'save',
'abuseReport',$message,
);
my $ua = LWP::UserAgent->new;
my $netloc = 'www.dnswl.org:80';
my $realm = 'dnswl.org Abuse Reporting';
$ua->credentials( $netloc, $realm, $options->{report}->{conf}->{dnswl_address}, $options->{report}->{conf}->{dnswl_password} );
my $response = $ua->post('http://www.dnswl.org/abuse/report.pl', \%form);
# my $response = $ua->post('http://www.dnswl.org/abuse/report.test.pl', \%form);
# open OUT, ">/tmp/dnswlbody.".time.".txt";
# print OUT $form{'abuseReport'};
# close OUT;
if ($response->is_success) {
#if ( $response->content =~ m#Thank you for your report# ) {
if ( $response->content =~ m#IP ([\d\.]+) matches with DNSWL# ) {
my $reportedip = $1;
dbg("DNSWL: Successfully reported $reportedip.");
print "Successfully reported to DNSWL $reportedip.\n";
return 1;
#} elsif ( $response->content =~ m#No matching entry found for#) {
} elsif ( $response->content =~ m#No matching entry found for IP ([\d\.]+)#) {
my $reportedip = $1;
dbg("DNSWL: Successfully reported $reportedip. Current trust level is: Unlisted.");
print "Successfully reported to DNSWL $reportedip. Current trust level is: Unlisted.\n";
return 1;
} else {
dbg("DNSWL: Failed to report, acknowledgement not received.");
print "Failed to report to DNSWL, acknowledgement not received.\n";
# open OUT, ">/tmp/dnswlerr.".time.".txt";
# print OUT $response->content;
# close OUT;
return 0;
}
} else {
dbg("DNSWL: Failed to report: ". $response->status_line);
print "Failed to report to DNSWL, HTTP error: ". $response->status_line ."\n";
return 0;
}
dbg("DNSWL: Error: This isn't possible.");
return 0;
}
1;
=back
=cut

View File

@@ -0,0 +1,604 @@
loadplugin Mail::SpamAssassin::Plugin::DecodeShortURLs /etc/mail/spamassassin/DecodeShortURLs.pm
body HAS_SHORT_URL eval:short_url_tests()
describe HAS_SHORT_URL Message contains one or more shortened URLs
score HAS_SHORT_URL 0.01
body SHORT_URL_CHAINED eval:short_url_tests()
describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
score SHORT_URL_CHAINED 3.0
body SHORT_URL_MAXCHAIN eval:short_url_tests()
describe SHORT_URL_MAXCHAIN Message has shortened URL that causes more than 10 redirections
score SHORT_URL_MAXCHAIN 5.0
body SHORT_URL_LOOP eval:short_url_tests()
describe SHORT_URL_LOOP Message has short URL that loops back to itself
score SHORT_URL_LOOP 0.01
body SHORT_URL_404 eval:short_url_tests()
describe SHORT_URL_404 Message has short URL that returns 404
score SHORT_URL_404 1.0
uri URI_BITLY_BLOCKED /^http:\/\/bit\.ly\/a\/warning/i
describe URI_BITLY_BLOCKED Message contains a bit.ly URL that has been disabled due to abuse
score URI_BITLY_BLOCKED 10.0
uri URI_SIMURL_BLOCKED /^http:\/\/simurl\.com\/redirect_black\.php/i
describe URI_SIMURL_BLOCKED Message contains a simurl URL that has been disabled due to abuse
score URI_SIMURL_BLOCKED 10.0
uri URI_MIGRE_BLOCKED /^http:\/\/migre\.me\/bloqueado/i
describe URI_MIGRE_BLOCKED Message contains a migre.me URL that has been disabled due to abuse
score URI_MIGRE_BLOCKED 10.0
meta SHORT_URIBL HAS_SHORT_URL && (URIBL_BLACK || URIBL_AB_SURBL || URIBL_WS_SURBL || URIBL_JP_SURBL || URIBL_SC_SURBL || URIBL_RHS_DOB || URIBL_DBL_SPAM || URIBL_SBL)
describe SHORT_URIBL Message contains shortened URL(s) and also hits a URIDNSBL
score SHORT_URIBL 0.01
url_shortener_log /tmp/DecodeShortURLs.txt
url_shortener_cache /tmp/DecodeShortURLs.sq3
#url_shortener_syslog 1
url_shortener 0rz.tw
url_shortener 1l2.us
url_shortener 1u.ro
url_shortener 1url.com
url_shortener 2.gp
url_shortener 2.ly
url_shortener 2chap.it
url_shortener 2pl.us
url_shortener 2su.de
url_shortener 2tu.us
url_shortener 2ze.us
url_shortener 3.ly
url_shortener 301.to
url_shortener 301url.com
url_shortener 307.to
# url_shortener 4sq.com
url_shortener 6url.com
url_shortener 7.ly
url_shortener 9mp.com
url_shortener a.gd
url_shortener a.gg
url_shortener a.nf
url_shortener a2a.me
url_shortener a2n.eu
url_shortener abbr.com
url_shortener abe5.com
url_shortener access.im
url_shortener ad.vu
url_shortener adf.ly
url_shortener adjix.com
url_shortener alturl.com
url_shortener amzn.com
url_shortener amzn.to
url_shortener arm.in
url_shortener asso.in
url_shortener atu.ca
url_shortener aurls.info
url_shortener awe.sm
url_shortener ayl.lv
url_shortener azqq.com
url_shortener b23.ru
url_shortener b65.com
url_shortener b65.us
url_shortener bacn.me
url_shortener beam.to
url_shortener bgl.me
url_shortener bit.ly
url_shortener bkite.com
url_shortener blippr.com
url_shortener bloat.me
url_shortener blu.cc
url_shortener bon.no
url_shortener bt.io
url_shortener budurl.com
url_shortener buk.me
url_shortener burnurl.com
url_shortener c-o.in
url_shortener c.shamekh.ws
url_shortener canurl.com
url_shortener cd4.me
url_shortener chilp.it
url_shortener chopd.it
url_shortener chpt.me
url_shortener chs.mx
url_shortener chzb.gr
url_shortener clck.ru
url_shortener cli.gs
url_shortener cliccami.info
url_shortener clickthru.ca
url_shortener clipurl.us
url_shortener clk.my
url_shortener clop.in
url_shortener clp.ly
url_shortener coge.la
url_shortener cokeurl.com
url_shortener cort.as
url_shortener cot.ag
url_shortener crum.pl
url_shortener curio.us
url_shortener cuthut.com
url_shortener cuturl.com
url_shortener cuturls.com
url_shortener dealspl.us
url_shortener decenturl.com
url_shortener df9.net
url_shortener digbig.com
url_shortener digg.com
url_shortener digipills.com
url_shortener digs.by
url_shortener dld.bz
url_shortener dlvr.it
url_shortener dn.vc
url_shortener doi.org
url_shortener doiop.com
url_shortener dr.tl
url_shortener durl.me
url_shortener durl.us
url_shortener dvlr.it
url_shortener dwarfurl.com
url_shortener easyurl.net
url_shortener eca.sh
url_shortener eclurl.com
url_shortener eepurl.com
url_shortener eezurl.com
url_shortener ewerl.com
url_shortener ezurl.eu
url_shortener fa.by
url_shortener faceto.us
url_shortener fav.me
url_shortener fb.me
url_shortener ff.im
url_shortener fff.to
url_shortener fhurl.com
url_shortener flic.kr
url_shortener flingk.com
url_shortener flq.us
url_shortener fly2.ws
url_shortener fon.gs
url_shortener foxyurl.com
url_shortener fuseurl.com
url_shortener fwd4.me
url_shortener fwdurl.net
url_shortener fwib.net
url_shortener g8l.us
url_shortener get-shorty.com
url_shortener get-url.com
url_shortener get.sh
url_shortener gi.vc
url_shortener gkurl.us
url_shortener gl.am
url_shortener go.9nl.com
url_shortener go.to
url_shortener go2.me
url_shortener golmao.com
url_shortener goo.gl
url_shortener good.ly
url_shortener goshrink.com
url_shortener gri.ms
url_shortener gurl.es
url_shortener hao.jp
url_shortener hellotxt.com
url_shortener hex.io
url_shortener hiderefer.com
url_shortener hop.im
url_shortener hotredirect.com
url_shortener hotshorturl.com
url_shortener href.in
url_shortener ht.ly
url_shortener htxt.it
url_shortener hugeurl.com
url_shortener hurl.it
url_shortener hurl.no
url_shortener hurl.ws
url_shortener icanhaz.com
url_shortener icio.us
url_shortener idek.net
url_shortener ikr.me
url_shortener ir.pe
url_shortener irt.me
url_shortener is.gd
url_shortener iscool.net
url_shortener it2.in
url_shortener ito.mx
url_shortener j.mp
url_shortener j2j.de
url_shortener jdem.cz
url_shortener jijr.com
url_shortener just.as
url_shortener k.vu
url_shortener ketkp.in
url_shortener kisa.ch
url_shortener kissa.be
url_shortener kl.am
url_shortener klck.me
url_shortener kore.us
url_shortener korta.nu
url_shortener kots.nu
url_shortener krz.ch
url_shortener ktzr.us
url_shortener kxk.me
url_shortener l.pr
url_shortener l9k.net
url_shortener liip.to
url_shortener liltext.com
url_shortener lin.cr
url_shortener lin.io
url_shortener linkbee.com
url_shortener linkee.com
url_shortener linkgap.com
url_shortener linkslice.com
url_shortener linxfix.de
url_shortener liteurl.net
url_shortener liurl.cn
url_shortener livesi.de
url_shortener lix.in
url_shortener lk.ht
url_shortener ln-s.net
url_shortener ln-s.ru
url_shortener lnk.by
url_shortener lnk.in
url_shortener lnk.ly
url_shortener lnk.ms
url_shortener lnk.sk
url_shortener lnkurl.com
url_shortener loopt.us
url_shortener lost.in
url_shortener lru.jp
url_shortener lt.tl
url_shortener lu.to
url_shortener lurl.no
url_shortener mavrev.com
url_shortener memurl.com
url_shortener merky.de
url_shortener metamark.net
url_shortener migre.me
url_shortener min2.me
url_shortener minilien.com
url_shortener minilink.org
url_shortener miniurl.com
url_shortener minurl.fr
url_shortener moby.to
url_shortener moourl.com
url_shortener msg.sg
url_shortener murl.kz
url_shortener mv2.me
url_shortener mysp.in
url_shortener myurl.in
url_shortener myurl.si
url_shortener nanoref.com
url_shortener nanourl.se
url_shortener nbx.ch
url_shortener ncane.com
url_shortener ndurl.com
url_shortener ne1.net
url_shortener netnet.me
url_shortener netshortcut.com
url_shortener ni.to
url_shortener nig.gr
url_shortener nm.ly
url_shortener nn.nf
url_shortener notlong.com
url_shortener nutshellurl.com
url_shortener nyti.ms
url_shortener o-x.fr
url_shortener o.ly
url_shortener oboeyasui.com
url_shortener offur.com
url_shortener ofl.me
url_shortener om.ly
url_shortener omf.gd
url_shortener onecent.us
url_shortener onion.com
url_shortener onsaas.info
url_shortener ooqx.com
url_shortener oreil.ly
url_shortener ow.ly
url_shortener oxyz.info
url_shortener p.ly
url_shortener p8g.tw
url_shortener parv.us
url_shortener paulding.net
url_shortener pduda.mobi
url_shortener peaurl.com
url_shortener pendek.in
url_shortener pep.si
url_shortener pic.gd
url_shortener piko.me
url_shortener ping.fm
url_shortener piurl.com
url_shortener plumurl.com
url_shortener plurl.me
url_shortener pnt.me
url_shortener poll.fm
url_shortener pop.ly
url_shortener poprl.com
url_shortener post.ly
url_shortener posted.at
url_shortener pt2.me
url_shortener ptiturl.com
url_shortener puke.it
url_shortener pysper.com
url_shortener qik.li
url_shortener qlnk.net
url_shortener qoiob.com
url_shortener qr.cx
url_shortener quickurl.co.uk
url_shortener qurl.com
url_shortener qurlyq.com
url_shortener quu.nu
url_shortener qux.in
url_shortener r.im
url_shortener rb6.me
url_shortener rde.me
url_shortener readthis.ca
url_shortener reallytinyurl.com
url_shortener redir.ec
url_shortener redirects.ca
url_shortener redirx.com
url_shortener relyt.us
url_shortener retwt.me
url_shortener ri.ms
url_shortener rickroll.it
url_shortener rivva.de
url_shortener rly.cc
url_shortener rnk.me
url_shortener rsmonkey.com
url_shortener rt.nu
url_shortener rubyurl.com
url_shortener rurl.org
url_shortener s.gnoss.us
url_shortener s3nt.com
url_shortener s4c.in
url_shortener s7y.us
url_shortener safe.mn
url_shortener safelinks.ru
url_shortener sai.ly
url_shortener SameURL.com
url_shortener sfu.ca
url_shortener shadyurl.com
url_shortener shar.es
url_shortener shim.net
url_shortener shink.de
url_shortener shorl.com
url_shortener short.ie
url_shortener short.to
url_shortener shorten.ws
url_shortener shortenurl.com
url_shortener shorterlink.com
url_shortener shortio.com
url_shortener shortlinks.co.uk
url_shortener shortn.me
url_shortener shortna.me
url_shortener shortr.me
url_shortener shorturl.com
url_shortener shortz.me
url_shortener shoturl.us
url_shortener shredu
url_shortener shredurl.com
url_shortener shrinkify.com
url_shortener shrinkr.com
url_shortener shrinkster.com
url_shortener shrinkurl.us
url_shortener shrt.fr
url_shortener shrt.ws
url_shortener shrtl.com
url_shortener shrtn.com
url_shortener shrtnd.com
url_shortener shurl.net
url_shortener shw.me
url_shortener simurl.com
url_shortener simurl.net
url_shortener simurl.org
url_shortener simurl.us
url_shortener sitelutions.com
url_shortener siteo.us
url_shortener sl.ly
url_shortener slidesha.re
url_shortener slki.ru
url_shortener smallr.com
url_shortener smallr.net
url_shortener smfu.in
url_shortener smsh.me
url_shortener smurl.com
url_shortener sn.im
url_shortener sn.vc
url_shortener snadr.it
url_shortener snipie.com
url_shortener snipr.com
url_shortener snipurl.com
url_shortener snkr.me
url_shortener snurl.com
url_shortener song.ly
url_shortener sp2.ro
url_shortener spedr.com
url_shortener sqze.it
url_shortener srnk.net
url_shortener srs.li
url_shortener starturl.com
url_shortener stickurl.com
url_shortener stpmvt.com
url_shortener sturly.com
url_shortener su.pr
url_shortener surl.co.uk
url_shortener surl.it
url_shortener t.co
url_shortener t.lh.com
url_shortener ta.gd
url_shortener takemyfile.com
url_shortener tcrn.ch
url_shortener tgr.me
url_shortener th8.us
url_shortener thecow.me
url_shortener thrdl.es
url_shortener tighturl.com
url_shortener timesurl.at
url_shortener tini.us
url_shortener tiniuri.com
url_shortener tiny.cc
url_shortener tiny.pl
url_shortener tinyarro.ws
url_shortener tinylink.com
url_shortener tinypl.us
url_shortener tinysong.com
url_shortener tinytw.it
url_shortener tinyurl.com
url_shortener tl.gd
url_shortener tllg.net
url_shortener tncr.ws
url_shortener tnw.to
url_shortener to.je
url_shortener to.ly
url_shortener to.vg
url_shortener togoto.us
url_shortener tr.im
url_shortener tr.my
url_shortener tra.kz
url_shortener traceurl.com
url_shortener trcb.me
url_shortener trg.li
url_shortener trick.ly
url_shortener trii.us
url_shortener trim.li
url_shortener trumpink.lt
url_shortener trunc.it
url_shortener truncurl.com
url_shortener tsort.us
url_shortener tubeurl.com
# url_shortener tumblr.com
url_shortener turo.us
url_shortener tw0.us
url_shortener tw1.us
url_shortener tw2.us
url_shortener tw5.us
url_shortener tw6.us
url_shortener tw8.us
url_shortener tw9.us
url_shortener twa.lk
url_shortener tweet.me
url_shortener tweetburner.com
url_shortener tweetl.com
url_shortener twi.gy
url_shortener twip.us
url_shortener twirl.at
url_shortener twit.ac
url_shortener twitclicks.com
url_shortener twitterurl.net
url_shortener twitthis.com
url_shortener twittu.ms
url_shortener twiturl.de
url_shortener twitzap.com
url_shortener twlv.net
url_shortener twtr.us
url_shortener twurl.cc
url_shortener twurl.nl
url_shortener u.mavrev.com
url_shortener u.nu
url_shortener u76.org
url_shortener ub0.cc
url_shortener uiop.me
url_shortener ulimit.com
url_shortener ulu.lu
url_shortener unfaker.it
url_shortener updating.me
url_shortener ur.ly
url_shortener ur1.ca
url_shortener urizy.com
url_shortener url.ag
url_shortener url.az
url_shortener url.co.uk
url_shortener url.go.it
url_shortener url.ie
url_shortener url.inc-x.eu
url_shortener url.lotpatrol.com
# url_shortener url4.eu
url_shortener urlao.com
url_shortener urlbee.com
url_shortener urlborg.com
url_shortener urlbrief.com
url_shortener urlcorta.es
url_shortener urlcut.com
url_shortener urlcutter.com
url_shortener urlg.info
url_shortener urlhawk.com
url_shortener urli.nl
url_shortener urlkiss.com
url_shortener urloo.com
url_shortener urlpire.com
url_shortener urltea.com
url_shortener urlu.ms
url_shortener urlvi.b
url_shortener urlvi.be
url_shortener urlx.ie
url_shortener urlz.at
url_shortener urlzen.com
url_shortener usat.ly
url_shortener uservoice.com
url_shortener ustre.am
url_shortener vado.it
url_shortener vb.ly
url_shortener vdirect.com
url_shortener vi.ly
url_shortener viigo.im
url_shortener virl.com
url_shortener vl.am
url_shortener voizle.com
url_shortener vtc.es
url_shortener w0r.me
url_shortener w33.us
url_shortener w34.us
url_shortener w3t.org
url_shortener wa9.la
url_shortener wapurl.co.uk
url_shortener webalias.com
url_shortener welcome.to
url_shortener wh.gov
url_shortener wipi.es
url_shortener wkrg.com
url_shortener woo.ly
url_shortener wp.me
url_shortener x.hypem.com
url_shortener x.se
url_shortener x.vu
url_shortener xeeurl.com
url_shortener xil.in
url_shortener xlurl.de
url_shortener xr.com
url_shortener xrl.in
url_shortener xrl.us
url_shortener xrt.me
url_shortener xurl.jp
url_shortener xxsurl.de
url_shortener xzb.cc
url_shortener yatuc.com
url_shortener ye-s.com
url_shortener yep.it
# url_shortener youtu.be
url_shortener z.pe
url_shortener zapt.in
url_shortener zi.ma
url_shortener zi.me
url_shortener zi.pe
url_shortener zip.li
url_shortener zipmyurl.com
url_shortener zootit.com
url_shortener zud.me
url_shortener zurl.ws
url_shortener zz.gd
url_shortener zzang.kr
url_shortener xn--cwg.ws
url_shortener xn--fwg.ws
url_shortener xn--bih.ws
url_shortener xn--l3h.ws
url_shortener xn--1ci.ws
url_shortener xn--odi.ws
url_shortener xn--rei.ws
url_shortener xn--3fi.ws
url_shortener xn--egi.ws
url_shortener xn--hgi.ws
url_shortener xn--ogi.ws
url_shortener xn--vgi.ws
url_shortener xn--5gi.ws
url_shortener xn--9gi.ws

View File

@@ -0,0 +1,564 @@
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
# Author: Steve Freegard <steve.freegard@fsl.com>
=head1 NAME
DecodeShortURLs - Expand shortened URLs
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::DecodeShortURLs
url_shortener bit.ly
url_shortener go.to
...
=head1 DESCRIPTION
This plugin looks for URLs shortened by a list of URL shortening services and
upon finding a matching URL will connect using to the shortening service and
do an HTTP HEAD lookup and retrieve the location header which points to the
actual shortened URL, it then adds this URL to the list of URIs extracted by
SpamAssassin which can then be accessed by other plug-ins, such as URIDNSBL.
This plugin also sets the rule HAS_SHORT_URL if any matching short URLs are
found.
Regular 'uri' rules can be used to detect and score links disabled by the
shortening service for abuse and URL_BITLY_BLOCKED is supplied as an example.
It should be safe to score this rule highly on a match as experience shows
that bit.ly only blocks access to a URL if it has seen consistent abuse and
problem reports.
As of version 0.3 this plug-in will follow 'chained' shorteners e.g.
short URL -> short URL -> short URL -> real URL
If this form of chaining is found, then the rule 'SHORT_URL_CHAINED' will be
fired. If a loop is detected then 'SHORT_URL_LOOP' will be fired.
This plug-in limits the number of chained shorteners to a maximim of 10 at
which point it will fire the rule 'SHORT_URL_MAXCHAIN' and go no further.
If a shortener returns a '404 Not Found' result for the short URL then the
rule 'SHORT_URL_404' will be fired.
=head1 NOTES
This plugin runs the parsed_metadata hook with a priority of -1 so that
it may modify the parsed URI list prior to the URIDNSBL plugin which
runs as priority 0.
Currently the plugin queries a maximum of 10 distinct shortened URLs with
a maximum timeout of 5 seconds per lookup. It does not recurse and follow
'chained' shortening as the author has no examples of this happening.
=head1 ACKNOWLEDGEMENTS
A lot of this plugin has been hacked together by using other plugins as
examples. The author would particularly like to tip his hat to Karsten
Bräckelmann for the _add_uri_detail_list() function that he stole from
GUDO.pm for which this plugin would not be possible due to the SpamAssassin
API making no provision for adding to the base list of extracted URIs and
the author not knowing enough about Perl to be able to achieve this without
a good example from someone that does ;-)
=cut
package Mail::SpamAssassin::Plugin::DecodeShortURLs;
my $VERSION = 0.6;
use Mail::SpamAssassin::Plugin;
use strict;
use warnings;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
use constant HAS_LWP_USERAGENT => eval { local $SIG{'__DIE__'}; require LWP::UserAgent; };
use constant HAS_SQLITE => eval { local $SIG{'__DIE__'}; require DBD::SQLite; };
use Fcntl qw(:flock SEEK_END);
use Sys::Syslog qw(:DEFAULT setlogsock);
sub dbg {
my $msg = shift;
return Mail::SpamAssassin::Logger::dbg("DecodeShortURLs: $msg");
}
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
if ($mailsaobject->{local_tests_only} || !HAS_LWP_USERAGENT) {
$self->{disabled} = 1;
} else {
$self->{disabled} = 0;
}
unless ($self->{disabled}) {
$self->{ua} = new LWP::UserAgent;
$self->{ua}->{max_redirect} = 0;
$self->{ua}->{timeout} = 5;
$self->{ua}->env_proxy;
$self->{logging} = 0;
$self->{caching} = 0;
$self->{syslog} = 0;
}
$self->set_config($mailsaobject->{conf});
$self->register_method_priority ('parsed_metadata', -1);
$self->register_eval_rule('short_url_tests');
return $self;
}
sub set_config {
my($self, $conf) = @_;
my @cmds = ();
push (@cmds, {
setting => 'url_shortener',
default => {},
code => sub {
my ($self, $key, $value, $line) = @_;
if ($value =~ /^$/) {
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
}
foreach my $domain (split(/\s+/, $value)) {
$self->{url_shorteners}->{lc $domain} = 1;
}
}
});
=cut
=head1 PRIVILEGED SETTINGS
=over 4
=item url_shortener_log (default: none)
A path to a log file to be written to. The file will be created if it does
not already exist and must be writable by the user running spamassassin.
For each short URL found the following will be written to the log file:
[unix_epoch_time] <short url> => <decoded url>
=cut
push (@cmds, {
setting => 'url_shortener_log',
default => '',
is_priv => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
=item url_shortener_cache (default: none)
The full path to a database file to write cache entries to. The database will
be created automatically if is does not already exist but the supplied path
and file must be read/writable by the user running spamassassin or spamd.
NOTE: you will need the DBD::SQLite module installed to use this feature.
Example:
url_shortener_cache /tmp/DecodeShortURLs.sq3
=cut
push (@cmds, {
setting => 'url_shortener_cache',
default => '',
is_priv => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
=item url_shortener_cache_ttl (default: 86400)
The length of time a cache entry will be valid for in seconds.
Default is 86400 (1 day).
NOTE: you will also need to run the following via cron to actually remove the
records from the database:
echo "DELETE FROM short_url_cache WHERE modified < strftime('%s',now) - <ttl>; | sqlite3 /path/to/database"
NOTE: replace <ttl> above with the same value you use for this option
=cut
push (@cmds, {
setting => 'url_shortener_cache_ttl',
is_admin => 1,
default => 86400,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
=item url_shortener_syslog (default: 0 (off))
If this option is enabled (set to 1), then short URLs and the decoded URLs will be logged to syslog (mail.info).
=cut
push (@cmds, {
setting => 'url_shortener_syslog',
is_admin => 1,
default => 0,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
});
$conf->{parser}->register_commands(\@cmds);
}
sub parsed_metadata {
my ($self, $opts) = @_;
my $pms = $opts->{permsgstatus};
my $msg = $opts->{msg};
return if $self->{disabled};
dbg ('warn: get_uri_detail_list() has been called already')
if exists $pms->{uri_detail_list};
# don't keep dereferencing these
$self->{url_shorteners} = $pms->{main}->{conf}->{url_shorteners};
($self->{url_shortener_log}) = ($pms->{main}->{conf}->{url_shortener_log} =~ /^(.*)$/g);
($self->{url_shortener_cache}) = ($pms->{main}->{conf}->{url_shortener_cache} =~ /^(.*)$/g);
$self->{url_shortener_cache_ttl} = $pms->{main}->{conf}->{url_shortener_cache_ttl};
$self->{url_shortener_syslog} = $pms->{main}->{conf}->{url_shortener_syslog};
# Sort short URLs into hash to de-dup them
my %short_urls;
my $uris = $pms->get_uri_detail_list();
while (my($uri, $info) = each %{$uris}) {
next unless ($info->{domains});
foreach ( keys %{ $info->{domains} } ) {
if (exists $self->{url_shorteners}->{lc $_}) {
# NOTE: $info->{domains} appears to contain all the domains parsed
# from the single input URI with no way to work out what the base
# domain is. So to prevent someone from stuffing the URI with a
# shortener to force this plug-in to follow a link that *isn't* on
# the list of shorteners; we enforce that the shortener must be the
# base URI and that a path must be present.
if ($uri !~ /^http:\/\/(?:www\.)?$_\/.+$/) {
dbg("Discarding URI: $uri");
next;
}
$short_urls{$uri} = 1;
next;
}
}
}
# Make sure we have some work to do
# Before we open any log files etc.
my $count = scalar keys %short_urls;
return undef unless $count gt 0;
# Initialise logging if enabled
if ($self->{url_shortener_log}) {
eval {
local $SIG{'__DIE__'};
open($self->{logfh}, '>>'.$self->{url_shortener_log}) or die $!;
};
if ($@) {
dbg("warn: $@");
} else {
$self->{logging} = 1;
}
}
# Initialise syslog if enabled
if ($self->{url_shortener_syslog}) {
eval {
local $SIG{'__DIE__'};
openlog('DecodeShortURLs','ndelay,pid','mail');
};
if ($@) {
dbg("warn: $@");
} else {
$self->{syslog} = 1;
}
}
# Initialise cache if enabled
if ($self->{url_shortener_cache} && HAS_SQLITE) {
eval {
local $SIG{'__DIE__'};
$self->{dbh} = DBI->connect_cached("dbi:SQLite:dbname=".$self->{url_shortener_cache},"","", {RaiseError => 1, PrintError => 0, InactiveDestroy => 1}) or die $!;
};
if ($@) {
dbg("warn: $@");
} else {
$self->{caching} = 1;
# Create database if needed
eval {
local $SIG{'__DIE__'};
$self->{dbh}->do("
CREATE TABLE IF NOT EXISTS short_url_cache (
short_url TEXT PRIMARY KEY NOT NULL,
decoded_url TEXT NOT NULL,
hits INTEGER NOT NULL DEFAULT 1,
created INTEGER NOT NULL DEFAULT (strftime('%s','now')),
modified INTEGER NOT NULL DEFAULT (strftime('%s','now'))
)
");
$self->{dbh}->do("
CREATE INDEX IF NOT EXISTS short_url_by_modified
ON short_url_cache(short_url, modified)
");
$self->{dbh}->do("
CREATE INDEX IF NOT EXISTS short_url_modified
ON short_url_cache(modified)
");
};
if ($@) {
dbg("warn: $@");
$self->{caching} = 0;
}
}
}
my $max_short_urls = 10;
foreach my $short_url (keys %short_urls) {
next if ($max_short_urls le 0);
my $location = $self->recursive_lookup($short_url, $pms);
$max_short_urls--;
}
# Close log
eval {
local $SIG{'__DIE__'};
close($self->{logfh}) or die $!;
} if $self->{logging};
# Close syslog
eval {
local $SIG{'__DIE__'};
closelog() or die $!;
} if $self->{syslog};
# Don't disconnect cached database handle
# eval { $self->{dbh}->disconnect() or die $!; } if $self->{caching};
}
sub recursive_lookup {
my ($self, $short_url, $pms, %been_here) = @_;
my $count = scalar keys %been_here;
dbg("Redirection count $count") if $count gt 0;
if ($count ge 10) {
dbg("Error: more than 10 shortener redirections");
# Fire test
$pms->got_hit('SHORT_URL_MAXCHAIN');
return undef;
}
my $location;
if ($self->{caching} && ($location = $self->cache_get($short_url))) {
dbg("Found cached $short_url => $location");
eval {
local $SIG{'__DIE__'};
$self->log_to_file("$short_url => $location")
} if $self->{logging};
syslog('info',"Found cached $short_url => $location") if $self->{syslog};
} else {
# Not cached; do lookup
my $response = $self->{ua}->head($short_url);
if (!$response->is_redirect) {
dbg("Skipping URL as not redirect: $short_url = ".$response->status_line);
$pms->got_hit('SHORT_URL_404') if($response->code == '404');
return undef;
}
$location = $response->headers->{location};
# Bail out if $short_url redirects to itself
return undef if ($short_url eq $location);
$self->cache_add($short_url, $location) if $self->{caching};
dbg("Found $short_url => $location");
eval {
local $SIG{'__DIE__'};
$self->log_to_file("$short_url => $location")
} if $self->{logging};
syslog('info',"Found $short_url => $location") if $self->{syslog};
}
# At this point we have a new URL in $response
$pms->got_hit('HAS_SHORT_URL');
_add_uri_detail_list($pms, $location);
# Set chained here otherwise we might mark a disabled page or
# redirect back to the same host as chaining incorrectly.
$pms->got_hit('SHORT_URL_CHAINED') if ($count gt 0);
# Check if we are being redirected to a local page
# Don't recurse in this case...
if($location !~ /^https?:/) {
my($host) = ($short_url =~ /^(https?:\/\/\S+)\//);
$location = "$host/$location";
dbg("Looks like a local redirection: $short_url => $location");
_add_uri_detail_list($pms, $location);
return $location;
}
# Check for recursion
if ((my ($domain) = ($location =~ /^https?:\/\/(\S+)\//))) {
if (exists $been_here{$location}) {
# Loop detected
dbg("Error: loop detected");
$pms->got_hit('SHORT_URL_LOOP');
return $location;
} else {
if (exists $self->{url_shorteners}->{$domain}) {
$been_here{$location} = 1;
# Recurse...
return $self->recursive_lookup($location, $pms, %been_here);
}
}
}
# No recursion; just return the final location...
return $location;
}
sub short_url_tests {
# Set by parsed_metadata
return 0;
}
# Beware. Code copied from PerMsgStatus get_uri_detail_list().
# Stolen from GUDO.pm
sub _add_uri_detail_list {
my ($pms, $uri) = @_;
my $info;
# Cache of text parsed URIs, as previously used by get_uri_detail_list().
push @{$pms->{parsed_uri_list}}, $uri;
$info->{types}->{parsed} = 1;
$info->{cleaned} =
[Mail::SpamAssassin::Util::uri_list_canonify (undef, $uri)];
foreach (@{$info->{cleaned}}) {
my $dom = Mail::SpamAssassin::Util::uri_to_domain($_);
if ($dom && !$info->{domains}->{$dom}) {
$info->{domains}->{$dom} = 1;
$pms->{uri_domain_count}++;
}
}
$pms->{uri_detail_list}->{$uri} = $info;
# And of course, copied code from PerMsgStatus get_uri_list(). *sigh*
dbg ('warn: PMS::get_uri_list() appears to have been harvested'),
push @{$pms->{uri_list}}, @{$info->{cleaned}}
if exists $pms->{uri_list};
}
sub log_to_file {
my ($self, $msg) = @_;
return undef if not $self->{logging};
my $fh = $self->{logfh};
eval {
flock($fh, LOCK_EX) or die $!;
seek($fh, 0, SEEK_END) or die $!;
print $fh '['.time.'] '.$msg."\n";
flock($fh, LOCK_UN) or die $!;
};
}
sub cache_add {
my ($self, $short_url, $decoded_url) = @_;
return undef if not $self->{caching};
eval {
$self->{sth_insert} = $self->{dbh}->prepare_cached("
INSERT INTO short_url_cache (short_url, decoded_url)
VALUES (?,?)
");
};
if ($@) {
dbg("warn: $@");
return undef;
};
$self->{sth_insert}->execute($short_url, $decoded_url);
return undef;
}
sub cache_get {
my ($self, $key) = @_;
return undef if not $self->{caching};
eval {
$self->{sth_select} = $self->{dbh}->prepare_cached("
SELECT decoded_url FROM short_url_cache
WHERE short_url = ? AND modified > (strftime('%s','now') - ?)
");
};
if ($@) {
dbg("warn: $@");
return undef;
}
eval {
$self->{sth_update} = $self->{dbh}->prepare_cached("
UPDATE short_url_cache
SET modified=strftime('%s','now'), hits=hits+1
WHERE short_url = ?
");
};
if ($@) {
dbg("warn: $@");
return undef;
}
$self->{sth_select}->execute($key, $self->{url_shortener_cache_ttl});
my $row = $self->{sth_select}->fetchrow_array();
if($row) {
# Found cache entry; touch it to prevent expiry
$self->{sth_update}->execute($key);
$self->{sth_select}->finish();
$self->{sth_update}->finish();
return $row;
}
$self->{sth_select}->finish();
$self->{sth_update}->finish();
return undef;
}
1;

View File

@@ -0,0 +1,3 @@
loadplugin Mail::SpamAssassin::Plugin::DNSWLh
dnswl_address bogdan@vrem.ro
dnswl_password 7llfxe

View File

@@ -0,0 +1,404 @@
=head1 NAME
Mail::SpamAssassin::Plugin::iXhash - compute fuzzy checksums from mail bodies and compare to known spam ones via DNS
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
# Timeout in seconds - default is 10 seconds
ixhash_timeout 10
# Should we add the hashes to the messages' metadata for later re-use
# Default is not to cache hashes (i.e. re-compute them for every check)
use_ixhash_cache 0
# wether to only use perl (ixhash_pureperl = 1) or the system's 'tr' and 'md5sum'
# Default is to use Perl only
ixhash_pureperl 1
# If you should have 'tr' and/or 'md5sum' in some weird place (e.g on a Windows server)
# or you want to specify which version to use you can specifiy the exact paths here
# Default is to have SpamAssassin find the executables
ixhash_tr_path "/usr/bin/tr"
ixhash_md5sum_path "/usr/bin/md5sum"
# The actual rule
body IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
describe IXHASH This mail has been classified as spam @ iX Magazine, Germany
tflags IXHASH net
score IXHASH 1.5
=head1 DESCRIPTION
iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, strips parts from it and then computes a hash value
from the rest. These values will then be looked up via DNS to see if the hashes have already been categorized as spam by others.
This plugin is based on parts of the procmail-based project 'NiX Spam', developed by Bert Ungerer.(un@ix.de)
For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here:
ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums
To see which DNS zones are currently available see http://www.ixhash.net
=cut
package Mail::SpamAssassin::Plugin::iXhash;
use strict;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Timeout;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
my $VERSION = "1.5.5";
sub new {
my ($class, $mailsa, $server) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless ($self, $class);
# Are network tests enabled?
if ($mailsa->{local_tests_only}) {
dbg("IXHASH: local tests only, not using iXhash plugin");
$self->{iXhash_available} = 0;
}
else {
dbg("IXHASH: Using iXhash plugin $VERSION");
$self->{iXhash_available} = 1;
}
$self->set_config($mailsa->{conf});
$self->register_eval_rule ("ixhashtest");
return $self;
}
sub set_config {
my ($self, $conf) = @_;
my @cmds = ();
# implements iXhash_timeout config option - by dallase@uribl.com
push(@cmds, {
setting => 'ixhash_timeout',
default => 10,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'use_ixhash_cache',
default => 0,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'ixhash_pureperl',
default => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
}
);
push(@cmds, {
setting => 'ixhash_tr_path',
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
}
);
push(@cmds, {
setting => 'ixhash_md5sum_path',
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
}
);
$conf->{parser}->register_commands(\@cmds);
}
sub ixhashtest {
my ($self, $permsgstatus,$full,$dnszone) = @_;
dbg("IXHASH: IxHash querying $dnszone");
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 0){
# Return subito if we are do not find the tools we need
# Only relevant if we are those tools in the 1st way
return 0 unless $self->is_md5sum_available();
return 0 unless $self->is_tr_available();
}
my ($answer,$ixdigest) = "";
# Changed to use get_pristine_body returning a scalar
my $body = $permsgstatus->{msg}->get_pristine_body();
my $resolver = Net::DNS::Resolver->new;
my $body_copy = "";
my $rr;
my $tmpfile = '';
my $tmpfh = undef;
my $hits = 0;
my $digest = 0;
# alarm the dns query - dallase@uribl.com
# --------------------------------------------------------------------------
# here we implement proper alarms, ala Pyzor, Razor2 plugins.
# keep the alarm as $oldalarm, so we dont loose the timeout-child alarm
# see http://issues.apache.org/SpamAssassin/show_bug.cgi?id=3828#c123
my $oldalarm = 0;
my $timer = Mail::SpamAssassin::Timeout->new({ secs => $permsgstatus->{main}->{conf}->{'ixhash_timeout'}});
my $time_err = $timer->run_and_catch(sub {
# create a temporary file unless we are to use only Perl code and we don't find a hash value in metadata
# If we use the system's 'tr' and 'md5sum' utilities we need this.
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 0){
unless ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-1') or $permsgstatus->{msg}->get_metadata('X-iXhash-hash-2') or $permsgstatus->{msg}->get_metadata('X-iXhash-hash-3')) {
($tmpfile, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();
$body_copy = $body;
$body_copy =~ s/\r\n/\n/g;
print $tmpfh $body_copy;
close $tmpfh;
dbg ("IXHASH: Writing body to temporary file $tmpfile");
}
else {
dbg ("IXHASH: Not writing body to temporary file - reusing stored hashes");
}
}
my $digest = compute1sthash($permsgstatus,$body, $tmpfile);
if ($digest){
dbg ("IXHASH: Now checking $digest.$dnszone");
# Now check via DNS query
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
}
}
}
# Only go ahead if $hits ist still 0 - i.e hash #1 didn't score a hit
if ($hits == 0 ){
$digest = compute2ndhash($permsgstatus,$body, $tmpfile);
if ($digest){
dbg ("IXHASH: Now checking $digest.$dnszone");
# Now check via DNS query
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
} # end foreach
} # end if $answer
} # end if $digest
} # end if $hits
if ( $hits == 0 ){
$digest = compute3rdhash($permsgstatus,$body, $tmpfile);
if (length($digest) == 32){
dbg ("IXHASH: Now checking $digest.$dnszone");
# Now check via DNS query
$answer = $resolver->search($digest.'.'.$dnszone, "A", "IN");
if ($answer) {
foreach $rr ($answer->answer) {
next unless $rr->type eq "A";
dbg ("IXHASH: Received reply from $dnszone:". $rr->address);
$hits = 1 if $rr->address =~ /^127\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
} # foreach $answer
} # end if $anser
} # end if $digest
} # end if $hits
} # end of sub{
); # end of timer->run_and_catch
if ($timer->timed_out()) {
dbg("IXHASH: ".$permsgstatus->{main}->{conf}->{'ixhash_timeout'}." second timeout exceeded while checking ".$digest.".".$dnszone."!");
}
elsif ($time_err) {
chomp $time_err;
dbg("IXHASH: iXhash lookup failed: $time_err");
}
unlink $tmpfile;
return $hits;
}
sub compute1sthash {
my ($permsgstatus, $body, $tmpfile) = @_;
my $body_copy = '';
my $digest = '';
# Creation of hash # 1 if following conditions are met:
# - mail contains 20 spaces or tabs or more - changed follwoing a suggestion by Karsten Br<42>ckelmann
# - mail consists of at least 2 lines
# This should generate the most hits (according to Bert Ungerer about 70%)
# This also is where you can tweak your plugin if you have problems with short mails FP'ing -
# simply raise that barrier here.
# We'll try to find the required hash in this message's metadata first.
# This might be the case if another zone has been queried already
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1 ) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-1'))) {
dbg ("IXHASH: Hash value for method #1 found in metadata, re-using that one");
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-1');
}
else
{
if (($body =~ /(?>\s.+?){20}/g) || ( $body =~ /\n.*\n/ ) ){
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1 ){
# All space class chars just one time
# Do this in two steps to avoid Perl segfaults
# if there are more than x identical chars to be replaced
# Thanks to Martin Blapp for finding that out and suggesting this workaround concerning spaces only
# Thanks to Karsten Br<42>ckelmann for pointing out this would also be the case with _any_ characater, not only spaces
$body_copy = $body;
$body_copy =~ s/\r\n/\n/g;
# Step One
$body_copy =~ s/([[:space:]]{100})(?:\1+)/$1/g;
# Step Two
$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
# remove graph class chars and some specials
$body_copy =~ s/[[:graph:]]+//go;
# Create actual digest
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value ".$digest." via method 1, using perl exclusively");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-1', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
} else {
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:space:]' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:graph:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
chop($digest);
dbg ("IXHASH: Computed hash-value ".$digest." via method 1, using system utilities");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-1', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
}
}
else
{
dbg ("IXHASH: Hash value #1 not computed, requirements not met");
}
}
return $digest;
}
sub compute2ndhash{
my ($permsgstatus, $body, $tmpfile) = @_;
my $body_copy = '';
my $digest = '';
# See if this hash has been computed already
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-2'))) {
dbg ("IXHASH: Hash value for method #2 found in metadata, re-using that one");
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-2');
}
else
{
# Creation of hash # 2 if mail contains at least 3 of the following characters:
# '[<>()|@*'!?,]' or the combination of ':/'
# (To match something like "Already seen? http:/host.domain.tld/")
if ($body =~ /((([<>\(\)\|@\*'!?,])|(:\/)).*?){3,}/m ) {
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1 ){
$body_copy = $body;
# remove redundant stuff
$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
# replace '_' with '.'
$body_copy =~ tr/_/./;
# replace duplicate chars. This too suffers from a bug in perl
# so we do it in two steps
# Step One
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
# Step Two
$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
# Computing hash...
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 2, using perl exclusively");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-2', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
}
else {
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:cntrl:][:alnum:]%&#;=' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} '_' '.' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:print:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
chop($digest);
dbg ("IXHASH: Computed hash-value ".$digest." via method 2, using system utilities");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-2', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
}
}
else
{
dbg ("IXHASH: Hash value #2 not computed, requirements not met");
}
}
return $digest;
}
sub compute3rdhash{
my ($permsgstatus, $body, $tmpfile ) = @_;
my $body_copy = '';
my $digest = '';
# See if this hash has been computed already
if (($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) && ($permsgstatus->{msg}->get_metadata('X-iXhash-hash-3'))) {
dbg ("IXHASH: Hash value for method #3 found in metadata, re-using that one");
$digest = $permsgstatus->{msg}->get_metadata('X-iXhash-hash-3');
}
else
{
# Compute hash # 3 if
# - there are at least 8 non-space characters in the body and
# - neither hash #1 nor hash #2 have been computed
# (which means $digest is still empty, in any case < 32)
if (($body =~ /[\S]{8}/) && (length($digest) < 32)) {
if ($permsgstatus->{main}->{conf}->{'ixhash_pureperl'} == 1){
$body_copy = $body;
$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
# replace duplicate chars. This too suffers from a bug in perl
# so we do it in two steps
# Step One
$body_copy =~ s/([[:print:]]{100})(?:\1+)/$1/g;
# Step Two
$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
# Computing actual hash
$digest = md5_hex($body_copy);
dbg ("IXHASH: Computed hash-value $digest via method 3");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-3', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
}
else {
# shellcode
$digest = `cat $tmpfile | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d '[:cntrl:][:space:]=' | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -s '[:graph:]' | $permsgstatus->{main}->{conf}->{ixhash_md5sum_path} | $permsgstatus->{main}->{conf}->{ixhash_tr_path} -d ' -'`;
chop($digest);
dbg ("IXHASH: Computed hash-value ".$digest." via method 3, using system utilities");
$permsgstatus->{msg}->put_metadata('X-iXhash-hash-3', $digest) if ($permsgstatus->{main}->{conf}->{'use_ixhash_cache'} == 1) ;
}
}
else
{
dbg ("IXHASH: Hash value #3 not computed, requirements not met");
}
}
return $digest;
}
sub is_tr_available {
# Find out where your 'tr' lives
# shamelessly stolen from the Pyzor plugin code
my ($self) = @_;
my $tr = $self->{main}->{conf}->{ixhash_tr_path} || '';
unless ($tr) {
$tr = Mail::SpamAssassin::Util::find_executable_in_env_path('tr');
}
unless ($tr && -x $tr) {
dbg("IXHASH: tr is not available: no tr executable found");
return 0;
}
# remember any found tr
$self->{main}->{conf}->{ixhash_tr_path} = $tr;
dbg("IXHASH: tr is available: " . $self->{main}->{conf}->{ixhash_tr_path});
return 1;
}
sub is_md5sum_available {
# Find out where your 'md5sum' lives
# again shamelessly stolen from the Pyzor plugin code
my ($self) = @_;
my $md5sum = $self->{main}->{conf}->{ixhash_md5sum_path} || '';
unless ($md5sum) {
$md5sum = Mail::SpamAssassin::Util::find_executable_in_env_path('md5sum');
}
unless ($md5sum && -x $md5sum) {
dbg("IXHASH: md5sum is not available: no md5sum executable found");
return 0;
}
# remember any found md5sum
$self->{main}->{conf}->{ixhash_md5sum_path} = $md5sum;
dbg("IXHASH: md5sum is available: " . $self->{main}->{conf}->{ixhash_md5sum_path});
return 1;
}
1;