root / exilog_parse.pm @ 226ad0a3c764c0606048acf7371b02765eee60d2
View | Annotate | Download (7.4 KB)
| 1 | #!/usr/bin/perl -w |
|---|---|
| 2 | # |
| 3 | # This file is part of the exilog suite. |
| 4 | # |
| 5 | # http://duncanthrax.net/exilog/ |
| 6 | # |
| 7 | # (c) Tom Kistner 2004 |
| 8 | # |
| 9 | # See LICENSE for licensing information. |
| 10 | # |
| 11 | |
| 12 | package exilog_parse; |
| 13 | use strict; |
| 14 | use exilog_util; |
| 15 | use Digest::MD5 qw( md5_base64 ); |
| 16 | |
| 17 | use Data::Dumper; |
| 18 | |
| 19 | BEGIN {
|
| 20 | use Exporter; |
| 21 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
| 22 | |
| 23 | $VERSION = 0.1; |
| 24 | @ISA = qw(Exporter); |
| 25 | @EXPORT = qw( |
| 26 | &parse_message_line |
| 27 | &parse_reject_line |
| 28 | &date_to_stamp |
| 29 | &stamp_to_date |
| 30 | ); |
| 31 | |
| 32 | %EXPORT_TAGS = (); |
| 33 | @EXPORT_OK = qw(); |
| 34 | } |
| 35 | |
| 36 | sub _parse_error {
|
| 37 | my $subj = shift || ""; |
| 38 | my $h = shift || {};
|
| 39 | |
| 40 | $subj = _parse_delivery($subj,$h); |
| 41 | |
| 42 | m/()()/; |
| 43 | if ($subj =~ / host ([^ ]+?) \[([0-9.]+?)\]\:/) {
|
| 44 | $h->{host_addr} = $2;
|
| 45 | $h->{host_dns} = $1;
|
| 46 | }; |
| 47 | $subj =~ s/^[ :]+//; |
| 48 | $subj =~ s/ +$//; |
| 49 | $h->{errmsg} = $subj if ($subj);
|
| 50 | |
| 51 | return $subj; |
| 52 | }; |
| 53 | |
| 54 | |
| 55 | sub _parse_deferral {
|
| 56 | my $subj = shift || ""; |
| 57 | my $h = shift || {};
|
| 58 | |
| 59 | $subj = _parse_delivery($subj,$h); |
| 60 | |
| 61 | if ($subj =~ / host ([^ ]+?) \[([0-9.]+?)\]\:/) {
|
| 62 | $h->{host_addr} = $2;
|
| 63 | $h->{host_dns} = $1;
|
| 64 | }; |
| 65 | $subj =~ s/^[ :]+//; |
| 66 | $subj =~ s/ +$//; |
| 67 | $h->{errmsg} = $subj if ($subj);
|
| 68 | |
| 69 | return $subj; |
| 70 | }; |
| 71 | |
| 72 | |
| 73 | sub _parse_delivery {
|
| 74 | my $subj = shift || ""; |
| 75 | my $h = shift || {};
|
| 76 | |
| 77 | |
| 78 | # When +sender_on_delivery is set, cut away the F=<> part |
| 79 | $subj =~ s/[PF]\=[^ ]+ //; |
| 80 | |
| 81 | m/()/; |
| 82 | |
| 83 | $subj =~ s/^.+?[\=\-\*][\>\=\*] (.+?)((\: )|( R\=)|( \<)|( \())/$2/; |
| 84 | $h->{rcpt_final} = $1 if ($1);
|
| 85 | $subj =~ s/^\: //; |
| 86 | $subj =~ s/^ +//; |
| 87 | |
| 88 | m/()/; |
| 89 | $subj =~ s/^\((.+?)\) //; |
| 90 | $h->{rcpt_intermediate} = $1 if ($1);
|
| 91 | |
| 92 | m/()/; |
| 93 | $subj =~ s/^\<(.+?)\> //; |
| 94 | if ($1) {
|
| 95 | $h->{rcpt} = $1;
|
| 96 | } |
| 97 | else {
|
| 98 | $h->{rcpt} = $h->{rcpt_final};
|
| 99 | }; |
| 100 | |
| 101 | m/()/; |
| 102 | $subj =~ s/R\=([^ \:]+)//; |
| 103 | $h->{router} = $1 if ($1);
|
| 104 | |
| 105 | m/()/; |
| 106 | $subj =~ s/ST\=([^ \:]+)//; |
| 107 | $h->{shadow_transport} = $1 if ($1);
|
| 108 | |
| 109 | m/()/; |
| 110 | $subj =~ s/T\=([^ \:]+)//; |
| 111 | $h->{transport} = $1 if ($1);
|
| 112 | |
| 113 | m/()/; |
| 114 | $subj =~ s/X\=([^ ]+)//; |
| 115 | $h->{tls_cipher} = $1 if ($1);
|
| 116 | |
| 117 | m/()()/; |
| 118 | $subj =~ s/H\=([^ ]+) \[(.+?)\]//; |
| 119 | $h->{host_dns} = $1 if ($1);
|
| 120 | $h->{host_addr} = $2 if ($2);
|
| 121 | |
| 122 | return $subj; |
| 123 | }; |
| 124 | |
| 125 | |
| 126 | |
| 127 | |
| 128 | sub _parse_arrival {
|
| 129 | my $subj = shift || ""; |
| 130 | my $h = shift || {};
|
| 131 | |
| 132 | m/()/; |
| 133 | $subj =~ s/^.+?\<\= (.+?) //; |
| 134 | $h->{mailfrom} = $1 if ($1);
|
| 135 | |
| 136 | m/()()/; |
| 137 | $subj =~ s/H\=(.+?) ([A-Za-z]\=)/$2/; |
| 138 | if ($1) {
|
| 139 | my $hstr = $1; |
| 140 | m/()/; |
| 141 | $hstr =~ s/\[([0-9.]+)\]$//; |
| 142 | $h->{host_addr} = $1 if ($1);
|
| 143 | |
| 144 | $hstr =~ s/^ +//; |
| 145 | $hstr =~ s/ +$//; |
| 146 | |
| 147 | m/()/; |
| 148 | $hstr =~ s/\((.+?)\)$//; |
| 149 | $h->{host_helo} = $1 if ($1);
|
| 150 | |
| 151 | $hstr =~ s/^ +//; |
| 152 | $hstr =~ s/ +$//; |
| 153 | |
| 154 | # if we have something left over now, it must |
| 155 | # be a confirmed rdns host name |
| 156 | $h->{host_rdns} = $hstr if ($hstr);
|
| 157 | } |
| 158 | |
| 159 | m/()/; |
| 160 | $subj =~ s/P\=([^ ]+)//; |
| 161 | $h->{proto} = $1 if ($1);
|
| 162 | if ($1 =~ /^local/) {
|
| 163 | # U= contains local user account |
| 164 | m/()/; |
| 165 | $subj =~ s/U\=([^ ]+)//; |
| 166 | $h->{user} = $1 if ($1);
|
| 167 | } |
| 168 | elsif ( ($1 eq 'asmtp') || ($1 eq 'esmtpa') || ($1 eq 'esmtpsa') ) {
|
| 169 | # fill in both auth user and ident |
| 170 | m/()/; |
| 171 | $subj =~ s/A\=([^ ]+)//; |
| 172 | $h->{user} = $1 if ($1);
|
| 173 | |
| 174 | m/()/; |
| 175 | $subj =~ s/U\=([^ ]+)//; |
| 176 | $h->{host_ident} = $1 if ($1);
|
| 177 | } |
| 178 | else {
|
| 179 | # U= contains remote ident |
| 180 | m/()/; |
| 181 | $subj =~ s/U\=([^ ]+)//; |
| 182 | $h->{host_ident} = $1 if ($1);
|
| 183 | }; |
| 184 | |
| 185 | m/()/; |
| 186 | $subj =~ s/S\=([^ ]+)//; |
| 187 | $h->{size} = $1 if ($1);
|
| 188 | |
| 189 | m/()/; |
| 190 | $subj =~ s/id\=([^ ]+)//; |
| 191 | $h->{msgid} = $1 if ($1);
|
| 192 | |
| 193 | m/()/; |
| 194 | $subj =~ s/X\=([^ ]+)//; |
| 195 | $h->{tls_cipher} = $1 if ($1);
|
| 196 | |
| 197 | m/()/; |
| 198 | $subj =~ s/R\=([^ ]+)//; |
| 199 | $h->{bounce_parent} = $1 if ($1);
|
| 200 | |
| 201 | return $subj; |
| 202 | }; |
| 203 | |
| 204 | sub _parse_reject {
|
| 205 | my $subj = shift; |
| 206 | my $h = shift; |
| 207 | |
| 208 | m/()()/; |
| 209 | $subj =~ s/H\=(.+?) \[(.+?)\] //; |
| 210 | if ($1 && $2) {
|
| 211 | $h->{host_addr} = $2;
|
| 212 | my $hstr = $1; |
| 213 | |
| 214 | $hstr =~ s/^ +//; |
| 215 | $hstr =~ s/ +$//; |
| 216 | |
| 217 | m/()/; |
| 218 | $hstr =~ s/\((.+?)\)$//; |
| 219 | $h->{host_helo} = $1 if ($1);
|
| 220 | |
| 221 | $hstr =~ s/^ +//; |
| 222 | $hstr =~ s/ +$//; |
| 223 | |
| 224 | # if we have something left over now, it must |
| 225 | # be a confirmed rdns host name |
| 226 | $h->{host_rdns} = $hstr if ($hstr);
|
| 227 | }; |
| 228 | |
| 229 | m/()/; |
| 230 | $subj =~ s/U\=(.+?) //; |
| 231 | $h->{host_ident} = $1 if ($1);
|
| 232 | |
| 233 | m/()()/; |
| 234 | $subj =~ s/F\=(\<.*?\>) //; |
| 235 | $h->{mailfrom} = $1 if ($1);
|
| 236 | if (exists($h->{mailfrom})) {
|
| 237 | unless ($h->{mailfrom} eq '<>') {
|
| 238 | $h->{mailfrom} =~ s/[<>]//g;
|
| 239 | } |
| 240 | }; |
| 241 | |
| 242 | m/()()/; |
| 243 | $subj =~ m/\<(.+?)\>/; |
| 244 | if ($1) {
|
| 245 | $h->{rcpt} = $1;
|
| 246 | }; |
| 247 | |
| 248 | return $subj; |
| 249 | }; |
| 250 | |
| 251 | |
| 252 | # Parse a reject line |
| 253 | sub parse_reject_line {
|
| 254 | my $subj = shift || ""; |
| 255 | chomp($subj); |
| 256 | |
| 257 | my $h = { 'table' => 'rejects' };
|
| 258 | |
| 259 | # There are 2 types of rejects: one without a message ID (pre-DATA) |
| 260 | # and one with message ID (post-DATA). Try the latter first. |
| 261 | |
| 262 | m/()()()()/; |
| 263 | $subj =~ m/(\d{4}-\d\d-\d\d) (\d\d:\d\d:\d\d( [-+]\d{4})?) ([A-Za-z0-9]{6}-[A-Za-z0-9]{6}-[A-Za-z0-9]{2}) (H=.*)$/;
|
| 264 | my ($date,$tod,$msgid,$line) = ($1,$2,$4,$5); |
| 265 | if ($date && $tod && $msgid && $line) {
|
| 266 | # line with message id |
| 267 | $h->{data}->{message_id} = $msgid;
|
| 268 | } |
| 269 | else {
|
| 270 | # try format without message id |
| 271 | m/()()()()/; |
| 272 | $subj =~ m/(\d{4}-\d\d-\d\d) (\d\d:\d\d:\d\d( [-+]\d{4})?) (H=.*)$/;
|
| 273 | ($date,$tod,$line) = ($1,$2,$4); |
| 274 | unless ($date && $tod && $line) {
|
| 275 | # unparsable |
| 276 | return 0; |
| 277 | }; |
| 278 | # Add custom "Message ID" hash |
| 279 | $h->{data}->{message_id} = substr(md5_base64($date,$tod,$line),0,16);
|
| 280 | }; |
| 281 | |
| 282 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 283 | $h->{data}->{errmsg} = substr(_parse_reject($line,$h->{data}),0,255);
|
| 284 | |
| 285 | return $h; |
| 286 | }; |
| 287 | |
| 288 | |
| 289 | # Parse line that relates to an actual message. |
| 290 | sub parse_message_line {
|
| 291 | my $subj = shift || ""; |
| 292 | chomp($subj); |
| 293 | |
| 294 | # Exception: do not use "retry time not reached [for any host]". |
| 295 | # It's just too spammy and gets logged by default. |
| 296 | return 0 if ($subj =~ /retry time not reached$/); |
| 297 | return 0 if ($subj =~ /retry time not reached for any host$/); |
| 298 | |
| 299 | # Grab date, time and message id |
| 300 | $subj =~ m/(\d{4}-\d\d-\d\d) (\d\d:\d\d:\d\d( [-+]\d{4})?) ([A-Za-z0-9]{6}-[A-Za-z0-9]{6}-[A-Za-z0-9]{2}) (([^ ]+).*)$/;
|
| 301 | my ($date,$tod,$msgid,$line,$type) = ($1,$2,$4,$5,$6); |
| 302 | $line =~ s/^ +// if (defined($line)); |
| 303 | unless ($date && $tod && $msgid && $line && $type) {
|
| 304 | # non-message based line |
| 305 | return 0; |
| 306 | }; |
| 307 | |
| 308 | # removed fttb, too much overhead |
| 309 | #my $h = { 'data' => { 'line' => $line, 'message_id' => $msgid } };
|
| 310 | my $h = { 'data' => { 'message_id' => $msgid } };
|
| 311 | |
| 312 | |
| 313 | if ($type eq '<=') {
|
| 314 | $h->{table} = 'messages';
|
| 315 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 316 | _parse_arrival($subj,$h->{data});
|
| 317 | } |
| 318 | elsif (($type eq '=>') || ($type eq '->') || ($type eq '*>')) {
|
| 319 | $h->{table} = 'deliveries';
|
| 320 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 321 | _parse_delivery($subj,$h->{data});
|
| 322 | } |
| 323 | elsif ($type eq '**') {
|
| 324 | $h->{table} = 'errors';
|
| 325 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 326 | _parse_error($subj,$h->{data});
|
| 327 | } |
| 328 | elsif ($type eq '==') {
|
| 329 | $h->{table} = 'deferrals';
|
| 330 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 331 | _parse_deferral($subj,$h->{data});
|
| 332 | } |
| 333 | elsif ($type eq 'Completed') {
|
| 334 | $h->{table} = 'messages';
|
| 335 | $h->{data}->{completed} = date_to_stamp($date,$tod);
|
| 336 | } |
| 337 | else {
|
| 338 | if ($line =~ /^H\=.*rejected/) {
|
| 339 | # looks like a reject line after DATA, pass on |
| 340 | return 0; |
| 341 | }; |
| 342 | |
| 343 | $h->{table} = 'unknown';
|
| 344 | $h->{data}->{timestamp} = date_to_stamp($date,$tod);
|
| 345 | $h->{data}->{line} = substr($line,0,255);
|
| 346 | }; |
| 347 | |
| 348 | return $h; |
| 349 | }; |
| 350 | |
| 351 | 1; |