#! /usr/bin/perl -w
# MD5: multiple
# TEST: ../rwfilter/rwfilter --stime=2009/02/13:20:00-2009/02/13:20 --sensor=S2 --proto=6 --aport=80,8080,443 --pass=stdout ../../tests/data.rwf | ./rwallformats --no-invocation --basename=/tmp/sk-teststmp && md5 /tmp/sk-teststmp*

use strict;
use SiLKTests;
use File::Find;

# name of this script
my $NAME = $0;
$NAME =~ s,.*/,,;

my $rwallformats = check_silk_app('rwallformats');
my $rwcut = check_silk_app('rwcut');
my $rwtuc = check_silk_app('rwtuc');

# Fields to test
my $tuc_fields = 'packets,bytes';
my $cut_fields = $tuc_fields;

my $base_name = 'sk-teststmp';

# create our tempdir
my $tmpdir = make_tempdir();

# create subdirs for writing and reading with rwcut
my $write_dir = "$tmpdir/write";
my $rwcut_dir = "$tmpdir/rwcut";
for my $d ($write_dir, $rwcut_dir) {
    mkdir $d
        or die "$NAME: Cannot mkdir '$d': $!";
}

my $tmpfile = make_tempname('rwtuc-input.txt');
my $tmpsilk = make_tempname('rwtuc-output.rw');
open my $tmpfp, '>', $tmpfile
    or die "$NAME: Cannot open '$tmpfile': $!";
print $tmpfp <<'RWTUC_INPUT';
      packets|         bytes|ignore
            0|            12|# rwtuc rejects: pkts < 1
           16|             0|# rwtuc rejects: bytes < 1
           16|            12|# bytes/packet < 1
           16|          2048|
           16|       2097152|# BPP=131072 overflows ratio in hourly files
       524288|    2147483648|
      1048576|     134217728|# MAX_PKTS in hourly file formats
      1048576|    2147483648|
      1048576| 1099511627776|# MAX_PKTS and overflow BPP=1048576
     33554432|    2147483648|# Exceeds 1<<24 allowed in FT_FLOWCAP
     67108863| 1099444502529|# Just below DBL_MAX_PKTS and BPP max
     67108863| 1099511627776|# DBL_MAX_PKTS and overflow BPP
     67108864|    2147483648|# DBL_MAX_PKTS in hourly file formats
   1073741824|    2147483648|
   4294967295|    4294967295|
1099511627776|70368744177664|
RWTUC_INPUT
close $tmpfp
    or die "$NAME: Unable to write '$tmpfile': $!";

my $cmd = ("$rwtuc --fields=$tuc_fields,ignore"
           ." --protocol=6 --dport=8080 --application=80"
           ." --output-path=$tmpsilk --verbose -- $tmpfile");
if (!check_exit_status($cmd)) {
    exit 1;
}
$cmd = ("$rwallformats --no-invocation --basename='$write_dir/$base_name'"
        ." $tmpsilk");
if (!check_exit_status($cmd)) {
    exit 1;
}

# get list of expected MD5s for each file from the end of this file;
# these are the MD5 from running rwcut on the file
my %readback;
while (<DATA>) {
    next unless /\w/;
    next if /^\#/;

    my ($expect, $tail_name) = split " ";
    push @{$readback{$tail_name}}, $expect;
}

# hash to store names of files that had an issue; value is an array of
# possible isues: 'missing', 'read-mismatch', 'unknown-file',
# 'unknown-readback'
#
my %mismatch;

# find the files in the data directory and compare their MD5 hashes
File::Find::find({wanted => \&check_file, no_chdir => 1}, $write_dir);

# print results: Files are sorted by the reverse of the name so all
# differences across a wide range of file formats and versions are
# seen first.
#
my @badfiles = sort { reverse($a) cmp reverse($b) } keys %mismatch;
if (@badfiles) {

    my %issues;

    my $msg = sprintf("%s: Found %u file%s with problems:",
                      $NAME, scalar(@badfiles),
                      ((scalar(@badfiles) == 1) ? "" : "s"));
    print STDERR $msg, "\n";

    for my $file (@badfiles) {
        for my $i (@{$mismatch{$file}}) {
            ++$issues{$i};
        }
        print STDERR join(" ", "$NAME:", "$file -", @{$mismatch{$file}}), "\n";
    }

    my $issuemsg = join ", ", map { "$_: ".$issues{$_} } keys %issues;

    die($msg, " ", $issuemsg, "\n");
}

# successful!
exit 0;


# This function is called by File::Find::find.  The full path to the
# file is in the $_ variable.
#
# The function checks for the file in the %checksums hash.  If not
# present, an entry is added to %mismatch for an unknown file and the
# function returns.
#
# Files in %checksums hash have their values removed from the hash so
# mising files may be reported at shutdown.
#
# The function computes the MD5 of the file and compares it to the
# expected value; if the values do not match, the function puts the
# file name into the %mismatch hash with a file-mismatch.
#
# The function runs rwcut on the file and computes the MD5 of the
# result.  If that does not match the expected value, the function
# puts the file name into the %mismatch hash with a read-mismatch.
#
sub check_file
{
    # skip anything that is not a file
    return unless -f $_;

    my $path = $_;

    # file name without $write_dir
    my $file = $_;
    $file =~ s,^\Q$write_dir/\E,,;

    # set $tail_name to be the varying part of the filename; that is,
    # remove the directory and base_name
    my $tail_name = $_;
    $tail_name =~ s,^$write_dir/$base_name-,,;

    # set $read_name to be $tail_name truncated at the version (that
    # is, minus the compresssion and byte-order)
    my $read_name = $tail_name;
    $read_name =~ s,^(\w+-v\d+)-.+,$1,;

    unless (exists $readback{$read_name}) {
        push @{$mismatch{$file}}, 'unknown-readback';
        return;
    }

    # check reading
    my $read_md5;
    my $read_cmd = ("$rwcut --fields=$cut_fields --delimited=,"
                    ." --output-path=$rwcut_dir/$file $path");
    check_exit_status($read_cmd, 1);
    compute_md5_file(\$read_md5, "$rwcut_dir/$file");
    if (! grep {$_ eq $read_md5} @{$readback{$read_name}}) {
        print STDERR "$NAME: $read_name: read checksum mismatch [$read_md5]",
            " (expected ", join(",", @{$readback{$read_name}}), ")\n";
        push @{$mismatch{$file}}, 'read-mismatch';
    }
}



__DATA__
#########################################################################
#
#   These are the expected MD5 values when reading the data with rwcut
#
#   Values should not depend on compression or byte-order
#

##  FT_FLOWCAP stores packet-count in 24-bits and uses max value for
##  packet counts between 2^24 and 2^32-1.  Records with packet or
##  byte counts larger than 2^32-1 rejected.
#
#|packets,bytes
#|16,12
#|16,2048
#|16,2097152
#|524288,2147483648
#|1048576,134217728
#|1048576,2147483648
#|16777215,2147483648
#|16777215,2147483648
#|16777215,2147483648
#|16777215,4294967295
#|
3c65b56d8f227907cc4c504fe3acb10d FT_FLOWCAP-v2
3c65b56d8f227907cc4c504fe3acb10d FT_FLOWCAP-v3
3c65b56d8f227907cc4c504fe3acb10d FT_FLOWCAP-v4
3c65b56d8f227907cc4c504fe3acb10d FT_FLOWCAP-v5
3c65b56d8f227907cc4c504fe3acb10d FT_FLOWCAP-v6


##  These file formats (used primarily as hourly files) use a 20-bit
##  packets field and a 20-bit bytes-per-packet ratio field, with
##  14-bits for the quotient and 6-bits for the remainder.  If packets
##  does not fit in 20-bits, it is divided by 64 and a bit is set to
##  denote this.  (If the resulting packets is still too arge (>2^26),
##  the record is rejected.)  If the BPP ratio is less than 1 or
##  larger than 2^14, the record is rejected.
#
#|packets,bytes
#|16,2048
#|524288,2147483648
#|1048576,134217728
#|1048576,2147483648
#|33554432,2147483648
#|67108800,1099443470400
#
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGMENTED-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGMENTED-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGMENTED-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGMENTED-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGROUTING-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGROUTING-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGROUTING-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGROUTING-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGSNMPOUT-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGSNMPOUT-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGSNMPOUT-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGSNMPOUT-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGWEB-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGWEB-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGWEB-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWAUGWEB-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWFILTER-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWFILTER-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWFILTER-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWFILTER-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWFILTER-v5
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWNOTROUTED-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWNOTROUTED-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWNOTROUTED-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWNOTROUTED-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWNOTROUTED-v5
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWROUTED-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWROUTED-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWROUTED-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWROUTED-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWROUTED-v5
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWSPLIT-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWSPLIT-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWSPLIT-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWSPLIT-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWSPLIT-v5
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWWWW-v1
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWWWW-v2
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWWWW-v3
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWWWW-v4
1ffa0f8726c93eaed5cd54ee44c0e536 FT_RWWWW-v5

##  These store packet and byte counts in 32-bit values and reject
##  anything that exceeds the maximum.
#
#|packets,bytes
#|16,12
#|16,2048
#|16,2097152
#|524288,2147483648
#|1048576,134217728
#|1048576,2147483648
#|33554432,2147483648
#|67108864,2147483648
#|1073741824,2147483648
#|4294967295,4294967295
#
95beabdd7518338675e6f304cdb5895f FT_RWAUGMENTED-v5
95beabdd7518338675e6f304cdb5895f FT_RWAUGMENTED-v6
95beabdd7518338675e6f304cdb5895f FT_RWAUGROUTING-v5
95beabdd7518338675e6f304cdb5895f FT_RWAUGROUTING-v6
95beabdd7518338675e6f304cdb5895f FT_RWAUGSNMPOUT-v5
95beabdd7518338675e6f304cdb5895f FT_RWAUGWEB-v5
95beabdd7518338675e6f304cdb5895f FT_RWAUGWEB-v6
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v0
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v1
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v2
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v3
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v4
95beabdd7518338675e6f304cdb5895f FT_RWGENERIC-v5
95beabdd7518338675e6f304cdb5895f FT_RWIPV6-v1
95beabdd7518338675e6f304cdb5895f FT_RWIPV6-v2
95beabdd7518338675e6f304cdb5895f FT_RWIPV6-v3
95beabdd7518338675e6f304cdb5895f FT_RWIPV6ROUTING-v1
95beabdd7518338675e6f304cdb5895f FT_RWIPV6ROUTING-v2


##  These store packet and byte counts in 64-bit values.
#
#|packets,bytes
#|16,12
#|16,2048
#|16,2097152
#|524288,2147483648
#|1048576,134217728
#|1048576,2147483648
#|1048576,1099511627776
#|33554432,2147483648
#|67108863,1099444502529
#|67108863,1099511627776
#|67108864,2147483648
#|1073741824,2147483648
#|4294967295,4294967295
#|1099511627776,70368744177664
#
ae2db8c19de1ba4dfb6715d71697263a FT_RWGENERIC-v6
ae2db8c19de1ba4dfb6715d71697263a FT_RWIPV6ROUTING-v3
ae2db8c19de1ba4dfb6715d71697263a FT_RWIPV6ROUTING-v4
