#! /usr/bin/perl -w
# MD5: multiple
# TEST: ../rwfilter/rwfilter --stime=2009/02/13:20:00-2009/02/13:20 --sensor=S2 --proto=6 --aport=80,8080,443 --pass=stdout ../../tests/data.rwf | ./rwallformats --no-invocation --basename=/tmp/sk-teststmp && md5 /tmp/sk-teststmp*

use strict;
use SiLKTests;
use File::Find;

# name of this script
my $NAME = $0;
$NAME =~ s,.*/,,;

my $rwallformats = check_silk_app('rwallformats');
my $rwcut = check_silk_app('rwcut');
my $rwtuc = check_silk_app('rwtuc');

# Fields to test
my $tuc_fields = 'sTime,eTime';
my $cut_fields = "$tuc_fields,duration";

my $base_name = 'sk-teststmp';

# create our tempdir
my $tmpdir = make_tempdir();

# create subdirs for writing and reading with rwcut
my $write_dir = "$tmpdir/write";
my $rwcut_dir = "$tmpdir/rwcut";
for my $d ($write_dir, $rwcut_dir) {
    mkdir $d
        or die "$NAME: Cannot mkdir '$d': $!";
}

my $tmpfile = make_tempname('rwtuc-input.txt');
my $tmpsilk = make_tempname('rwtuc-output.rw');
open my $tmpfp, '>', $tmpfile
    or die "$NAME: Cannot open '$tmpfile': $!";
print $tmpfp <<'RWTUC_INPUT';
               sTime|               eTime|
1616161616.249934881|1616162516.634751297|
1616161616.249934881|1616164316.650488478|
1616161616.249934881|1616166116.040771022|
1616161616.249934881|1620049616.408011213|
1616161616.249934881|1621345616.572620512|
RWTUC_INPUT
close $tmpfp
    or die "$NAME: Unable to write '$tmpfile': $!";

my $cmd = ("$rwtuc --fields=$tuc_fields,ignore"
           ." --protocol=6 --dport=8080 --application=80"
           ." --bytes=2560 --packets=20"
           ." --output-path=$tmpsilk --verbose -- $tmpfile");
if (!check_exit_status($cmd)) {
    exit 1;
}
$cmd = ("$rwallformats --no-invocation --basename='$write_dir/$base_name'"
        ." $tmpsilk");
if (!check_exit_status($cmd)) {
    exit 1;
}

# get list of expected MD5s for each file from the end of this file;
# these are the MD5 from running rwcut on the file
my %readback;
while (<DATA>) {
    next unless /\w/;
    next if /^\#/;

    my ($expect, $tail_name) = split " ";
    push @{$readback{$tail_name}}, $expect;
}

# hash to store names of files that had an issue; value is an array of
# possible isues: 'missing', 'read-mismatch', 'unknown-file',
# 'unknown-readback'
#
my %mismatch;

# find the files in the data directory and compare their MD5 hashes
File::Find::find({wanted => \&check_file, no_chdir => 1}, $write_dir);

# print results: Files are sorted by the reverse of the name so all
# differences across a wide range of file formats and versions are
# seen first.
#
my @badfiles = sort { reverse($a) cmp reverse($b) } keys %mismatch;
if (@badfiles) {

    my %issues;

    my $msg = sprintf("%s: Found %u file%s with problems:",
                      $NAME, scalar(@badfiles),
                      ((scalar(@badfiles) == 1) ? "" : "s"));
    print STDERR $msg, "\n";

    for my $file (@badfiles) {
        for my $i (@{$mismatch{$file}}) {
            ++$issues{$i};
        }
        print STDERR join(" ", "$NAME:", "$file -", @{$mismatch{$file}}), "\n";
    }

    my $issuemsg = join ", ", map { "$_: ".$issues{$_} } keys %issues;

    die($msg, " ", $issuemsg, "\n");
}

# successful!
exit 0;


# This function is called by File::Find::find.  The full path to the
# file is in the $_ variable.
#
# The function checks for the file in the %checksums hash.  If not
# present, an entry is added to %mismatch for an unknown file and the
# function returns.
#
# Files in %checksums hash have their values removed from the hash so
# mising files may be reported at shutdown.
#
# The function computes the MD5 of the file and compares it to the
# expected value; if the values do not match, the function puts the
# file name into the %mismatch hash with a file-mismatch.
#
# The function runs rwcut on the file and computes the MD5 of the
# result.  If that does not match the expected value, the function
# puts the file name into the %mismatch hash with a read-mismatch.
#
sub check_file
{
    # skip anything that is not a file
    return unless -f $_;

    my $path = $_;

    # file name without $write_dir
    my $file = $_;
    $file =~ s,^\Q$write_dir/\E,,;

    # set $tail_name to be the varying part of the filename; that is,
    # remove the directory and base_name
    my $tail_name = $_;
    $tail_name =~ s,^$write_dir/$base_name-,,;

    # set $read_name to be $tail_name truncated at the version (that
    # is, minus the compresssion and byte-order)
    my $read_name = $tail_name;
    $read_name =~ s,^(\w+-v\d+)-.+,$1,;

    unless (exists $readback{$read_name}) {
        push @{$mismatch{$file}}, 'unknown-readback';
        return;
    }

    # check reading
    my $read_md5;
    my $read_cmd = ("$rwcut --fields=$cut_fields --delimited=,"
                    ." --timestamp-format=epoch,nano"
                    ." --output-path=$rwcut_dir/$file $path");
    check_exit_status($read_cmd, 1);
    compute_md5_file(\$read_md5, "$rwcut_dir/$file");
    if (! grep {$_ eq $read_md5} @{$readback{$read_name}}) {
        print STDERR "$NAME: $read_name: read checksum mismatch [$read_md5]",
            " (expected ", join(",", @{$readback{$read_name}}), ")\n";
        push @{$mismatch{$file}}, 'read-mismatch';
    }
}



__DATA__
#########################################################################
#
#   These are the expected MD5 values when reading the data with rwcut
#
#   Values should not depend on compression or byte-order
#

##  FT_FLOWCAP uses 16-bits for elapsed-seconds; starting in v3 an
##  additional value was added for elapsed-msec-fraction.  Also,
##  FT_FLOWCAP clamps the field if it is too large.
#
#   FT_FLOWCAP-v2
#
#|sTime,eTime,duration
#|1616161616.000000000,1616162516.000000000,900.000000000
#|1616161616.000000000,1616164316.000000000,2700.000000000
#|1616161616.000000000,1616166115.000000000,4499.000000000
#|1616161616.000000000,1616227151.000000000,65535.000000000
#|1616161616.000000000,1616227151.000000000,65535.000000000
#
0a6384d74debd8f483fd4d004a626eb0 FT_FLOWCAP-v2
#
#   FT_FLOWCAP-v3,v4,v5,v6
#
#|sTime,eTime,duration
#|1616161616.249000000,1616162516.633000000,900.384000000
#|1616161616.249000000,1616164316.649000000,2700.400000000
#|1616161616.249000000,1616166116.039000000,4499.790000000
#|1616161616.249000000,1616227151.407000000,65535.158000000
#|1616161616.249000000,1616227151.571000000,65535.322000000
#
5bf319a146ad3dc5069b60ea88ad7c4f FT_FLOWCAP-v3
5bf319a146ad3dc5069b60ea88ad7c4f FT_FLOWCAP-v4
5bf319a146ad3dc5069b60ea88ad7c4f FT_FLOWCAP-v5
5bf319a146ad3dc5069b60ea88ad7c4f FT_FLOWCAP-v6


##  These old hourly-file format-versions (and the FT_RWFILTER format
##  for ouput from rwfilter) only store seconds and use 11-bits for
##  elpased-seconds.
#
#|sTime,eTime,duration
#|1616161616.000000000,1616162516.000000000,900.000000000
#
75e4c2aac1bd881a0f4c83c57f221852 FT_RWFILTER-v1
75e4c2aac1bd881a0f4c83c57f221852 FT_RWFILTER-v2
75e4c2aac1bd881a0f4c83c57f221852 FT_RWNOTROUTED-v1
75e4c2aac1bd881a0f4c83c57f221852 FT_RWNOTROUTED-v2
75e4c2aac1bd881a0f4c83c57f221852 FT_RWROUTED-v1
75e4c2aac1bd881a0f4c83c57f221852 FT_RWROUTED-v2
75e4c2aac1bd881a0f4c83c57f221852 FT_RWSPLIT-v1
75e4c2aac1bd881a0f4c83c57f221852 FT_RWSPLIT-v2
75e4c2aac1bd881a0f4c83c57f221852 FT_RWWWW-v1
75e4c2aac1bd881a0f4c83c57f221852 FT_RWWWW-v2


##  These hourly-file format-versions use 22-bits for total elapsed
##  milliseconds, supporting a maximum elapsed time of 69.9 minutes.
#
#|sTime,eTime,duration
#|1616161616.249000000,1616162516.633000000,900.384000000
#|1616161616.249000000,1616164316.649000000,2700.400000000
#
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGMENTED-v1
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGMENTED-v2
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGMENTED-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGMENTED-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGROUTING-v1
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGROUTING-v2
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGROUTING-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGROUTING-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGSNMPOUT-v1
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGSNMPOUT-v2
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGSNMPOUT-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGSNMPOUT-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGWEB-v1
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGWEB-v2
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGWEB-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWAUGWEB-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWNOTROUTED-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWNOTROUTED-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWNOTROUTED-v5
53bb6083d042a33bf50e6c26fc8009f2 FT_RWROUTED-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWROUTED-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWROUTED-v5
53bb6083d042a33bf50e6c26fc8009f2 FT_RWSPLIT-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWSPLIT-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWSPLIT-v5
53bb6083d042a33bf50e6c26fc8009f2 FT_RWWWW-v3
53bb6083d042a33bf50e6c26fc8009f2 FT_RWWWW-v4
53bb6083d042a33bf50e6c26fc8009f2 FT_RWWWW-v5


##  These format-versions only store seconds and use 32-bits for the
##  elapsed time.
#
#|sTime,eTime,duration
#|1616161616.000000000,1616162516.000000000,900.000000000
#|1616161616.000000000,1616164316.000000000,2700.000000000
#|1616161616.000000000,1616166115.000000000,4499.000000000
#|1616161616.000000000,1620049616.000000000,3888000.000000000
#|1616161616.000000000,1621345616.000000000,5184000.000000000
#
e5630573b7151838f3ad71c7c75f6aef FT_RWFILTER-v3
e5630573b7151838f3ad71c7c75f6aef FT_RWGENERIC-v0
e5630573b7151838f3ad71c7c75f6aef FT_RWGENERIC-v1
e5630573b7151838f3ad71c7c75f6aef FT_RWGENERIC-v2


##  These format-versions use 32-bits for elapsed-seconds with a
##  separate value holding fractional-msec.
#
#|sTime,eTime,duration
#|1616161616.249000000,1616162516.633000000,900.384000000
#|1616161616.249000000,1616164316.649000000,2700.400000000
#|1616161616.249000000,1616166116.039000000,4499.790000000
#|1616161616.249000000,1620049616.407000000,3888000.158000000
#|1616161616.249000000,1621345616.571000000,5184000.322000000
#
4b8c0d5e41b10c81e284e0c1f599ff10 FT_RWFILTER-v4
4b8c0d5e41b10c81e284e0c1f599ff10 FT_RWFILTER-v5
4b8c0d5e41b10c81e284e0c1f599ff10 FT_RWGENERIC-v3
4b8c0d5e41b10c81e284e0c1f599ff10 FT_RWGENERIC-v4


##  This set of format-versions use 32-bits for total milliseconds,
##  supporting a maximum elapsed time of 4294967.296 seconds or 49.7
##  days.
#
#|sTime,eTime,duration
#|1616161616.249000000,1616162516.633000000,900.384000000
#|1616161616.249000000,1616164316.649000000,2700.400000000
#|1616161616.249000000,1616166116.039000000,4499.790000000
#|1616161616.249000000,1620049616.407000000,3888000.158000000
#
b849dad3e598465d19dd81d6d3d07c5f FT_RWAUGMENTED-v5
b849dad3e598465d19dd81d6d3d07c5f FT_RWAUGROUTING-v5
b849dad3e598465d19dd81d6d3d07c5f FT_RWAUGSNMPOUT-v5
b849dad3e598465d19dd81d6d3d07c5f FT_RWGENERIC-v5
b849dad3e598465d19dd81d6d3d07c5f FT_RWIPV6-v1
b849dad3e598465d19dd81d6d3d07c5f FT_RWIPV6-v2
b849dad3e598465d19dd81d6d3d07c5f FT_RWIPV6ROUTING-v1
b849dad3e598465d19dd81d6d3d07c5f FT_RWIPV6ROUTING-v2
b849dad3e598465d19dd81d6d3d07c5f FT_RWIPV6ROUTING-v3


##  FT_RWAUGWEB-v5 uses 30-bits for total-elapsed-millisec
##  (1073741.824 seconds or 12.4 days)
#
#   FT_RWAUGWEB-v5
#
#|sTime,eTime,duration
#|1616161616.249000000,1616162516.633000000,900.384000000
#|1616161616.249000000,1616164316.649000000,2700.400000000
#|1616161616.249000000,1616166116.039000000,4499.790000000
#
40c59cb75d091d6d368832c9b32a28c6 FT_RWAUGWEB-v5


##  These format-versions support nanoseconds and store sTime and
##  eTime.  (RWAUG* types are offsets from the hour).
#
#|sTime,eTime,duration
#|1616161616.249934881,1616162516.634751297,900.384816416
#|1616161616.249934881,1616164316.650488478,2700.400553597
#|1616161616.249934881,1616166116.040771022,4499.790836141
#|1616161616.249934881,1620049616.408011213,3888000.158076332
#|1616161616.249934881,1621345616.572620512,5184000.322685631
#
ab95571e56108e6054954ff9fe1bfe7c FT_RWAUGMENTED-v6
ab95571e56108e6054954ff9fe1bfe7c FT_RWAUGROUTING-v6
ab95571e56108e6054954ff9fe1bfe7c FT_RWAUGWEB-v6
ab95571e56108e6054954ff9fe1bfe7c FT_RWGENERIC-v6
ab95571e56108e6054954ff9fe1bfe7c FT_RWIPV6-v3
ab95571e56108e6054954ff9fe1bfe7c FT_RWIPV6ROUTING-v4
