/*
 *  Copyright 2007-2022 Carnegie Mellon University
 *  See license information in LICENSE.txt.
 */
/**
 *  @internal
 *
 *  @file smtpplugin.c
 *
 *  @brief this is a protocol classifier for the simple mail transport
 *  protocol (SMTP)
 *
 *  ------------------------------------------------------------------------
 *  Authors: Chris Inacio
 *  ------------------------------------------------------------------------
 *  @DISTRIBUTION_STATEMENT_BEGIN@
 *  YAF 3.0.0
 *
 *  Copyright 2022 Carnegie Mellon University.
 *
 *  NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
 *  INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
 *  UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
 *  AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF FITNESS FOR
 *  PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED FROM USE OF
 *  THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF
 *  ANY KIND WITH RESPECT TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT
 *  INFRINGEMENT.
 *
 *  Released under a GNU GPL 2.0-style license, please see license.txt or
 *  contact permission@sei.cmu.edu for full terms.
 *
 *  [DISTRIBUTION STATEMENT A] This material has been approved for public
 *  release and unlimited distribution.  Please see Copyright notice for
 *  non-US Government use and distribution.
 *
 *  Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
 *  Trademark Office by Carnegie Mellon University.
 *
 *  This Software includes and/or makes use of Third-Party Software subject
 *  to its own license.
 *
 *  DM22-0007
 *  @DISTRIBUTION_STATEMENT_END@
 *  ------------------------------------------------------------------------
 */


#define _YAF_SOURCE_
#include <yaf/autoinc.h>
#include <yaf/yafcore.h>
#include <yaf/decode.h>
#include <pcre.h>

#include <yaf/yafDPIPlugin.h>
#if YAF_ENABLE_DPI

#define YAF_SMTP_FLOW_TID   0xCB01
#define YAF_SMTP_FLOW_NAME  "yaf_smtp"
#define YAF_SMTP_FLOW_DESC  NULL

#define YAF_SMTP_MESSAGE_TID   0xCB02
#define YAF_SMTP_MESSAGE_NAME  "yaf_smtp_message"
#define YAF_SMTP_MESSAGE_DESC  NULL

#define YAF_SMTP_HEADER_TID   0xCB03
#define YAF_SMTP_HEADER_NAME  "yaf_smtp_header"
#define YAF_SMTP_HEADER_DESC  NULL

static fbInfoElementSpec_t yaf_smtp_spec[] = {
    {"smtpHello",         FB_IE_VARLEN, 0 },
    {"smtpResponseList",  FB_IE_VARLEN, 0 },
    {"smtpMessageList",   FB_IE_VARLEN, 0 },
    {"smtpStartTLS",      1, 0 },
    FB_IESPEC_NULL
};

typedef struct yfSMTPFlow_st {
    fbVarfield_t          smtpHello;
    fbBasicList_t         smtpResponseList;
    fbSubTemplateList_t   smtpMessageList;
    uint8_t               smtpStartTLS;
} yfSMTPFlow_t;

static fbInfoElementSpec_t yaf_smtp_message_spec[] = {
    {"smtpSubject",       FB_IE_VARLEN, 0 },
    {"smtpToList",        FB_IE_VARLEN, 0 },
    {"smtpFromList",      FB_IE_VARLEN, 0 },
    {"smtpFilenameList",  FB_IE_VARLEN, 0 },
    {"smtpURLList",       FB_IE_VARLEN, 0 },
    {"smtpHeaderList",    FB_IE_VARLEN, 0 },
    {"smtpMessageSize",   4,            0 },
    FB_IESPEC_NULL
};

typedef struct yfSMTPMessage_st {
    fbVarfield_t          smtpSubject;
    fbBasicList_t         smtpToList;
    fbBasicList_t         smtpFromList;
    fbBasicList_t         smtpFilenameList;
    fbBasicList_t         smtpURLList;
    fbSubTemplateList_t   smtpHeaderList;
    uint32_t              smtpSize;
} yfSMTPMessage_t;

static fbInfoElementSpec_t yaf_smtp_header_spec[] = {
    {"smtpKey",           FB_IE_VARLEN, 0 },
    {"smtpValue",         FB_IE_VARLEN, 0 },
    FB_IESPEC_NULL
};

typedef struct yfSMTPHeader_st {
    fbVarfield_t   smtpKey;
    fbVarfield_t   smtpValue;
} yfSMTPHeader_t;

static fbTemplate_t     *smtpTemplate;
static fbTemplate_t     *smtpMessageTemplate;
static fbTemplate_t     *smtpHeaderTemplate;


/*  Max number of separate emails; note that these fill space in the DPI
 *  array that could be used by other DPI info. */
#define SMTP_MAX_EMAILS 5

/*  If the <CRLF>.<CRLF> to close a message is within this number of bytes of
 *  the payloadSize, assume the only remaining SMTP command from the client is
 *  "QUIT<CRLF>". */
#define YF_BYTES_AFTER_DOT  12
#endif  /* YAF_ENABLE_DPI */


#define SMTP_PORT_NUMBER 25

/*  Size for PCRE capture vector. */
#define NUM_CAPT_VECTS 60

static pcre        *smtpRegexApplabel = NULL;

#if YAF_ENABLE_DPI
static pcre        *smtpRegexBdatLast = NULL;
static pcre        *smtpRegexBlankLine = NULL;
static pcre        *smtpRegexDataBdat = NULL;
static pcre        *smtpRegexEndData = NULL;

static pcre        *smtpRegexFilename = NULL;
static pcre        *smtpRegexFrom = NULL;
static pcre        *smtpRegexHeader = NULL;
static pcre        *smtpRegexHello = NULL;
static pcre        *smtpRegexResponse = NULL;
static pcre        *smtpRegexSize = NULL;
static pcre        *smtpRegexStartTLS = NULL;
static pcre        *smtpRegexSubject = NULL;
static pcre        *smtpRegexTo = NULL;
static pcre        *smtpRegexURL = NULL;

static const fbInfoElement_t *smtpElemFile = NULL;
static const fbInfoElement_t *smtpElemFrom = NULL;
static const fbInfoElement_t *smtpElemResponse = NULL;
static const fbInfoElement_t *smtpElemTo = NULL;
static const fbInfoElement_t *smtpElemURL = NULL;
#endif  /* YAF_ENABLE_DPI */


#ifndef YFP_DEBUG
#define YFP_DEBUG 0
#endif

#if !YFP_DEBUG
#define YFP_DEBUG_STORE_COUNT(count_, flowctx_)
#define YFP_DEBUG_LOG_NEW(count_, flowctx_, what_)
#else

#define YFP_DEBUG_STORE_COUNT(count_, flowctx_)           \
    do { *(count_) = (flowctx_)->dpinum; } while(0)

#define YFP_DEBUG_LOG_NEW(count_, flowctx_, what_)              \
    do {                                                        \
        g_debug("SMTP %s check matched %u locations",           \
                what_, (flowctx_)->dpinum - *(count_));         \
        while (*(count_) < (flowctx_)->dpinum) {                \
            ydpPayloadPrinter(                                  \
                payload, payloadSize,                           \
                (flowctx_)->dpi[*(count_)].dpacketCapt,         \
                (flowctx_)->dpi[*(count_)].dpacketCaptLen,      \
                "    offset %u, len %u, data",                  \
                (flowctx_)->dpi[*(count_)].dpacketCapt,         \
                (flowctx_)->dpi[*(count_)].dpacketCaptLen);     \
            ++*(count_);                                        \
        }                                                       \
    } while(0)



/**
 *  Print `numPrint` octets of data contained in `payloadData` starting at
 *  octet position `offset`.  `payloadSize` is the size of `payloadData`.  The
 *  output is prefixed with text created by applying `format` to the remaining
 *  arguments, followed by ": ".
 */
static void
ydpPayloadPrinter(
    const uint8_t *payloadData,
    unsigned int   payloadSize,
    unsigned int   offset,
    unsigned int   numPrint,
    const char    *format,
    ...)
{
#define PAYLOAD_PRINTER_ARRAY_LENGTH 4096
    unsigned int loop;
    char         dumpArray[PAYLOAD_PRINTER_ARRAY_LENGTH];
    char         prefixString[PAYLOAD_PRINTER_ARRAY_LENGTH];
    va_list      args;

    va_start(args, format);
    vsnprintf(prefixString, sizeof(prefixString), format, args);
    va_end(args);

    if (NULL == payloadData) {
        numPrint = 0;
    } else {
        if (offset >= payloadSize) {
            numPrint = 0;
        } else {
            payloadSize -= offset;
            payloadData += offset;
        }
        if (numPrint > payloadSize) {
            numPrint = payloadSize;
        }
        if (numPrint > PAYLOAD_PRINTER_ARRAY_LENGTH) {
            numPrint = PAYLOAD_PRINTER_ARRAY_LENGTH;
        }
    }
    for (loop = 0; loop < numPrint; ++loop) {
        if (isprint(*(payloadData + loop)) &&
            !iscntrl(*(payloadData + loop)))
        {
            dumpArray[loop] = (char)(*(payloadData + loop));
        } else {
            dumpArray[loop] = '.';
        }
    }
    dumpArray[loop] = '\0';

    g_debug("%s: \"%s\"", prefixString, dumpArray);
}
#endif /* if YFP_DEBUG */


/**
 * ydpScanPayload
 *
 * returns SMTP_PORT_NUMBER if the passed in payload matches a service location
 * protocol packet
 *
 * @param payload the packet payload
 * @param payloadSize size of the packet payload
 * @param flow a pointer to the flow state structure
 * @param val a pointer to biflow state (used for forward vs reverse)
 *
 *
 * @return SMTP_PORT_NUMBER otherwise 0
 */
uint16_t
ydpScanPayload(
    const uint8_t  *payload,
    unsigned int    payloadSize,
    yfFlow_t       *flow,
    yfFlowVal_t    *val)
{
#if YFP_DEBUG && YAF_ENABLE_DPI
    const ypDPIFlowCtx_t *flowContext = (ypDPIFlowCtx_t *)(flow->dpictx);
    unsigned int prev;
#endif
    int rc;
    int vects[NUM_CAPT_VECTS];

#if YFP_DEBUG
    g_debug("smtpplugin scanning payload of flow %p\n", flow);
#endif

    rc = pcre_exec(smtpRegexApplabel, NULL, (char *)payload, payloadSize,
                   0, 0, vects, NUM_CAPT_VECTS);
#if YFP_DEBUG
    ydpPayloadPrinter(payload, payloadSize, 0, 512,
                      "SMTP applabel check returned %d", rc);
#endif

#if YAF_ENABLE_DPI
    /* If pcre_exec() returns 1 this is the client-side of the conversation
     * and if 2 it is the server-side. */
    if (rc == 1) {
        /*
         * To limit the regexes to searching only the relative parts of the
         * payload, we first find the positions of those relative parts, while
         * being aware multiple messages may be sent during a single
         * connection.
         *
         * msgSplits[i] is start of the area where STMP commands are allowed
         * and also marks the end of message i-1.
         * msgData[i] is boundary between STMP commands and the message.
         * msgBegin[i] equals msgData[i] unless DATA/BDAT was not seen, in
         * which case it equals msgSplit[i].
         * hdrEnd[i] is the blank line beween the msg's header and body.
         */
        int msgSplits[1 + SMTP_MAX_EMAILS];
        int msgData[SMTP_MAX_EMAILS];
        int msgBegin[SMTP_MAX_EMAILS];
        int hdrEnd[SMTP_MAX_EMAILS];
        int msgIndex = 0;
        int tmprc;
        int i;

        msgSplits[0] = 0;

        for (;;) {
            /* look for DATA or BDAT */
            tmprc = pcre_exec(smtpRegexDataBdat, NULL, (char *)payload,
                              payloadSize, msgSplits[msgIndex],
                              0, vects, NUM_CAPT_VECTS);
#if YFP_DEBUG
            switch (tmprc) {
              case 1:
                ydpPayloadPrinter(
                    payload, payloadSize, vects[0], (1 + vects[1] - vects[0]),
                    ("SMTP data/bdat check returned %d at offset %d"
                     "; vects %d,%d; data"),
                    tmprc, msgSplits[msgIndex], vects[0], vects[1]);
                break;
              case 2:
                ydpPayloadPrinter(
                    payload, payloadSize, vects[0], (1 + vects[1] - vects[0]),
                    ("SMTP data/bdat check returned %d at offset %d"
                     "; vects %d,%d %d,%d; data"),
                    tmprc, msgSplits[msgIndex], vects[0], vects[1],
                    vects[2], vects[3]);
                break;
              default:
                ydpPayloadPrinter(
                    payload, payloadSize, MAX(0, msgSplits[msgIndex] - 10),
                    64, "SMTP data/bdat check returned %d at offset %d; data",
                    tmprc, msgSplits[msgIndex]);
                break;
            }
#endif  /* YFP_DEBUG */

            if (tmprc <= 0) {
                /* DATA/BDAT not found; if there are more than
                 * YF_BYTES_AFTER_DOT bytes of payload after the end of the
                 * last message, assume the payload contains the start of
                 * another "MAIL FROM:..." */
                if (payloadSize - msgSplits[msgIndex] > YF_BYTES_AFTER_DOT) {
                    msgData[msgIndex] = payloadSize;
                    msgBegin[msgIndex] = msgSplits[msgIndex];
                    hdrEnd[msgIndex] = payloadSize;
                    msgSplits[++msgIndex] = payloadSize;
                }
                break;
            }

            msgData[msgIndex] = msgBegin[msgIndex] = vects[1];
            /* assume email message goes to end of payload */
            msgSplits[msgIndex + 1] = payloadSize;

            if (tmprc == 2) {
                /* saw "BDAT <LENGTH>(| +LAST)"; if the character before
                 * vects[3] is not 'T', search for the last BDAT blob */
                if ('T' != payload[vects[3] - 1]) {
                    tmprc = pcre_exec(smtpRegexBdatLast, NULL, (char *)payload,
                                      payloadSize, msgData[msgIndex], 0,
                                      vects, NUM_CAPT_VECTS);
#if YFP_DEBUG
                    g_debug("SMTP bdat last check returned %d at offset %d"
                            "; vects[0] is %d",
                            tmprc, msgData[msgIndex], vects[0]);
#endif
                }

                if (tmprc > 1) {
                    /* parse the length of the last BDAT blob to find the end
                     * of the message */
                    unsigned long len;
                    char *ep = (char *)payload;

                    errno = 0;
                    len = strtoul((char *)payload + vects[2], &ep, 10);
                    if (len > 0 || (0 == errno && ep != (char *)payload)) {
                        msgSplits[msgIndex + 1] =
                            MIN(vects[1] + len, payloadSize);
                    }
#if YFP_DEBUG
                    else {
                        ydpPayloadPrinter(
                            payload, payloadSize, vects[0],
                            (1 + vects[1] - vects[0]),
                            "Unable to parse BDAT length: %s; data",
                            strerror(errno));
                    }
#endif  /* YFP_DEBUG */
                }
            } else {
                /* saw DATA; search for <CRLF>.<CRLF> to find the end of
                 * msg */
                tmprc = pcre_exec(smtpRegexEndData, NULL, (char *)payload,
                                  payloadSize, msgData[msgIndex], 0,
                                  vects, NUM_CAPT_VECTS);
#if YFP_DEBUG
                g_debug("SMTP end data check returned %d at offset %d"
                        "; vects[0] is %d",
                        tmprc, msgData[msgIndex], vects[0]);
#endif
                if (tmprc > 0) {
                    msgSplits[msgIndex + 1] = vects[1];
                }
            }

            /* find the separator between headers and body; if not found, set
             * it to the next message split */
            tmprc = pcre_exec(smtpRegexBlankLine, NULL, (char *)payload,
                              msgSplits[msgIndex + 1], msgData[msgIndex], 0,
                              vects, NUM_CAPT_VECTS);
#if YFP_DEBUG
            g_debug("SMTP blank check returned %d at offset %d; vects[0] is %d",
                    tmprc, msgData[msgIndex], vects[0]);
#endif
            if (tmprc > 0) {
                hdrEnd[msgIndex] = vects[1];
            } else {
                hdrEnd[msgIndex] = msgSplits[msgIndex + 1];
            }

            ++msgIndex;
            if (msgIndex >= SMTP_MAX_EMAILS ||
                msgSplits[msgIndex] >= (int)payloadSize)
            {
                break;
            }
        }

#if YFP_DEBUG
        g_debug("Found %d messages in payload of size %u:",
                msgIndex, payloadSize);
        for (i = 0; i < msgIndex; ++i) {
            g_debug("    msg# %d, smtpBegin %d, data %d, msgBegin %d,"
                    " blank %d, end %d",
                    i, msgSplits[i], msgData[i], msgBegin[i], hdrEnd[i],
                    msgSplits[i+1]);
        }
#endif  /* YFP_DEBUG */

        /* Capture headers in order of importance since we may run out of room
         * in the DPI array */

        /* Check for hello, from, to, and subject in each message */
        YFP_DEBUG_STORE_COUNT(&prev, flowContext);
        for (i = 0; i < msgIndex && msgSplits[i] < (int)payloadSize; ++i) {
            /* store the end of the message as a separator if it not at or
             * near the end of the payload */
            if (msgSplits[i+1] + YF_BYTES_AFTER_DOT < (int)payloadSize) {
                ydRunPluginRegex(flow, payload, 2, NULL, msgSplits[i+1], 38,
                                 SMTP_PORT_NUMBER);
                YFP_DEBUG_LOG_NEW(&prev, flowContext, "msg separator");
            }

            ydRunPluginRegex(flow, payload, msgData[i], smtpRegexHello,
                             msgSplits[i], 26, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "hello");

            ydRunPluginRegex(flow, payload, msgData[i], smtpRegexFrom,
                             msgSplits[i], 33, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "from");

            ydRunPluginRegex(flow, payload, msgData[i], smtpRegexTo,
                             msgSplits[i], 32, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "to");

            ydRunPluginRegex(flow, payload, hdrEnd[i], smtpRegexSubject,
                             msgBegin[i], 31, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "subject");
        }

        /* get filenames and urls throughout the payload */
        ydRunPluginRegex(flow, payload, payloadSize,
                         smtpRegexFilename, 0, 34, SMTP_PORT_NUMBER);
        YFP_DEBUG_LOG_NEW(&prev, flowContext, "filename");

        ydRunPluginRegex(flow, payload, payloadSize,
                         smtpRegexURL, 0, 35, SMTP_PORT_NUMBER);
        YFP_DEBUG_LOG_NEW(&prev, flowContext, "url");

        /* look for starttls, msg size, and headers per message */
        for (i = 0; i < msgIndex && msgSplits[i] < (int)payloadSize; ++i) {
            ydRunPluginRegex(flow, payload, msgData[i], smtpRegexStartTLS,
                             msgSplits[i], 29, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "starttls");

            ydRunPluginRegex(flow, payload, msgData[i], smtpRegexSize,
                             msgSplits[i], 28, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "msg size");

            ydRunPluginRegex(flow, payload, hdrEnd[i], smtpRegexHeader,
                             msgBegin[i], 36, SMTP_PORT_NUMBER);
            YFP_DEBUG_LOG_NEW(&prev, flowContext, "header");
        }
    } else if (rc > 0 || flow->appLabel == SMTP_PORT_NUMBER) {
        YFP_DEBUG_STORE_COUNT(&prev, flowContext);
        ydRunPluginRegex(flow, payload, payloadSize, smtpRegexResponse, 0, 30,
                          SMTP_PORT_NUMBER);
        YFP_DEBUG_LOG_NEW(&prev, flowContext, "response");
    }
#endif /* if YAF_ENABLE_DPI */

    if (rc > 0 || flow->appLabel == SMTP_PORT_NUMBER) {
        return SMTP_PORT_NUMBER;
    }

    return 0;
}

#if YAF_ENABLE_DPI
void *
ydpProcessDPI(
    ypDPIFlowCtx_t       *flowContext,
    fbSubTemplateList_t  *stl,
    yfFlow_t             *flow,
    uint8_t               fwdcap,
    uint8_t               totalcap)
{
    yfDPIData_t   *dpi = flowContext->dpi;
    yfSMTPFlow_t  *rec = NULL;
    int            count;

    fbVarfield_t          *responseCode = NULL;
    fbVarfield_t          *smtpTo = NULL;
    fbVarfield_t          *smtpFrom = NULL;
    fbVarfield_t          *smtpFilename = NULL;
    fbVarfield_t          *smtpURL = NULL;
    yfSMTPMessage_t       *smtpEmail;
    yfSMTPHeader_t        *smtpHeader;

    /* DPI counts, one for each list */
    int      numMatchesTo;
    int      numMatchesFrom;
    int      numMatchesFile;
    int      numMatchesURL;
    int      numMatchesHeader;
    const uint8_t *msgBound[SMTP_MAX_EMAILS + 1];
    int      numMessages;
    int      msgIndex;

    unsigned int  maxMsgCapt = 0;
    const uint8_t *msgBegin;
    const uint8_t *msgEnd;
    const uint8_t *colon;

    const yfFlowVal_t *current;
    const yfFlowVal_t *msgData = NULL;

    /*
     * FIXME: Consider changing this function so it does not cache the
     * msgData.  While we expect all msgData to be on one side of the
     * connection, we cannot know what data will actually be matched by this
     * plugin.
     *
     * The current approach is to initialize msgData when data is matched and
     * refuse to change it to the other direction.  A different approach is to
     * store the number of messages and the msgBound[]s for both the forward
     * and reverse directions.
     */

#if YFP_DEBUG
    g_debug("smtpplugin processing dpi of flow %p\n", flow);
#endif

    rec = (yfSMTPFlow_t *)fbSubTemplateListInit(stl, 3, YAF_SMTP_FLOW_TID,
                                                smtpTemplate, 1);
    rec->smtpHello.buf = NULL;
    rec->smtpStartTLS = 0;

    /* Create an empty basicList of SMTP response codes; fill the list as we
     * scan the data. */
    fbBasicListInit(&rec->smtpResponseList, 3, smtpElemResponse, 0);

#if YFP_DEBUG
    {
        char buf[1024] = "";
        unsigned int pos = 0;
        int i;
        for (i = 0; i < totalcap && pos < sizeof(buf); ++i) {
            pos += snprintf(buf + pos, (sizeof(buf) - pos), "%s%u",
                            ((pos > 0) ? ", " : ""), dpi[i].dpacketID);
        }
        g_debug("totalcap = %d, fwdcap = %d, flowContext->startOffset = %d,"
                " dpacketID = [%s]",
                totalcap, fwdcap, flowContext->startOffset, buf);
    }
#endif  /* YFP_DEBUG */

    /* Assume one message */
    numMessages = 1;

    /* Capture top-level data; determine whether forward or reverse direction
     * captured the client; capture the response codes; note bounds between
     * messages when multiple in a single conversation */
    for (count = flowContext->startOffset; count < totalcap; ++count) {
        current = ((count < fwdcap) ? &flow->val : &flow->rval);
        switch (dpi[count].dpacketID) {
          case 26:   /* Hello */
            if (rec->smtpHello.buf == NULL) {
                rec->smtpHello.buf = current->payload + dpi[count].dpacketCapt;
                rec->smtpHello.len = dpi[count].dpacketCaptLen;
            }
            if (msgData != current) {
                if (NULL == msgData) {
                    msgData = current;
                } else {
#if YFP_DEBUG
                    g_debug("msgData appears in both directions;"
                            " count = %d, ID = %d",
                            count, dpi[count].dpacketID);
#endif
                    break;
                }
            }
            if (dpi[count].dpacketCapt > maxMsgCapt) {
                maxMsgCapt = dpi[count].dpacketCapt;
            }
            break;
          case 29:   /* StartTLS */
            rec->smtpStartTLS = 1;
            break;
          case 30:   /* Response codes */
            responseCode = (fbVarfield_t *)
                fbBasicListAddNewElements(&rec->smtpResponseList, 1);
            responseCode->buf = current->payload + dpi[count].dpacketCapt;
            responseCode->len = dpi[count].dpacketCaptLen;
#if YFP_DEBUG
            ydpPayloadPrinter(current->payload, current->paylen,
                              dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                              "Response: ");
#endif
            break;
          case 38:   /* End of one message / Start of another */
            if (msgData != current) {
                if (NULL == msgData) {
                    msgData = current;
                } else {
#if YFP_DEBUG
                    g_debug("msgData appears in both directions;"
                            " count = %d, ID = %d",
                            count, dpi[count].dpacketID);
#endif
                    break;
                }
            }
            msgBound[numMessages] = current->payload + dpi[count].dpacketCapt;
            ++numMessages;
            if (dpi[count].dpacketCapt > maxMsgCapt) {
                maxMsgCapt = dpi[count].dpacketCapt;
            }
#if YFP_DEBUG
            g_debug("message separator #%d at offset %d",
                    numMessages - 1, dpi[count].dpacketCapt);
#endif
            break;
          case 28:   /* Size */
          case 31:   /* Subject */
          case 32:   /* To */
          case 33:   /* From */
          case 34:   /* File */
          case 35:   /* URL */
          case 36:   /* Header */
            if (msgData != current) {
                if (NULL == msgData) {
                    msgData = current;
                } else {
#if YFP_DEBUG
                    g_debug("msgData appears in both directions;"
                            " count = %d, ID = %d",
                            count, dpi[count].dpacketID);
#endif
                    break;
                }
            }
            if (dpi[count].dpacketCapt > maxMsgCapt) {
                maxMsgCapt = dpi[count].dpacketCapt;
            }
            break;
        }
    }

#if YFP_DEBUG
    g_debug("fwd = %p, fwdlen = %u, rev = %p, revlen = %u, msgData = %p,"
            " maxMsgCapt = %u",
            &flow->val, flow->val.paylen, &flow->rval, flow->rval.paylen,
            msgData, maxMsgCapt);
#endif  /* YFP_DEBUG */

    if (NULL == msgData) {
        fbSubTemplateListInit(&rec->smtpMessageList, 3,
                              YAF_SMTP_MESSAGE_TID, smtpMessageTemplate, 0);
        return rec;
    }

    /* the first message begins at the start of the payload */
    msgBound[0] = msgData->payload;

    /* if no data was captured within the last bounded message, decrement the
     * number of messages; otherwise, set the bound of the final message to
     * the end of the payload */
    if (msgData->payload + maxMsgCapt <= msgBound[numMessages - 1]) {
#if YFP_DEBUG
        g_debug("numMessages = %d, maxMsgCapt = %d,"
                " msgBound[nm-1] = %ld, fwdlen = %u,"
                " decrementing numMessages",
                numMessages, maxMsgCapt,
                msgBound[numMessages - 1] - msgData->payload, flow->val.paylen);
#endif
        --numMessages;
    } else {
#if YFP_DEBUG
        g_debug("numMessages = %d, maxMsgCapt = %d,"
                " msgBound[nm-1] = %ld, fwdlen = %u,"
                " setting msgBound to paylen",
                numMessages, maxMsgCapt,
                msgBound[numMessages - 1] - msgData->payload, flow->val.paylen);
#endif
        msgBound[numMessages] = msgData->payload + msgData->paylen;
    }

    /* Create the STL of messages */
    smtpEmail = ((yfSMTPMessage_t *)fbSubTemplateListInit(
                     &rec->smtpMessageList, 3,
                     YAF_SMTP_MESSAGE_TID, smtpMessageTemplate,
                     numMessages));

    /* FIXME: Consider changing the following loop to build the lists in one
     * pass instead of looping through the data once to count things and then
     * again to fill the lists. */

    /* Process each message */
    for (msgIndex = 0; msgIndex < numMessages; ++msgIndex) {
        msgBegin = msgBound[msgIndex];
        msgEnd = msgBound[msgIndex + 1];

        /* for IEs stored in basicLists or STLs, count the number of items to
         * know how big to make the lists. */
        numMatchesTo = 0;
        numMatchesFrom = 0;
        numMatchesFile = 0;
        numMatchesURL = 0;
        numMatchesHeader = 0;

        for (count = flowContext->startOffset; count < totalcap; ++count) {
            if (msgData->payload + dpi[count].dpacketCapt >= msgBegin &&
                (msgData->payload + dpi[count].dpacketCapt <= msgEnd))
            {
                switch (dpi[count].dpacketID) {
                  case 32:   /* To */
                    numMatchesTo++;
                    break;
                  case 33:   /* From */
                    numMatchesFrom++;
                    break;
                  case 34:   /* File */
                    numMatchesFile++;
                    break;
                  case 35:   /* URL */
                    numMatchesURL++;
                    break;
                  case 36:   /* Header */
                    numMatchesHeader++;
                    break;
                }
            }
        }
#if YFP_DEBUG
        g_debug("Message #%d (%ld--%ld): numTo = %d, numFrom = %d,"
                " numFile = %d, numURL = %d, numHeader = %d",
                msgIndex,
                msgBegin - msgData->payload, msgEnd - msgData->payload,
                numMatchesTo, numMatchesFrom,
                numMatchesFile, numMatchesURL, numMatchesHeader);
#endif

        /* Create the basicLists and STLs */
        smtpTo = (fbVarfield_t *)fbBasicListInit(
            &smtpEmail->smtpToList, 3, smtpElemTo, numMatchesTo);

        smtpFrom = (fbVarfield_t *)fbBasicListInit(
            &smtpEmail->smtpFromList, 3, smtpElemFrom, numMatchesFrom);

        smtpFilename = (fbVarfield_t *)fbBasicListInit(
            &smtpEmail->smtpFilenameList, 3, smtpElemFile, numMatchesFile);

        smtpURL = (fbVarfield_t *)fbBasicListInit(
            &smtpEmail->smtpURLList, 3, smtpElemURL, numMatchesURL);

        smtpHeader = (yfSMTPHeader_t *)fbSubTemplateListInit(
            &smtpEmail->smtpHeaderList, 3,
            YAF_SMTP_HEADER_TID, smtpHeaderTemplate, numMatchesHeader);

        /* Fill the lists we just created */
        for (count = flowContext->startOffset; count < totalcap; ++count) {
            if (msgData->payload + dpi[count].dpacketCapt >= msgBegin &&
                msgData->payload + dpi[count].dpacketCapt <= msgEnd)
            {
                switch (dpi[count].dpacketID) {
                  case 28:   /* Size */
                    smtpEmail->smtpSize = (uint32_t)strtoul(
                        (char *)(msgData->payload + dpi[count].dpacketCapt),
                        NULL, 10);
                    break;
                  case 31:   /* Subject */
                    if (NULL == smtpEmail->smtpSubject.buf) {
                        smtpEmail->smtpSubject.buf =
                            msgData->payload + dpi[count].dpacketCapt;
                        smtpEmail->smtpSubject.len = dpi[count].dpacketCaptLen;
#if YFP_DEBUG
                        ydpPayloadPrinter(
                            msgData->payload, msgData->paylen,
                            dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                            "Subject");
#endif
                    }
                    break;
                  case 32:   /* To */
                    smtpTo->buf = msgData->payload + dpi[count].dpacketCapt;
                    smtpTo->len = dpi[count].dpacketCaptLen;
                    smtpTo = fbBasicListGetNextPtr(&smtpEmail->smtpToList,
                                                   smtpTo);
#if YFP_DEBUG
                    ydpPayloadPrinter(
                        msgData->payload, msgData->paylen,
                        dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                        "To");
#endif
                    break;
                  case 33:   /* From */
                    smtpFrom->buf = msgData->payload + dpi[count].dpacketCapt;
                    smtpFrom->len = dpi[count].dpacketCaptLen;
                    smtpFrom = fbBasicListGetNextPtr(&smtpEmail->smtpFromList,
                                                     smtpFrom);
#if YFP_DEBUG
                    ydpPayloadPrinter(
                        msgData->payload, msgData->paylen,
                        dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                        "From");
#endif
                    break;
                  case 34:   /* Filename */
                    smtpFilename->buf = msgData->payload +
                        dpi[count].dpacketCapt;
                    smtpFilename->len = dpi[count].dpacketCaptLen;
                    smtpFilename = fbBasicListGetNextPtr(
                        &smtpEmail->smtpFilenameList, smtpFilename);
#if YFP_DEBUG
                    ydpPayloadPrinter(
                        msgData->payload, msgData->paylen,
                        dpi[count].dpacketCapt - 50,
                        dpi[count].dpacketCaptLen + 60,
                        "Filename");
#endif
                    break;
                  case 35:   /* URL */
                    smtpURL->buf = msgData->payload + dpi[count].dpacketCapt;
                    smtpURL->len = dpi[count].dpacketCaptLen;
                    smtpURL = fbBasicListGetNextPtr(&smtpEmail->smtpURLList,
                                                    smtpURL);
#if YFP_DEBUG
                    ydpPayloadPrinter(
                        msgData->payload, msgData->paylen,
                        dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                        "URL");
#endif
                    break;
                  case 36:   /* Header: split it at the ':' */
#if YFP_DEBUG
                    ydpPayloadPrinter(
                        msgData->payload, msgData->paylen,
                        dpi[count].dpacketCapt, dpi[count].dpacketCaptLen,
                        "Header");
#endif
                    smtpHeader->smtpKey.buf =
                        msgData->payload + dpi[count].dpacketCapt;
                    colon = memchr(smtpHeader->smtpKey.buf, (int)(':'),
                                   dpi[count].dpacketCaptLen);
                    if (NULL == colon) {
                        smtpHeader->smtpKey.buf = NULL;
                        g_debug("Unable to find ':' in Email header");
                        break;
                    }
                    smtpHeader->smtpKey.len = colon - smtpHeader->smtpKey.buf;

                    /* initialze value length to remainder of capture len */
                    smtpHeader->smtpValue.len =
                        dpi[count].dpacketCaptLen - smtpHeader->smtpKey.len;

                    /* Move over the colon and any whitespace */
                    do {
                        ++colon;
                        --smtpHeader->smtpValue.len;
                    } while (isspace(*colon) && smtpHeader->smtpValue.len > 0);
                    smtpHeader->smtpValue.buf = (uint8_t *)colon;

                    smtpHeader = fbSubTemplateListGetNextPtr(
                        &smtpEmail->smtpHeaderList, smtpHeader);
                    break;
                }
            }
        }
        smtpEmail = fbSubTemplateListGetNextPtr(&rec->smtpMessageList,
                                                smtpEmail);
    }
    return (void *)rec;
}

gboolean ydpAddTemplates(
    fbSession_t  *session)
{
    fbTemplateInfo_t *mdInfo;
    const fbInfoElement_t *bl_element;

    /* top level yaf_smtp template */
    mdInfo = fbTemplateInfoAlloc();
    fbTemplateInfoInit(
        mdInfo, YAF_SMTP_FLOW_NAME, YAF_SMTP_FLOW_DESC, SMTP_PORT_NUMBER,
        FB_TMPL_MD_LEVEL_1);

    /* ruleset does not contain IE information, add metadata manually */
    bl_element = ydLookupNamedBlByID(CERT_PEN, 169); /* smtpResponse */
    if (bl_element) {
        fbTemplateInfoAddBasicList(mdInfo, bl_element->ent, bl_element->num,
                                   CERT_PEN, 169);
    }

    if (!ydInitTemplate(&smtpTemplate, session, yaf_smtp_spec,
                        mdInfo, YAF_SMTP_FLOW_TID, 0))
    {
        return FALSE;
    }

    /* child yaf_smtp_message template */
    mdInfo = fbTemplateInfoAlloc();
    fbTemplateInfoInit(
        mdInfo, YAF_SMTP_MESSAGE_NAME, YAF_SMTP_MESSAGE_DESC, SMTP_PORT_NUMBER,
        YAF_SMTP_FLOW_TID);

    /* ruleset does not contain IE information, add metadata manually */
    bl_element = ydLookupNamedBlByID(CERT_PEN, 164); /* smtpTo */
    if (bl_element) {
        fbTemplateInfoAddBasicList(mdInfo, bl_element->ent, bl_element->num,
                                   CERT_PEN, 164);
    }
    bl_element = ydLookupNamedBlByID(CERT_PEN, 163); /* smtpFrom */
    if (bl_element) {
        fbTemplateInfoAddBasicList(mdInfo, bl_element->ent, bl_element->num,
                                   CERT_PEN, 163);
    }
    bl_element = ydLookupNamedBlByID(CERT_PEN, 167); /* smtpFilename */
    if (bl_element) {
        fbTemplateInfoAddBasicList(mdInfo, bl_element->ent, bl_element->num,
                                   CERT_PEN, 167);
    }
    bl_element = ydLookupNamedBlByID(CERT_PEN, 329); /* smtpURL */
    if (bl_element) {
        fbTemplateInfoAddBasicList(mdInfo, bl_element->ent, bl_element->num,
                                   CERT_PEN, 329);
    }

    if (!ydInitTemplate(&smtpMessageTemplate, session, yaf_smtp_message_spec,
                        mdInfo, YAF_SMTP_MESSAGE_TID, 0))
    {
        return FALSE;
    }

    /* grandchild yaf_smtp_header template */
    mdInfo = fbTemplateInfoAlloc();
    fbTemplateInfoInit(
        mdInfo, YAF_SMTP_HEADER_NAME, YAF_SMTP_HEADER_DESC, SMTP_PORT_NUMBER,
        YAF_SMTP_MESSAGE_TID);

    if (!ydInitTemplate(&smtpHeaderTemplate, session, yaf_smtp_header_spec,
                        mdInfo, YAF_SMTP_HEADER_TID, 0))
    {
        return FALSE;
    }
    return TRUE;
}

void ydpFreeRec(
    ypDPIFlowCtx_t  *flowContext)
{
    yfSMTPFlow_t    *rec = (yfSMTPFlow_t *)flowContext->rec;
    yfSMTPMessage_t *message = NULL;

    fbBasicListClear(&rec->smtpResponseList);

    while ((message = fbSubTemplateListGetNextPtr(&rec->smtpMessageList,
                                                  message)))
    {
        fbBasicListClear(&message->smtpToList);
        fbBasicListClear(&message->smtpFromList);
        fbBasicListClear(&message->smtpFilenameList);
        fbBasicListClear(&message->smtpURLList);
        fbSubTemplateListClear(&message->smtpHeaderList);
    }

    fbSubTemplateListClear(&rec->smtpMessageList);
}
#endif  /* YAF_ENABLE_DPI */


/**
 * ydpInitialize
 *
 * this initializes the PCRE expressions needed to search the payload for SMTP
 *
 * @sideeffect sets the initialized flag on success
 *
 * @return 1 if initialization is complete correctly, 0 otherwise
 */
int
ydpInitialize(
    int             argc,
    char           *argv[],
    uint16_t        applabel,
    gboolean        applabelOnly,
    GError        **err)
{
#if YAF_ENABLE_DPI
    /* many of these regexes use "[\\t -~]" to denote printable ASCII with
     * whitespace and "[!-~]" for printable ASCII without whitespace */

    /* this matches an email address */
#define EMAIL_ADDR  ""                          \
        "(?:@[-A-Z0-9.](?:,@[-A-Z0-9.])*:)?"    \
        "(?:[!#-+\\-./0-9=?@A-Z^_`a-z{|}~]+|"   \
        "\"(?:[ !#-\\[\\]-~]+|\\\\[ -~])*\")"

    /* a regex to use in mail headers (fields) that matches a single
     * whitespace character on a line or a complete folded header */
#define FOLD_SPACE  "(?:[\\t ]|\\r\\n[\\t ])"

    /* a regex to match a single char in a field-name. RFC2822 2.2: field-name
     * is any ASCII from decimal 33(!) to 126(~) inclusive except 58(:) */
#define FIELD_NAME  "[!-9;-~]"
    /* a regex to use in fields that matches a complete folder header or a
     * single character in a field-body: printable ascii, space, tab */
#define FIELD_BODY  "(?:[\\t -~]|\\r\\n[\\t ])"

    const char  smtpStringDataBdat[] =
        "(?im)^(?:DATA|BDAT +(\\d+(?:| +LAST)))\\r\\n";
    const char  smtpStringBdatLast[] =
        "(?im)^BDAT +(\\d+) +LAST\\r\\n";
    const char  smtpStringEndData[] = "\\r\\n\\.\\r\\n";
    const char  smtpStringBlankLine[] = "\\r\\n\\r\\n";

    const char  smtpStringHello[] =
        "(?im)^((?:HELO|EHLO) [!-~]+)\\r\\n";
    const char  smtpStringSize[] = "(?im)^MAIL FROM:.+ SIZE=(\\d+)\\s";
    const char  smtpStringStartTLS[] = "(?im)^STARTTLS\\r\\n";
    /* limit responses to the 220 welcome banner and error codes */
    const char  smtpStringResponse[] =
        "(?m)^((?:220|[45][0-5][0-9])[- ][\\t -~]*)\r\n";

    const char  smtpStringTo[] =
        "(?im)^RCPT TO: ?<?(" EMAIL_ADDR ")>?(?: |\\r\\n)";
    const char  smtpStringFrom[] =
        "(?im)^MAIL FROM: ?<?(" EMAIL_ADDR ")>?(?: |\\r\\n)";

    const char  smtpStringHeader[] =
        "(?m)^(" FIELD_NAME "+:" FIELD_BODY "+)\\r\\n";
    const char  smtpStringSubject[] =
        "(?im)^Subject:" FOLD_SPACE "*(" FIELD_BODY "+)\\r\\n";
    /* a filename may be in double quotes (which supports \-quoting of a
     * character) or unquoted with a restricted character set */
    const char  smtpStringFilename[] =
        "(?im)^Content-Disposition:" FIELD_BODY "*;" FOLD_SPACE "*filename=("
        "\"(?:[\\t !#-\\[\\]-~]|\\\\.|\\r\\n[\\t ])*\"|"
        "[!#-+\\-./0-9=?A-Z^_`a-z{|}~]+"
        ")";

    const char  smtpStringURL[] =
        "https?://(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\\+.~#?&/=]*)";

    fbInfoModel_t *model = ydGetDPIInfoModel();

#endif  /* YAF_ENABLE_DPI */

    const char  smtpStringApplabel[] =
        "(?i)^\\s*(?:(?:HE|EH)LO\\b|MAIL FROM:|RCPT TO:|(2[25]0[ -].*E?SMTP))";

    smtpRegexApplabel = ydPcreCompile(smtpStringApplabel, 0);
    if (NULL == smtpRegexApplabel) {
        return 0;
    }

#if !YAF_ENABLE_DPI
    return 1;
#else
    smtpRegexBdatLast = ydPcreCompile(smtpStringBdatLast, 0);
    smtpRegexBlankLine = ydPcreCompile(smtpStringBlankLine, 0);
    smtpRegexDataBdat = ydPcreCompile(smtpStringDataBdat, 0);
    smtpRegexEndData = ydPcreCompile(smtpStringEndData, 0);

    smtpRegexFilename = ydPcreCompile(smtpStringFilename, 0);
    smtpRegexFrom = ydPcreCompile(smtpStringFrom, 0);
    smtpRegexHeader = ydPcreCompile(smtpStringHeader, 0);
    smtpRegexHello = ydPcreCompile(smtpStringHello, 0);

    smtpRegexResponse = ydPcreCompile(smtpStringResponse, 0);
    smtpRegexSize = ydPcreCompile(smtpStringSize, 0);
    smtpRegexStartTLS = ydPcreCompile(smtpStringStartTLS, 0);
    smtpRegexSubject = ydPcreCompile(smtpStringSubject, 0);

    smtpRegexTo = ydPcreCompile(smtpStringTo, 0);
    smtpRegexURL = ydPcreCompile(smtpStringURL, 0);

    smtpElemFile = fbInfoModelGetElementByName(model, "smtpFilename");
    smtpElemFrom = fbInfoModelGetElementByName(model, "smtpFrom");
    smtpElemResponse = fbInfoModelGetElementByName(model, "smtpResponse");
    smtpElemTo = fbInfoModelGetElementByName(model, "smtpTo");
    smtpElemURL = fbInfoModelGetElementByName(model, "smtpURL");

    if (!smtpRegexBdatLast || !smtpRegexBlankLine || !smtpRegexDataBdat ||
        !smtpRegexEndData || !smtpRegexFilename || !smtpRegexFrom ||
        !smtpRegexHeader ||  !smtpRegexHello || !smtpRegexResponse ||
        !smtpRegexSize || !smtpRegexStartTLS || !smtpRegexSubject ||
        !smtpRegexTo || !smtpRegexURL)
    {
        return 0;
    }

    if (!smtpElemFile || !smtpElemFrom || !smtpElemResponse ||
        !smtpElemTo || !smtpElemURL)
    {
        g_warning("Unable to find all the smtp elements in the info model");
        return 0;
    }

    return 1;
#endif  /* #else of #if !YAF_ENABLE_DPI */
}
