/*
	SpamTestBuddy 0.95

	Copyright (C) 2007 Jem E. Berkes
	http://www.sysdesign.ca/
	
	This software may be used freely for personal, non-commercial use and
	educational use. It may not be re-distributed, re-sold, included in a
	commercial package or modified for redistribution without the author's
	permission. The user of the software assumes all risk associated with
	installation and use of this software. The author makes no guarantees
	of performance or fitness for a particular purpose. There is no warranty.
	
	Configuration is loaded from $HOME/.spamtestbuddy
	
	Returns 0 on success (configuration loaded, email processed) with result to stdout
	Returns 1 on failure (bad configuration) with errors to stderr
	
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "spamtestbuddy.h"

#define VERSION		"0.95"

/* #define VERBOSE if you want some useful debugging output to stderr */

void process_headers();
int test_dns_problems(const char* ipstr);
void test_file_ip(const char* ipstr);
void test_header_substr(const char* linebuf);
void test_header_yes(const char* linebuf);
void test_header_float(const char* linebuf);
void test_dnsbl(unsigned int* ip);
const char* resolve_host(const char* host);
void output_test_results(const char* ipstr);
void show_matches(struct testentry* pos);
int ip_is_local(const char* ipstr);
int load_configuration(FILE* config);
int add_test(struct testentry** chain, float weight, const char* testname, const char* param);
int add_test_old(struct testentry** chain, const char* prefix, const char* testname, const char* param);
void show_config();

/* Global variables (program instance handles one email) */
float score = 0;

/* Loaded from configuration */
float threshold = 0;				/* SpamThreshold */
char local_hops[MAXLINEBUF] = "";		/* SkipReceived */
struct testentry* testdnsproblems = NULL;	/* TestDnsProblems */
struct testentry* testfileip = NULL;		/* TestFileIP */
struct testentry* testheadersubstr = NULL;	/* TestHeaderSubstr */
struct testentry* testheaderyes = NULL;		/* TestHeaderYes */
struct testentry* testheaderfloat = NULL;	/* TestHeaderFloat */
struct testentry* testdnsbl = NULL;		/* TestDNSBL */


int main()
{
	FILE* configfile;
	char configfn[MAXLINEBUF];
	char* homedir;
	
	homedir = getenv("HOME");
	if (!homedir)
	{
		fprintf(stderr, "Can't load configuration, HOME undefined\n");
		return 1;
	}
	snprintf(configfn, sizeof(configfn), "%s/.spamtestbuddy", homedir);
	configfile = fopen(configfn, "r");
	if (!configfile)
	{
		perror(configfn);
		return 1;
	}
	if (load_configuration(configfile) == 0)
	{
		/* ready to go, process email at stdin */
		char linebuf[MAXLINEBUF];
		fclose(configfile);
		#ifdef VERBOSE
		show_config();
		#endif
		process_headers();
		/* spit out the rest of the email */
		while (fgets(linebuf, sizeof(linebuf), stdin))
			printf("%s", linebuf);
		return 0;
	}
	else
	{
		fprintf(stderr, "load_configuration() failed, aborting\n");
		fclose(configfile);
		return 1;
	}
}


/*
	Read email input from stdin until the end of email headers.
	Parse out the sending IP address from Received headers
*/
void process_headers()
{
	int got_ip = 0;
	unsigned int ip[4] = {0, 0, 0, 0};	/* sender's IP address */
	char linebuf[MAXLINEBUF], ipstr[MAXLINEBUF] = "none";
	while (fgets(linebuf, sizeof(linebuf), stdin))
	{
		if (*linebuf == '\n')
		{
			/* Reached end of headers */
			/* TestDnsProblems */
			if (got_ip && testdnsproblems && test_dns_problems(ipstr))
			{
				testdnsproblems->match = 1;	/* yes, there is a DNS problem */
				score += testdnsproblems->weight;
			}
			/* TestFileIP */
			if (got_ip)
				test_file_ip(ipstr);	/* scores applied in function */
			/* TestDNSBL */
			if (got_ip)
				test_dnsbl(ip);		/* scores applied in function */
			
			output_test_results(ipstr);	/* if haven't got_ip uses default ipstr=none */
			return;
		}
		else if (!got_ip && (strncmp(linebuf, "Received: from ", 15)==0))
		{
			char* bracket = strrchr(linebuf, '[');
			if (bracket)
			{
				if ((sscanf(bracket+1, "%u.%u.%u.%u", &ip[0], &ip[1], &ip[2], &ip[3]) == 4) &&
				(ip[0]<256) && (ip[1]<256) && (ip[2]<256) && (ip[3]<256))
				{
					sprintf(ipstr, "%u.%u.%u.%u", ip[0], ip[1], ip[2], ip[3]);
					if (ip_is_local(ipstr))
						strcpy(ipstr, "none");
					else
					{
						got_ip = 1;
						#ifdef VERBOSE
						fprintf(stderr, "Parsed sender IP %s\n", ipstr);
						#endif
					}
				}
			}
		}
		else
		{
			/* For the tests on headers, scoring is done in the test_ function */
			test_header_substr(linebuf);	/* TestHeaderSubstr */
			test_header_yes(linebuf);	/* TestHeaderYes */
			test_header_float(linebuf);	/* TestHeaderFloat */
		}
		printf("%s", linebuf);	/* output the line back to stdout */
	}
}




/*
	If this header line matches, the appropriate testentry and score is updated
*/
void test_header_substr(const char* linebuf)
{
	struct testentry* pos = testheadersubstr;
	while (pos)
	{
		if (!(pos->match))
		{
			if (strncmp(linebuf, pos->param, strlen(pos->param)) == 0)
			{
				pos->match = 1;
				score += pos->weight;
			}
		}
		pos = pos->next;
	}
}



/*
	If this header line matches, the appropriate testentry and score is updated
*/
void test_header_yes(const char* linebuf)
{
	struct testentry* pos = testheaderyes;
	while (pos)
	{
		if (!(pos->match))
		{
			if (strncmp(linebuf, pos->param, strlen(pos->param)) == 0)
			{
				char word[MAXLINEBUF]="";
				/* Found the right line */
				if ((sscanf(linebuf+strlen(pos->param), "%s", word) == 1) &&
				((strcasecmp(word, "yes")==0)||(strcasecmp(word, "true")==0)||(strcasecmp(word, "spam")==0)))
				{
					pos->match = 1;
					score += pos->weight;
				}
			}
		}
		pos = pos->next;
	}
}

/*
	If the header containing the number is found, update score and testentry
*/
void test_header_float(const char* linebuf)
{
	struct testentry* pos = testheaderfloat;
	while (pos)
	{
		if (!(pos->match))
		{
			if (strncmp(linebuf, pos->param, strlen(pos->param)) == 0)
			{
				float value = 0;
				/* Start parsing on the number, which must be 0 - 1.0001 */
				if ((sscanf(linebuf+strlen(pos->param), "%*[^0-9.]%f", &value) == 1)
					&& (value >= 0) && (value < 1.0001))
				{
					pos->match = 1;
					score += (pos->weight * value);
				}
			 }
		}
		pos = pos->next;
	}
}


/*
	Test IP against files containing IP lists
	The function is called once per email
*/
void test_file_ip(const char* ipstr)
{
	struct testentry* pos = testfileip;
	while (pos)
	{
		FILE* localfile = fopen(pos->param, "r");
		if (localfile)
		{
			char fileline[MAXLINEBUF];
			while (!pos->match && fgets(fileline, sizeof(fileline), localfile))
			{
				char* end = strchr(fileline, '\r');
				if (!end) end = strchr(fileline, '\n');
				if (end) *end = '\0';
				/* do exact comparison */
				if (strcmp(ipstr, fileline) == 0)
				{
					pos->match = 1;
					score += pos->weight;
				}
			}
			fclose(localfile);
		}
		else
			perror(pos->param);	/* error to stderr if could not open file */
		
		pos = pos->next;
	}
}


/*
	Test all DNSBLs, updating scores
	The function is called once per mail input so we check all DNSBLs here
	Calls resolve_host() to use cached query/response for multiple queries of same zone
*/
void test_dnsbl(unsigned int* ip)
{
	struct testentry* pos = testdnsbl;
	while (pos)
	{
		char hostname[MAXLINEBUF*2];
		const char* response;
		char *left, *right;
		char* matchstr = "127.0.";
		char zonespec[MAXLINEBUF];
		strcpy(zonespec, pos->param);
		/*
			If the DNSBL param has just zone name, then use default matchstr
			and try to match that substring from left to right.
			The user may also specify:
			a) zone^str requires a left-to-right substring search for str
			b) zone$str requires a right-to-left substring search for str
		*/
		left = strchr(zonespec, '^');	/* e.g. example.com^127.0.0. */
		right = strchr(zonespec, '$');	/* e.g. example.com$.5 */
		if (left)
		{
			*left = '\0';
			matchstr = left+1;
		}
		else if (right)
		{
			*right = '\0';
			matchstr = right+1;
		}
			
		/* If ^ or $ was used, zonespec now holds just zone name */
		/* construct reverse octet format hostname to look up under zone */
		sprintf(hostname, "%u.%u.%u.%u.%s", ip[3], ip[2], ip[1], ip[0], zonespec);
		response = resolve_host(hostname);
		if (response)
		{
			const char* start = response;
			int len = strlen(matchstr);
			if (right)
			{
				/* doing right aligned substring match */
				start = response + strlen(response) - len;
				if (start < response)
					start = response;
			}
			if (strncmp(start, matchstr, len) == 0)
			{
				pos->match = 1;
				score += pos->weight;
			}
		}
		/* keep processing, check all DNSBLs for this IP */
		pos = pos->next;
	}
}


/*
	Looks up a host name and returns pointer to static buffer holding text string of IP address
	Returns NULL if no IP address is found for the host
	
	Internally caches the last query and response, so if called many times in a row with the same
	host query, this will only result in one gethostbyname() call. Used by test_dnsbl
	
	Also caches negative responses (if last identical call gave NXDOMAIN then immediately returns NULL)
*/
const char* resolve_host(const char* host)
{
	static char query[MAXLINEBUF*2] = "";
	static char result[MAXLINEBUF] = "";
	
	if (strcmp(host, query) != 0)
	{
		/* This query is different from the last, must use DNS */
		struct hostent* h;
		strncpy(query, host, sizeof(query)-1);
		h = gethostbyname(query);
		if (h && (h->h_addr_list) && (h->h_addr_list[0]))
			strncpy(result, inet_ntoa(*(struct in_addr*)(h->h_addr_list[0])), sizeof(result)-1);
		else
			*result = '\0';		/* null result, NXDOMAIN */
	}

	/* Return result, may have been untouched since last call */
	if (*result)
		return result;
	else
		return NULL;
}




/*
	Test that
	1. The IP address resolves to a host name (reverse dns)
	2. The host name has this IP as an address record (forward dns)
	
	If both forward and reverse dns are fine, then returns 0
	If there is a problem (match), returns non-zero
*/
int test_dns_problems(const char* ipstr)
{
	struct in_addr a;
	memset(&a, 0, sizeof(a));
	a.s_addr = inet_addr(ipstr);
	if (a.s_addr == INADDR_NONE)
		return 1;	/* not valid IP address */
	else
	{
		struct hostent* h = gethostbyaddr((char*)&a, sizeof(a), AF_INET);
		if (h && (h->h_name))	/* check for reverse dns */
		{
			char hostname[MAXLINEBUF], *newipstr=NULL;
			memset(hostname, 0, sizeof(hostname));
			strncpy(hostname, h->h_name, sizeof(hostname)-1);
			/* might it be fake? */
			h = gethostbyname(hostname);	/* resolve A records for host */
			if (h && (h->h_addr_list))
			{
				int i;
				for (i=0; (h->h_addr_list[i]); i++)
				{
					newipstr = inet_ntoa(*(struct in_addr*)(h->h_addr_list[i]));
					if (newipstr && (strcmp(newipstr, ipstr)==0))
					{
						/* both reverse and matching forward dns exist */
						return 0;
					}
				}
			}
		}
	}
	return 1;		/* did not succeed for various reasons (no reverse DNS, no forward DNS, inconsistent) */
}


int ip_is_local(const char* ipstr)
{
	char networks[MAXLINEBUF];
        char* tok = NULL;
        strncpy(networks, local_hops, sizeof(networks));
	tok = strtok(networks, LIST_TOKENS);
	while (tok)
	{
		if (strncmp(tok, ipstr, strlen(tok)) == 0)
			return 1;	/* ipstr matches a network in the list */
		tok = strtok(NULL, LIST_TOKENS);
	}
	return 0;			/* ipstr does not match any */
}


/*
	Show results of all tests done (scores already applied)
*/
void output_test_results(const char* ipstr)
{
	const char* label;
	if (score >= threshold)
		label = "OVER";
	else
		label = "UNDER";
	printf("X-SpamTestBuddy: %s score=%.4f threshold=%.4f ip=%s\n", label, score, threshold, ipstr);
	printf("X-SpamTestBuddy-Tests: ver=" VERSION " ");
	/* Show testnames that go along with all tests which matched */
	show_matches(testdnsproblems);
	show_matches(testfileip);
	show_matches(testheadersubstr);
	show_matches(testheaderyes);
	show_matches(testheaderfloat);
	show_matches(testdnsbl);
	puts("\n");
}


void show_matches(struct testentry* pos)
{
	for (; pos; pos=pos->next)
	{
		if (pos->match)
			printf("%s ", pos->testname);
	}	
}


/*
	Load, parse the configuration file
	Return nonzero and errors to stderr if there are errors
*/
int load_configuration(FILE* config)
{
	char linebuf[MAXLINEBUF];
	while (fgets(linebuf, sizeof(linebuf), config))
	{
		float number;
		char prefix[MAXLINEBUF];
		char testname[MAXLINEBUF];
		char param[MAXLINEBUF];

		if ((*linebuf=='#') || (*linebuf=='\r') || (*linebuf=='\n'))
			continue;

		if (sscanf(linebuf, "SpamThreshold %f", &number) == 1)
			threshold = number;
		else if (sscanf(linebuf, "SkipReceived %[^\r\n]", param) == 1)
			strncpy(local_hops, param, sizeof(local_hops));
		/* Scan for custom weighted test definitions first */
		else if (sscanf(linebuf, "%f * TestDnsProblems %s", &number, testname) == 2)
		{
			if (add_test(&testdnsproblems, number, testname, ""))
				return 1;
		}
		else if (sscanf(linebuf, "%f * TestFileIP %s %s", &number, testname, param) == 3)
		{
			if (add_test(&testfileip, number, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%f * TestHeaderSubstr %s %[^\r\n]", &number, testname, param) == 3)
		{
			if (add_test(&testheadersubstr, number, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%f * TestHeaderYes %s %s", &number, testname, param) == 3)
		{
			if (add_test(&testheaderyes, number, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%f * TestHeaderFloat %s %s", &number, testname, param) == 3)
		{
			if (add_test(&testheaderfloat, number, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%f * TestDNSBL %s %s", &number, testname, param) == 3)
		{
			if (add_test(&testdnsbl, number, testname, param))
				return 1;
		}
		/* Now scan for old style, no weight in test definition (defaults to unit weight) */
		else if (sscanf(linebuf, "%[+-]TestDnsProblems %s", prefix, testname) == 2)
		{
			if (add_test_old(&testdnsproblems, prefix, testname, ""))
				return 1;
		}
		else if (sscanf(linebuf, "%[+-]TestFileIP %s %s", prefix, testname, param) == 3)
		{
			if (add_test_old(&testfileip, prefix, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%[+-]TestHeaderSubstr %s %[^\r\n]", prefix, testname, param) == 3)
		{
			if (add_test_old(&testheadersubstr, prefix, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%[+-]TestHeaderYes %s %s", prefix, testname, param) == 3)
		{
			if (add_test_old(&testheaderyes, prefix, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%[+-]TestHeaderFloat %s %s", prefix, testname, param) == 3)
		{
			if (add_test_old(&testheaderfloat, prefix, testname, param))
				return 1;
		}
		else if (sscanf(linebuf, "%[+-]TestDNSBL %s %s", prefix, testname, param) == 3)
		{
			if (add_test_old(&testdnsbl, prefix, testname, param))
				return 1;
		}
		else
		{
			fprintf(stderr, "Bad configuration line: %s", linebuf);
			return 1;
		}
	}
	return 0;
}


/*
	Returns 0 on success, nonzero on failure
*/
int add_test(struct testentry** chain, float weight, const char* testname, const char* param)
{
	struct testentry* newtest = calloc(1, sizeof(struct testentry));
	struct testentry* pos = *chain;
	
	newtest->weight = weight;
	/* Modify testname to include + or - for informative purposes */
	if (weight < 0.0)
		newtest->testname[0] = '-';
	else
		newtest->testname[0] = '+';
	strncpy(newtest->testname+1, testname, MAXLINEBUF-1);
	strncpy(newtest->param, param, MAXLINEBUF);
	
	if (pos == NULL)	/* create chain, first entry */
	{
		*chain = newtest;
		pos = newtest;
	}
	else
	{
		while (pos->next)
			pos = pos->next;
		pos->next = newtest;
	}
	return 0;
}


/*
	Before version 0.95, custom score weight could not be specified.
	If parsing a test name without floating point weight multiplier, use 1.0
*/
int add_test_old(struct testentry** chain, const char* prefix, const char* testname, const char* param)
{
	float weight;
	if (*prefix == '-')
		weight = -1.0;
	else if (*prefix == '+')
		weight = +1.0;
	else
	{
		fprintf(stderr, "Missing +/- in configuration test\n");
		return 1;
	}
	return add_test(chain, weight, testname, param);
}


#ifdef VERBOSE
void show_config()
{
	struct testentry* pos;
	char networks[MAXLINEBUF];
	char* tok = NULL;
	strncpy(networks, local_hops, sizeof(networks));
	fprintf(stderr, "SpamTestBuddy " VERSION "\n\n");
	fprintf(stderr, "SpamThreshold = %f\n", threshold);
	fprintf(stderr, "SkipReceived =\n");
	tok = strtok(networks, LIST_TOKENS);
	while (tok)
	{
		fprintf(stderr, "\t%s\n", tok);
		tok = strtok(NULL, LIST_TOKENS);
	}
	fprintf(stderr, "\nWeight\tTest\t\tName\tParam\n\n");
	if (testdnsproblems)
		fprintf(stderr, "%+.4f\tTestDnsProblems\t%s\tN/A\n", testdnsproblems->weight, testdnsproblems->testname);
	for (pos=testfileip; pos; pos=pos->next)
		fprintf(stderr, "%+.4f\tTestFileIP\t%s\t%s\n", pos->weight, pos->testname, pos->param);
	for (pos=testheadersubstr; pos; pos=pos->next)
		fprintf(stderr, "%+.4f\tTestHeaderSubstr\t%s\t%s\n", pos->weight, pos->testname, pos->param);
	for (pos=testheaderyes; pos; pos=pos->next)
		fprintf(stderr, "%+.4f\tTestHeaderYes\t%s\t%s\n", pos->weight, pos->testname, pos->param);
	for (pos=testheaderfloat; pos; pos=pos->next)
		fprintf(stderr, "%+.4f\tTestHeaderFloat\t%s\t%s\n", pos->weight, pos->testname, pos->param);
	for (pos=testdnsbl; pos; pos=pos->next)
		fprintf(stderr, "%+.4f\tTestDNSBL\t%s\t%s\n", pos->weight, pos->testname, pos->param);
}

#endif
