/* thread-switcher.c - program to switch worker thread between cpus
 *
 * Copyright 2013,2014 Clark Williams <williams@redhat.com>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ***********************************************************************
 * This is an example program showing how to safely poll
 * in a tight loop on a multiprocessor Linux system without killing the OS.
 *
 * Linux has a number of operations that are dependent on the timer tick
 * periodicaly firing on each cpu to perform housekeeping operations
 * (e.g. workqueues, RCU, memory management, accounting operations, etc).
 * If these are held off by a high priority thread that runs continually,
 * the system becomes bogged down and eventualy unstable, either locking
 * up or crashing.
 *
 * The idea here is to migrate the affinity of a worker thread across some
 * set of cpus so  that after some period of time, it is moved from it's
 * current cpu to another, giving each cpu some time to do housekeeping
 * tasks. Yes, there will be some performance hits during the migration
 * due to cache misses, but that's infinitely better than having the system
 * lock up, isn't it?
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#define __USE_GNU
#define __USE_UNIX98
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
#include <sched.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <getopt.h>

#define DEFAULT_PRIORITY 2
#define WORK_QUANTUM_SEC 5
#define WORK_QUANTUM_NSEC 0

#define NSECS_PER_MICROSEC  1000
#define NSECS_PER_MILLISEC  (1000 * NSECS_PER_MICROSEC)
#define NSECS_PER_SEC       (1000 * NSECS_PER_MILLISEC)

struct timespec interval = {
	.tv_sec = WORK_QUANTUM_SEC,
	.tv_nsec = WORK_QUANTUM_NSEC
};

struct thread_arguments {
	int prio;
	int start_cpu;
};

/* this has to be volatile */
volatile unsigned int stop_work = 0;
unsigned int *cpus = NULL;
unsigned int ncpus = 0;
unsigned int worker_priority = DEFAULT_PRIORITY;

/*
 * command line option parsing helper routine
 * presume the input argument is a list of cpu numbers
 * separated by commas.
 */
int *getcpulist(char *str)
{
	char *endptr = str;
	char *ptr = str;
	int val;
	int *array = NULL;


	while (*ptr) {
		errno = 0;
		val = strtol(ptr, &endptr, 10);
		if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN))
		    || (errno != 0 && val == 0)) {
			perror("strtol");
			goto out_free;
		}
		if (endptr == ptr)
			goto out_free;

		array = realloc(array, sizeof(int) * ++ncpus);
		array[ncpus-1] = val;
		ptr = endptr;
		if (ptr && *ptr == ',')
			ptr++;
	}
	goto out;
out_free:
	if (array)
		free(array);
	array = NULL;
out:
	return array;
}

void usage(void)
{
	fputs("\nusage: thread-switcher [options]\n", stderr);
	fputs("     options:\n", stderr);
	fputs("              --cpus | -c     : list of cpus to use (default: all)\n", stderr);
	fputs("              --prio | -p     : priority to use for the worker thread (default: 1)\n", stderr);
	fputs("              --interval | -c : interval between switching cpus (default: 5s)\n", stderr);
}


struct option opts[] = {
	{ "cpus",     required_argument, NULL, 'c' },
	{ "prio",     required_argument, NULL, 'p' },
	{ "interval", required_argument, NULL, 'i' },
	{ 0,          0,                 0,    0 },
};

void process_arguments (int argc, char **argv)
{
	int c;
	int option_index = 0;
	int maxcpus = sysconf(_SC_NPROCESSORS_ONLN);
	int value;
	char *ptr;

	while(1) {
		c = getopt_long(argc, argv, "c:p:", opts, &option_index);
		if (c == -1)
			break;
		switch(c) {
		case 'c':
			cpus = getcpulist(optarg);
			break;
		case 'p':
			worker_priority = strtol(optarg, NULL, 10);
			if (worker_priority < 1 || worker_priority > 99) {
				fprintf(stderr, "invalid realtime priority specified: %d (defaulting to %d)\n",
					worker_priority, DEFAULT_PRIORITY);
				worker_priority = DEFAULT_PRIORITY;
			}
			break;
		case 'i':
			errno = 0;
			value = strtol(optarg, &ptr, 10);
			if (errno) {
				perror("strtol on --interval argument");
				exit(errno);
			}
			if (ptr == optarg) {
				fprintf(stderr, "invalid or no value for --interval\n");
				exit(EINVAL);
			}
			if (ptr && *ptr) {
				if (*ptr == 's')
					interval.tv_sec = value;
				else if (*ptr == 'm') {
					if (*(ptr+1) == 's') {
						interval.tv_sec = 0;
						interval.tv_nsec = value * NSECS_PER_MILLISEC;
					}
					else
						interval.tv_sec = value * 60;
				}
				else if (*ptr == 'u' && *(ptr+1) == 's') {
					interval.tv_sec = 0;
					interval.tv_nsec = value * NSECS_PER_MICROSEC;
				}
				else {
					fprintf(stderr, "invalid suffix for --interval value: %s\n", ptr);
					fprintf(stderr, "value suffixes are: m, s, ms, us\n");
					exit(EINVAL);
				}
			}
			/* normalize the interval structure */
			while (interval.tv_nsec > NSECS_PER_SEC) {
				interval.tv_sec++;
				interval.tv_nsec -= NSECS_PER_SEC;
			}
			break;
		default:
			usage();
			exit(0);
		}
	}

	if (ncpus < 2 || ncpus > maxcpus) {
		if (argc > 1) {
			fprintf(stderr, "invalid value for ncpus: %d (must be between 2 and %d\n", ncpus, maxcpus);
			fprintf(stderr, "defaulting to using all online cpus\n");
		}
		ncpus = maxcpus;
		cpus = realloc(cpus, sizeof(int) * ncpus);
		if (cpus == NULL) {
			fprintf(stderr, "Error allocating memory for %d cpus\n", maxcpus);
			exit(ENOMEM);
		}
		for (c = 0; c < ncpus; c++)
			cpus[c] = c;
	}
}


/* routine to set the thread cpu affinity to the specified cpu */
void set_my_affinity(char *who, int cpu)
{
	cpu_set_t mask;

	printf("%s: setting affinity to cpu %d\n", who, cpu);
	CPU_ZERO(&mask);
	CPU_SET(cpu, &mask);

	sched_setaffinity(0, sizeof(mask), &mask);
}

/* set the calling thread's priority to SCHED_FIFO:prio */
void set_my_priority(char *who, int prio)
{
	struct sched_param sp = { .sched_priority = prio };

	printf("%s: setting priority to %d\n", who, prio);
	/* set up our priority */
	sched_setscheduler(0, SCHED_FIFO, &sp);
}

/* worker thread routine */
void *work(void *data)
{
	struct thread_arguments *args = (struct thread_arguments *) data;
	unsigned long loopcount = 0;
	struct sigaction sa;

	set_my_priority("worker", args->prio);
	set_my_affinity("worker", args->start_cpu);

	while (!stop_work)
		loopcount++;

	printf("worker: out of main loop, thread exiting\n");
	pthread_exit(NULL);
}

/* SIGINT handler */
void interrupt(int sig)
{
	printf("got SIGINT, setting stop_work\n");
	stop_work = 1;
}

int main(int argc, char **argv)
{
	int cpu, idx, ret;
	pthread_t wt;
	struct timespec ts;
	cpu_set_t mask;
	struct sigaction sa;
	struct thread_arguments targs;

	if (ncpus < 0) {
		fprintf(stderr, "unable to find number of online cpus: %s",
			strerror(errno));
		exit(errno);
	}

	process_arguments(argc, argv);

	/*
	 * make sure we're one higher than the worker's priority so that
	 * we can run.
	 */
	set_my_priority("main", worker_priority + 1);

	/* setup the thread arguments */
	idx = 0;
	targs.prio = worker_priority;
	targs.start_cpu = cpus[idx];

	/* setup a signal handler for SIGINT */
	sa.sa_handler = interrupt;
	sigaction(SIGINT, &sa, NULL);

	printf("main: multiplexing worker over %d cpus\n", ncpus);

	/* fire up the polling thread */
	ret = pthread_create(&wt, NULL, work, (void *) &targs);
	if (ret) {
		fprintf(stderr, "error starting thread %c: %s\n", strerror(ret));
		exit(ret);
	}

	/*
	 * sleep for an interval, then switch the affinity of the
	 * worker thread to the next cpu
	 */
	while(!stop_work) {
		clock_nanosleep(CLOCK_MONOTONIC, 0, &interval, NULL);
		if (++idx >= ncpus)
			idx = 0;
		cpu = cpus[idx];
		CPU_ZERO(&mask);
		CPU_SET(cpu, &mask);
		printf("switching worker to cpu %d\n", cpu);
		pthread_setaffinity_np(wt, sizeof(mask), &mask);
	}

	printf("main: out of main loop, calling pthread_join()\n");
	pthread_join(wt, NULL);

	exit(0);
}