Browse Source

First real balancing. Not tested.

Serj Kalichev 10 years ago
parent
commit
949f96d357
6 changed files with 88 additions and 11 deletions
  1. 60 4
      balance.c
  2. 2 1
      balance.h
  3. 22 3
      birq.c
  4. 2 1
      birq.h
  5. 1 1
      statistics.c
  6. 1 1
      statistics.h

+ 60 - 4
balance.c

@@ -33,7 +33,7 @@ static int move_irq_to_cpu(irq_t *irq, cpu_t *cpu)
 	return 0;
 }
 
-static cpu_t *choose_cpu(lub_list_t *cpus, irq_t *irq)
+static cpu_t *choose_cpu(lub_list_t *cpus, cpumask_t cpumask, float threshold)
 {
 	lub_list_node_t *iter;
 	lub_list_t * min_cpus = NULL;
@@ -44,7 +44,9 @@ static cpu_t *choose_cpu(lub_list_t *cpus, irq_t *irq)
 	for (iter = lub_list_iterator_init(cpus); iter;
 		iter = lub_list_iterator_next(iter)) {
 		cpu = (cpu_t *)lub_list_node__get_data(iter);
-		if (!cpu_isset(cpu->id, irq->local_cpus))
+		if (!cpu_isset(cpu->id, cpumask))
+			continue;
+		if (cpu->load >= threshold)
 			continue;
 		if ((!min_cpus) || (cpu->load < min_load)) {
 			min_load = cpu->load;
@@ -72,7 +74,7 @@ static cpu_t *choose_cpu(lub_list_t *cpus, irq_t *irq)
 	return cpu;
 }
 
-int balance(lub_list_t *cpus, lub_list_t *balance_irqs)
+int balance(lub_list_t *cpus, lub_list_t *balance_irqs, float threshold)
 {
 	lub_list_node_t *iter;
 
@@ -81,12 +83,23 @@ int balance(lub_list_t *cpus, lub_list_t *balance_irqs)
 		irq_t *irq;
 		cpu_t *cpu;
 		irq = (irq_t *)lub_list_node__get_data(iter);
-		cpu = choose_cpu(cpus, irq);
+		/* Try to find local CPU to move IRQ to.
+		   The local CPU is CPU with native NUMA node. */
+		cpu = choose_cpu(cpus, irq->local_cpus, threshold);
+		/* If local CPU is not found then try to use
+		   CPU from another NUMA node. It's better then
+		   overloaded CPUs. */
+		if (!cpu) {
+			cpumask_t complement;
+			cpus_complement(complement, irq->local_cpus);
+			cpu = choose_cpu(cpus, complement, threshold);
+		}
 		if (cpu) {
 			move_irq_to_cpu(irq, cpu);
 			printf("Move IRQ %u to CPU%u\n", irq->irq, cpu->id);
 		}
 	}
+
 	return 0;
 }
 
@@ -104,3 +117,46 @@ int apply_affinity(lub_list_t *balance_irqs)
 	}
 	return 0;
 }
+
+int choose_irqs_to_move(lub_list_t *cpus, lub_list_t *balance_irqs, float threshold)
+{
+	lub_list_node_t *iter;
+	cpu_t *overloaded_cpu = NULL;
+	irq_t *irq_to_move = NULL;
+	float max_load = 0.0;
+	unsigned long long max_intr = 0;
+
+	/* Search for the most overloaded CPU.
+	   The load must be greater than threshold. */
+	for (iter = lub_list_iterator_init(cpus); iter;
+		iter = lub_list_iterator_next(iter)) {
+		cpu_t *cpu = (cpu_t *)lub_list_node__get_data(iter);
+		if (cpu->load < threshold)
+			continue;
+		if (cpu->load > max_load) {
+			max_load = cpu->load;
+			overloaded_cpu = cpu;
+		}
+	}
+	/* Can't find overloaded CPUs */
+	if (!overloaded_cpu)
+		return 0;
+
+	/* Search for the IRQ (owned by overloaded CPU) with
+	   maximum number of interrupts. */
+	if (lub_list_len(overloaded_cpu->irqs) <= 1)
+		return 0;
+	for (iter = lub_list_iterator_init(overloaded_cpu->irqs); iter;
+		iter = lub_list_iterator_next(iter)) {
+		irq_t *irq = (irq_t *)lub_list_node__get_data(iter);
+		if (irq->intr >= max_intr) {
+			max_intr = irq->intr;
+			irq_to_move = irq;
+		}
+	}
+
+	if (irq_to_move)
+		lub_list_add(balance_irqs, irq_to_move);
+
+	return 0;
+}

+ 2 - 1
balance.h

@@ -5,7 +5,8 @@
 #include "irq.h"
 #include "cpu.h"
 
-int balance(lub_list_t *cpus, lub_list_t *balance_irqs);
+int balance(lub_list_t *cpus, lub_list_t *balance_irqs, float threshold);
 int apply_affinity(lub_list_t *balance_irqs);
+int choose_irqs_to_move(lub_list_t *cpus, lub_list_t *balance_irqs, float threshold);
 
 #endif

+ 22 - 3
birq.c

@@ -69,6 +69,7 @@ int main(int argc, char **argv)
 	int retval = -1;
 	struct options *opts = NULL;
 	int pidfd = -1;
+	int interval = BIRQ_SHORT_INTERVAL;
 
 	/* Signal vars */
 	struct sigaction sig_act;
@@ -170,7 +171,7 @@ int main(int argc, char **argv)
 		}
 
 		/* Timeout and poll for new devices */
-		while ((n = nl_poll(nl_fds, BIRQ_INTERVAL)) != 0) {
+		while ((n = nl_poll(nl_fds, interval)) != 0) {
 			if (-1 == n) {
 				fprintf(stderr,
 					"Error: Broken NetLink socket.\n");
@@ -186,10 +187,23 @@ int main(int argc, char **argv)
 
 		if (opts->debug)
 			printf("Some balancing...\n");
-		parse_proc_stat(cpus, irqs);
+		gather_statistics(cpus, irqs);
 		show_statistics(cpus);
-		balance(cpus, balance_irqs);
+		/* Choose IRQ to move to another CPU.
+		   Don't choose IRQ if we have new IRQs to balance */
+		if (lub_list_len(balance_irqs) == 0) {
+			choose_irqs_to_move(cpus, balance_irqs,
+				opts->threshold);
+		}
+		/* Nothing to balance */
+		if (lub_list_len(balance_irqs) == 0) {
+			interval = BIRQ_LONG_INTERVAL;
+			continue;
+		}
+		interval = BIRQ_SHORT_INTERVAL;
+		balance(cpus, balance_irqs, opts->threshold);
 		apply_affinity(balance_irqs);
+		/* Free list of balanced IRQs */
 		while ((node = lub_list__get_tail(balance_irqs))) {
 			lub_list_del(balance_irqs, node);
 			lub_list_node_free(node);
@@ -316,6 +330,11 @@ static int opts_parse(int argc, char *argv[], struct options *opts)
 			if (endptr == optarg)
 				thresh = opts->threshold;
 			opts->threshold = thresh;
+			if (thresh > 100.00) {
+				fprintf(stderr, "Error: Illegal threshold value %s.\n", optarg);
+				help(-1, argv[0]);
+				exit(-1);
+			}
 			}
 			break;
 		case 'h':

+ 2 - 1
birq.h

@@ -2,7 +2,8 @@
 #define _birq_h
 
 #define BIRQ_PIDFILE "/var/run/birq.pid"
-#define BIRQ_INTERVAL 3 /* in seconds */
+#define BIRQ_LONG_INTERVAL 5 /* in seconds */
+#define BIRQ_SHORT_INTERVAL 2 /* in seconds */
 #define BIRQ_DEFAULT_THRESHOLD 95.00
 
 #endif

+ 1 - 1
statistics.c

@@ -14,7 +14,7 @@
 #include "cpu.h"
 #include "irq.h"
 
-void parse_proc_stat(lub_list_t *cpus, lub_list_t *irqs)
+void gather_statistics(lub_list_t *cpus, lub_list_t *irqs)
 {
 	FILE *file;
 	char *line = NULL;

+ 1 - 1
statistics.h

@@ -3,7 +3,7 @@
 
 #include "lub/list.h"
 
-void parse_proc_stat(lub_list_t *cpus, lub_list_t *irqs);
+void gather_statistics(lub_list_t *cpus, lub_list_t *irqs);
 void show_statistics(lub_list_t *cpus);
 
 #endif