birq.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. * birq
  3. *
  4. * Balance IRQ
  5. *
  6. */
  7. #ifdef HAVE_CONFIG_H
  8. #include "config.h"
  9. #endif /* HAVE_CONFIG_H */
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <errno.h>
  15. #include <assert.h>
  16. #include <string.h>
  17. #include <signal.h>
  18. #include <syslog.h>
  19. #include <fcntl.h>
  20. #include <time.h>
  21. #ifdef HAVE_GETOPT_H
  22. #include <getopt.h>
  23. #endif
  24. #include "birq.h"
  25. #include "lub/log.h"
  26. #include "lub/list.h"
  27. #include "irq.h"
  28. #include "numa.h"
  29. #include "cpu.h"
  30. #include "statistics.h"
  31. #include "balance.h"
  32. #include "pxm.h"
  33. #ifndef VERSION
  34. #define VERSION "1.0.0"
  35. #endif
  36. /* Signal handlers */
  37. static volatile int sigterm = 0; /* Exit if 1 */
  38. static void sighandler(int signo);
  39. static void help(int status, const char *argv0);
  40. static struct options *opts_init(void);
  41. static void opts_free(struct options *opts);
  42. static int opts_parse(int argc, char *argv[], struct options *opts);
  43. /* Command line options */
  44. struct options {
  45. char *pidfile;
  46. char *pxm; /* Proximity config file */
  47. int debug; /* Don't daemonize in debug mode */
  48. int log_facility;
  49. float threshold;
  50. int verbose;
  51. int ht;
  52. unsigned int long_interval;
  53. unsigned int short_interval;
  54. birq_choose_strategy_e strategy;
  55. };
  56. /*--------------------------------------------------------- */
  57. int main(int argc, char **argv)
  58. {
  59. int retval = -1;
  60. struct options *opts = NULL;
  61. int pidfd = -1;
  62. unsigned int interval;
  63. /* Signal vars */
  64. struct sigaction sig_act;
  65. sigset_t sig_set;
  66. /* IRQ list. It contain all found IRQs. */
  67. lub_list_t *irqs;
  68. /* IRQs need to be balanced */
  69. lub_list_t *balance_irqs;
  70. /* CPU list. It contain all found CPUs. */
  71. lub_list_t *cpus;
  72. /* NUMA list. It contain all found NUMA nodes. */
  73. lub_list_t *numas;
  74. /* Proximity list. */
  75. lub_list_t *pxms;
  76. /* Parse command line options */
  77. opts = opts_init();
  78. if (opts_parse(argc, argv, opts))
  79. goto err;
  80. /* Initialize syslog */
  81. openlog(argv[0], LOG_CONS, opts->log_facility);
  82. syslog(LOG_ERR, "Start daemon.\n");
  83. /* Fork the daemon */
  84. if (!opts->debug) {
  85. /* Daemonize */
  86. if (daemon(0, 0) < 0) {
  87. syslog(LOG_ERR, "Can't daemonize\n");
  88. goto err;
  89. }
  90. /* Write pidfile */
  91. if ((pidfd = open(opts->pidfile,
  92. O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
  93. S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
  94. syslog(LOG_WARNING, "Can't open pidfile %s: %s",
  95. opts->pidfile, strerror(errno));
  96. } else {
  97. char str[20];
  98. snprintf(str, sizeof(str), "%u\n", getpid());
  99. if (write(pidfd, str, strlen(str)) < 0)
  100. syslog(LOG_WARNING, "Can't write to %s: %s",
  101. opts->pidfile, strerror(errno));
  102. close(pidfd);
  103. }
  104. }
  105. /* Set signal handler */
  106. sigemptyset(&sig_set);
  107. sigaddset(&sig_set, SIGTERM);
  108. sigaddset(&sig_set, SIGINT);
  109. sigaddset(&sig_set, SIGQUIT);
  110. sig_act.sa_flags = 0;
  111. sig_act.sa_mask = sig_set;
  112. sig_act.sa_handler = &sighandler;
  113. sigaction(SIGTERM, &sig_act, NULL);
  114. sigaction(SIGINT, &sig_act, NULL);
  115. sigaction(SIGQUIT, &sig_act, NULL);
  116. /* Randomize */
  117. srand(time(NULL));
  118. /* Scan NUMA nodes */
  119. numas = lub_list_new(numa_list_compare);
  120. scan_numas(numas);
  121. if (opts->verbose)
  122. show_numas(numas);
  123. /* Scan CPUs */
  124. cpus = lub_list_new(cpu_list_compare);
  125. scan_cpus(cpus, opts->ht);
  126. if (opts->verbose)
  127. show_cpus(cpus);
  128. /* Prepare data structures */
  129. irqs = lub_list_new(irq_list_compare);
  130. balance_irqs = lub_list_new(irq_list_compare);
  131. /* Parse proximity file */
  132. pxms = lub_list_new(NULL);
  133. if (opts->pxm)
  134. parse_pxm_config(opts->pxm, pxms, numas);
  135. if (opts->verbose)
  136. show_pxms(pxms);
  137. /* Main loop */
  138. while (!sigterm) {
  139. lub_list_node_t *node;
  140. char outstr[10];
  141. time_t t;
  142. struct tm *tmp;
  143. t = time(NULL);
  144. tmp = localtime(&t);
  145. if (tmp) {
  146. strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
  147. printf("----[ %s ]----------------------------------------------------------------\n", outstr);
  148. }
  149. /* Rescan PCI devices for new IRQs. */
  150. scan_irqs(irqs, balance_irqs, pxms);
  151. if (opts->verbose)
  152. irq_list_show(irqs);
  153. /* Gather statistics on CPU load and number of interrupts. */
  154. gather_statistics(cpus, irqs);
  155. show_statistics(cpus, opts->verbose);
  156. /* Choose IRQ to move to another CPU.
  157. Don't choose IRQ if we already have new IRQs to balance */
  158. if (lub_list_len(balance_irqs) == 0) {
  159. choose_irqs_to_move(cpus, balance_irqs,
  160. opts->threshold, opts->strategy);
  161. }
  162. /* If nothing to balance */
  163. if (lub_list_len(balance_irqs) != 0) {
  164. /* Set short interval to make balancing faster. */
  165. interval = opts->short_interval;
  166. /* Choose new CPU for IRQs need to be balanced. */
  167. balance(cpus, balance_irqs, opts->threshold);
  168. /* Write new values to /proc/irq/<IRQ>/smp_affinity */
  169. apply_affinity(balance_irqs);
  170. /* Free list of balanced IRQs */
  171. while ((node = lub_list__get_tail(balance_irqs))) {
  172. lub_list_del(balance_irqs, node);
  173. lub_list_node_free(node);
  174. }
  175. } else {
  176. /* If nothing to balance */
  177. interval = opts->long_interval;
  178. }
  179. /* Wait before next iteration */
  180. sleep(interval);
  181. }
  182. /* Free data structures */
  183. irq_list_free(irqs);
  184. lub_list_free(balance_irqs);
  185. cpu_list_free(cpus);
  186. numa_list_free(numas);
  187. pxm_list_free(pxms);
  188. retval = 0;
  189. err:
  190. /* Remove pidfile */
  191. if (pidfd >= 0) {
  192. if (unlink(opts->pidfile) < 0) {
  193. syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
  194. opts->pidfile, strerror(errno));
  195. }
  196. }
  197. /* Free command line options */
  198. opts_free(opts);
  199. syslog(LOG_ERR, "Stop daemon.\n");
  200. return retval;
  201. }
  202. /*--------------------------------------------------------- */
  203. /*
  204. * Signal handler for temination signals (like SIGTERM, SIGINT, ...)
  205. */
  206. static void sighandler(int signo)
  207. {
  208. sigterm = 1;
  209. signo = signo; /* Happy compiler */
  210. }
  211. /*--------------------------------------------------------- */
  212. /* Initialize option structure by defaults */
  213. static struct options *opts_init(void)
  214. {
  215. struct options *opts = NULL;
  216. opts = malloc(sizeof(*opts));
  217. assert(opts);
  218. opts->debug = 0; /* daemonize by default */
  219. opts->pidfile = strdup(BIRQ_PIDFILE);
  220. opts->pxm = NULL;
  221. opts->log_facility = LOG_DAEMON;
  222. opts->threshold = BIRQ_DEFAULT_THRESHOLD;
  223. opts->verbose = 0;
  224. opts->ht = 0;
  225. opts->long_interval = BIRQ_LONG_INTERVAL;
  226. opts->short_interval = BIRQ_SHORT_INTERVAL;
  227. opts->strategy = BIRQ_CHOOSE_RND;
  228. return opts;
  229. }
  230. /*--------------------------------------------------------- */
  231. /* Free option structure */
  232. static void opts_free(struct options *opts)
  233. {
  234. if (opts->pidfile)
  235. free(opts->pidfile);
  236. if (opts->pxm)
  237. free(opts->pxm);
  238. free(opts);
  239. }
  240. /*--------------------------------------------------------- */
  241. /* Parse command line options */
  242. static int opts_parse(int argc, char *argv[], struct options *opts)
  243. {
  244. static const char *shortopts = "hp:dO:t:vri:I:s:x:";
  245. #ifdef HAVE_GETOPT_H
  246. static const struct option longopts[] = {
  247. {"help", 0, NULL, 'h'},
  248. {"pid", 1, NULL, 'p'},
  249. {"debug", 0, NULL, 'd'},
  250. {"facility", 1, NULL, 'O'},
  251. {"threshold", 1, NULL, 't'},
  252. {"verbose", 0, NULL, 'v'},
  253. {"ht", 0, NULL, 'r'},
  254. {"short-interval", 1, NULL, 'i'},
  255. {"long-interval", 1, NULL, 'i'},
  256. {"strategy", 1, NULL, 's'},
  257. {"pxm", 1, NULL, 'x'},
  258. {NULL, 0, NULL, 0}
  259. };
  260. #endif
  261. optind = 1;
  262. while(1) {
  263. int opt;
  264. #ifdef HAVE_GETOPT_H
  265. opt = getopt_long(argc, argv, shortopts, longopts, NULL);
  266. #else
  267. opt = getopt(argc, argv, shortopts);
  268. #endif
  269. if (-1 == opt)
  270. break;
  271. switch (opt) {
  272. case 'p':
  273. if (opts->pidfile)
  274. free(opts->pidfile);
  275. opts->pidfile = strdup(optarg);
  276. break;
  277. case 'x':
  278. if (opts->pxm)
  279. free(opts->pxm);
  280. opts->pxm = strdup(optarg);
  281. break;
  282. case 'd':
  283. opts->debug = 1;
  284. break;
  285. case 'v':
  286. opts->verbose = 1;
  287. break;
  288. case 'r':
  289. opts->ht = 1;
  290. break;
  291. case 'O':
  292. if (lub_log_facility(optarg, &(opts->log_facility))) {
  293. fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
  294. help(-1, argv[0]);
  295. exit(-1);
  296. }
  297. break;
  298. case 't':
  299. {
  300. char *endptr;
  301. float thresh;
  302. thresh = strtof(optarg, &endptr);
  303. if (endptr == optarg)
  304. thresh = opts->threshold;
  305. opts->threshold = thresh;
  306. if (thresh > 100.00) {
  307. fprintf(stderr, "Error: Illegal threshold value %s.\n", optarg);
  308. help(-1, argv[0]);
  309. exit(-1);
  310. }
  311. }
  312. break;
  313. case 'i':
  314. {
  315. char *endptr;
  316. unsigned long int val;
  317. val = strtoul(optarg, &endptr, 10);
  318. if (endptr != optarg)
  319. opts->short_interval = val;
  320. }
  321. break;
  322. case 'I':
  323. {
  324. char *endptr;
  325. unsigned long int val;
  326. val = strtoul(optarg, &endptr, 10);
  327. if (endptr != optarg)
  328. opts->long_interval = val;
  329. }
  330. break;
  331. case 'c':
  332. if (!strcmp(optarg, "max"))
  333. opts->strategy = BIRQ_CHOOSE_MAX;
  334. else if (!strcmp(optarg, "min"))
  335. opts->strategy = BIRQ_CHOOSE_MIN;
  336. else if (!strcmp(optarg, "rnd"))
  337. opts->strategy = BIRQ_CHOOSE_RND;
  338. else {
  339. fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
  340. help(-1, argv[0]);
  341. exit(-1);
  342. }
  343. break;
  344. case 'h':
  345. help(0, argv[0]);
  346. exit(0);
  347. break;
  348. default:
  349. help(-1, argv[0]);
  350. exit(-1);
  351. break;
  352. }
  353. }
  354. return 0;
  355. }
  356. /*--------------------------------------------------------- */
  357. /* Print help message */
  358. static void help(int status, const char *argv0)
  359. {
  360. const char *name = NULL;
  361. if (!argv0)
  362. return;
  363. /* Find the basename */
  364. name = strrchr(argv0, '/');
  365. if (name)
  366. name++;
  367. else
  368. name = argv0;
  369. if (status != 0) {
  370. fprintf(stderr, "Try `%s -h' for more information.\n",
  371. name);
  372. } else {
  373. printf("Version : %s\n", VERSION);
  374. printf("Usage : %s [options]\n", name);
  375. printf("Daemon to balance IRQs.\n");
  376. printf("Options :\n");
  377. printf("\t-h, --help\tPrint this help.\n");
  378. printf("\t-d, --debug\tDebug mode. Don't daemonize.\n");
  379. printf("\t-v, --verbose\tBe verbose.\n");
  380. printf("\t-r, --ht\tEnable hyper-threading. Not recommended.\n");
  381. printf("\t-p <path>, --pid=<path>\tFile to save daemon's PID to.\n");
  382. printf("\t-x <path>, --pxm=<path>\tProximity config file.\n");
  383. printf("\t-O, --facility\tSyslog facility. Default is DAEMON.\n");
  384. printf("\t-t <float>, --threshold=<float>\tThreshold to consider CPU is overloaded, in percents.\n");
  385. printf("\t-i <sec>, --short-interval=<sec>\tShort iteration interval.\n");
  386. printf("\t-I <sec>, --long-interval=<sec>\tLong iteration interval.\n");
  387. printf("\t-c <strategy>, --choose=<strategy>\tStrategy to choose IRQ to move (min/max/rnd).\n");
  388. }
  389. }