birq.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /*
  2. * birq
  3. *
  4. * Balance IRQ
  5. *
  6. */
  7. #ifdef HAVE_CONFIG_H
  8. #include "config.h"
  9. #endif /* HAVE_CONFIG_H */
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <errno.h>
  15. #include <assert.h>
  16. #include <string.h>
  17. #include <signal.h>
  18. #include <syslog.h>
  19. #include <fcntl.h>
  20. #include <time.h>
  21. #ifdef HAVE_GETOPT_H
  22. #include <getopt.h>
  23. #endif
  24. #include "birq.h"
  25. #include "lub/log.h"
  26. #include "lub/list.h"
  27. #include "irq.h"
  28. #include "numa.h"
  29. #include "cpu.h"
  30. #include "statistics.h"
  31. #include "balance.h"
  32. #include "pxm.h"
  33. #ifndef VERSION
  34. #define VERSION "1.2.0"
  35. #endif
  36. /* Signal handlers */
  37. static volatile int sigterm = 0; /* Exit if 1 */
  38. static void sighandler(int signo);
  39. static void help(int status, const char *argv0);
  40. static struct options *opts_init(void);
  41. static void opts_free(struct options *opts);
  42. static int opts_parse(int argc, char *argv[], struct options *opts);
  43. static int parse_config(const char *fname, struct options *opts);
  44. /* Command line options */
  45. struct options {
  46. char *pidfile;
  47. char *pxm; /* Proximity config file */
  48. int debug; /* Don't daemonize in debug mode */
  49. int log_facility;
  50. float threshold;
  51. float load_limit;
  52. int verbose;
  53. int ht;
  54. unsigned int long_interval;
  55. unsigned int short_interval;
  56. birq_choose_strategy_e strategy;
  57. };
  58. /*--------------------------------------------------------- */
  59. int main(int argc, char **argv)
  60. {
  61. int retval = -1;
  62. struct options *opts = NULL;
  63. int pidfd = -1;
  64. unsigned int interval;
  65. /* Signal vars */
  66. struct sigaction sig_act;
  67. sigset_t sig_set;
  68. /* IRQ list. It contain all found IRQs. */
  69. lub_list_t *irqs;
  70. /* IRQs need to be balanced */
  71. lub_list_t *balance_irqs;
  72. /* CPU list. It contain all found CPUs. */
  73. lub_list_t *cpus;
  74. /* NUMA list. It contain all found NUMA nodes. */
  75. lub_list_t *numas;
  76. /* Proximity list. */
  77. lub_list_t *pxms;
  78. /* Parse command line options */
  79. opts = opts_init();
  80. if (opts_parse(argc, argv, opts))
  81. goto err;
  82. /* Initialize syslog */
  83. openlog(argv[0], LOG_CONS, opts->log_facility);
  84. syslog(LOG_ERR, "Start daemon.\n");
  85. /* Fork the daemon */
  86. if (!opts->debug) {
  87. /* Daemonize */
  88. if (daemon(0, 0) < 0) {
  89. syslog(LOG_ERR, "Can't daemonize\n");
  90. goto err;
  91. }
  92. /* Write pidfile */
  93. if ((pidfd = open(opts->pidfile,
  94. O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
  95. S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
  96. syslog(LOG_WARNING, "Can't open pidfile %s: %s",
  97. opts->pidfile, strerror(errno));
  98. } else {
  99. char str[20];
  100. snprintf(str, sizeof(str), "%u\n", getpid());
  101. str[sizeof(str) - 1] = '\0';
  102. if (write(pidfd, str, strlen(str)) < 0)
  103. syslog(LOG_WARNING, "Can't write to %s: %s",
  104. opts->pidfile, strerror(errno));
  105. close(pidfd);
  106. }
  107. }
  108. /* Set signal handler */
  109. sigemptyset(&sig_set);
  110. sigaddset(&sig_set, SIGTERM);
  111. sigaddset(&sig_set, SIGINT);
  112. sigaddset(&sig_set, SIGQUIT);
  113. sig_act.sa_flags = 0;
  114. sig_act.sa_mask = sig_set;
  115. sig_act.sa_handler = &sighandler;
  116. sigaction(SIGTERM, &sig_act, NULL);
  117. sigaction(SIGINT, &sig_act, NULL);
  118. sigaction(SIGQUIT, &sig_act, NULL);
  119. /* Randomize */
  120. srand(time(NULL));
  121. /* Scan NUMA nodes */
  122. numas = lub_list_new(numa_list_compare);
  123. scan_numas(numas);
  124. if (opts->verbose)
  125. show_numas(numas);
  126. /* Scan CPUs */
  127. cpus = lub_list_new(cpu_list_compare);
  128. scan_cpus(cpus, opts->ht);
  129. if (opts->verbose)
  130. show_cpus(cpus);
  131. /* Prepare data structures */
  132. irqs = lub_list_new(irq_list_compare);
  133. balance_irqs = lub_list_new(irq_list_compare);
  134. /* Parse proximity file */
  135. pxms = lub_list_new(NULL);
  136. if (opts->pxm)
  137. parse_pxm_config(opts->pxm, pxms, numas);
  138. if (opts->verbose)
  139. show_pxms(pxms);
  140. /* Main loop */
  141. while (!sigterm) {
  142. lub_list_node_t *node;
  143. char outstr[10];
  144. time_t t;
  145. struct tm *tmp;
  146. t = time(NULL);
  147. tmp = localtime(&t);
  148. if (tmp) {
  149. strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
  150. printf("----[ %s ]----------------------------------------------------------------\n", outstr);
  151. }
  152. /* Rescan PCI devices for new IRQs. */
  153. scan_irqs(irqs, balance_irqs, pxms);
  154. if (opts->verbose)
  155. irq_list_show(irqs);
  156. /* Link IRQs to CPUs due to real current smp affinity. */
  157. link_irqs_to_cpus(cpus, irqs);
  158. /* Gather statistics on CPU load and number of interrupts. */
  159. gather_statistics(cpus, irqs);
  160. show_statistics(cpus, opts->verbose);
  161. /* Choose IRQ to move to another CPU. */
  162. choose_irqs_to_move(cpus, balance_irqs,
  163. opts->threshold, opts->strategy);
  164. /* Balance IRQs */
  165. if (lub_list_len(balance_irqs) != 0) {
  166. /* Set short interval to make balancing faster. */
  167. interval = opts->short_interval;
  168. /* Choose new CPU for IRQs need to be balanced. */
  169. balance(cpus, balance_irqs, opts->load_limit);
  170. /* Write new values to /proc/irq/<IRQ>/smp_affinity */
  171. apply_affinity(balance_irqs);
  172. /* Free list of balanced IRQs */
  173. while ((node = lub_list__get_tail(balance_irqs))) {
  174. lub_list_del(balance_irqs, node);
  175. lub_list_node_free(node);
  176. }
  177. } else {
  178. /* If nothing to balance */
  179. interval = opts->long_interval;
  180. }
  181. /* Wait before next iteration */
  182. sleep(interval);
  183. }
  184. /* Free data structures */
  185. irq_list_free(irqs);
  186. lub_list_free(balance_irqs);
  187. cpu_list_free(cpus);
  188. numa_list_free(numas);
  189. pxm_list_free(pxms);
  190. retval = 0;
  191. err:
  192. /* Remove pidfile */
  193. if (pidfd >= 0) {
  194. if (unlink(opts->pidfile) < 0) {
  195. syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
  196. opts->pidfile, strerror(errno));
  197. }
  198. }
  199. /* Free command line options */
  200. opts_free(opts);
  201. syslog(LOG_ERR, "Stop daemon.\n");
  202. return retval;
  203. }
  204. /*--------------------------------------------------------- */
  205. /*
  206. * Signal handler for temination signals (like SIGTERM, SIGINT, ...)
  207. */
  208. static void sighandler(int signo)
  209. {
  210. sigterm = 1;
  211. signo = signo; /* Happy compiler */
  212. }
  213. /*--------------------------------------------------------- */
  214. /* Initialize option structure by defaults */
  215. static struct options *opts_init(void)
  216. {
  217. struct options *opts = NULL;
  218. opts = malloc(sizeof(*opts));
  219. assert(opts);
  220. opts->debug = 0; /* daemonize by default */
  221. opts->pidfile = strdup(BIRQ_PIDFILE);
  222. opts->pxm = NULL;
  223. opts->log_facility = LOG_DAEMON;
  224. opts->threshold = BIRQ_DEFAULT_THRESHOLD;
  225. opts->load_limit = BIRQ_DEFAULT_LOAD_LIMIT;
  226. opts->verbose = 0;
  227. opts->ht = 0;
  228. opts->long_interval = BIRQ_LONG_INTERVAL;
  229. opts->short_interval = BIRQ_SHORT_INTERVAL;
  230. opts->strategy = BIRQ_CHOOSE_RND;
  231. return opts;
  232. }
  233. /*--------------------------------------------------------- */
  234. /* Free option structure */
  235. static void opts_free(struct options *opts)
  236. {
  237. if (opts->pidfile)
  238. free(opts->pidfile);
  239. if (opts->pxm)
  240. free(opts->pxm);
  241. free(opts);
  242. }
  243. /*--------------------------------------------------------- */
  244. /* Parse command line options */
  245. static int opts_parse(int argc, char *argv[], struct options *opts)
  246. {
  247. static const char *shortopts = "hp:dO:t:l:vri:I:s:x:";
  248. #ifdef HAVE_GETOPT_H
  249. static const struct option longopts[] = {
  250. {"help", 0, NULL, 'h'},
  251. {"pid", 1, NULL, 'p'},
  252. {"debug", 0, NULL, 'd'},
  253. {"facility", 1, NULL, 'O'},
  254. {"threshold", 1, NULL, 't'},
  255. {"load-limit", 1, NULL, 't'},
  256. {"verbose", 0, NULL, 'v'},
  257. {"ht", 0, NULL, 'r'},
  258. {"short-interval", 1, NULL, 'i'},
  259. {"long-interval", 1, NULL, 'i'},
  260. {"strategy", 1, NULL, 's'},
  261. {"pxm", 1, NULL, 'x'},
  262. {NULL, 0, NULL, 0}
  263. };
  264. #endif
  265. optind = 1;
  266. while(1) {
  267. int opt;
  268. #ifdef HAVE_GETOPT_H
  269. opt = getopt_long(argc, argv, shortopts, longopts, NULL);
  270. #else
  271. opt = getopt(argc, argv, shortopts);
  272. #endif
  273. if (-1 == opt)
  274. break;
  275. switch (opt) {
  276. case 'p':
  277. if (opts->pidfile)
  278. free(opts->pidfile);
  279. opts->pidfile = strdup(optarg);
  280. break;
  281. case 'x':
  282. if (opts->pxm)
  283. free(opts->pxm);
  284. opts->pxm = strdup(optarg);
  285. break;
  286. case 'd':
  287. opts->debug = 1;
  288. break;
  289. case 'v':
  290. opts->verbose = 1;
  291. break;
  292. case 'r':
  293. opts->ht = 1;
  294. break;
  295. case 'O':
  296. if (lub_log_facility(optarg, &(opts->log_facility))) {
  297. fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
  298. help(-1, argv[0]);
  299. exit(-1);
  300. }
  301. break;
  302. case 't':
  303. {
  304. char *endptr;
  305. float thresh;
  306. thresh = strtof(optarg, &endptr);
  307. if (endptr == optarg)
  308. thresh = opts->threshold;
  309. opts->threshold = thresh;
  310. if (thresh > 100.00) {
  311. fprintf(stderr, "Error: Illegal threshold value %s.\n", optarg);
  312. help(-1, argv[0]);
  313. exit(-1);
  314. }
  315. }
  316. break;
  317. case 'l':
  318. {
  319. char *endptr;
  320. float limit;
  321. limit = strtof(optarg, &endptr);
  322. if (endptr == optarg)
  323. limit = opts->load_limit;
  324. opts->load_limit = limit;
  325. if (limit > 100.00) {
  326. fprintf(stderr, "Error: Illegal load limit value %s.\n", optarg);
  327. help(-1, argv[0]);
  328. exit(-1);
  329. }
  330. }
  331. break;
  332. case 'i':
  333. {
  334. char *endptr;
  335. unsigned long int val;
  336. val = strtoul(optarg, &endptr, 10);
  337. if (endptr != optarg)
  338. opts->short_interval = val;
  339. }
  340. break;
  341. case 'I':
  342. {
  343. char *endptr;
  344. unsigned long int val;
  345. val = strtoul(optarg, &endptr, 10);
  346. if (endptr != optarg)
  347. opts->long_interval = val;
  348. }
  349. break;
  350. case 's':
  351. if (!strcmp(optarg, "max"))
  352. opts->strategy = BIRQ_CHOOSE_MAX;
  353. else if (!strcmp(optarg, "min"))
  354. opts->strategy = BIRQ_CHOOSE_MIN;
  355. else if (!strcmp(optarg, "rnd"))
  356. opts->strategy = BIRQ_CHOOSE_RND;
  357. else {
  358. fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
  359. help(-1, argv[0]);
  360. exit(-1);
  361. }
  362. break;
  363. case 'h':
  364. help(0, argv[0]);
  365. exit(0);
  366. break;
  367. default:
  368. help(-1, argv[0]);
  369. exit(-1);
  370. break;
  371. }
  372. }
  373. /* Check threshold and load limit */
  374. if (opts->load_limit > opts->threshold) {
  375. fprintf(stderr, "Error: The load limit is greater than threshold.\n");
  376. help(-1, argv[0]);
  377. exit(-1);
  378. }
  379. return 0;
  380. }
  381. /*--------------------------------------------------------- */
  382. /* Print help message */
  383. static void help(int status, const char *argv0)
  384. {
  385. const char *name = NULL;
  386. if (!argv0)
  387. return;
  388. /* Find the basename */
  389. name = strrchr(argv0, '/');
  390. if (name)
  391. name++;
  392. else
  393. name = argv0;
  394. if (status != 0) {
  395. fprintf(stderr, "Try `%s -h' for more information.\n",
  396. name);
  397. } else {
  398. printf("Version : %s\n", VERSION);
  399. printf("Usage : %s [options]\n", name);
  400. printf("Daemon to balance IRQs.\n");
  401. printf("Options :\n");
  402. printf("\t-h, --help Print this help.\n");
  403. printf("\t-d, --debug Debug mode. Don't daemonize.\n");
  404. printf("\t-v, --verbose Be verbose.\n");
  405. printf("\t-r, --ht Enable Hyper Threading.\n");
  406. printf("\t-p <path>, --pid=<path> File to save daemon's PID to.\n");
  407. printf("\t-x <path>, --pxm=<path> Proximity config file.\n");
  408. printf("\t-O, --facility Syslog facility. Default is DAEMON.\n");
  409. printf("\t-t <float>, --threshold=<float> Threshold to consider CPU is overloaded, in percents. Default threhold is %.2f.\n",
  410. BIRQ_DEFAULT_THRESHOLD);
  411. printf("\t-l <float>, --load-limit=<float> Don't move IRQs to CPUs loaded more than this limit, in percents. Default limit is %.2f.\n",
  412. BIRQ_DEFAULT_LOAD_LIMIT);
  413. printf("\t-i <sec>, --short-interval=<sec> Short iteration interval.\n");
  414. printf("\t-I <sec>, --long-interval=<sec> Long iteration interval.\n");
  415. printf("\t-s <strategy>, --strategy=<strategy> Strategy to choose IRQ to move (min/max/rnd).\n");
  416. }
  417. }
  418. /*--------------------------------------------------------- */
  419. /* Parse config file */
  420. static int parse_config(const char *fname, struct options *opts)
  421. {
  422. return 0;
  423. }