birq.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. /*
  2. * birq
  3. *
  4. * Balance IRQ
  5. *
  6. */
  7. #ifdef HAVE_CONFIG_H
  8. #include "config.h"
  9. #endif /* HAVE_CONFIG_H */
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <errno.h>
  15. #include <assert.h>
  16. #include <string.h>
  17. #include <signal.h>
  18. #include <syslog.h>
  19. #include <fcntl.h>
  20. #include <time.h>
  21. #ifdef HAVE_GETOPT_H
  22. #include <getopt.h>
  23. #endif
  24. #include "birq.h"
  25. #include "lub/log.h"
  26. #include "lub/list.h"
  27. #include "lub/ini.h"
  28. #include "irq.h"
  29. #include "numa.h"
  30. #include "cpu.h"
  31. #include "statistics.h"
  32. #include "balance.h"
  33. #include "pxm.h"
  34. #ifndef VERSION
  35. #define VERSION "1.2.0"
  36. #endif
  37. /* Signal handlers */
  38. static volatile int sigterm = 0; /* Exit if 1 */
  39. static void sighandler(int signo);
  40. static volatile int sighup = 0; /* Re-read config file */
  41. static void sighup_handler(int signo);
  42. static void help(int status, const char *argv0);
  43. static struct options *opts_init(void);
  44. static void opts_free(struct options *opts);
  45. static int opts_parse(int argc, char *argv[], struct options *opts);
  46. static int parse_config(const char *fname, struct options *opts);
  47. /* Command line options */
  48. struct options {
  49. char *pidfile;
  50. char *cfgfile;
  51. int cfgfile_userdefined;
  52. char *pxm; /* Proximity config file */
  53. int debug; /* Don't daemonize in debug mode */
  54. int log_facility;
  55. float threshold;
  56. float load_limit;
  57. int verbose;
  58. int ht;
  59. int non_local_cpus;
  60. unsigned int long_interval;
  61. unsigned int short_interval;
  62. birq_choose_strategy_e strategy;
  63. cpumask_t exclude_cpus;
  64. };
  65. /*--------------------------------------------------------- */
  66. int main(int argc, char **argv)
  67. {
  68. int retval = -1;
  69. struct options *opts = NULL;
  70. int pidfd = -1;
  71. unsigned int interval;
  72. /* Signal vars */
  73. struct sigaction sig_act;
  74. sigset_t sig_set;
  75. /* IRQ list. It contain all found IRQs. */
  76. lub_list_t *irqs;
  77. /* IRQs need to be balanced */
  78. lub_list_t *balance_irqs;
  79. /* CPU list. It contain all found CPUs. */
  80. lub_list_t *cpus;
  81. /* NUMA list. It contain all found NUMA nodes. */
  82. lub_list_t *numas;
  83. /* Proximity list. */
  84. lub_list_t *pxms;
  85. /* Parse command line options */
  86. opts = opts_init();
  87. if (opts_parse(argc, argv, opts))
  88. goto err;
  89. /* Parse config file */
  90. if (!access(opts->cfgfile, R_OK)) {
  91. if (parse_config(opts->cfgfile, opts))
  92. goto err;
  93. } else if (opts->cfgfile_userdefined) {
  94. fprintf(stderr, "Error: Can't find config file %s\n",
  95. opts->cfgfile);
  96. goto err;
  97. }
  98. /* Validate threshold and load limit */
  99. /* if (opts->load_limit > opts->threshold) {
  100. fprintf(stderr, "Error: The load limit is greater than threshold.\n");
  101. goto err;
  102. }
  103. */
  104. /* Initialize syslog */
  105. openlog(argv[0], LOG_CONS, opts->log_facility);
  106. syslog(LOG_ERR, "Start daemon.\n");
  107. /* Fork the daemon */
  108. if (!opts->debug) {
  109. /* Daemonize */
  110. if (daemon(0, 0) < 0) {
  111. syslog(LOG_ERR, "Can't daemonize\n");
  112. goto err;
  113. }
  114. /* Write pidfile */
  115. if ((pidfd = open(opts->pidfile,
  116. O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
  117. S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
  118. syslog(LOG_WARNING, "Can't open pidfile %s: %s\n",
  119. opts->pidfile, strerror(errno));
  120. } else {
  121. char str[20];
  122. snprintf(str, sizeof(str), "%u\n", getpid());
  123. str[sizeof(str) - 1] = '\0';
  124. if (write(pidfd, str, strlen(str)) < 0)
  125. syslog(LOG_WARNING, "Can't write to %s: %s\n",
  126. opts->pidfile, strerror(errno));
  127. close(pidfd);
  128. }
  129. }
  130. /* Set signal handler */
  131. sigemptyset(&sig_set);
  132. sigaddset(&sig_set, SIGTERM);
  133. sigaddset(&sig_set, SIGINT);
  134. sigaddset(&sig_set, SIGQUIT);
  135. sig_act.sa_flags = 0;
  136. sig_act.sa_mask = sig_set;
  137. sig_act.sa_handler = &sighandler;
  138. sigaction(SIGTERM, &sig_act, NULL);
  139. sigaction(SIGINT, &sig_act, NULL);
  140. sigaction(SIGQUIT, &sig_act, NULL);
  141. /* SIGHUP handler */
  142. sigemptyset(&sig_set);
  143. sigaddset(&sig_set, SIGHUP);
  144. sig_act.sa_flags = 0;
  145. sig_act.sa_mask = sig_set;
  146. sig_act.sa_handler = &sighup_handler;
  147. sigaction(SIGHUP, &sig_act, NULL);
  148. /* Randomize */
  149. srand(time(NULL));
  150. /* Scan NUMA nodes */
  151. numas = lub_list_new(numa_list_compare);
  152. scan_numas(numas);
  153. if (opts->verbose)
  154. show_numas(numas);
  155. /* Scan CPUs */
  156. cpus = lub_list_new(cpu_list_compare);
  157. scan_cpus(cpus, opts->ht);
  158. if (opts->verbose)
  159. show_cpus(cpus);
  160. /* Prepare data structures */
  161. irqs = lub_list_new(irq_list_compare);
  162. balance_irqs = lub_list_new(irq_list_compare);
  163. /* Parse proximity file */
  164. pxms = lub_list_new(NULL);
  165. if (opts->pxm)
  166. parse_pxm_config(opts->pxm, pxms, numas);
  167. if (opts->verbose)
  168. show_pxms(pxms);
  169. /* Main loop */
  170. while (!sigterm) {
  171. lub_list_node_t *node;
  172. char outstr[10];
  173. time_t t;
  174. struct tm *tmp;
  175. t = time(NULL);
  176. tmp = localtime(&t);
  177. if (tmp) {
  178. strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
  179. printf("----[ %s ]----------------------------------------------------------------\n", outstr);
  180. }
  181. /* Re-read config file on SIGHUP */
  182. if (sighup) {
  183. if (!access(opts->cfgfile, R_OK)) {
  184. syslog(LOG_ERR, "Re-reading config file\n");
  185. if (parse_config(opts->cfgfile, opts))
  186. syslog(LOG_ERR, "Error while config file parsing.\n");
  187. } else if (opts->cfgfile_userdefined)
  188. syslog(LOG_ERR, "Can't find config file.\n");
  189. sighup = 0;
  190. }
  191. /* Rescan PCI devices for new IRQs. */
  192. scan_irqs(irqs, balance_irqs, pxms);
  193. if (opts->verbose)
  194. irq_list_show(irqs);
  195. /* Link IRQs to CPUs due to real current smp affinity. */
  196. link_irqs_to_cpus(cpus, irqs);
  197. /* Gather statistics on CPU load and number of interrupts. */
  198. gather_statistics(cpus, irqs);
  199. show_statistics(cpus, opts->verbose);
  200. /* Choose IRQ to move to another CPU. */
  201. choose_irqs_to_move(cpus, balance_irqs,
  202. opts->threshold, opts->strategy, &opts->exclude_cpus);
  203. /* Balance IRQs */
  204. if (lub_list_len(balance_irqs) != 0) {
  205. /* Set short interval to make balancing faster. */
  206. interval = opts->short_interval;
  207. /* Choose new CPU for IRQs need to be balanced. */
  208. balance(cpus, balance_irqs, opts->load_limit,
  209. &opts->exclude_cpus, opts->non_local_cpus);
  210. /* Write new values to /proc/irq/<IRQ>/smp_affinity */
  211. apply_affinity(balance_irqs);
  212. /* Free list of balanced IRQs */
  213. while ((node = lub_list__get_tail(balance_irqs))) {
  214. lub_list_del(balance_irqs, node);
  215. lub_list_node_free(node);
  216. }
  217. } else {
  218. /* If nothing to balance */
  219. interval = opts->long_interval;
  220. }
  221. /* Wait before next iteration */
  222. sleep(interval);
  223. }
  224. /* Free data structures */
  225. irq_list_free(irqs);
  226. lub_list_free(balance_irqs);
  227. cpu_list_free(cpus);
  228. numa_list_free(numas);
  229. pxm_list_free(pxms);
  230. retval = 0;
  231. err:
  232. /* Remove pidfile */
  233. if (pidfd >= 0) {
  234. if (unlink(opts->pidfile) < 0) {
  235. syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
  236. opts->pidfile, strerror(errno));
  237. }
  238. }
  239. /* Free command line options */
  240. opts_free(opts);
  241. syslog(LOG_ERR, "Stop daemon.\n");
  242. return retval;
  243. }
  244. /*--------------------------------------------------------- */
  245. /* Signal handler for temination signals (like SIGTERM, SIGINT, ...) */
  246. static void sighandler(int signo)
  247. {
  248. sigterm = 1;
  249. signo = signo; /* Happy compiler */
  250. }
  251. /*--------------------------------------------------------- */
  252. /* Re-read config file on SIGHUP */
  253. static void sighup_handler(int signo)
  254. {
  255. sighup = 1;
  256. signo = signo; /* Happy compiler */
  257. }
  258. /*--------------------------------------------------------- */
  259. /* Initialize option structure by defaults */
  260. static struct options *opts_init(void)
  261. {
  262. struct options *opts = NULL;
  263. opts = malloc(sizeof(*opts));
  264. assert(opts);
  265. opts->debug = 0; /* daemonize by default */
  266. opts->pidfile = strdup(BIRQ_PIDFILE);
  267. opts->cfgfile = strdup(BIRQ_CFGFILE);
  268. opts->cfgfile_userdefined = 0;
  269. opts->pxm = NULL;
  270. opts->log_facility = LOG_DAEMON;
  271. opts->threshold = BIRQ_DEFAULT_THRESHOLD;
  272. opts->load_limit = BIRQ_DEFAULT_LOAD_LIMIT;
  273. opts->verbose = 0;
  274. opts->ht = 1; /* It's 1 since 1.5.0 */
  275. opts->non_local_cpus = 0;
  276. opts->long_interval = BIRQ_LONG_INTERVAL;
  277. opts->short_interval = BIRQ_SHORT_INTERVAL;
  278. opts->strategy = BIRQ_CHOOSE_RND;
  279. cpus_init(opts->exclude_cpus);
  280. cpus_clear(opts->exclude_cpus);
  281. return opts;
  282. }
  283. /*--------------------------------------------------------- */
  284. /* Free option structure */
  285. static void opts_free(struct options *opts)
  286. {
  287. if (opts->pidfile)
  288. free(opts->pidfile);
  289. if (opts->cfgfile)
  290. free(opts->cfgfile);
  291. if (opts->pxm)
  292. free(opts->pxm);
  293. cpus_free(opts->exclude_cpus);
  294. free(opts);
  295. }
  296. /* Parse y/n options */
  297. static int opt_parse_y_n(const char *optarg, int *flag)
  298. {
  299. assert(optarg);
  300. assert(flag);
  301. if (!strcmp(optarg, "y"))
  302. *flag = 1;
  303. else if (!strcmp(optarg, "yes"))
  304. *flag = 1;
  305. else if (!strcmp(optarg, "n"))
  306. *flag = 0;
  307. else if (!strcmp(optarg, "no"))
  308. *flag = 0;
  309. else {
  310. fprintf(stderr, "Error: Illegal flag value %s.\n", optarg);
  311. return -1;
  312. }
  313. return 0;
  314. }
  315. /* Parse 'strategy' option */
  316. static int opt_parse_strategy(const char *optarg, birq_choose_strategy_e *strategy)
  317. {
  318. assert(optarg);
  319. assert(strategy);
  320. if (!strcmp(optarg, "max"))
  321. *strategy = BIRQ_CHOOSE_MAX;
  322. else if (!strcmp(optarg, "min"))
  323. *strategy = BIRQ_CHOOSE_MIN;
  324. else if (!strcmp(optarg, "rnd"))
  325. *strategy = BIRQ_CHOOSE_RND;
  326. else {
  327. fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
  328. return -1;
  329. }
  330. return 0;
  331. }
  332. /* Parse 'threshold' and 'load-limit' options */
  333. static int opt_parse_threshold(const char *optarg, float *threshold)
  334. {
  335. char *endptr;
  336. float thresh;
  337. assert(optarg);
  338. assert(threshold);
  339. thresh = strtof(optarg, &endptr);
  340. if (endptr == optarg) {
  341. fprintf(stderr, "Error: Illegal threshold/load-limit value %s.\n", optarg);
  342. return -1;
  343. }
  344. if (thresh > 100.00) {
  345. fprintf(stderr, "Error: The threshold/load-limit value %s > 100.\n", optarg);
  346. return -1;
  347. }
  348. *threshold = thresh;
  349. return 0;
  350. }
  351. /* Parse 'short-interval' and 'long-interval' options */
  352. static int opt_parse_interval(const char *optarg, unsigned int *interval)
  353. {
  354. char *endptr;
  355. unsigned long int val;
  356. assert(optarg);
  357. assert(interval);
  358. val = strtoul(optarg, &endptr, 10);
  359. if (endptr == optarg) {
  360. fprintf(stderr, "Error: Illegal interval value %s.\n", optarg);
  361. return -1;
  362. }
  363. *interval = val;
  364. return 0;
  365. }
  366. /*--------------------------------------------------------- */
  367. /* Parse command line options */
  368. static int opts_parse(int argc, char *argv[], struct options *opts)
  369. {
  370. static const char *shortopts = "hp:c:dO:t:l:vri:I:s:x:";
  371. #ifdef HAVE_GETOPT_H
  372. static const struct option longopts[] = {
  373. {"help", 0, NULL, 'h'},
  374. {"pid", 1, NULL, 'p'},
  375. {"conf", 1, NULL, 'c'},
  376. {"debug", 0, NULL, 'd'},
  377. {"facility", 1, NULL, 'O'},
  378. {"threshold", 1, NULL, 't'},
  379. {"load-limit", 1, NULL, 't'},
  380. {"verbose", 0, NULL, 'v'},
  381. {"ht", 0, NULL, 'r'},
  382. {"short-interval", 1, NULL, 'i'},
  383. {"long-interval", 1, NULL, 'I'},
  384. {"strategy", 1, NULL, 's'},
  385. {"pxm", 1, NULL, 'x'},
  386. {NULL, 0, NULL, 0}
  387. };
  388. #endif
  389. optind = 1;
  390. while(1) {
  391. int opt;
  392. #ifdef HAVE_GETOPT_H
  393. opt = getopt_long(argc, argv, shortopts, longopts, NULL);
  394. #else
  395. opt = getopt(argc, argv, shortopts);
  396. #endif
  397. if (-1 == opt)
  398. break;
  399. switch (opt) {
  400. case 'p':
  401. if (opts->pidfile)
  402. free(opts->pidfile);
  403. opts->pidfile = strdup(optarg);
  404. break;
  405. case 'c':
  406. if (opts->cfgfile)
  407. free(opts->cfgfile);
  408. opts->cfgfile = strdup(optarg);
  409. opts->cfgfile_userdefined = 1;
  410. break;
  411. case 'x':
  412. if (opts->pxm)
  413. free(opts->pxm);
  414. opts->pxm = strdup(optarg);
  415. break;
  416. case 'd':
  417. opts->debug = 1;
  418. break;
  419. case 'v':
  420. opts->verbose = 1;
  421. break;
  422. case 'r':
  423. fprintf(stderr, "Warning: The -r option is obsoleted. The HT is enabled by default.\n");
  424. break;
  425. case 'O':
  426. if (lub_log_facility(optarg, &(opts->log_facility))) {
  427. fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
  428. exit(-1);
  429. }
  430. break;
  431. case 't':
  432. if (opt_parse_threshold(optarg, &opts->threshold))
  433. exit(-1);
  434. break;
  435. case 'l':
  436. if (opt_parse_threshold(optarg, &opts->load_limit))
  437. exit(-1);
  438. break;
  439. case 'i':
  440. if (opt_parse_interval(optarg, &opts->short_interval))
  441. exit(-1);
  442. break;
  443. case 'I':
  444. if (opt_parse_interval(optarg, &opts->long_interval))
  445. exit(-1);
  446. break;
  447. case 's':
  448. if (opt_parse_strategy(optarg, &opts->strategy) < 0)
  449. exit(-1);
  450. break;
  451. case 'h':
  452. help(0, argv[0]);
  453. exit(0);
  454. break;
  455. default:
  456. help(-1, argv[0]);
  457. exit(-1);
  458. break;
  459. }
  460. }
  461. return 0;
  462. }
  463. /*--------------------------------------------------------- */
  464. /* Print help message */
  465. static void help(int status, const char *argv0)
  466. {
  467. const char *name = NULL;
  468. if (!argv0)
  469. return;
  470. /* Find the basename */
  471. name = strrchr(argv0, '/');
  472. if (name)
  473. name++;
  474. else
  475. name = argv0;
  476. if (status != 0) {
  477. fprintf(stderr, "Try `%s -h' for more information.\n",
  478. name);
  479. } else {
  480. printf("Version : %s\n", VERSION);
  481. printf("Usage : %s [options]\n", name);
  482. printf("Daemon to balance IRQs.\n");
  483. printf("Options :\n");
  484. printf("\t-h, --help Print this help.\n");
  485. printf("\t-d, --debug Debug mode. Don't daemonize.\n");
  486. printf("\t-v, --verbose Be verbose.\n");
  487. printf("\t-r, --ht This option is obsoleted. The Hyper Threading is enabled by default.\n");
  488. printf("\t-p <path>, --pid=<path> File to save daemon's PID to (" BIRQ_PIDFILE ").\n");
  489. printf("\t-c <path>, --conf=<path> Config file (" BIRQ_CFGFILE ").\n");
  490. printf("\t-x <path>, --pxm=<path> Proximity config file.\n");
  491. printf("\t-O, --facility Syslog facility (DAEMON).\n");
  492. printf("\t-t <float>, --threshold=<float> Threshold to consider CPU is overloaded, in percents. Default threhold is %.2f.\n",
  493. BIRQ_DEFAULT_THRESHOLD);
  494. printf("\t-l <float>, --load-limit=<float> Don't move IRQs to CPUs loaded more than this limit, in percents. Default limit is %.2f.\n",
  495. BIRQ_DEFAULT_LOAD_LIMIT);
  496. printf("\t-i <sec>, --short-interval=<sec> Short iteration interval.\n");
  497. printf("\t-I <sec>, --long-interval=<sec> Long iteration interval.\n");
  498. printf("\t-s <strategy>, --strategy=<strategy> Strategy to choose IRQ to move (min/max/rnd).\n");
  499. }
  500. }
  501. /*--------------------------------------------------------- */
  502. /* Parse config file */
  503. static int parse_config(const char *fname, struct options *opts)
  504. {
  505. int ret = -1; /* Pessimistic retval */
  506. lub_ini_t *ini;
  507. const char *tmp = NULL;
  508. unsigned int mask_opts_num = 0;
  509. ini = lub_ini_new();
  510. if (lub_ini_parse_file(ini, fname)) {
  511. lub_ini_free(ini);
  512. return -1;
  513. }
  514. if ((tmp = lub_ini_find(ini, "strategy")))
  515. if (opt_parse_strategy(tmp, &opts->strategy) < 0)
  516. goto err;
  517. if ((tmp = lub_ini_find(ini, "threshold")))
  518. if (opt_parse_threshold(tmp, &opts->threshold))
  519. goto err;
  520. if ((tmp = lub_ini_find(ini, "load-limit")))
  521. if (opt_parse_threshold(tmp, &opts->load_limit))
  522. goto err;
  523. if ((tmp = lub_ini_find(ini, "short-interval")))
  524. if (opt_parse_interval(tmp, &opts->short_interval))
  525. goto err;
  526. if ((tmp = lub_ini_find(ini, "long-interval")))
  527. if (opt_parse_interval(tmp, &opts->long_interval))
  528. goto err;
  529. if ((tmp = lub_ini_find(ini, "exclude-cpus"))) {
  530. if (cpumask_parse_user(tmp, strlen(tmp), opts->exclude_cpus)) {
  531. fprintf(stderr, "Error: Can't parse exclude-cpus option \"%s\".\n", tmp);
  532. goto err;
  533. }
  534. mask_opts_num++;
  535. }
  536. if ((tmp = lub_ini_find(ini, "use-cpus"))) {
  537. if (cpumask_parse_user(tmp, strlen(tmp), opts->exclude_cpus)) {
  538. fprintf(stderr, "Error: Can't parse use-cpus option \"%s\".\n", tmp);
  539. goto err;
  540. }
  541. /* The exclude-cpus option was implemented first. So the
  542. * programm is based on it. The use-cpus options really
  543. * says to exclude all the cpus that is not within bitmask.
  544. * So invert use-cpus and we'll get exclude-cpus mask.
  545. */
  546. cpus_complement(opts->exclude_cpus, opts->exclude_cpus);
  547. mask_opts_num++;
  548. }
  549. /* Check if cpus mask was defined more than once. It's error. */
  550. if (mask_opts_num > 1) {
  551. fprintf(stderr, "Error: Can't use use-cpus and exclude-cpus options together.\n");
  552. goto err;
  553. }
  554. if ((tmp = lub_ini_find(ini, "ht")))
  555. if (opt_parse_y_n(tmp, &opts->ht))
  556. goto err;
  557. if ((tmp = lub_ini_find(ini, "non-local-cpus")))
  558. if (opt_parse_y_n(tmp, &opts->non_local_cpus))
  559. goto err;
  560. ret = 0;
  561. err:
  562. lub_ini_free(ini);
  563. return ret;
  564. }