birq.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /*
  2. * birq
  3. *
  4. * Balance IRQ
  5. *
  6. */
  7. #ifdef HAVE_CONFIG_H
  8. #include "config.h"
  9. #endif /* HAVE_CONFIG_H */
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <errno.h>
  15. #include <assert.h>
  16. #include <string.h>
  17. #include <signal.h>
  18. #include <syslog.h>
  19. #include <fcntl.h>
  20. #include <time.h>
  21. #ifdef HAVE_GETOPT_H
  22. #include <getopt.h>
  23. #endif
  24. #include "birq.h"
  25. #include "lub/log.h"
  26. #include "lub/list.h"
  27. #include "lub/ini.h"
  28. #include "irq.h"
  29. #include "numa.h"
  30. #include "cpu.h"
  31. #include "statistics.h"
  32. #include "balance.h"
  33. #include "pxm.h"
  34. #ifndef VERSION
  35. #define VERSION "1.2.0"
  36. #endif
  37. /* Signal handlers */
  38. static volatile int sigterm = 0; /* Exit if 1 */
  39. static void sighandler(int signo);
  40. static void help(int status, const char *argv0);
  41. static struct options *opts_init(void);
  42. static void opts_free(struct options *opts);
  43. static int opts_parse(int argc, char *argv[], struct options *opts);
  44. static int parse_config(const char *fname, struct options *opts);
  45. /* Command line options */
  46. struct options {
  47. char *pidfile;
  48. char *cfgfile;
  49. int cfgfile_userdefined;
  50. char *pxm; /* Proximity config file */
  51. int debug; /* Don't daemonize in debug mode */
  52. int log_facility;
  53. float threshold;
  54. float load_limit;
  55. int verbose;
  56. int ht;
  57. unsigned int long_interval;
  58. unsigned int short_interval;
  59. birq_choose_strategy_e strategy;
  60. };
  61. /*--------------------------------------------------------- */
  62. int main(int argc, char **argv)
  63. {
  64. int retval = -1;
  65. struct options *opts = NULL;
  66. int pidfd = -1;
  67. unsigned int interval;
  68. /* Signal vars */
  69. struct sigaction sig_act;
  70. sigset_t sig_set;
  71. /* IRQ list. It contain all found IRQs. */
  72. lub_list_t *irqs;
  73. /* IRQs need to be balanced */
  74. lub_list_t *balance_irqs;
  75. /* CPU list. It contain all found CPUs. */
  76. lub_list_t *cpus;
  77. /* NUMA list. It contain all found NUMA nodes. */
  78. lub_list_t *numas;
  79. /* Proximity list. */
  80. lub_list_t *pxms;
  81. /* Parse command line options */
  82. opts = opts_init();
  83. if (opts_parse(argc, argv, opts))
  84. goto err;
  85. /* Parse config file */
  86. if (!access(opts->cfgfile, R_OK)) {
  87. if (parse_config(opts->cfgfile, opts))
  88. goto err;
  89. } else if (opts->cfgfile_userdefined) {
  90. fprintf(stderr, "Error: Can't find config file %s\n",
  91. opts->cfgfile);
  92. goto err;
  93. }
  94. /* Validate threshold and load limit */
  95. if (opts->load_limit > opts->threshold) {
  96. fprintf(stderr, "Error: The load limit is greater than threshold.\n");
  97. goto err;
  98. }
  99. /* Initialize syslog */
  100. openlog(argv[0], LOG_CONS, opts->log_facility);
  101. syslog(LOG_ERR, "Start daemon.\n");
  102. /* Fork the daemon */
  103. if (!opts->debug) {
  104. /* Daemonize */
  105. if (daemon(0, 0) < 0) {
  106. syslog(LOG_ERR, "Can't daemonize\n");
  107. goto err;
  108. }
  109. /* Write pidfile */
  110. if ((pidfd = open(opts->pidfile,
  111. O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
  112. S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
  113. syslog(LOG_WARNING, "Can't open pidfile %s: %s",
  114. opts->pidfile, strerror(errno));
  115. } else {
  116. char str[20];
  117. snprintf(str, sizeof(str), "%u\n", getpid());
  118. str[sizeof(str) - 1] = '\0';
  119. if (write(pidfd, str, strlen(str)) < 0)
  120. syslog(LOG_WARNING, "Can't write to %s: %s",
  121. opts->pidfile, strerror(errno));
  122. close(pidfd);
  123. }
  124. }
  125. /* Set signal handler */
  126. sigemptyset(&sig_set);
  127. sigaddset(&sig_set, SIGTERM);
  128. sigaddset(&sig_set, SIGINT);
  129. sigaddset(&sig_set, SIGQUIT);
  130. sig_act.sa_flags = 0;
  131. sig_act.sa_mask = sig_set;
  132. sig_act.sa_handler = &sighandler;
  133. sigaction(SIGTERM, &sig_act, NULL);
  134. sigaction(SIGINT, &sig_act, NULL);
  135. sigaction(SIGQUIT, &sig_act, NULL);
  136. /* Randomize */
  137. srand(time(NULL));
  138. /* Scan NUMA nodes */
  139. numas = lub_list_new(numa_list_compare);
  140. scan_numas(numas);
  141. if (opts->verbose)
  142. show_numas(numas);
  143. /* Scan CPUs */
  144. cpus = lub_list_new(cpu_list_compare);
  145. scan_cpus(cpus, opts->ht);
  146. if (opts->verbose)
  147. show_cpus(cpus);
  148. /* Prepare data structures */
  149. irqs = lub_list_new(irq_list_compare);
  150. balance_irqs = lub_list_new(irq_list_compare);
  151. /* Parse proximity file */
  152. pxms = lub_list_new(NULL);
  153. if (opts->pxm)
  154. parse_pxm_config(opts->pxm, pxms, numas);
  155. if (opts->verbose)
  156. show_pxms(pxms);
  157. /* Main loop */
  158. while (!sigterm) {
  159. lub_list_node_t *node;
  160. char outstr[10];
  161. time_t t;
  162. struct tm *tmp;
  163. t = time(NULL);
  164. tmp = localtime(&t);
  165. if (tmp) {
  166. strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
  167. printf("----[ %s ]----------------------------------------------------------------\n", outstr);
  168. }
  169. /* Rescan PCI devices for new IRQs. */
  170. scan_irqs(irqs, balance_irqs, pxms);
  171. if (opts->verbose)
  172. irq_list_show(irqs);
  173. /* Link IRQs to CPUs due to real current smp affinity. */
  174. link_irqs_to_cpus(cpus, irqs);
  175. /* Gather statistics on CPU load and number of interrupts. */
  176. gather_statistics(cpus, irqs);
  177. show_statistics(cpus, opts->verbose);
  178. /* Choose IRQ to move to another CPU. */
  179. choose_irqs_to_move(cpus, balance_irqs,
  180. opts->threshold, opts->strategy);
  181. /* Balance IRQs */
  182. if (lub_list_len(balance_irqs) != 0) {
  183. /* Set short interval to make balancing faster. */
  184. interval = opts->short_interval;
  185. /* Choose new CPU for IRQs need to be balanced. */
  186. balance(cpus, balance_irqs, opts->load_limit);
  187. /* Write new values to /proc/irq/<IRQ>/smp_affinity */
  188. apply_affinity(balance_irqs);
  189. /* Free list of balanced IRQs */
  190. while ((node = lub_list__get_tail(balance_irqs))) {
  191. lub_list_del(balance_irqs, node);
  192. lub_list_node_free(node);
  193. }
  194. } else {
  195. /* If nothing to balance */
  196. interval = opts->long_interval;
  197. }
  198. /* Wait before next iteration */
  199. sleep(interval);
  200. }
  201. /* Free data structures */
  202. irq_list_free(irqs);
  203. lub_list_free(balance_irqs);
  204. cpu_list_free(cpus);
  205. numa_list_free(numas);
  206. pxm_list_free(pxms);
  207. retval = 0;
  208. err:
  209. /* Remove pidfile */
  210. if (pidfd >= 0) {
  211. if (unlink(opts->pidfile) < 0) {
  212. syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
  213. opts->pidfile, strerror(errno));
  214. }
  215. }
  216. /* Free command line options */
  217. opts_free(opts);
  218. syslog(LOG_ERR, "Stop daemon.\n");
  219. return retval;
  220. }
  221. /*--------------------------------------------------------- */
  222. /*
  223. * Signal handler for temination signals (like SIGTERM, SIGINT, ...)
  224. */
  225. static void sighandler(int signo)
  226. {
  227. sigterm = 1;
  228. signo = signo; /* Happy compiler */
  229. }
  230. /*--------------------------------------------------------- */
  231. /* Initialize option structure by defaults */
  232. static struct options *opts_init(void)
  233. {
  234. struct options *opts = NULL;
  235. opts = malloc(sizeof(*opts));
  236. assert(opts);
  237. opts->debug = 0; /* daemonize by default */
  238. opts->pidfile = strdup(BIRQ_PIDFILE);
  239. opts->cfgfile = strdup(BIRQ_CFGFILE);
  240. opts->cfgfile_userdefined = 0;
  241. opts->pxm = NULL;
  242. opts->log_facility = LOG_DAEMON;
  243. opts->threshold = BIRQ_DEFAULT_THRESHOLD;
  244. opts->load_limit = BIRQ_DEFAULT_LOAD_LIMIT;
  245. opts->verbose = 0;
  246. opts->ht = 0;
  247. opts->long_interval = BIRQ_LONG_INTERVAL;
  248. opts->short_interval = BIRQ_SHORT_INTERVAL;
  249. opts->strategy = BIRQ_CHOOSE_RND;
  250. return opts;
  251. }
  252. /*--------------------------------------------------------- */
  253. /* Free option structure */
  254. static void opts_free(struct options *opts)
  255. {
  256. if (opts->pidfile)
  257. free(opts->pidfile);
  258. if (opts->cfgfile)
  259. free(opts->cfgfile);
  260. if (opts->pxm)
  261. free(opts->pxm);
  262. free(opts);
  263. }
  264. /* Parse 'strategy' option */
  265. static int opt_parse_strategy(const char *optarg, birq_choose_strategy_e *strategy)
  266. {
  267. assert(optarg);
  268. assert(strategy);
  269. if (!strcmp(optarg, "max"))
  270. *strategy = BIRQ_CHOOSE_MAX;
  271. else if (!strcmp(optarg, "min"))
  272. *strategy = BIRQ_CHOOSE_MIN;
  273. else if (!strcmp(optarg, "rnd"))
  274. *strategy = BIRQ_CHOOSE_RND;
  275. else {
  276. fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
  277. return -1;
  278. }
  279. return 0;
  280. }
  281. /* Parse 'threshold' and 'load-limit' options */
  282. static int opt_parse_threshold(const char *optarg, float *threshold)
  283. {
  284. char *endptr;
  285. float thresh;
  286. assert(optarg);
  287. assert(threshold);
  288. thresh = strtof(optarg, &endptr);
  289. if (endptr == optarg) {
  290. fprintf(stderr, "Error: Illegal threshold/load-limit value %s.\n", optarg);
  291. return -1;
  292. }
  293. if (thresh > 100.00) {
  294. fprintf(stderr, "Error: The threshold/load-limit value %s > 100.\n", optarg);
  295. return -1;
  296. }
  297. *threshold = thresh;
  298. return 0;
  299. }
  300. /* Parse 'short-interval' and 'long-interval' options */
  301. static int opt_parse_interval(const char *optarg, unsigned int *interval)
  302. {
  303. char *endptr;
  304. unsigned long int val;
  305. assert(optarg);
  306. assert(interval);
  307. val = strtoul(optarg, &endptr, 10);
  308. if (endptr == optarg) {
  309. fprintf(stderr, "Error: Illegal interval value %s.\n", optarg);
  310. return -1;
  311. }
  312. *interval = val;
  313. return 0;
  314. }
  315. /*--------------------------------------------------------- */
  316. /* Parse command line options */
  317. static int opts_parse(int argc, char *argv[], struct options *opts)
  318. {
  319. static const char *shortopts = "hp:c:dO:t:l:vri:I:s:x:";
  320. #ifdef HAVE_GETOPT_H
  321. static const struct option longopts[] = {
  322. {"help", 0, NULL, 'h'},
  323. {"pid", 1, NULL, 'p'},
  324. {"conf", 1, NULL, 'c'},
  325. {"debug", 0, NULL, 'd'},
  326. {"facility", 1, NULL, 'O'},
  327. {"threshold", 1, NULL, 't'},
  328. {"load-limit", 1, NULL, 't'},
  329. {"verbose", 0, NULL, 'v'},
  330. {"ht", 0, NULL, 'r'},
  331. {"short-interval", 1, NULL, 'i'},
  332. {"long-interval", 1, NULL, 'I'},
  333. {"strategy", 1, NULL, 's'},
  334. {"pxm", 1, NULL, 'x'},
  335. {NULL, 0, NULL, 0}
  336. };
  337. #endif
  338. optind = 1;
  339. while(1) {
  340. int opt;
  341. #ifdef HAVE_GETOPT_H
  342. opt = getopt_long(argc, argv, shortopts, longopts, NULL);
  343. #else
  344. opt = getopt(argc, argv, shortopts);
  345. #endif
  346. if (-1 == opt)
  347. break;
  348. switch (opt) {
  349. case 'p':
  350. if (opts->pidfile)
  351. free(opts->pidfile);
  352. opts->pidfile = strdup(optarg);
  353. break;
  354. case 'c':
  355. if (opts->cfgfile)
  356. free(opts->cfgfile);
  357. opts->cfgfile = strdup(optarg);
  358. opts->cfgfile_userdefined = 1;
  359. break;
  360. case 'x':
  361. if (opts->pxm)
  362. free(opts->pxm);
  363. opts->pxm = strdup(optarg);
  364. break;
  365. case 'd':
  366. opts->debug = 1;
  367. break;
  368. case 'v':
  369. opts->verbose = 1;
  370. break;
  371. case 'r':
  372. opts->ht = 1;
  373. break;
  374. case 'O':
  375. if (lub_log_facility(optarg, &(opts->log_facility))) {
  376. fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
  377. exit(-1);
  378. }
  379. break;
  380. case 't':
  381. if (opt_parse_threshold(optarg, &opts->threshold))
  382. exit(-1);
  383. break;
  384. case 'l':
  385. if (opt_parse_threshold(optarg, &opts->load_limit))
  386. exit(-1);
  387. break;
  388. case 'i':
  389. if (opt_parse_interval(optarg, &opts->short_interval))
  390. exit(-1);
  391. break;
  392. case 'I':
  393. if (opt_parse_interval(optarg, &opts->long_interval))
  394. exit(-1);
  395. break;
  396. case 's':
  397. if (opt_parse_strategy(optarg, &opts->strategy) < 0)
  398. exit(-1);
  399. break;
  400. case 'h':
  401. help(0, argv[0]);
  402. exit(0);
  403. break;
  404. default:
  405. help(-1, argv[0]);
  406. exit(-1);
  407. break;
  408. }
  409. }
  410. return 0;
  411. }
  412. /*--------------------------------------------------------- */
  413. /* Print help message */
  414. static void help(int status, const char *argv0)
  415. {
  416. const char *name = NULL;
  417. if (!argv0)
  418. return;
  419. /* Find the basename */
  420. name = strrchr(argv0, '/');
  421. if (name)
  422. name++;
  423. else
  424. name = argv0;
  425. if (status != 0) {
  426. fprintf(stderr, "Try `%s -h' for more information.\n",
  427. name);
  428. } else {
  429. printf("Version : %s\n", VERSION);
  430. printf("Usage : %s [options]\n", name);
  431. printf("Daemon to balance IRQs.\n");
  432. printf("Options :\n");
  433. printf("\t-h, --help Print this help.\n");
  434. printf("\t-d, --debug Debug mode. Don't daemonize.\n");
  435. printf("\t-v, --verbose Be verbose.\n");
  436. printf("\t-r, --ht Enable Hyper Threading.\n");
  437. printf("\t-p <path>, --pid=<path> File to save daemon's PID to (" BIRQ_PIDFILE ").\n");
  438. printf("\t-c <path>, --conf=<path> Config file (" BIRQ_CFGFILE ").\n");
  439. printf("\t-x <path>, --pxm=<path> Proximity config file.\n");
  440. printf("\t-O, --facility Syslog facility (DAEMON).\n");
  441. printf("\t-t <float>, --threshold=<float> Threshold to consider CPU is overloaded, in percents. Default threhold is %.2f.\n",
  442. BIRQ_DEFAULT_THRESHOLD);
  443. printf("\t-l <float>, --load-limit=<float> Don't move IRQs to CPUs loaded more than this limit, in percents. Default limit is %.2f.\n",
  444. BIRQ_DEFAULT_LOAD_LIMIT);
  445. printf("\t-i <sec>, --short-interval=<sec> Short iteration interval.\n");
  446. printf("\t-I <sec>, --long-interval=<sec> Long iteration interval.\n");
  447. printf("\t-s <strategy>, --strategy=<strategy> Strategy to choose IRQ to move (min/max/rnd).\n");
  448. }
  449. }
  450. /*--------------------------------------------------------- */
  451. /* Parse config file */
  452. static int parse_config(const char *fname, struct options *opts)
  453. {
  454. lub_ini_t *ini;
  455. const char *tmp = NULL;
  456. ini = lub_ini_new();
  457. if (lub_ini_parse_file(ini, opts->cfgfile)) {
  458. lub_ini_free(ini);
  459. return -1;
  460. }
  461. if ((tmp = lub_ini_find(ini, "strategy")))
  462. if (opt_parse_strategy(tmp, &opts->strategy) < 0)
  463. goto err;
  464. if ((tmp = lub_ini_find(ini, "threshold")))
  465. if (opt_parse_threshold(tmp, &opts->threshold))
  466. goto err;
  467. if ((tmp = lub_ini_find(ini, "load-limit")))
  468. if (opt_parse_threshold(tmp, &opts->load_limit))
  469. goto err;
  470. if ((tmp = lub_ini_find(ini, "short-interval")))
  471. if (opt_parse_interval(tmp, &opts->short_interval))
  472. goto err;
  473. if ((tmp = lub_ini_find(ini, "long-interval")))
  474. if (opt_parse_interval(tmp, &opts->long_interval))
  475. goto err;
  476. return 0;
  477. err:
  478. lub_ini_free(ini);
  479. return -1;
  480. }