source: trunk/packages/xen-common/xen-common/tools/xenmon/xenbaked.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 17 years ago

Add xen and xen-common

File size: 30.6 KB
RevLine 
[34]1/******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 *  performing some accumulation operations and other processing
6 *  on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
11 *
12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
13 *          Rob Gardner, rob.gardner@hp.com
14 *          Lucy Cherkasova, lucy.cherkasova.hp.com
15 * Much code based on xentrace, authored by Mark Williamson, mark.a.williamson@intel.com
16 * Date:   November, 2005
17 *
18 *  This program is free software; you can redistribute it and/or modify
19 *  it under the terms of the GNU General Public License as published by
20 *  the Free Software Foundation; under version 2 of the License.
21 *
22 *  This program is distributed in the hope that it will be useful,
23 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
24 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 *  GNU General Public License for more details.
26 *
27 *  You should have received a copy of the GNU General Public License
28 *  along with this program; if not, write to the Free Software
29 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30 */
31
32#include <time.h>
33#include <stdlib.h>
34#include <stdio.h>
35#include <sys/mman.h>
36#include <fcntl.h>
37#include <unistd.h>
38#include <errno.h>
39#include <argp.h>
40#include <signal.h>
41#include <xenctrl.h>
42#include <xen/xen.h>
43#include <string.h>
44#include <sys/select.h>
45
46#define PERROR(_m, _a...)                                       \
47do {                                                            \
48    int __saved_errno = errno;                                  \
49    fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a ,       \
50            __saved_errno, strerror(__saved_errno));            \
51    errno = __saved_errno;                                      \
52} while (0)
53
54typedef struct { int counter; } atomic_t;
55#define _atomic_read(v)         ((v).counter)
56
57#include <xen/trace.h>
58#include "xenbaked.h"
59
60extern FILE *stderr;
61
62/***** Compile time configuration of defaults ********************************/
63
64/* when we've got more records than this waiting, we log it to the output */
65#define NEW_DATA_THRESH 1
66
67/* sleep for this long (milliseconds) between checking the trace buffers */
68#define POLL_SLEEP_MILLIS 100
69
70/* Size of time period represented by each sample */
71#define MS_PER_SAMPLE 100
72
73/* CPU Frequency */
74#define MHZ
75#define CPU_FREQ 2660 MHZ
76
77/***** The code **************************************************************/
78
79typedef struct settings_st {
80    char *outfile;
81    struct timespec poll_sleep;
82    unsigned long new_data_thresh;
83    unsigned long ms_per_sample;
84    double cpu_freq;
85} settings_t;
86
87settings_t opts;
88
89int interrupted = 0; /* gets set if we get a SIGHUP */
90int rec_count = 0;
91int wakeups = 0;
92time_t start_time;
93int dom0_flips = 0;
94
95_new_qos_data *new_qos;
96_new_qos_data **cpu_qos_data;
97
98int global_cpu;
99uint64_t global_now;
100
101// array of currently running domains, indexed by cpu
102int *running = NULL;
103
104// number of cpu's on this platform
105int NCPU = 0;
106
107
108void init_current(int ncpu)
109{
110  running = calloc(ncpu, sizeof(int));
111  NCPU = ncpu;
112  printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
113}
114
115int is_current(int domain, int cpu)
116{
117  //  int i;
118 
119  //  for (i=0; i<NCPU; i++)
120    if (running[cpu] == domain)
121      return 1;
122  return 0;
123}
124
125
126// return the domain that's currently running on the given cpu
127int current(int cpu)
128{
129  return running[cpu];
130}
131
132void set_current(int cpu, int domain)
133{
134  running[cpu] = domain;
135}
136
137
138
139void close_handler(int signal)
140{
141    interrupted = 1;
142}
143
144#if 0
145void dump_record(int cpu, struct t_rec *x)
146{
147    printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
148            cpu, x->cycles, x->event, x->data[0]);
149}
150#endif
151
152/**
153 * millis_to_timespec - convert a time in milliseconds to a struct timespec
154 * @millis:             time interval in milliseconds
155 */
156struct timespec millis_to_timespec(unsigned long millis)
157{
158    struct timespec spec;
159
160    spec.tv_sec = millis / 1000;
161    spec.tv_nsec = (millis % 1000) * 1000;
162
163    return spec;
164}
165
166
167typedef struct 
168{
169    int event_count;
170    int event_id;
171    char *text;
172} stat_map_t;
173
174stat_map_t stat_map[] = {
175    { 0,       0,           "Other" },
176    { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
177    { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
178    { 0, TRC_SCHED_SLEEP, "Sleep" },
179    { 0, TRC_SCHED_WAKE,  "Wake" },
180    { 0, TRC_SCHED_BLOCK,  "Block" },
181    { 0, TRC_SCHED_SWITCH,  "Switch" },
182    { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
183    { 0, TRC_SCHED_SWITCH_INFPREV,  "Switch Prev" },
184    { 0, TRC_SCHED_SWITCH_INFNEXT,  "Switch Next" },
185    { 0, TRC_MEM_PAGE_GRANT_MAP,  "Page Map" },
186    { 0, TRC_MEM_PAGE_GRANT_UNMAP,  "Page Unmap" },
187    { 0, TRC_MEM_PAGE_GRANT_TRANSFER,  "Page Transfer" },
188    { 0,      0,                 0  }
189};
190
191
192void check_gotten_sum(void)
193{
194#if 0
195    uint64_t sum, ns;
196    extern uint64_t total_ns_gotten(uint64_t*);
197    double percent;
198    int i;
199
200    for (i=0; i<NCPU; i++) {
201      new_qos = cpu_qos_data[i];
202      ns = billion;
203      sum = total_ns_gotten(&ns);
204
205      printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
206              i, sum, ns);
207      percent = (double) sum;
208      percent = (100.0*percent) / (double)ns;
209      printf(" ==> ns_gotten = %7.3f%%\n", percent);
210    }
211#endif
212}
213
214
215
216void dump_stats(void) 
217{
218    stat_map_t *smt = stat_map;
219    time_t end_time, run_time;
220
221    time(&end_time);
222
223    run_time = end_time - start_time;
224
225    printf("Event counts:\n");
226    while (smt->text != NULL) {
227        printf("%08d\t%s\n", smt->event_count, smt->text);
228        smt++;
229    }
230
231    printf("processed %d total records in %d seconds (%ld per second)\n",
232            rec_count, (int)run_time, rec_count/run_time);
233
234    printf("woke up %d times in %d seconds (%ld per second)\n", wakeups,
235           (int) run_time, wakeups/run_time);
236
237    check_gotten_sum();
238}
239
240void log_event(int event_id) 
241{
242    stat_map_t *smt = stat_map;
243
244    //  printf("event_id = 0x%x\n", event_id);
245
246    while (smt->text != NULL) {
247        if (smt->event_id == event_id) {
248            smt->event_count++;
249            return;
250        }
251        smt++;
252    }
253    if (smt->text == NULL)
254        stat_map[0].event_count++;      // other
255}
256
257int virq_port;
258int xce_handle = -1;
259
260/* Returns the event channel handle. */
261/* Stolen from xenstore code */
262int eventchn_init(void)
263{
264  int rc;
265 
266  // to revert to old way:
267  if (0)
268    return -1;
269 
270  xce_handle = xc_evtchn_open();
271
272  if (xce_handle < 0)
273    perror("Failed to open evtchn device");
274 
275  if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
276    perror("Failed to bind to domain exception virq port");
277  virq_port = rc;
278 
279  return xce_handle;
280}
281
282void wait_for_event(void)
283{
284  int ret;
285  fd_set inset;
286  evtchn_port_t port;
287  struct timeval tv;
288  int evtchn_fd;
289 
290  if (xce_handle < 0) {
291    nanosleep(&opts.poll_sleep, NULL);
292    return;
293  }
294
295  evtchn_fd = xc_evtchn_fd(xce_handle);
296
297  FD_ZERO(&inset);
298  FD_SET(evtchn_fd, &inset);
299  tv.tv_sec = 1;
300  tv.tv_usec = 0;
301  // tv = millis_to_timespec(&opts.poll_sleep);
302  ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
303 
304  if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
305    if ((port = xc_evtchn_pending(xce_handle)) == -1)
306      perror("Failed to read from event fd");
307   
308    //    if (port == virq_port)
309    //      printf("got the event I was looking for\r\n");
310
311    if (xc_evtchn_unmask(xce_handle, port) == -1)
312      perror("Failed to write to event fd");
313  }
314}
315
316static void get_tbufs(unsigned long *mfn, unsigned long *size)
317{
318    int xc_handle = xc_interface_open();
319    int ret;
320
321    if ( xc_handle < 0 ) 
322    {
323        exit(EXIT_FAILURE);
324    }
325
326    ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
327
328    if ( ret != 0 )
329    {
330        perror("Couldn't enable trace buffers");
331        exit(1);
332    }
333
334    xc_interface_close(xc_handle);
335}
336
337void disable_tracing(void)
338{
339  int xc_handle = xc_interface_open();
340  xc_tbuf_disable(xc_handle); 
341  xc_interface_close(xc_handle);
342}
343
344/**
345 * map_tbufs - memory map Xen trace buffers into user space
346 * @tbufs_mfn: mfn of the trace buffers
347 * @num:       number of trace buffers to map
348 * @size:      size of each trace buffer
349 *
350 * Maps the Xen trace buffers them into process address space.
351 */
352struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
353                        unsigned long size)
354{
355    int xc_handle;
356    struct t_buf *tbufs_mapped;
357
358    xc_handle = xc_interface_open();
359
360    if ( xc_handle < 0 ) 
361    {
362        exit(EXIT_FAILURE);
363    }
364
365    tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN,
366                                        size * num, PROT_READ | PROT_WRITE,
367                                        tbufs_mfn);
368
369    xc_interface_close(xc_handle);
370
371    if ( tbufs_mapped == 0 ) 
372    {
373        PERROR("Failed to mmap trace buffers");
374        exit(EXIT_FAILURE);
375    }
376
377    return tbufs_mapped;
378}
379
380/**
381 * init_bufs_ptrs - initialises an array of pointers to the trace buffers
382 * @bufs_mapped:    the userspace address where the trace buffers are mapped
383 * @num:            number of trace buffers
384 * @size:           trace buffer size
385 *
386 * Initialises an array of pointers to individual trace buffers within the
387 * mapped region containing all trace buffers.
388 */
389struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
390        unsigned long size)
391{
392    int i;
393    struct t_buf **user_ptrs;
394
395    user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
396    if ( user_ptrs == NULL )
397    {
398        PERROR( "Failed to allocate memory for buffer pointers\n");
399        exit(EXIT_FAILURE);
400    }
401
402    /* initialise pointers to the trace buffers - given the size of a trace
403     * buffer and the value of bufs_maped, we can easily calculate these */
404    for ( i = 0; i<num; i++ )
405        user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
406
407    return user_ptrs;
408}
409
410
411/**
412 * init_rec_ptrs - initialises data area pointers to locations in user space
413 * @tbufs_mfn:     base mfn of the trace buffer area
414 * @tbufs_mapped:  user virtual address of base of trace buffer area
415 * @meta:          array of user-space pointers to struct t_buf's of metadata
416 * @num:           number of trace buffers
417 *
418 * Initialises data area pointers to the locations that data areas have been
419 * mapped in user space.  Note that the trace buffer metadata contains machine
420 * pointers - the array returned allows more convenient access to them.
421 */
422struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
423{
424    int i;
425    struct t_rec **data;
426   
427    data = calloc(num, sizeof(struct t_rec *));
428    if ( data == NULL )
429    {
430        PERROR("Failed to allocate memory for data pointers\n");
431        exit(EXIT_FAILURE);
432    }
433
434    for ( i = 0; i < num; i++ )
435        data[i] = (struct t_rec *)(meta[i] + 1);
436
437    return data;
438}
439
440
441
442/**
443 * get_num_cpus - get the number of logical CPUs
444 */
445unsigned int get_num_cpus(void)
446{
447    xc_physinfo_t physinfo;
448    int xc_handle = xc_interface_open();
449    int ret;
450
451    ret = xc_physinfo(xc_handle, &physinfo);
452
453    if ( ret != 0 )
454    {
455        PERROR("Failure to get logical CPU count from Xen");
456        exit(EXIT_FAILURE);
457    }
458
459    xc_interface_close(xc_handle);
460    opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
461
462    return (physinfo.threads_per_core *
463            physinfo.cores_per_socket *
464            physinfo.sockets_per_node *
465            physinfo.nr_nodes);
466}
467
468
469/**
470 * monitor_tbufs - monitor the contents of tbufs
471 */
472int monitor_tbufs(void)
473{
474    int i;
475    extern void process_record(int, struct t_rec *);
476    extern void alloc_qos_data(int ncpu);
477
478    void *tbufs_mapped;          /* pointer to where the tbufs are mapped    */
479    struct t_buf **meta;         /* pointers to the trace buffer metadata    */
480    struct t_rec **data;         /* pointers to the trace buffer data areas
481                                  * where they are mapped into user space.   */
482    unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
483    unsigned int  num;           /* number of trace buffers / logical CPUS   */
484    unsigned long size;          /* size of a single trace buffer            */
485
486    int size_in_recs;
487
488    /* get number of logical CPUs (and therefore number of trace buffers) */
489    num = get_num_cpus();
490
491    init_current(num);
492    alloc_qos_data(num);
493
494    printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
495   
496    /* setup access to trace buffers */
497    get_tbufs(&tbufs_mfn, &size);
498
499    //    printf("from dom0op: %ld, t_buf: %d, t_rec: %d\n",
500    //            size, sizeof(struct t_buf), sizeof(struct t_rec));
501
502    tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
503
504    size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
505    //    fprintf(stderr, "size_in_recs = %d\n", size_in_recs);
506
507    /* build arrays of convenience ptrs */
508    meta  = init_bufs_ptrs (tbufs_mapped, num, size);
509    data  = init_rec_ptrs(meta, num);
510
511    // Set up event channel for select()
512    if (eventchn_init() < 0) {
513      fprintf(stderr, "Failed to initialize event channel; Using POLL method\r\n");
514    }
515
516    /* now, scan buffers for events */
517    while ( !interrupted )
518    {
519        for ( i = 0; ( i < num ) && !interrupted; i++ )
520            while ( meta[i]->cons != meta[i]->prod )
521            {
522                rmb(); /* read prod, then read item. */
523                process_record(i, data[i] + meta[i]->cons % size_in_recs);
524                mb(); /* read item, then update cons. */
525                meta[i]->cons++;
526            }
527
528        wait_for_event();
529        wakeups++;
530    }
531
532    /* cleanup */
533    free(meta);
534    free(data);
535    /* don't need to munmap - cleanup is automatic */
536
537    return 0;
538}
539
540
541/******************************************************************************
542 * Various declarations / definitions GNU argp needs to do its work
543 *****************************************************************************/
544
545
546/* command parser for GNU argp - see GNU docs for more info */
547error_t cmd_parser(int key, char *arg, struct argp_state *state)
548{
549    settings_t *setup = (settings_t *)state->input;
550
551    switch ( key )
552    {
553        case 't': /* set new records threshold for logging */
554            {
555                char *inval;
556                setup->new_data_thresh = strtol(arg, &inval, 0);
557                if ( inval == arg )
558                    argp_usage(state);
559            }
560            break;
561
562        case 's': /* set sleep time (given in milliseconds) */
563            {
564                char *inval;
565                setup->poll_sleep = millis_to_timespec(strtol(arg, &inval, 0));
566                if ( inval == arg )
567                    argp_usage(state);
568            }
569            break;
570
571        case 'm': /* set ms_per_sample */
572            {
573                char *inval;
574                setup->ms_per_sample = strtol(arg, &inval, 0);
575                if ( inval == arg )
576                    argp_usage(state);
577            }
578            break;
579
580        case ARGP_KEY_ARG:
581            {
582                if ( state->arg_num == 0 )
583                    setup->outfile = arg;
584                else
585                    argp_usage(state);
586            }
587            break;
588
589        default:
590            return ARGP_ERR_UNKNOWN;
591    }
592
593    return 0;
594}
595
596#define SHARED_MEM_FILE "/tmp/xenq-shm"
597void alloc_qos_data(int ncpu)
598{
599    int i, n, pgsize, off=0;
600    char *dummy;
601    int qos_fd;
602    void advance_next_datapoint(uint64_t);
603
604    cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
605
606
607    qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
608    if (qos_fd < 0) {
609        PERROR(SHARED_MEM_FILE);
610        exit(2);
611    }
612    pgsize = getpagesize();
613    dummy = malloc(pgsize);
614
615    for (n=0; n<ncpu; n++) {
616
617      for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
618          if ((write(qos_fd, dummy, pgsize)) != pgsize) {
619              PERROR(SHARED_MEM_FILE);
620              exit(2);
621          }
622
623      new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE, 
624                                       MAP_SHARED, qos_fd, off);
625      off += i;
626      if (new_qos == NULL) {
627        PERROR("mmap");
628        exit(3);
629      }
630      //  printf("new_qos = %p\n", new_qos);
631      memset(new_qos, 0, sizeof(_new_qos_data));
632      new_qos->next_datapoint = 0;
633      advance_next_datapoint(0);
634      new_qos->structlen = i;
635      new_qos->ncpu = ncpu;
636      //      printf("structlen = 0x%x\n", i);
637      cpu_qos_data[n] = new_qos;
638    }
639    free(dummy);
640    new_qos = NULL;
641}
642
643
644#define xstr(x) str(x)
645#define str(x) #x
646
647const struct argp_option cmd_opts[] =
648{
649    { .name = "log-thresh", .key='t', .arg="l",
650        .doc =
651            "Set number, l, of new records required to trigger a write to output "
652            "(default " xstr(NEW_DATA_THRESH) ")." },
653
654    { .name = "poll-sleep", .key='s', .arg="p",
655        .doc = 
656            "Set sleep time, p, in milliseconds between polling the trace buffer "
657            "for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },
658
659    { .name = "ms_per_sample", .key='m', .arg="MS",
660        .doc = 
661            "Specify the number of milliseconds per sample "
662            " (default " xstr(MS_PER_SAMPLE) ")." },
663
664    {0}
665};
666
667const struct argp parser_def =
668{
669    .options = cmd_opts,
670    .parser = cmd_parser,
671    //    .args_doc = "[output file]",
672    .doc =
673        "Tool to capture and partially process Xen trace buffer data"
674        "\v"
675        "This tool is used to capture trace buffer data from Xen.  The data is "
676        "saved in a shared memory structure to be further processed by xenmon."
677};
678
679
680const char *argp_program_version     = "xenbaked v1.4";
681const char *argp_program_bug_address = "<rob.gardner@hp.com>";
682
683
684int main(int argc, char **argv)
685{
686    int ret;
687    struct sigaction act;
688
689    time(&start_time);
690    opts.outfile = 0;
691    opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
692    opts.new_data_thresh = NEW_DATA_THRESH;
693    opts.ms_per_sample = MS_PER_SAMPLE;
694    opts.cpu_freq = CPU_FREQ;
695
696    argp_parse(&parser_def, argc, argv, 0, 0, &opts);
697    fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
698
699
700    /* ensure that if we get a signal, we'll do cleanup, then exit */
701    act.sa_handler = close_handler;
702    act.sa_flags = 0;
703    sigemptyset(&act.sa_mask);
704    sigaction(SIGHUP,  &act, NULL);
705    sigaction(SIGTERM, &act, NULL);
706    sigaction(SIGINT,  &act, NULL);
707
708    ret = monitor_tbufs();
709
710    dump_stats();
711    msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
712    disable_tracing();
713
714    return ret;
715}
716
717void qos_init_domain(int domid, int idx)
718{
719  int i;
720
721  memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
722  new_qos->domain_info[idx].last_update_time = global_now;
723  //  runnable_start_time[idx] = 0;
724  new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
725  new_qos->domain_info[idx].in_use = 1;
726  new_qos->domain_info[idx].blocked_start_time = 0;
727  new_qos->domain_info[idx].id = domid;
728  if (domid == IDLE_DOMAIN_ID)
729    sprintf(new_qos->domain_info[idx].name, "Idle Task%d", global_cpu);
730  else
731    sprintf(new_qos->domain_info[idx].name, "Domain#%d", domid);
732 
733  for (i=0; i<NSAMPLES; i++) {
734    new_qos->qdata[i].ns_gotten[idx] = 0;
735    new_qos->qdata[i].ns_allocated[idx] = 0;
736    new_qos->qdata[i].ns_waiting[idx] = 0;
737    new_qos->qdata[i].ns_blocked[idx] = 0;
738    new_qos->qdata[i].switchin_count[idx] = 0;
739    new_qos->qdata[i].io_count[idx] = 0;
740  }
741}
742
743void global_init_domain(int domid, int idx) 
744{
745  int cpu;
746  _new_qos_data *saved_qos;
747 
748  saved_qos = new_qos;
749 
750  for (cpu=0; cpu<NCPU; cpu++) {
751    new_qos = cpu_qos_data[cpu];
752    qos_init_domain(domid, idx);
753  }
754  new_qos = saved_qos;
755}
756
757
758// give index of this domain in the qos data array
759int indexof(int domid)
760{
761  int idx;
762  xc_dominfo_t dominfo[NDOMAINS];
763  int xc_handle, ndomains;
764  extern void qos_kill_thread(int domid);
765 
766  if (domid < 0) {      // shouldn't happen
767    printf("bad domain id: %d\r\n", domid);
768    return 0;
769  }
770
771  for (idx=0; idx<NDOMAINS; idx++)
772    if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
773      return idx;
774
775  // not found, make a new entry
776  for (idx=0; idx<NDOMAINS; idx++)
777    if (new_qos->domain_info[idx].in_use == 0) {
778      global_init_domain(domid, idx);
779      return idx;
780    }
781
782  // call domaininfo hypercall to try and garbage collect unused entries
783  xc_handle = xc_interface_open();
784  ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
785  xc_interface_close(xc_handle);
786
787  // for each domain in our data, look for it in the system dominfo structure
788  // and purge the domain's data from our state if it does not exist in the
789  // dominfo structure
790  for (idx=0; idx<NDOMAINS; idx++) {
791    int domid = new_qos->domain_info[idx].id;
792    int jdx;
793   
794    for (jdx=0; jdx<ndomains; jdx++) {
795      if (dominfo[jdx].domid == domid)
796        break;
797    }
798    if (jdx == ndomains)        // we didn't find domid in the dominfo struct
799      if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
800                                   // contained in dominfo
801        qos_kill_thread(domid); // purge our stale data
802  }
803 
804  // look again for a free slot
805  for (idx=0; idx<NDOMAINS; idx++)
806    if (new_qos->domain_info[idx].in_use == 0) {
807      global_init_domain(domid, idx);
808      return idx;
809    }
810
811  // still no space found, so bail
812  fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
813  exit(2);
814}
815
816int domain_runnable(int domid)
817{
818    return new_qos->domain_info[indexof(domid)].runnable;
819}
820
821
822void update_blocked_time(int domid, uint64_t now)
823{
824    uint64_t t_blocked;
825    int id = indexof(domid);
826
827    if (new_qos->domain_info[id].blocked_start_time != 0) {
828        if (now >= new_qos->domain_info[id].blocked_start_time)
829            t_blocked = now - new_qos->domain_info[id].blocked_start_time;
830        else
831            t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
832        new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
833    }
834
835    if (domain_runnable(domid))
836        new_qos->domain_info[id].blocked_start_time = 0;
837    else
838        new_qos->domain_info[id].blocked_start_time = now;
839}
840
841
842// advance to next datapoint for all domains
843void advance_next_datapoint(uint64_t now)
844{
845    int new, old, didx;
846
847    old = new_qos->next_datapoint;
848    new = QOS_INCR(old);
849    new_qos->next_datapoint = new;
850    //  memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
851    for (didx = 0; didx < NDOMAINS; didx++) {
852        new_qos->qdata[new].ns_gotten[didx] = 0;
853        new_qos->qdata[new].ns_allocated[didx] = 0;
854        new_qos->qdata[new].ns_waiting[didx] = 0;
855        new_qos->qdata[new].ns_blocked[didx] = 0;
856        new_qos->qdata[new].switchin_count[didx] = 0;
857        new_qos->qdata[new].io_count[didx] = 0;
858    }
859    new_qos->qdata[new].ns_passed = 0;
860    new_qos->qdata[new].lost_records = 0;
861    new_qos->qdata[new].flip_free_periods = 0;
862
863    new_qos->qdata[new].timestamp = now;
864}
865
866
867
868void qos_update_thread(int cpu, int domid, uint64_t now)
869{
870    int n, id;
871    uint64_t last_update_time, start;
872    int64_t time_since_update, run_time = 0;
873
874    id = indexof(domid);
875
876    n = new_qos->next_datapoint;
877    last_update_time = new_qos->domain_info[id].last_update_time;
878
879    time_since_update = now - last_update_time;
880
881    if (time_since_update < 0) {
882      // what happened here? either a timestamp wraparound, or more likely,
883      // a slight inconsistency among timestamps from various cpu's
884      if (-time_since_update < billion) {
885        // fairly small difference, let's just adjust 'now' to be a little
886        // beyond last_update_time
887        time_since_update = -time_since_update;
888      }
889      else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
890        // difference is huge, must be a wraparound
891        // last_update time should be "near" ~0ULL,
892        // and now should be "near" 0
893        time_since_update = now + (~0ULL - last_update_time);
894        printf("time wraparound\n");
895      }
896      else {
897        // none of the above, may be an out of order record
898        // no good solution, just ignore and update again later
899        return;
900      }
901    }
902       
903    new_qos->domain_info[id].last_update_time = now;
904
905    if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
906        start = new_qos->domain_info[id].start_time;
907        if (start > now) {              // wrapped around
908            run_time = now + (~0ULL - start);
909            // this could happen if there is nothing going on within a cpu;
910            // in this case the idle domain would run forever
911            //        printf("warning: start > now\n");
912        }
913        else
914            run_time = now - start;
915        //      if (run_time < 0)       // should not happen
916        //        printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
917        new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
918        new_qos->domain_info[id].start_time = now;
919        new_qos->domain_info[id].ns_since_boot += time_since_update;
920
921        new_qos->qdata[n].ns_gotten[id] += run_time;
922        //      if (domid == 0 && cpu == 1)
923        //        printf("adding run time for dom0 on cpu1\r\n");
924
925    }
926
927    new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
928
929    update_blocked_time(domid, now);
930
931    // how much time passed since this datapoint was updated?
932    if (now >= new_qos->qdata[n].timestamp) {
933        // all is right with the world, time is increasing
934        new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
935    }
936    else {
937        // time wrapped around
938        //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
939        //    printf("why timewrap?\r\n");
940    }
941    new_qos->qdata[n].timestamp = now;
942}
943
944
945// called by dump routines to update all structures
946void qos_update_all(uint64_t now, int cpu)
947{
948    int i;
949
950    for (i=0; i<NDOMAINS; i++)
951        if (new_qos->domain_info[i].in_use)
952            qos_update_thread(cpu, new_qos->domain_info[i].id, now); 
953}
954
955
956void qos_update_thread_stats(int cpu, int domid, uint64_t now)
957{
958    if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
959        qos_update_all(now, cpu);
960        advance_next_datapoint(now);
961        return;
962    }
963    qos_update_thread(cpu, domid, now);
964}
965
966
967
968// called when a new thread gets the cpu
969void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
970{
971    int idx = indexof(domid);
972
973    new_qos->domain_info[idx].runnable = 1;
974    update_blocked_time(domid, now);
975    new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
976    new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
977    //runnable_start_time[idx] = 0;
978
979    new_qos->domain_info[idx].start_time = now;
980    new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
981    new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
982    new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
983    qos_update_thread_stats(cpu, domid, now);
984    set_current(cpu, domid);
985
986    // count up page flips for dom0 execution
987    if (domid == 0)
988      dom0_flips = 0;
989}
990
991// called when the current thread is taken off the cpu
992void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
993{
994    int idx = indexof(domid);
995    int n;
996
997    if (!is_current(domid, cpu)) {
998        //    printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
999    }
1000
1001    if (gotten == 0) {
1002        printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
1003    }
1004
1005    if (gotten < 100) {
1006        printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
1007    }
1008
1009
1010    n = new_qos->next_datapoint;
1011#if 0
1012    new_qos->qdata[n].ns_gotten[idx] += gotten;
1013    if (gotten > new_qos->qdata[n].ns_passed)
1014      printf("inconsistency #257, diff = %lld\n",
1015            gotten - new_qos->qdata[n].ns_passed );
1016#endif
1017    new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
1018    new_qos->domain_info[idx].runnable_start_time = now;
1019    //  runnable_start_time[id] = now;
1020    qos_update_thread_stats(cpu, domid, now);
1021
1022    // process dom0 page flips
1023    if (domid == 0)
1024      if (dom0_flips == 0)
1025        new_qos->qdata[n].flip_free_periods++;
1026}
1027
1028// called when domain is put to sleep, may also be called
1029// when thread is already asleep
1030void qos_state_sleeping(int cpu, int domid, uint64_t now) 
1031{
1032    int idx;
1033
1034    if (!domain_runnable(domid))        // double call?
1035        return;
1036
1037    idx = indexof(domid);
1038    new_qos->domain_info[idx].runnable = 0;
1039    new_qos->domain_info[idx].blocked_start_time = now;
1040    new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1041    //  runnable_start_time[idx] = 0; // invalidate
1042    qos_update_thread_stats(cpu, domid, now);
1043}
1044
1045
1046
1047// domain died, presume it's dead on all cpu's, not just mostly dead
1048void qos_kill_thread(int domid)
1049{
1050  int cpu;
1051 
1052  for (cpu=0; cpu<NCPU; cpu++) {
1053    cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
1054  }
1055 
1056}
1057
1058
1059// called when thread becomes runnable, may also be called
1060// when thread is already runnable
1061void qos_state_runnable(int cpu, int domid, uint64_t now)
1062{
1063   int idx;
1064 
1065
1066    qos_update_thread_stats(cpu, domid, now);
1067
1068    if (domain_runnable(domid)) // double call?
1069        return;
1070
1071    idx = indexof(domid);
1072    new_qos->domain_info[idx].runnable = 1;
1073    update_blocked_time(domid, now);
1074
1075    new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
1076    new_qos->domain_info[idx].runnable_start_time = now;
1077    //  runnable_start_time[id] = now;
1078}
1079
1080
1081void qos_count_packets(domid_t domid, uint64_t now)
1082{
1083  int i, idx = indexof(domid);
1084  _new_qos_data *cpu_data;
1085
1086  for (i=0; i<NCPU; i++) {
1087    cpu_data = cpu_qos_data[i];
1088    if (cpu_data->domain_info[idx].in_use) {
1089      cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
1090    }
1091  }
1092
1093  new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
1094  dom0_flips++;
1095}
1096
1097
1098void process_record(int cpu, struct t_rec *r)
1099{
1100  uint64_t now;
1101
1102  new_qos = cpu_qos_data[cpu];
1103
1104  rec_count++;
1105
1106  now = ((double)r->cycles) / (opts.cpu_freq / 1000.0);
1107
1108  global_now = now;
1109  global_cpu = cpu;
1110
1111  log_event(r->event);
1112
1113  switch (r->event) {
1114
1115  case TRC_SCHED_SWITCH_INFPREV:
1116    // domain data[0] just switched out and received data[1] ns of cpu time
1117    qos_switch_out(cpu, r->data[0], now, r->data[1]);
1118    //    printf("ns_gotten %ld\n", r->data[1]);
1119    break;
1120   
1121  case TRC_SCHED_SWITCH_INFNEXT:
1122    // domain data[0] just switched in and
1123    // waited data[1] ns, and was allocated data[2] ns of cpu time
1124    qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]);
1125    break;
1126   
1127  case TRC_SCHED_DOM_ADD:
1128    (void) indexof(r->data[0]);
1129    break;
1130   
1131  case TRC_SCHED_DOM_REM:
1132    qos_kill_thread(r->data[0]);
1133    break;
1134   
1135  case TRC_SCHED_SLEEP:
1136    qos_state_sleeping(cpu, r->data[0], now);
1137    break;
1138   
1139  case TRC_SCHED_WAKE:
1140    qos_state_runnable(cpu, r->data[0], now);
1141    break;
1142   
1143  case TRC_SCHED_BLOCK:
1144    qos_state_sleeping(cpu, r->data[0], now);
1145    break;
1146   
1147  case TRC_MEM_PAGE_GRANT_TRANSFER:
1148    qos_count_packets(r->data[0], now);
1149    break;
1150   
1151  default:
1152    break;
1153  }
1154  new_qos = NULL;
1155}
1156
1157
1158
Note: See TracBrowser for help on using the repository browser.