intel_gpu_top: initialize monitoring statistics at startup

This patch initializes the last_stats[] for registers prior to starting
the monitoring itself. This way, the first measure will already contain
the difference from the previous value instead of non-initialized value.

Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
This commit is contained in:
Eugeni Dodonov 2011-09-05 16:33:04 -03:00
parent 4b0a15e45d
commit 431fe7803d
2 changed files with 188 additions and 39 deletions

View File

@ -16,8 +16,21 @@ privilege to map the graphics device.
.B -s [samples per second] .B -s [samples per second]
number of samples to acquire per second number of samples to acquire per second
.TP .TP
.B -o [output file]
run non-interactively and collect usage statistics to [file]
.TP
.B -e ["command to profile"]
execute a command, and leave when it is finished. Note that the entire command
with all parameters should be included as one parameter.
.TP
.B -h .B -h
show usage notes show usage notes
.SH EXAMPLES
.TP
intel_gpu_top -o "cairo-trace-gvim.log" -s 100 -e "cairo-perf-trace /tmp/gvim"
will run cairo-perf-trace with /tmp/gvim trace, non-interactively, saving the
statistics into cairo-trace-gvim.log file, and collecting 100 samples per
second.
.PP .PP
Note that idle units are not Note that idle units are not
displayed, so an entirely idle GPU will only display the ring status and displayed, so an entirely idle GPU will only display the ring status and

View File

@ -33,6 +33,8 @@
#include <err.h> #include <err.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/wait.h>
#include <string.h>
#include "intel_gpu_tools.h" #include "intel_gpu_tools.h"
#include "instdone.h" #include "instdone.h"
@ -373,24 +375,39 @@ static void ring_sample(struct ring *ring)
ring->full += full; ring->full += full;
} }
static void ring_print_header(FILE *out, struct ring *ring)
{
fprintf(out, "%.6s%%\tops\t",
ring->name
);
}
static void ring_print(struct ring *ring, unsigned long samples_per_sec, static void ring_print(struct ring *ring, unsigned long samples_per_sec,
FILE *output) FILE *output)
{ {
int samples_to_percent_ratio, percent, len; int samples_to_percent_ratio, percent, len;
if (!ring->size)
return;
/* Calculate current value of samples_to_percent_ratio */ /* Calculate current value of samples_to_percent_ratio */
samples_to_percent_ratio = (ring->idle * 100) / samples_per_sec; samples_to_percent_ratio = (ring->idle * 100) / samples_per_sec;
percent = 100 - samples_to_percent_ratio; percent = 100 - samples_to_percent_ratio;
len = fprintf(output, "%25s busy: %3d%%: ", ring->name, percent);
print_percentage_bar (percent, len); if (output == stdout) {
fprintf(output, "%24s space: %d/%d (%d%%)\n", if (!ring->size)
ring->name, return;
(int)(ring->full / samples_per_sec),
ring->size, len = fprintf(output, "%25s busy: %3d%%: ", ring->name, percent);
(int)((ring->full / samples_to_percent_ratio) / ring->size)); print_percentage_bar (percent, len);
fprintf(output, "%24s space: %d/%d (%d%%)\n",
ring->name,
(int)(ring->full / samples_per_sec),
ring->size,
(int)((ring->full / samples_to_percent_ratio) / ring->size));
} else {
fprintf(output, "%3d\t%d\t",
(ring->size) ? 100 - ring->idle / samples_to_percent_ratio : -1,
(ring->size) ? (int)(ring->full / samples_per_sec) : -1
);
}
} }
static void static void
@ -402,6 +419,7 @@ usage(const char *appname)
"\n" "\n"
"The following parameters apply:\n" "The following parameters apply:\n"
"[-s <samples>] samples per seconds (default %d)\n" "[-s <samples>] samples per seconds (default %d)\n"
"[-o <file>] output to file (default to stdio)\n"
"[-h] show this help screen\n" "[-h] show this help screen\n"
"\n", "\n",
appname, appname,
@ -429,17 +447,29 @@ int main(int argc, char **argv)
int i, ch; int i, ch;
int samples_per_sec = SAMPLES_PER_SEC; int samples_per_sec = SAMPLES_PER_SEC;
FILE *output = stdout; FILE *output = stdout;
double elapsed_time=0;
int print_headers=1;
pid_t child_pid=-1;
int child_stat;
char *cmd=NULL;
/* Parse options? */ /* Parse options? */
while ((ch = getopt(argc, argv, "s:h")) != -1) while ((ch = getopt(argc, argv, "s:o:h")) != -1) {
{
switch (ch) { switch (ch) {
case 'e': cmd = strdup(optarg);
break;
case 's': samples_per_sec = atoi(optarg); case 's': samples_per_sec = atoi(optarg);
if (samples_per_sec < 100) { if (samples_per_sec < 100) {
fprintf(stderr, "Error: samples per second must be >= 100\n"); fprintf(stderr, "Error: samples per second must be >= 100\n");
exit(1); exit(1);
} }
break; break;
case 'o': output = fopen(optarg, "w");
if (!output) {
perror("fopen");
exit(1);
}
break;
case 'h': case 'h':
usage(argv[0]); usage(argv[0]);
exit(0); exit(0);
@ -454,6 +484,37 @@ int main(int argc, char **argv)
argc -= optind; argc -= optind;
argv += optind; argv += optind;
/* Do we have a command to run? */
if (cmd != NULL)
{
if (output != stdout) {
fprintf(output, "# Profiling: %s\n", cmd);
fflush(output);
}
child_pid = fork();
if (child_pid < 0)
{
perror("fork");
exit(1);
}
else if (child_pid == 0) {
int res;
res = system(cmd);
free(cmd);
if (res < 0)
perror("running command");
if (output != stdout) {
fflush(output);
fprintf(output, "# %s exited with status %d\n", cmd, res);
fflush(output);
}
exit(0);
}
else {
free(cmd);
}
}
pci_dev = intel_get_pci_device(); pci_dev = intel_get_pci_device();
devid = pci_dev->device_id; devid = pci_dev->device_id;
intel_get_mmio(pci_dev); intel_get_mmio(pci_dev);
@ -473,9 +534,25 @@ int main(int argc, char **argv)
ring_init(&blt_ring); ring_init(&blt_ring);
} }
/* Initialize GPU stats */
if (HAS_STATS_REGS(devid)) {
for (i = 0; i < STATS_COUNT; i++) {
uint32_t stats_high, stats_low, stats_high_2;
do {
stats_high = INREG(stats_regs[i] + 4);
stats_low = INREG(stats_regs[i]);
stats_high_2 = INREG(stats_regs[i] + 4);
} while (stats_high != stats_high_2);
last_stats[i] = (uint64_t)stats_high << 32 |
stats_low;
}
}
for (;;) { for (;;) {
int j; int j;
unsigned long long t1, ti, tf; unsigned long long t1, ti, tf, t2;
unsigned long long def_sleep = 1000000 / samples_per_sec; unsigned long long def_sleep = 1000000 / samples_per_sec;
unsigned long long last_samples_per_sec = samples_per_sec; unsigned long long last_samples_per_sec = samples_per_sec;
char clear_screen[] = {0x1b, '[', 'H', char clear_screen[] = {0x1b, '[', 'H',
@ -546,39 +623,82 @@ int main(int argc, char **argv)
if (max_lines >= num_instdone_bits) if (max_lines >= num_instdone_bits)
max_lines = num_instdone_bits; max_lines = num_instdone_bits;
fprintf(output, "%s", clear_screen); t2 = gettime();
elapsed_time += (t2 - t1) / 1000000.0;
print_clock_info(pci_dev); if (output == stdout) {
fprintf(output, "%s", clear_screen);
print_clock_info(pci_dev);
ring_print(&render_ring, last_samples_per_sec, output); ring_print(&render_ring, last_samples_per_sec, output);
ring_print(&bsd_ring, last_samples_per_sec, output); ring_print(&bsd_ring, last_samples_per_sec, output);
ring_print(&bsd6_ring, last_samples_per_sec, output); ring_print(&bsd6_ring, last_samples_per_sec, output);
ring_print(&blt_ring, last_samples_per_sec, output); ring_print(&blt_ring, last_samples_per_sec, output);
fprintf(output, "\n%30s %s\n", "task", "percent busy"); fprintf(output, "\n%30s %s\n", "task", "percent busy");
for (i = 0; i < max_lines; i++) { for (i = 0; i < max_lines; i++) {
if (top_bits_sorted[i]->count > 0) { if (top_bits_sorted[i]->count > 0) {
percent = (top_bits_sorted[i]->count * 100) / percent = (top_bits_sorted[i]->count * 100) /
last_samples_per_sec; last_samples_per_sec;
len = fprintf(output, "%30s: %3d%%: ", len = fprintf(output, "%30s: %3d%%: ",
top_bits_sorted[i]->bit->name, top_bits_sorted[i]->bit->name,
percent); percent);
print_percentage_bar (percent, len); print_percentage_bar (percent, len);
} else { } else {
fprintf(output, "%*s", PERCENTAGE_BAR_END, ""); fprintf(output, "%*s", PERCENTAGE_BAR_END, "");
}
if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
fprintf(output, "%13s: %llu (%lld/sec)",
stats_reg_names[i],
stats[i],
stats[i] - last_stats[i]);
last_stats[i] = stats[i];
} else {
if (!top_bits_sorted[i]->count)
break;
}
fprintf(output, "\n");
}
} else {
/* Print headers for columns at first run */
if (print_headers) {
fprintf(output, "# time\t");
ring_print_header(output, &render_ring);
ring_print_header(output, &bsd_ring);
ring_print_header(output, &bsd6_ring);
ring_print_header(output, &blt_ring);
for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
fprintf(output, "%.6s\t",
stats_reg_names[i]
);
}
if (!top_bits[i].count)
continue;
}
fprintf(output, "\n");
print_headers = 0;
} }
if (i < STATS_COUNT && HAS_STATS_REGS(devid)) { /* Print statistics */
fprintf(output, "%13s: %llu (%lld/sec)", fprintf(output, "%.2f\t", elapsed_time);
stats_reg_names[i], ring_print(&render_ring, last_samples_per_sec, output);
stats[i], ring_print(&bsd_ring, last_samples_per_sec, output);
stats[i] - last_stats[i]); ring_print(&bsd6_ring, last_samples_per_sec, output);
last_stats[i] = stats[i]; ring_print(&blt_ring, last_samples_per_sec, output);
} else {
if (!top_bits_sorted[i]->count) for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
break; if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
fprintf(output, "%lu\t",
stats[i] - last_stats[i]);
last_stats[i] = stats[i];
}
if (!top_bits[i].count)
continue;
} }
fprintf(output, "\n"); fprintf(output, "\n");
fflush(output);
} }
for (i = 0; i < num_instdone_bits; i++) { for (i = 0; i < num_instdone_bits; i++) {
@ -587,7 +707,23 @@ int main(int argc, char **argv)
if (i < STATS_COUNT) if (i < STATS_COUNT)
last_stats[i] = stats[i]; last_stats[i] = stats[i];
} }
/* Check if child has gone */
if (child_pid > 0)
{
int res;
if ((res = waitpid(child_pid, &child_stat, WNOHANG)) == -1) {
perror("waitpid");
exit(1);
}
if (res == 0)
continue;
if (WIFEXITED(child_stat))
break;
}
} }
fclose(output);
return 0; return 0;
} }