From c7edb06b4597451976a3a8bb0e56a2c8e894d95f Mon Sep 17 00:00:00 2001 From: Tiago Vignatti Date: Tue, 29 Sep 2015 20:58:08 -0300 Subject: [PATCH] HACK: benchmarks: Read BOs to measure performance against VGEM --- benchmarks/Makefile.sources | 1 + benchmarks/intel_upload_blit_large.c | 3 +- benchmarks/intel_upload_blit_large_gtt.c | 3 +- benchmarks/intel_upload_blit_large_map.c | 21 +- benchmarks/intel_upload_blit_large_vgem.c | 238 ++++++++++++++++++++++ 5 files changed, 259 insertions(+), 7 deletions(-) create mode 100644 benchmarks/intel_upload_blit_large_vgem.c diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources index a456e05b..3a2785b3 100644 --- a/benchmarks/Makefile.sources +++ b/benchmarks/Makefile.sources @@ -4,6 +4,7 @@ benchmarks_PROGRAMS = \ intel_upload_blit_large \ intel_upload_blit_large_gtt \ intel_upload_blit_large_map \ + intel_upload_blit_large_vgem \ intel_upload_blit_small \ gem_create \ gem_exec_ctx \ diff --git a/benchmarks/intel_upload_blit_large.c b/benchmarks/intel_upload_blit_large.c index 1984bfde..9bfd6954 100644 --- a/benchmarks/intel_upload_blit_large.c +++ b/benchmarks/intel_upload_blit_large.c @@ -84,12 +84,13 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, drm_intel_bo *src_bo; int i; static uint32_t seed = 1; + int x = 0; /* Generate some junk. Real workloads would be doing a lot more * work to generate the junk. */ for (i = 0; i < width * height; i++) { - data[i] = seed++; + x = data[i]; } /* Upload the junk. */ diff --git a/benchmarks/intel_upload_blit_large_gtt.c b/benchmarks/intel_upload_blit_large_gtt.c index d62a01ea..bb1cc458 100644 --- a/benchmarks/intel_upload_blit_large_gtt.c +++ b/benchmarks/intel_upload_blit_large_gtt.c @@ -82,6 +82,7 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, drm_intel_bo *src_bo; int i; static uint32_t seed = 1; + int x = 0; src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096); @@ -89,7 +90,7 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, data = src_bo->virtual; for (i = 0; i < width * height; i++) { - data[i] = seed++; + x = data[i]; } drm_intel_gem_bo_unmap_gtt(src_bo); diff --git a/benchmarks/intel_upload_blit_large_map.c b/benchmarks/intel_upload_blit_large_map.c index 03bf760f..90ffd00c 100644 --- a/benchmarks/intel_upload_blit_large_map.c +++ b/benchmarks/intel_upload_blit_large_map.c @@ -67,6 +67,8 @@ #define OBJECT_WIDTH 1280 #define OBJECT_HEIGHT 720 +int fd; + static double get_time_in_secs(void) { @@ -82,17 +84,27 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, drm_intel_bo *dst_bo, int width, int height) { uint32_t *data; - drm_intel_bo *src_bo; - int i; + drm_intel_bo *src_bo, *tmp_bo; + int i, prime_fd; static uint32_t seed = 1; + int x = 0; src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096); - +#if 0 + // this is here just to see overhead it takes and compare with vgem's. + { + drm_intel_bo_gem_export_to_prime(src_bo, &prime_fd); + if (drmPrimeFDToHandle(fd, prime_fd, &tmp_bo)) { + fprintf(stderr, "failed to import handle\n"); + return; + } + } +#endif drm_intel_bo_map(src_bo, 1); data = src_bo->virtual; for (i = 0; i < width * height; i++) { - data[i] = seed++; + x = data[i]; } drm_intel_bo_unmap(src_bo); @@ -117,7 +129,6 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, int main(int argc, char **argv) { - int fd; int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4; double start_time, end_time; drm_intel_bo *dst_bo; diff --git a/benchmarks/intel_upload_blit_large_vgem.c b/benchmarks/intel_upload_blit_large_vgem.c new file mode 100644 index 00000000..a486f55e --- /dev/null +++ b/benchmarks/intel_upload_blit_large_vgem.c @@ -0,0 +1,238 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** + * Roughly simulates repeatedly uploading frames of images, by uploading + * the data all at once with pwrite, and then blitting it to another buffer. + * + * You might think of this like a movie player, but that wouldn't be entirely + * accurate, since the access patterns of the memory would be different + * (generally, smaller source image, upscaled, an thus different memory access + * pattern in both texel fetch for the stretching and the destination writes). + * However, some things like swfdec would be doing something like this since + * they compute their data in host memory and upload the full sw rendered + * frame. + * + * Additionally, those applications should be rendering at the screen refresh + * rate, while this test has no limits, and so can get itself into the + * working set larger than aperture size performance disaster. + * + * The current workload we have that does large drm_intel_bo_map() + * uploads is texture upload for OpenGL (as it frequently is doing + * reformatting as it uploads the user's data, making bo_subdata less + * suitable) + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "drm.h" +#include "i915_drm.h" +#include "drmtest.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_io.h" +#include "intel_chipset.h" + +#define OBJECT_WIDTH 1280 +#define OBJECT_HEIGHT 720 + + +int vgem_fd; + +const char g_sys_card_path_format[] = +"/sys/bus/platform/devices/vgem/drm/card%d"; +const char g_dev_card_path_format[] = +"/dev/dri/card%d"; + +static int drm_open_vgem(void) { + char *name; + int i, fd; + + for (i = 0; i < 16; i++) { + struct stat _stat; + int ret; + ret = asprintf(&name, g_sys_card_path_format, i); + assert(ret != -1); + + if (stat(name, &_stat) == -1) { + free(name); + continue; + } + + free(name); + ret = asprintf(&name, g_dev_card_path_format, i); + assert(ret != -1); + + fd = open(name, O_RDWR); + free(name); + if (fd == -1) { + continue; + } + return fd; + } + return -1; +} + +static void *mmap_dumb_bo(int fd, int handle, size_t size) { + struct drm_mode_map_dumb mmap_arg; + void *ptr; + int ret; + + memset(&mmap_arg, 0, sizeof(mmap_arg)); + + mmap_arg.handle = handle; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &mmap_arg); + assert(ret == 0); + assert(mmap_arg.offset != 0); + + ptr = mmap(NULL, size, (PROT_READ|PROT_WRITE), MAP_SHARED, fd, + mmap_arg.offset); + + assert(ptr != MAP_FAILED); + + return ptr; +} + + static double +get_time_in_secs(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + + return (double)tv.tv_sec + tv.tv_usec / 1000000.0; +} + +static void +do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, + drm_intel_bo *dst_bo, int width, int height) +{ + uint32_t *data; + drm_intel_bo *src_bo; + int i, prime_fd; + static uint32_t seed = 1; + int x = 0; + uint32_t vgem_bo; + uint32_t *bo_ptr; + volatile uint32_t *ptr; + + src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096); + + drm_intel_bo_gem_export_to_prime(src_bo, &prime_fd); + if (drmPrimeFDToHandle(vgem_fd, prime_fd, &vgem_bo)) { + fprintf(stderr, "failed to import handle\n"); + return; + } + + bo_ptr = mmap_dumb_bo(vgem_fd, vgem_bo, width * height * 4); + ptr = bo_ptr; + + for (i = 0; i < width * height; i++) { + x = ptr[i]; + } + + munmap(bo_ptr, width * height * 4); + + /* Render the junk to the dst. */ + BLIT_COPY_BATCH_START(0); + OUT_BATCH((3 << 24) | /* 32 bits */ + (0xcc << 16) | /* copy ROP */ + (width * 4) /* dst pitch */); + OUT_BATCH(0); /* dst x1,y1 */ + OUT_BATCH((height << 16) | width); /* dst x2,y2 */ + OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(0); /* src x1,y1 */ + OUT_BATCH(width * 4); /* src pitch */ + OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_flush(batch); + + drm_intel_bo_unreference(src_bo); +} + +int main(int argc, char **argv) +{ + int fd; + int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4; + double start_time, end_time; + drm_intel_bo *dst_bo; + drm_intel_bufmgr *bufmgr; + struct intel_batchbuffer *batch; + int i; + + fd = drm_open_driver(DRIVER_INTEL); + + vgem_fd = drm_open_vgem(); + if (vgem_fd < 0) { + fprintf(stderr, "failed to open vgem card\n"); + close(fd); + return 1; + } + + bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); + drm_intel_bufmgr_gem_enable_reuse(bufmgr); + + batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + + dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096); + + /* Prep loop to get us warmed up. */ + for (i = 0; i < 60; i++) { + do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT); + } + drm_intel_bo_wait_rendering(dst_bo); + + /* Do the actual timing. */ + start_time = get_time_in_secs(); + for (i = 0; i < 200; i++) { + do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT); + } + drm_intel_bo_wait_rendering(dst_bo); + end_time = get_time_in_secs(); + + printf("%d iterations in %.03f secs: %.01f MB/sec\n", i, + end_time - start_time, + (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 / + (end_time - start_time)); + + intel_batchbuffer_free(batch); + drm_intel_bufmgr_destroy(bufmgr); + + close(fd); + + return 0; +}