Sfoglia il codice sorgente

optimize rendering, part one:

1. improve merging same-material draws calls
2. disregard materials for far geometry
Ivan Avdeev 6 anni fa
parent
commit
91d54c4cba
6 ha cambiato i file con 132 aggiunte e 117 eliminazioni
  1. 9 9
      src/OpenSource.c
  2. 71 41
      src/bsp.c
  3. 8 2
      src/bsp.h
  4. 1 1
      src/profiler.c
  5. 42 62
      src/render.c
  6. 1 2
      src/render.h

+ 9 - 9
src/OpenSource.c

@@ -145,7 +145,7 @@ static enum BSPLoadResult loadMap(struct Map *map, struct ICollection *collectio
 		return result;
 	}
 
-	aAppDebugPrintf("Loaded %s to %u draw calls", map->name, map->model.draws_count);
+	aAppDebugPrintf("Loaded %s to %u draw calls", map->name, map->model.detailed.draws_count);
 	aAppDebugPrintf("AABB (%f, %f, %f) - (%f, %f, %f)",
 			map->model.aabb.min.x,
 			map->model.aabb.min.y,
@@ -232,21 +232,21 @@ static void opensrcPaint(ATimeUs timestamp, float dt) {
 
 	renderClear();
 
+	int triangles = 0;
 	for (struct Map *map = g.maps_begin; map; map = map->next) {
 		if (!map->loaded)
 			continue;
 
 		const struct AMat4f mvp = aMat4fMul(g.camera.view_projection, aMat4fTranslation(map->offset));
 
-		renderModelDraw(&mvp, g.lmn, &map->model);
+		renderModelDraw(&mvp, aVec3fSub(g.camera.pos, map->offset), g.lmn, &map->model);
 
-		if (profilerFrame(&stack_temp)) {
-			int triangles = 0;
-			for (int i = 0; i < map->model.draws_count; ++i) {
-				triangles += map->model.draws[i].count / 3;
-			}
-			PRINTF("Total triangles: %d", triangles);
-		}
+		for (int i = 0; i < map->model.detailed.draws_count; ++i)
+			triangles += map->model.detailed.draws[i].count / 3;
+	}
+
+	if (profilerFrame(&stack_temp)) {
+		PRINTF("Total triangles: %d", triangles);
 	}
 }
 

+ 71 - 41
src/bsp.c

@@ -552,8 +552,19 @@ static void bspLoadFace(
 	}
 }
 
+static int faceMaterialCompare(const void *a, const void *b) {
+	const struct Face *fa = a, *fb = b;
+
+	if (fa->material == fb->material)
+		return 0;
+
+	return fa->material->base_texture[0] - fb->material->base_texture[0];
+}
+
 static enum BSPLoadResult bspLoadModelDraws(const struct LoadModelContext *ctx, struct Stack *persistent,
 		struct BSPModel *model) {
+	void * const tmp_cursor = stackGetCursor(ctx->tmp);
+
 	struct BSPModelVertex * const vertices_buffer
 		= stackAlloc(ctx->tmp, sizeof(struct BSPModelVertex) * ctx->max_draw_vertices);
 	if (!vertices_buffer) return BSPLoadResult_ErrorTempMemory;
@@ -562,22 +573,71 @@ static enum BSPLoadResult bspLoadModelDraws(const struct LoadModelContext *ctx,
 	uint16_t * const indices_buffer = stackAlloc(ctx->tmp, sizeof(uint16_t) * ctx->indices);
 	if (!indices_buffer) return BSPLoadResult_ErrorTempMemory;
 
-	int vertex_pos = 0;
-	int draw_indices_start = 0, indices_pos = 0;
+	qsort(ctx->faces, ctx->faces_count, sizeof(*ctx->faces), faceMaterialCompare);
 
-	model->draws_count = ctx->faces_count;
-	model->draws = stackAlloc(persistent, sizeof(struct BSPDraw) * model->draws_count);
+	{
+		int vbo_offset = 0, vertex_pos = 0;
+		model->detailed.draws_count = 1;
+		model->coarse.draws_count = 1;
+		for (int iface = 0; iface < ctx->faces_count; ++iface) {
+			const struct Face *face = ctx->faces + iface;
+
+			const int update_vbo_offset = (vertex_pos - vbo_offset) + face->vertices >= c_max_draw_vertices;
+			if (update_vbo_offset || (iface > 0 && faceMaterialCompare(ctx->faces+iface-1,face) != 0)) {
+				//PRINTF("%p -> %p", (void*)ctx->faces[iface-1].material->base_texture[0], (void*)face->material->base_texture[0]);
+				++model->detailed.draws_count;
+			}
 
+			if (update_vbo_offset) {
+				vbo_offset = vertex_pos;
+				++model->coarse.draws_count;
+			}
+
+			vertex_pos += face->vertices;
+		}
+	}
+
+	PRINTF("Faces: %d -> %d detailed draws", ctx->faces_count, model->detailed.draws_count);
+
+	model->detailed.draws = stackAlloc(persistent, sizeof(struct BSPDraw) * model->detailed.draws_count);
+	model->coarse.draws = stackAlloc(persistent, sizeof(struct BSPDraw) * model->coarse.draws_count);
+
+	int vertex_pos = 0;
+	int draw_indices_start = 0, indices_pos = 0;
 	int vbo_offset = 0;
 	int idraw = 0;
+	struct BSPDraw *detailed_draw = model->detailed.draws - 1,
+								 *coarse_draw = model->coarse.draws - 1;
+
 	for (int iface = 0; iface < ctx->faces_count/* + 1*/; ++iface) {
 		const struct Face *face = ctx->faces + iface;
 
-		if ((vertex_pos - vbo_offset) + face->vertices >= c_max_draw_vertices) {
+		const int update_vbo_offset = (vertex_pos - vbo_offset) + face->vertices >= c_max_draw_vertices;
+
+		if (update_vbo_offset) {
 			PRINTF("vbo_offset %d -> %d", vbo_offset, vertex_pos);
 			vbo_offset = vertex_pos;
 		}
 
+		if (update_vbo_offset || iface == 0 || faceMaterialCompare(ctx->faces+iface-1,face) != 0) {
+			++detailed_draw;
+			detailed_draw->start = draw_indices_start;
+			detailed_draw->count = 0;
+			detailed_draw->vbo_offset = vbo_offset;
+			detailed_draw->material = face->material;
+
+			++idraw;
+			ASSERT(idraw <= model->detailed.draws_count);
+		}
+
+		if (update_vbo_offset || iface == 0) {
+			++coarse_draw;
+			coarse_draw->start = draw_indices_start;
+			coarse_draw->count = 0;
+			coarse_draw->vbo_offset = vbo_offset;
+			coarse_draw->material = face->material; /* FIXME */
+		}
+
 		if (face->dispinfo) {
 			bspLoadDisplacement(ctx, face, vertices_buffer + vertex_pos, indices_buffer + indices_pos, vertex_pos - vbo_offset);
 		} else {
@@ -587,48 +647,18 @@ static enum BSPLoadResult bspLoadModelDraws(const struct LoadModelContext *ctx,
 		vertex_pos += face->vertices;
 		indices_pos += face->indices;
 
-		struct BSPDraw *draw = model->draws + idraw;
-		memset(draw, 0, sizeof *draw);
-		draw->count = indices_pos - draw_indices_start;
-		draw->start = draw_indices_start;
-		draw->vbo_offset = vbo_offset;
-
-		//PRINTF("Adding draw=%u start=%u count=%u", idraw, draw->start, draw->count);
-
-		draw->material = face->material;
-
-		/*
-		PRINTF("Got texture size %dx%d",
-				draw->material->base_texture[0]->gltex.width,
-				draw->material->base_texture[0]->gltex.height);
-		*/
+		detailed_draw->count += indices_pos - draw_indices_start;
+		coarse_draw->count += indices_pos - draw_indices_start;
 
 		//vertex_pos = 0;
 		draw_indices_start = indices_pos;
-		++idraw;
-		ASSERT(idraw <= model->draws_count);
-	}
-
-	PRINTF("%d %d", idraw, model->draws_count);
-	ASSERT(idraw == model->draws_count);
-
-	renderModelOptimize(model);
-
-	uint16_t *tmp_indices = stackAlloc(ctx->tmp, sizeof(uint16_t) * ctx->indices);
-	if (!tmp_indices) {
-		return BSPLoadResult_ErrorTempMemory;
 	}
-	int tmp_indices_offset = 0;
-	for (int i = 0; i < model->draws_count; ++i) {
-		struct BSPDraw *d = model->draws + i;
-		memcpy(tmp_indices + tmp_indices_offset, indices_buffer + d->start, sizeof(uint16_t) * d->count);
-		d->start = tmp_indices_offset;
-		tmp_indices_offset += d->count;
-	}
-	ASSERT(tmp_indices_offset == ctx->indices);
-	renderBufferCreate(&model->ibo, RBufferType_Index, sizeof(uint16_t) * ctx->indices, tmp_indices);
+	ASSERT(idraw == model->detailed.draws_count);
 
+	renderBufferCreate(&model->ibo, RBufferType_Index, sizeof(uint16_t) * ctx->indices, indices_buffer);
 	renderBufferCreate(&model->vbo, RBufferType_Vertex, sizeof(struct BSPModelVertex) * vertex_pos, vertices_buffer);
+
+	stackFreeUpToPosition(ctx->tmp, tmp_cursor);
 	return BSPLoadResult_Success;
 }
 

+ 8 - 2
src/bsp.h

@@ -41,12 +41,18 @@ struct BSPLandmark {
 	struct AVec3f origin;
 };
 
+struct BSPDrawSet {
+	int draws_count;
+	struct BSPDraw *draws;
+};
+
 struct BSPModel {
 	struct AABB aabb;
 	RTexture lightmap;
 	RBuffer vbo, ibo;
-	int draws_count;
-	struct BSPDraw *draws;
+
+	struct BSPDrawSet detailed;
+	struct BSPDrawSet coarse;
 
 	struct BSPLandmark landmarks[BSP_MAX_LANDMARKS];
 	int landmarks_count;

+ 1 - 1
src/profiler.c

@@ -75,7 +75,7 @@ int profilerFrame(struct Stack *stack_temp) {
 	++profiler.counted_frame;
 	++profiler.frame;
 
-	if (start - profiler.last_print_time > 60000000) {
+	if (start - profiler.last_print_time > 1000000) {
 		PRINT("=================================================");
 		const ATimeUs dt = profiler.frame_deltas / profiler.counted_frame;
 		PRINTF("avg frame = %dus (fps = %f)", dt, 1000000. / dt);

+ 42 - 62
src/render.c

@@ -344,25 +344,38 @@ int renderInit() {
 	return 1;
 }
 
-static int drawCompare(const void *left, const void *right) {
-	const struct BSPDraw *l = left, *r = right;
-	const ptrdiff_t diff = l->material - r->material;
-
-	if (l->vbo_offset != r->vbo_offset)
-		return (int)l->vbo_offset - (int)r->vbo_offset;
-
-	if (diff == 0)
-		return l->material->base_texture[0] - r->material->base_texture[0];
+static void renderDraw(const struct BSPDraw *draw) {
+	const struct Material *m = draw->material;
+	if (m->base_texture[0]) {
+		const RTexture *t = &m->base_texture[0]->texture;
+		if (t != r.current_tex0) {
+			GL_CALL(glBindTexture(GL_TEXTURE_2D, t->gl_name));
+			GL_CALL(glUniform2f(lmgen_uniforms[4].location, (float)t->width, (float)t->height));
+			r.current_tex0 = t;
+		}
+	}
 
-	return diff;
+	GL_CALL(glDrawElements(GL_TRIANGLES, draw->count, GL_UNSIGNED_SHORT, (void*)(sizeof(uint16_t) * draw->start)));
 }
 
-void renderModelOptimize(struct BSPModel *model) {
-	qsort(model->draws, model->draws_count, sizeof(struct BSPDraw), drawCompare);
+static void renderDrawSet(const struct BSPModel *model, const struct BSPDrawSet *drawset) {
+	unsigned int vbo_offset = 0;
+	for (int i = 0; i < drawset->draws_count; ++i) {
+		const struct BSPDraw *draw = drawset->draws + i;
+		if (i == 0 || draw->vbo_offset != vbo_offset) {
+			vbo_offset = draw->vbo_offset;
+			renderApplyAttribs(lmgen_attribs, &model->vbo, draw->vbo_offset);
+		}
+		renderDraw(draw);
+	}
 }
 
-void renderModelDraw(const struct AMat4f *mvp, float lmn, const struct BSPModel *model) {
-	if (!model->draws_count) return;
+static float aMaxf(float a, float b) { return a > b ? a : b; }
+//static float aMinf(float a, float b) { return a < b ? a : b; }
+
+void renderModelDraw(const struct AMat4f *mvp, struct AVec3f camera_position, float lmn, const struct BSPModel *model) {
+	if (!model->detailed.draws_count) return;
+
 	GL_CALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, model->ibo.gl_name));
 	GL_CALL(glUseProgram(r.lmgen_program));
 	GL_CALL(glActiveTexture(GL_TEXTURE0));
@@ -376,54 +389,21 @@ void renderModelDraw(const struct AMat4f *mvp, float lmn, const struct BSPModel
 
 	GL_CALL(glActiveTexture(GL_TEXTURE0 + 1));
 
-	int start = model->draws[0].start;
-	int count = model->draws[0].count;
-
-	const struct Material *m = model->draws[0].material;
-	if (m->base_texture[0]) {
-		const RTexture *t = &m->base_texture[0]->texture;
-		if (t != r.current_tex0) {
-			GL_CALL(glBindTexture(GL_TEXTURE_2D, t->gl_name));
-			GL_CALL(glUniform2f(lmgen_uniforms[4].location, (float)t->width, (float)t->height));
-			r.current_tex0 = t;
-		}
-	}
-
-	renderApplyAttribs(lmgen_attribs, &model->vbo, model->draws->vbo_offset);
-
-	for (int i = 1; i < model->draws_count; ++i) {
-		const struct BSPDraw *d = model->draws + i;
-
-		if (drawCompare(d - 1, d) != 0) {
-			if (-1[d].vbo_offset != d->vbo_offset)
-				renderApplyAttribs(lmgen_attribs, &model->vbo, d->vbo_offset);
-
-			GL_CALL(glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_SHORT, (void*)(sizeof(uint16_t) * start)));
-			//PRINTF("DRAW start=%d, count=%d", start, count);
-
-			const struct Material *m = d->material;
-			if (m->base_texture[0]) {
-				const RTexture *t = &m->base_texture[0]->texture;
-				if (t != r.current_tex0) {
-					GL_CALL(glBindTexture(GL_TEXTURE_2D, t->gl_name));
-					GL_CALL(glUniform2f(lmgen_uniforms[4].location, (float)t->width, (float)t->height));
-					r.current_tex0 = t;
-				}
-			}
-			start = d->start;
-			count = d->count;
-		} else {
-			//PRINTF("start=%d, count=%d; d->start=%d, d->count=%d", start, count, d->start, d->count);
-			ASSERT(start + count == (int)d->start);
-			count += d->count;
-		}
-
-		//GL_CALL(glDrawElements(GL_TRIANGLES, d->count, GL_UNSIGNED_SHORT, (void*)(sizeof(uint16_t) * d->start)));
-	}
-
-	//PRINTF("start=%d, count=%d", start, count);
-	if (count)
-		GL_CALL(glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_SHORT, (void*)(sizeof(uint16_t) * start)));
+	const float distance =
+		aMaxf(aMaxf(
+			aMaxf(camera_position.x - model->aabb.max.x, model->aabb.min.x - camera_position.x),
+			aMaxf(camera_position.y - model->aabb.max.y, model->aabb.min.y - camera_position.y)),
+			aMaxf(camera_position.z - model->aabb.max.z, model->aabb.min.z - camera_position.z));
+
+	/*
+	PRINTF("%f %f %f -> %f",
+			camera_position.x, camera_position.y, camera_position.z, distance);
+	*/
+
+	if (distance < 5000.f)
+		renderDrawSet(model, &model->detailed);
+	else
+		renderDrawSet(model, &model->coarse);
 }
 
 void renderClear() {

+ 1 - 2
src/render.h

@@ -68,5 +68,4 @@ void renderBufferCreate(RBuffer *buffer, RBufferType type, int size, const void
 
 struct BSPModel;
 
-void renderModelOptimize(struct BSPModel *model);
-void renderModelDraw(const struct AMat4f *mvp, float lmn, const struct BSPModel *model);
+void renderModelDraw(const struct AMat4f *mvp, struct AVec3f camera_position, float lmn, const struct BSPModel *model);