shithub: cstory

Download patch

ref: 9bd21cca125ca528453e678bbd1868de0629e196
parent: bd876e9309acfa451fa31f3576a9bca71a5e9759
parent: 5e6658847cc387e306b511f7c8622d7841345a16
author: Clownacy <[email protected]>
date: Wed Jan 29 17:12:49 EST 2020

Merge pull request #95 from GabrielRavier/improvePerformance

Improve sound performance and add option for native optimizations

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,8 @@
 set(RENDERER "SDLTexture" CACHE STRING "Which renderer the game should use: 'OpenGL3' for an OpenGL 3.2 renderer, 'OpenGLES2' for an OpenGL ES 2.0 renderer, 'SDLTexture' for SDL2's hardware-accelerated Texture API, 'SDLSurface' for SDL2's software-rendered Surface API, or 'Software' for a handwritten software renderer")
 
 option(LTO "Enable link-time optimisation" OFF)
+option(NATIVE_OPTIMIZATIONS "Enable processor-specific optimisations (executable might not work on other architectures) (GCC-compatible compilers only)" OFF)
+
 option(WARNINGS "Enable common compiler warnings (for GCC-compatible compilers and MSVC only)" OFF)
 option(WARNINGS_ALL "Enable ALL compiler warnings (for Clang and MSVC only)" OFF)
 option(WARNINGS_FATAL "Stop compilation on any compiler warning (for GCC-compatible compilers and MSVC only)" OFF)
@@ -446,6 +448,25 @@
 		check_ipo_supported(RESULT result)
 		if(result)
 			set_target_properties(CSE2 PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
+		endif()
+	endif()
+endif()
+
+# Enable -march=native if available
+if(NATIVE_OPTIMIZATIONS)
+	include(CheckCXXCompilerFlag)
+	CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)	# GCC flag
+	if(COMPILER_SUPPORTS_MARCH_NATIVE)
+		target_compile_options(CSE2 PRIVATE -march=native)
+	else()
+		CHECK_CXX_COMPILER_FLAG("-xHost" COMPILER_SUPPORTS_XHOST)	# ICC (Linux) flag
+		CHECK_CXX_COMPILER_FLAG("/QxHost" COMPILER_SUPPORTS_QXHOST)	# ICC (Windows) flag
+		if(COMPILER_SUPPORTS_XHOST)
+			target_compile_options(CSE2 PRIVATE -xHost)
+		elseif(COMPILER_SUPPORTS_QXHOST)
+			target_compile_options(CSE2 PRIVATE /QxHost)
+		else()
+			message(WARNING "Couldn't activate native optimizations ! (Unsupported compiler)")
 		endif()
 	endif()
 endif()
--- a/README.md
+++ b/README.md
@@ -61,6 +61,7 @@
 Name | Function
 --------|--------
 `-DLTO=ON` | Enable link-time optimisation
+`-DNATIVE_OPTIMIZATIONS=ON` | Enable processor-specific optimisations (executable might not work on other architectures) (GCC-compatible compilers only)
 `-DJAPANESE=ON` | Enable the Japanese-language build (instead of the unofficial Aeon Genesis English translation)
 `-DFIX_BUGS=ON` | Fix various bugs in the game
 `-DDEBUG_SAVE=ON` | Re-enable the ability to drag-and-drop save files onto the window
--- a/src/Backends/Audio/SDL2.cpp
+++ b/src/Backends/Audio/SDL2.cpp
@@ -12,6 +12,7 @@
 
 #include "../../Organya.h"
 #include "../../WindowsWrapper.h"
+#include "../../CommonDefines.h"
 
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -70,7 +71,8 @@
 	sound->volume_r = sound->pan_r * sound->volume;
 }
 
-static void MixSounds(float *stream, unsigned int frames_total)
+// Most CPU-intensive function in the game (2/3rd CPU time consumption in my experience), so marked with attrHot so the compiler considers it a hot spot (as it is) when optimizing
+attrHot static void MixSounds(float *stream, unsigned int frames_total)
 {
 	for (AudioBackend_Sound *sound = sound_list_head; sound != NULL; sound = sound->next)
 	{
@@ -85,7 +87,7 @@
 				const float sample2 = (sound->samples[(size_t)sound->position + 1] - 128.0f) / 128.0f;
 
 				// Perform linear interpolation
-				const float interpolated_sample = sample1 + ((sample2 - sample1) * (float)fmod(sound->position, 1.0));
+				const float interpolated_sample = sample1 + ((sample2 - sample1) * (float)fmod((float)sound->position, 1.0f));
 
 				*steam_pointer++ += interpolated_sample * sound->volume_l;
 				*steam_pointer++ += interpolated_sample * sound->volume_r;
--- a/src/CommonDefines.h
+++ b/src/CommonDefines.h
@@ -10,6 +10,10 @@
 #define TILES_TO_UNITS(x) ((int)((x) * (0x200 * 0x10)))
 #define UNITS_TO_TILES(x) ((int)((x) / (0x200 * 0x10)))
 
+#ifdef __GNUC__
+#define attrHot __attribute__((hot))
+#endif
+
 enum Collisions
 {
 	COLL_LEFT_WALL = 1,     // Touching a left wall