diff options
-rwxr-xr-x | .travis-build.sh | 2 | ||||
-rwxr-xr-x | .travis-deps.sh | 7 | ||||
-rw-r--r-- | .travis.yml | 4 | ||||
-rw-r--r-- | src/core/hle/service/gsp_gpu.cpp | 2 | ||||
-rw-r--r-- | src/video_core/pica.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 30 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 3 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 17 |
9 files changed, 49 insertions, 31 deletions
diff --git a/.travis-build.sh b/.travis-build.sh index 3310bacc5..df6e236b6 100755 --- a/.travis-build.sh +++ b/.travis-build.sh @@ -12,7 +12,7 @@ fi # Only run clang-format on Linux because we don't have 4.0 on OS X images if [ "$TRAVIS_OS_NAME" = "linux" ]; then # Default clang-format points to default 3.5 version one - CLANG_FORMAT=clang-format-4.0 + CLANG_FORMAT=clang-format-3.9 $CLANG_FORMAT --version if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then diff --git a/.travis-deps.sh b/.travis-deps.sh index 9fd21cc57..1404fe19f 100755 --- a/.travis-deps.sh +++ b/.travis-deps.sh @@ -27,6 +27,13 @@ if [ "$TRAVIS_OS_NAME" = "linux" -o -z "$TRAVIS_OS_NAME" ]; then echo "Using cached SDL2" fi + export DEBIAN_FRONTEND=noninteractive + # Amazing placebo security + curl http://apt.llvm.org/llvm-snapshot.gpg.key | sudo -E apt-key add - + sudo -E add-apt-repository "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-3.9 main" + sudo -E apt-get -yq update + sudo -E apt-get -yq install clang-format-3.9 + elif [ "$TRAVIS_OS_NAME" = "osx" ]; then brew update brew install qt5 sdl2 dylibbundler diff --git a/.travis.yml b/.travis.yml index a9e7aadd2..cc34e039c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ language: cpp matrix: include: - os: linux - sudo: true + sudo: required dist: trusty - os: osx sudo: false @@ -17,7 +17,6 @@ addons: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-precise packages: - gcc-6 - g++-6 @@ -25,7 +24,6 @@ addons: - libqt5opengl5-dev - xorg-dev - lib32stdc++6 # For CMake - - clang-format-4.0 cache: directories: diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 710e0e485..78cb761be 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -346,7 +346,7 @@ static void SetAxiConfigQoSMode(Service::Interface* self) { cmd_buff[1] = RESULT_SUCCESS.raw; // No error - LOG_WARNING(Service_GSP, "(STUBBED) called mode=0x%08X", mode); + LOG_DEBUG(Service_GSP, "(STUBBED) called mode=0x%08X", mode); } /** diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 99bd59a69..b2db609ec 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -40,7 +40,7 @@ namespace Pica { // field offset. Otherwise, the compiler will fail to compile this code. #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \ - size_t>::type) PICA_REG_INDEX(field_name)) + size_t>::type)PICA_REG_INDEX(field_name)) #endif // _MSC_VER struct Regs { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1b734aaa5..3f2255e06 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -715,7 +715,11 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe CachedSurface src_params; src_params.addr = config.GetPhysicalInputAddress(); - src_params.width = config.output_width; + // It's important to use the correct source input width to properly skip over parts of the input + // image which will be cropped from the output but still affect the stride of the input image. + src_params.width = config.input_width; + // Using the output's height is fine because we don't read or skip over the remaining part of + // the image, and it allows for smaller texture cache lookup rectangles. src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format); @@ -736,6 +740,11 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe return false; } + // Adjust the source rectangle to take into account parts of the input lines being cropped + if (config.input_width > config.output_width) { + src_rect.right -= (config.input_width - config.output_width) * src_surface->res_scale_width; + } + // Require destination surface to have same resolution scale as source to preserve scaling dst_params.res_scale_width = src_surface->res_scale_width; dst_params.res_scale_height = src_surface->res_scale_height; @@ -938,7 +947,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con src_params.addr = framebuffer_addr; src_params.width = config.width; src_params.height = config.height; - src_params.stride = pixel_stride; + src_params.pixel_stride = pixel_stride; src_params.is_tiled = false; src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbad9b43..61f6e767f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -158,24 +158,21 @@ bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } - if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - return false; - } + bool can_blit = OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE && + OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE; - if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - return false; + if (can_blit) { + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, + dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); } - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - // Restore previous framebuffer bindings cur_state.draw.read_framebuffer = old_fbs[0]; cur_state.draw.draw_framebuffer = old_fbs[1]; cur_state.Apply(); - return true; + return can_blit; } bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, @@ -291,6 +288,9 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); + // Stride only applies to linear images. + ASSERT(params.pixel_stride == 0 || !params.is_tiled); + std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>(); new_surface->addr = params.addr; @@ -299,7 +299,7 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo new_surface->texture.Create(); new_surface->width = params.width; new_surface->height = params.height; - new_surface->stride = params.stride; + new_surface->pixel_stride = params.pixel_stride; new_surface->res_scale_width = params.res_scale_width; new_surface->res_scale_height = params.res_scale_height; @@ -325,14 +325,15 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo cur_state.Apply(); glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride); if (!new_surface->is_tiled) { // TODO: Ensure this will always be a color format, not a depth or other format ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; + glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, tuple.format, tuple.type, texture_src_data); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } else { SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { @@ -391,7 +392,6 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo 0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); } } - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { @@ -701,13 +701,14 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { cur_state.Apply(); glActiveTexture(GL_TEXTURE0); - glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride); if (!surface->is_tiled) { // TODO: Ensure this will always be a color format, not a depth or other format ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; + glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); } else { SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { @@ -750,7 +751,6 @@ void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { false); } } - glPixelStorei(GL_PACK_ROW_LENGTH, 0); surface->dirty = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 849530d86..32abfbaf5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -171,7 +171,8 @@ struct CachedSurface { OGLTexture texture; u32 width; u32 height; - u32 stride = 0; + /// Stride between lines, in pixels. Only valid for images in linear format. + u32 pixel_stride = 0; float res_scale_width = 1.f; float res_scale_height = 1.f; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 211c703ab..c96110bb2 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -102,11 +102,11 @@ static const X64Reg SETUP = R9; /// The two 32-bit VS address offset registers set by the MOVA instruction static const X64Reg ADDROFFS_REG_0 = R10; static const X64Reg ADDROFFS_REG_1 = R11; -/// VS loop count register +/// VS loop count register (Multiplied by 16) static const X64Reg LOOPCOUNT_REG = R12; /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) static const X64Reg LOOPCOUNT = RSI; -/// Number to increment LOOPCOUNT_REG by on each loop iteration +/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) static const X64Reg LOOPINC = RDI; /// Result of the previous CMP instruction for the X-component comparison static const X64Reg COND0 = R13; @@ -491,7 +491,7 @@ void JitShader::Compile_FLR(Instruction instr) { if (Common::GetCPUCaps().sse4_1) { ROUNDFLOORPS(SRC1, R(SRC1)); } else { - CVTPS2DQ(SRC1, R(SRC1)); + CVTTPS2DQ(SRC1, R(SRC1)); CVTDQ2PS(SRC1, R(SRC1)); } @@ -718,15 +718,18 @@ void JitShader::Compile_LOOP(Instruction instr) { looping = true; + // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. + // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by + // 4 bits) to be used as an offset into the 16-byte vector registers later int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); - SHR(32, R(LOOPCOUNT_REG), Imm8(8)); - AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start + SHR(32, R(LOOPCOUNT_REG), Imm8(4)); + AND(32, R(LOOPCOUNT_REG), Imm32(0xFF0)); // Y-component is the start MOV(32, R(LOOPINC), R(LOOPCOUNT)); - SHR(32, R(LOOPINC), Imm8(16)); - MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer + SHR(32, R(LOOPINC), Imm8(12)); + AND(32, R(LOOPINC), Imm32(0xFF0)); // Z-component is the incrementer MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 |