commit 584a7c6533f66948e587a149c678f0cdbddbbf9f Author: Joshua Goins Date: Fri Sep 22 15:54:45 2023 -0400 Add initial files diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e9638d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.directory +*build*/ +.idea/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..acc8bce --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.27) +project(dxbc LANGUAGES CXX) + +add_library(windows-headers INTERFACE) +target_include_directories(windows-headers INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include/windows) + +find_package(Vulkan REQUIRED) + +add_subdirectory(src/util) +add_subdirectory(src/spirv) + +add_library(dxbc STATIC) +target_sources(dxbc PRIVATE + src/dxbc/dxbc_analysis.cpp + src/dxbc/dxbc_analysis.h + src/dxbc/dxbc_chunk_isgn.cpp + src/dxbc/dxbc_chunk_isgn.h + src/dxbc/dxbc_chunk_shex.cpp + src/dxbc/dxbc_chunk_shex.h + src/dxbc/dxbc_common.cpp + src/dxbc/dxbc_common.h + src/dxbc/dxbc_compiler.cpp + src/dxbc/dxbc_compiler.h + src/dxbc/dxbc_decoder.cpp + src/dxbc/dxbc_decoder.h + src/dxbc/dxbc_defs.cpp + src/dxbc/dxbc_defs.h + src/dxbc/dxbc_enums.h + src/dxbc/dxbc_header.cpp + src/dxbc/dxbc_header.h + src/dxbc/dxbc_include.h + src/dxbc/dxbc_modinfo.h + src/dxbc/dxbc_module.cpp + src/dxbc/dxbc_module.h + src/dxbc/dxbc_names.cpp + src/dxbc/dxbc_names.h + src/dxbc/dxbc_options.cpp + src/dxbc/dxbc_options.h + src/dxbc/dxbc_reader.cpp + src/dxbc/dxbc_reader.h + src/dxbc/dxbc_tag.h + src/dxbc/dxbc_util.cpp + src/dxbc/dxbc_util.h +) +target_include_directories(dxbc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) +target_link_libraries(dxbc PUBLIC dxbc-util dxbc-spirv Vulkan::Vulkan) + +add_subdirectory(example) \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..434ca51 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ + Copyright (c) 2017 Philip Rebohle + Copyright (c) 2019 Joshua Ashton + + zlib/libpng license + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +– The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + +– Altered source versions must be plainly marked as such, and must not + be misrepresented as being the original software. + +– This notice may not be removed or altered from any source distribution. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0225ea0 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# dxbc + +This is the `dxbc` decompiler pulled directly from the [DXVK source tree](https://github.com/doitsujin/dxvk). This is for my personal use, do not expect me to keep updating this. + +See an example of how to use the library in `example/`. + +All credit goes to the DXVK developers, of course. \ No newline at end of file diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 0000000..4169fab --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(dxbc-example) +target_sources(dxbc-example PRIVATE main.cpp) +target_link_libraries(dxbc-example PRIVATE dxbc spirv-cross-core spirv-cross-glsl) \ No newline at end of file diff --git a/example/main.cpp b/example/main.cpp new file mode 100644 index 0000000..bdd0c0d --- /dev/null +++ b/example/main.cpp @@ -0,0 +1,35 @@ +#include + +#include +#include + +#include + +dxvk::Logger dxvk::Logger::s_instance("dxbc.log"); + +int main(int argc, char *argv[]) { + // pass in the DXVK binary you want to decompile + const std::string inputFile = argv[1]; + std::ifstream infile(inputFile, std::ios_base::binary); + + std::vector buffer((std::istreambuf_iterator(infile)), + std::istreambuf_iterator()); + + dxvk::DxbcReader reader(buffer.data(), buffer.size()); + + dxvk::DxbcModule module(reader); + + dxvk::DxbcModuleInfo info; + auto result = module.compile(info, "test"); + + spirv_cross::CompilerGLSL glsl(result.code.data(), result.code.dwords()); + + spirv_cross::CompilerGLSL::Options options; + options.version = 310; + options.es = true; + glsl.set_common_options(options); + + std::cout << glsl.compile() << std::endl; + + return 0; +} \ No newline at end of file diff --git a/include/windows/oaidl.h b/include/windows/oaidl.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/oaidl.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/objbase.h b/include/windows/objbase.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/objbase.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/ocidl.h b/include/windows/ocidl.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/ocidl.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/ole2.h b/include/windows/ole2.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/ole2.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/poppack.h b/include/windows/poppack.h new file mode 100644 index 0000000..163af22 --- /dev/null +++ b/include/windows/poppack.h @@ -0,0 +1,8 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#if !(defined(lint) || defined(RC_INVOKED)) +#pragma pack(pop) +#endif diff --git a/include/windows/pshpack4.h b/include/windows/pshpack4.h new file mode 100644 index 0000000..59fdbbc --- /dev/null +++ b/include/windows/pshpack4.h @@ -0,0 +1,8 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#if !(defined(lint) || defined(RC_INVOKED)) +#pragma pack(push,4) +#endif diff --git a/include/windows/rpc.h b/include/windows/rpc.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/rpc.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/rpcndr.h b/include/windows/rpcndr.h new file mode 100644 index 0000000..549fe36 --- /dev/null +++ b/include/windows/rpcndr.h @@ -0,0 +1,3 @@ +#pragma once + +// Don't care. \ No newline at end of file diff --git a/include/windows/unknwn.h b/include/windows/unknwn.h new file mode 100644 index 0000000..71216db --- /dev/null +++ b/include/windows/unknwn.h @@ -0,0 +1,52 @@ +#pragma once + +#include "windows_base.h" + +typedef interface IUnknown IUnknown; + +DEFINE_GUID(IID_IUnknown, 0x00000000,0x0000,0x0000,0xC0,0x00,0x00,0x00,0x00,0x00,0x00,0x46) + +#ifdef __cplusplus +struct IUnknown { + +public: + + virtual HRESULT QueryInterface(REFIID riid, void** ppvObject) = 0; + template + HRESULT STDMETHODCALLTYPE QueryInterface(Q **pp) { + return QueryInterface(__uuidof(Q), (void **)pp); + } + + virtual ULONG AddRef() = 0; + virtual ULONG Release() = 0; + +}; +#else +typedef struct IUnknownVtbl +{ +BEGIN_INTERFACE + + HRESULT (STDMETHODCALLTYPE *QueryInterface)( + IUnknown *This, + REFIID riid, + void **ppvObject + ); + ULONG (STDMETHODCALLTYPE *AddRef)(IUnknown *This); + ULONG (STDMETHODCALLTYPE *Release)(IUnknown *This); + +END_INTERFACE +} IUnknownVtbl; + +interface IUnknown +{ + CONST_VTBL struct IUnknownVtbl *lpVtbl; +}; + +#define IUnknown_AddRef(This) ((This)->lpVtbl->AddRef(This)) +#define IUnknown_Release(This) ((This)->lpVtbl->Release(This)) + +#endif // __cplusplus + +DECLARE_UUIDOF_HELPER(IUnknown, 0x00000000,0x0000,0x0000,0xC0,0x00,0x00,0x00,0x00,0x00,0x00,0x46) + +#define IID_PPV_ARGS(ppType) __uuidof(decltype(**(ppType))), [](auto** pp) { (void)static_cast(*pp); return reinterpret_cast(pp); }(ppType) diff --git a/include/windows/windows.h b/include/windows/windows.h new file mode 100644 index 0000000..5ffa7b2 --- /dev/null +++ b/include/windows/windows.h @@ -0,0 +1,4 @@ +#pragma once + +#include "windows_base.h" +#include "unknwn.h" \ No newline at end of file diff --git a/include/windows/windows_base.h b/include/windows/windows_base.h new file mode 100644 index 0000000..fe1a2a0 --- /dev/null +++ b/include/windows/windows_base.h @@ -0,0 +1,389 @@ +#pragma once + +#ifdef __cplusplus +#include +#include +#else +#include +#include +#include +#endif // __cplusplus + +// GCC complains about the COM interfaces +// not having virtual destructors + +// and class conversion for C...DESC helper types +#if defined(__GNUC__) && defined(__cplusplus) +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wclass-conversion" +#endif // __GNUC__ && __cplusplus + +typedef int32_t INT; +typedef uint32_t UINT; + +typedef int32_t LONG; +typedef uint32_t ULONG; +typedef int32_t *LPLONG; + +typedef int32_t HRESULT; + +typedef wchar_t WCHAR; +typedef WCHAR *NWPSTR, *LPWSTR, *PWSTR; +typedef unsigned char UCHAR, *PUCHAR; + +typedef char CHAR; +typedef const CHAR *LPCSTR, *PCSTR; + +typedef INT BOOL; +typedef BOOL WINBOOL; + +typedef uint16_t UINT16; +typedef uint32_t UINT32; +typedef uint64_t UINT64; +typedef void VOID; +typedef void* PVOID; +typedef void* LPVOID; +typedef const void* LPCVOID; + +typedef size_t SIZE_T; + +typedef int8_t INT8; +typedef uint8_t UINT8; +typedef uint8_t BYTE; + +typedef int16_t SHORT; +typedef uint16_t USHORT; + +typedef int64_t LONGLONG; +typedef int64_t INT64; + +typedef uint64_t ULONGLONG; +typedef uint64_t UINT64; + +typedef intptr_t LONG_PTR; +typedef uintptr_t ULONG_PTR; + +typedef float FLOAT; + +#ifndef GUID_DEFINED +#define GUID_DEFINED +typedef struct GUID { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} GUID; +#endif // GUID_DEFINED + +typedef GUID UUID; +typedef GUID IID; +#ifdef __cplusplus +#define REFIID const IID& +#define REFGUID const GUID& +#define REFCLSID const GUID& +#else +#define REFIID const IID* +#define REFGUID const GUID* +#define REFCLSID const GUID* const +#endif // __cplusplus + +#ifdef __cplusplus + +template +constexpr GUID __uuidof_helper(); + +#define __uuidof(T) __uuidof_helper() +#define __uuidof_var(T) __uuidof_helper() + +inline bool operator==(const GUID& a, const GUID& b) { return std::memcmp(&a, &b, sizeof(GUID)) == 0; } +inline bool operator!=(const GUID& a, const GUID& b) { return std::memcmp(&a, &b, sizeof(GUID)) != 0; } + +#endif // __cplusplus + +typedef uint32_t DWORD; +typedef uint16_t WORD; +typedef DWORD *LPDWORD; + +typedef void* HANDLE; +typedef HANDLE HMONITOR; +typedef HANDLE HDC; +typedef HANDLE HMODULE; +typedef HANDLE HINSTANCE; +typedef HANDLE HWND; +typedef HANDLE HKEY; +typedef HANDLE *LPHANDLE; +typedef DWORD COLORREF; + +#if INTPTR_MAX == INT64_MAX +typedef int64_t INT_PTR; +typedef uint64_t UINT_PTR; +#else +typedef int32_t INT_PTR; +typedef uint32_t UINT_PTR; +#endif +typedef INT_PTR* PINT_PTR; +typedef UINT_PTR* PUINT_PTR; + +#ifdef STRICT +#define DECLARE_HANDLE(a) typedef struct a##__ { int unused; } *a +#else /*STRICT*/ +#define DECLARE_HANDLE(a) typedef HANDLE a +#endif /*STRICT*/ + +typedef char* LPSTR; +typedef wchar_t* LPWSTR; +typedef const char* LPCSTR; +typedef const wchar_t* LPCWSTR; + +typedef struct LUID { + DWORD LowPart; + LONG HighPart; +} LUID; + +typedef struct POINT { + LONG x; + LONG y; +} POINT; + +typedef POINT* LPPOINT; + +typedef struct RECT { + LONG left; + LONG top; + LONG right; + LONG bottom; +} RECT,*PRECT,*NPRECT,*LPRECT; + +typedef struct SIZE { + LONG cx; + LONG cy; +} SIZE,*PSIZE,*LPSIZE; + +typedef union { + struct { + DWORD LowPart; + LONG HighPart; + }; + + struct { + DWORD LowPart; + LONG HighPart; + } u; + + LONGLONG QuadPart; +} LARGE_INTEGER; + +typedef struct MEMORYSTATUS { + DWORD dwLength; + SIZE_T dwTotalPhys; +} MEMORYSTATUS; + +typedef struct SECURITY_ATTRIBUTES { + DWORD nLength; + void* lpSecurityDescriptor; + BOOL bInheritHandle; +} SECURITY_ATTRIBUTES; + +typedef struct PALETTEENTRY { + BYTE peRed; + BYTE peGreen; + BYTE peBlue; + BYTE peFlags; +} PALETTEENTRY, *PPALETTEENTRY, *LPPALETTEENTRY; + +typedef struct RGNDATAHEADER { + DWORD dwSize; + DWORD iType; + DWORD nCount; + DWORD nRgnSize; + RECT rcBound; +} RGNDATAHEADER; + +typedef struct RGNDATA { + RGNDATAHEADER rdh; + char Buffer[1]; +} RGNDATA,*PRGNDATA,*NPRGNDATA,*LPRGNDATA; + +// Ignore these. +#define STDMETHODCALLTYPE +#define __stdcall + +#define CONST const +#define CONST_VTBL const + +#define TRUE 1 +#define FALSE 0 + +#define WAIT_TIMEOUT 0x00000102 +#define WAIT_FAILED 0xffffffff +#define WAIT_OBJECT_0 0 +#define WAIT_ABANDONED 0x00000080 + +#define interface struct +#define MIDL_INTERFACE(x) struct + +#ifdef __cplusplus + +#define DEFINE_GUID(iid, a, b, c, d, e, f, g, h, i, j, k) \ + constexpr GUID iid = {a,b,c,{d,e,f,g,h,i,j,k}}; + +#define DECLARE_UUIDOF_HELPER(type, a, b, c, d, e, f, g, h, i, j, k) \ + extern "C++" { template <> constexpr GUID __uuidof_helper() { return GUID{a,b,c,{d,e,f,g,h,i,j,k}}; } } \ + extern "C++" { template <> constexpr GUID __uuidof_helper() { return __uuidof_helper(); } } \ + extern "C++" { template <> constexpr GUID __uuidof_helper() { return __uuidof_helper(); } } \ + extern "C++" { template <> constexpr GUID __uuidof_helper() { return __uuidof_helper(); } } \ + extern "C++" { template <> constexpr GUID __uuidof_helper() { return __uuidof_helper(); } } + +#else +#define DEFINE_GUID(iid, a, b, c, d, e, f, g, h, i, j, k) \ + static const GUID iid = {a,b,c,{d,e,f,g,h,i,j,k}}; +#define DECLARE_UUIDOF_HELPER(type, a, b, c, d, e, f, g, h, i, j, k) +#endif // __cplusplus + +#define __CRT_UUID_DECL(type, a, b, c, d, e, f, g, h, i, j, k) DECLARE_UUIDOF_HELPER(type, a, b, c, d, e, f, g, h, i, j, k) + +#define S_OK 0 +#define S_FALSE 1 + +#define E_INVALIDARG ((HRESULT)0x80070057) +#define E_FAIL ((HRESULT)0x80004005) +#define E_NOINTERFACE ((HRESULT)0x80004002) +#define E_NOTIMPL ((HRESULT)0x80004001) +#define E_OUTOFMEMORY ((HRESULT)0x8007000E) +#define E_POINTER ((HRESULT)0x80004003) + +#define DXGI_STATUS_OCCLUDED ((HRESULT)0x087a0001) +#define DXGI_STATUS_CLIPPED ((HRESULT)0x087a0002) +#define DXGI_STATUS_NO_REDIRECTION ((HRESULT)0x087a0004) +#define DXGI_STATUS_NO_DESKTOP_ACCESS ((HRESULT)0x087a0005) +#define DXGI_STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE ((HRESULT)0x087a0006) +#define DXGI_STATUS_MODE_CHANGED ((HRESULT)0x087a0007) +#define DXGI_STATUS_MODE_CHANGE_IN_PROGRESS ((HRESULT)0x087a0008) +#define DXGI_STATUS_UNOCCLUDED ((HRESULT)0x087a0009) +#define DXGI_STATUS_DDA_WAS_STILL_DRAWING ((HRESULT)0x087a000a) +#define DXGI_STATUS_PRESENT_REQUIRED ((HRESULT)0x087a002f) + +#define DXGI_ERROR_INVALID_CALL ((HRESULT)0x887A0001) +#define DXGI_ERROR_NOT_FOUND ((HRESULT)0x887A0002) +#define DXGI_ERROR_MORE_DATA ((HRESULT)0x887A0003) +#define DXGI_ERROR_UNSUPPORTED ((HRESULT)0x887A0004) +#define DXGI_ERROR_DEVICE_REMOVED ((HRESULT)0x887A0005) +#define DXGI_ERROR_DEVICE_HUNG ((HRESULT)0x887A0006) +#define DXGI_ERROR_DEVICE_RESET ((HRESULT)0x887A0007) +#define DXGI_ERROR_WAS_STILL_DRAWING ((HRESULT)0x887A000A) +#define DXGI_ERROR_FRAME_STATISTICS_DISJOINT ((HRESULT)0x887A000B) +#define DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE ((HRESULT)0x887A000C) +#define DXGI_ERROR_DRIVER_INTERNAL_ERROR ((HRESULT)0x887A0020) +#define DXGI_ERROR_NONEXCLUSIVE ((HRESULT)0x887A0021) +#define DXGI_ERROR_NOT_CURRENTLY_AVAILABLE ((HRESULT)0x887A0022) +#define DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED ((HRESULT)0x887A0023) +#define DXGI_ERROR_REMOTE_OUTOFMEMORY ((HRESULT)0x887A0024) +#define DXGI_ERROR_ACCESS_LOST ((HRESULT)0x887A0026) +#define DXGI_ERROR_WAIT_TIMEOUT ((HRESULT)0x887A0027) +#define DXGI_ERROR_SESSION_DISCONNECTED ((HRESULT)0x887A0028) +#define DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE ((HRESULT)0x887A0029) +#define DXGI_ERROR_CANNOT_PROTECT_CONTENT ((HRESULT)0x887A002A) +#define DXGI_ERROR_ACCESS_DENIED ((HRESULT)0x887A002B) +#define DXGI_ERROR_NAME_ALREADY_EXISTS ((HRESULT)0x887A002C) +#define DXGI_ERROR_SDK_COMPONENT_MISSING ((HRESULT)0x887A002D) + +#define WINAPI +#define WINUSERAPI + +#define RGB(r,g,b) ((COLORREF)(((BYTE)(r)|((WORD)((BYTE)(g))<<8))|(((DWORD)(BYTE)(b))<<16))) + +#define MAKE_HRESULT(sev,fac,code) \ + ((HRESULT) (((unsigned long)(sev)<<31) | ((unsigned long)(fac)<<16) | ((unsigned long)(code))) ) + +#ifdef __cplusplus +#define STDMETHOD(name) virtual HRESULT name +#define STDMETHOD_(type, name) virtual type name +#else +#define STDMETHOD(name) HRESULT (STDMETHODCALLTYPE *name) +#define STDMETHOD_(type, name) type (STDMETHODCALLTYPE *name) +#endif // __cplusplus + +#define THIS_ +#define THIS + +#define __C89_NAMELESSSTRUCTNAME +#define __C89_NAMELESSUNIONNAME +#define __C89_NAMELESSUNIONNAME1 +#define __C89_NAMELESSUNIONNAME2 +#define __C89_NAMELESSUNIONNAME3 +#define __C89_NAMELESSUNIONNAME4 +#define __C89_NAMELESSUNIONNAME5 +#define __C89_NAMELESSUNIONNAME6 +#define __C89_NAMELESSUNIONNAME7 +#define __C89_NAMELESSUNIONNAME8 +#define __C89_NAMELESS +#define DUMMYUNIONNAME +#define DUMMYSTRUCTNAME +#define DUMMYUNIONNAME1 +#define DUMMYUNIONNAME2 +#define DUMMYUNIONNAME3 +#define DUMMYUNIONNAME4 +#define DUMMYUNIONNAME5 +#define DUMMYUNIONNAME6 +#define DUMMYUNIONNAME7 +#define DUMMYUNIONNAME8 +#define DUMMYUNIONNAME9 + +#ifdef __cplusplus +#define DECLARE_INTERFACE(x) struct x +#define DECLARE_INTERFACE_(x, y) struct x : public y +#else +#define DECLARE_INTERFACE(x) \ + typedef interface x { \ + const struct x##Vtbl *lpVtbl; \ + } x; \ + typedef const struct x##Vtbl x##Vtbl; \ + const struct x##Vtbl +#define DECLARE_INTERFACE_(x, y) DECLARE_INTERFACE(x) +#endif // __cplusplus + +#define BEGIN_INTERFACE +#define END_INTERFACE + +#ifdef __cplusplus +#define PURE = 0 +#else +#define PURE +#endif // __cplusplus + +#define DECLSPEC_SELECTANY + +#define __MSABI_LONG(x) x + +#define ENUM_CURRENT_SETTINGS ((DWORD)-1) +#define ENUM_REGISTRY_SETTINGS ((DWORD)-2) + +#define INVALID_HANDLE_VALUE ((HANDLE)-1) + +#define DUPLICATE_CLOSE_SOURCE ((DWORD)0x1) +#define DUPLICATE_SAME_ACCESS ((DWORD)0x2) + +#define FAILED(hr) ((HRESULT)(hr) < 0) +#define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) + +#define RtlZeroMemory(Destination,Length) memset((Destination),0,(Length)) +#define ZeroMemory RtlZeroMemory + +#ifndef DEFINE_ENUM_FLAG_OPERATORS + +#ifdef __cplusplus +# define DEFINE_ENUM_FLAG_OPERATORS(type) \ +extern "C++" \ +{ \ + inline type operator &(type x, type y) { return (type)((int)x & (int)y); } \ + inline type operator &=(type &x, type y) { return (type &)((int &)x &= (int)y); } \ + inline type operator ~(type x) { return (type)~(int)x; } \ + inline type operator |(type x, type y) { return (type)((int)x | (int)y); } \ + inline type operator |=(type &x, type y) { return (type &)((int &)x |= (int)y); } \ + inline type operator ^(type x, type y) { return (type)((int)x ^ (int)y); } \ + inline type operator ^=(type &x, type y) { return (type &)((int &)x ^= (int)y); } \ +} +#else +# define DEFINE_ENUM_FLAG_OPERATORS(type) +#endif +#endif /* DEFINE_ENUM_FLAG_OPERATORS */ diff --git a/src/dxbc/dxbc_analysis.cpp b/src/dxbc/dxbc_analysis.cpp new file mode 100644 index 0000000..8a8f44b --- /dev/null +++ b/src/dxbc/dxbc_analysis.cpp @@ -0,0 +1,115 @@ +#include "dxbc_analysis.h" + +namespace dxvk { + + DxbcAnalyzer::DxbcAnalyzer( + const DxbcModuleInfo& moduleInfo, + const DxbcProgramInfo& programInfo, + const Rc& isgn, + const Rc& osgn, + const Rc& psgn, + DxbcAnalysisInfo& analysis) + : m_isgn (isgn), + m_osgn (osgn), + m_psgn (psgn), + m_analysis(&analysis) { + // Get number of clipping and culling planes from the + // input and output signatures. We will need this to + // declare the shader input and output interfaces. + m_analysis->clipCullIn = getClipCullInfo(m_isgn); + m_analysis->clipCullOut = getClipCullInfo(m_osgn); + } + + + DxbcAnalyzer::~DxbcAnalyzer() { + + } + + + void DxbcAnalyzer::processInstruction(const DxbcShaderInstruction& ins) { + switch (ins.opClass) { + case DxbcInstClass::Atomic: { + const uint32_t operandId = ins.dstCount - 1; + + if (ins.dst[operandId].type == DxbcOperandType::UnorderedAccessView) { + const uint32_t registerId = ins.dst[operandId].idx[0].offset; + m_analysis->uavInfos[registerId].accessAtomicOp = true; + m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + } + } break; + + case DxbcInstClass::TextureSample: + case DxbcInstClass::TextureGather: + case DxbcInstClass::TextureQueryLod: + case DxbcInstClass::VectorDeriv: { + m_analysis->usesDerivatives = true; + } break; + + case DxbcInstClass::ControlFlow: { + if (ins.op == DxbcOpcode::Discard) + m_analysis->usesKill = true; + } break; + + case DxbcInstClass::BufferLoad: { + uint32_t operandId = ins.op == DxbcOpcode::LdStructured ? 2 : 1; + bool sparseFeedback = ins.dstCount == 2; + + if (ins.src[operandId].type == DxbcOperandType::UnorderedAccessView) { + const uint32_t registerId = ins.src[operandId].idx[0].offset; + m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT; + m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback; + } else if (ins.src[operandId].type == DxbcOperandType::Resource) { + const uint32_t registerId = ins.src[operandId].idx[0].offset; + m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback; + } + } break; + + case DxbcInstClass::BufferStore: { + if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) { + const uint32_t registerId = ins.dst[0].idx[0].offset; + m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT; + } + } break; + + case DxbcInstClass::TypedUavLoad: { + const uint32_t registerId = ins.src[1].idx[0].offset; + m_analysis->uavInfos[registerId].accessTypedLoad = true; + m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT; + } break; + + case DxbcInstClass::TypedUavStore: { + const uint32_t registerId = ins.dst[0].idx[0].offset; + m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT; + } break; + + default: + break; + } + + for (uint32_t i = 0; i < ins.dstCount; i++) { + if (ins.dst[0].type == DxbcOperandType::IndexableTemp) { + uint32_t index = ins.dst[0].idx[0].offset; + m_analysis->xRegMasks[index] |= ins.dst[0].mask; + } + } + } + + + DxbcClipCullInfo DxbcAnalyzer::getClipCullInfo(const Rc& sgn) const { + DxbcClipCullInfo result; + + if (sgn != nullptr) { + for (auto e = sgn->begin(); e != sgn->end(); e++) { + const uint32_t componentCount = e->componentMask.popCount(); + + if (e->systemValue == DxbcSystemValue::ClipDistance) + result.numClipPlanes += componentCount; + if (e->systemValue == DxbcSystemValue::CullDistance) + result.numCullPlanes += componentCount; + } + } + + return result; + } + +} diff --git a/src/dxbc/dxbc_analysis.h b/src/dxbc/dxbc_analysis.h new file mode 100644 index 0000000..fcbc1dd --- /dev/null +++ b/src/dxbc/dxbc_analysis.h @@ -0,0 +1,100 @@ +#pragma once + +#include "dxbc_chunk_isgn.h" +#include "dxbc_decoder.h" +#include "dxbc_defs.h" +#include "dxbc_names.h" +#include "dxbc_modinfo.h" +#include "dxbc_util.h" + +namespace dxvk { + + /** + * \brief Info about unordered access views + * + * Stores whether an UAV is accessed with typed + * read or atomic instructions. This information + * will be used to generate image types. + */ + struct DxbcUavInfo { + bool accessTypedLoad = false; + bool accessAtomicOp = false; + bool sparseFeedback = false; + VkAccessFlags accessFlags = 0; + }; + + /** + * \brief Info about shader resource views + * + * Stores whether an SRV is accessed with + * sparse feedback. Useful for buffers. + */ + struct DxbcSrvInfo { + bool sparseFeedback = false; + }; + + /** + * \brief Counts cull and clip distances + */ + struct DxbcClipCullInfo { + uint32_t numClipPlanes = 0; + uint32_t numCullPlanes = 0; + }; + + /** + * \brief Shader analysis info + */ + struct DxbcAnalysisInfo { + std::array uavInfos; + std::array srvInfos; + std::array xRegMasks; + + DxbcClipCullInfo clipCullIn; + DxbcClipCullInfo clipCullOut; + + bool usesDerivatives = false; + bool usesKill = false; + }; + + /** + * \brief DXBC shader analysis pass + * + * Collects information about the shader itself + * and the resources used by the shader, which + * will later be used by the actual compiler. + */ + class DxbcAnalyzer { + + public: + + DxbcAnalyzer( + const DxbcModuleInfo& moduleInfo, + const DxbcProgramInfo& programInfo, + const Rc& isgn, + const Rc& osgn, + const Rc& psgn, + DxbcAnalysisInfo& analysis); + + ~DxbcAnalyzer(); + + /** + * \brief Processes a single instruction + * \param [in] ins The instruction + */ + void processInstruction( + const DxbcShaderInstruction& ins); + + private: + + Rc m_isgn; + Rc m_osgn; + Rc m_psgn; + + DxbcAnalysisInfo* m_analysis = nullptr; + + DxbcClipCullInfo getClipCullInfo( + const Rc& sgn) const; + + }; + +} diff --git a/src/dxbc/dxbc_chunk_isgn.cpp b/src/dxbc/dxbc_chunk_isgn.cpp new file mode 100644 index 0000000..47f4eef --- /dev/null +++ b/src/dxbc/dxbc_chunk_isgn.cpp @@ -0,0 +1,122 @@ +#include "dxbc_chunk_isgn.h" + +namespace dxvk { + + DxbcIsgn::DxbcIsgn(DxbcReader reader, DxbcTag tag) { + uint32_t elementCount = reader.readu32(); + reader.skip(sizeof(uint32_t)); + + std::array componentTypes = { + DxbcScalarType::Uint32, DxbcScalarType::Uint32, + DxbcScalarType::Sint32, DxbcScalarType::Float32, + }; + + // https://github.com/DarkStarSword/3d-fixes/blob/master/dx11shaderanalyse.py#L101 + bool hasStream = (tag == "ISG1") || (tag == "OSG1") || (tag == "PSG1") || (tag == "OSG5"); + bool hasPrecision = (tag == "ISG1") || (tag == "OSG1") || (tag == "PSG1"); + + for (uint32_t i = 0; i < elementCount; i++) { + DxbcSgnEntry entry; + entry.streamId = hasStream ? reader.readu32() : 0; + entry.semanticName = reader.clone(reader.readu32()).readString(); + entry.semanticIndex = reader.readu32(); + entry.systemValue = static_cast(reader.readu32()); + entry.componentType = componentTypes.at(reader.readu32()); + entry.registerId = reader.readu32(); + entry.componentMask = bit::extract(reader.readu32(), 0, 3); + + if (hasPrecision) + reader.readu32(); + + m_entries.push_back(entry); + } + } + + + DxbcIsgn::~DxbcIsgn() { + + } + + + const DxbcSgnEntry* DxbcIsgn::findByRegister(uint32_t registerId) const { + for (auto e = this->begin(); e != this->end(); e++) { + if (e->registerId == registerId) + return &(*e); + } + + return nullptr; + } + + + const DxbcSgnEntry* DxbcIsgn::find( + const std::string& semanticName, + uint32_t semanticIndex, + uint32_t streamId) const { + for (auto e = this->begin(); e != this->end(); e++) { + if (e->semanticIndex == semanticIndex + && e->streamId == streamId + && compareSemanticNames(semanticName, e->semanticName)) + return &(*e); + } + + return nullptr; + } + + + DxbcRegMask DxbcIsgn::regMask( + uint32_t registerId) const { + DxbcRegMask mask; + + for (auto e = this->begin(); e != this->end(); e++) { + if (e->registerId == registerId) + mask |= e->componentMask; + } + + return mask; + } + + + uint32_t DxbcIsgn::maxRegisterCount() const { + uint32_t result = 0; + for (auto e = this->begin(); e != this->end(); e++) + result = std::max(result, e->registerId + 1); + return result; + } + + void DxbcIsgn::printEntries() const { + for (auto entry = this->begin(); entry != this->end(); entry++) { + Logger::debug(str::format("SGN Entry:\n\t", + "semanticName: ", entry->semanticName, "\n\t", + "semanticIndex: ", entry->semanticIndex, "\n\t", + "registerId: ", entry->registerId, "\n\t", + "componentMask: ", entry->componentMask.maskString(), "\n\t", + "componentType: ", entry->componentType, "\n\t", + "systemValue: ", entry->systemValue, "\n\t", + "streamId: ", entry->streamId, "\n", + "\n")); + } + } + + + bool DxbcIsgn::compareSemanticNames( + const std::string& a, const std::string& b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); i++) { + char ac = a[i]; + char bc = b[i]; + + if (ac != bc) { + if (ac >= 'A' && ac <= 'Z') ac += 'a' - 'A'; + if (bc >= 'A' && bc <= 'Z') bc += 'a' - 'A'; + + if (ac != bc) + return false; + } + } + + return true; + } + +} diff --git a/src/dxbc/dxbc_chunk_isgn.h b/src/dxbc/dxbc_chunk_isgn.h new file mode 100644 index 0000000..1dce64a --- /dev/null +++ b/src/dxbc/dxbc_chunk_isgn.h @@ -0,0 +1,69 @@ +#pragma once + +#include + +#include "dxbc_common.h" +#include "dxbc_decoder.h" +#include "dxbc_enums.h" +#include "dxbc_reader.h" + +namespace dxvk { + + /** + * \brief Signature entry + * + * Stores the semantic name of an input or + * output and the corresponding register. + */ + struct DxbcSgnEntry { + std::string semanticName; + uint32_t semanticIndex; + uint32_t registerId; + DxbcRegMask componentMask; + DxbcScalarType componentType; + DxbcSystemValue systemValue; + uint32_t streamId; + }; + + /** + * \brief Input/Output signature chunk + * + * Stores information about the input and + * output registers used by the shader stage. + */ + class DxbcIsgn : public RcObject { + + public: + + DxbcIsgn(DxbcReader reader, DxbcTag tag); + ~DxbcIsgn(); + + auto begin() const { return m_entries.cbegin(); } + auto end () const { return m_entries.cend(); } + + const DxbcSgnEntry* findByRegister( + uint32_t registerId) const; + + const DxbcSgnEntry* find( + const std::string& semanticName, + uint32_t semanticIndex, + uint32_t streamIndex) const; + + DxbcRegMask regMask( + uint32_t registerId) const; + + uint32_t maxRegisterCount() const; + + void printEntries() const; + + static bool compareSemanticNames( + const std::string& a, + const std::string& b); + + private: + + std::vector m_entries; + + }; + +} diff --git a/src/dxbc/dxbc_chunk_shex.cpp b/src/dxbc/dxbc_chunk_shex.cpp new file mode 100644 index 0000000..552329b --- /dev/null +++ b/src/dxbc/dxbc_chunk_shex.cpp @@ -0,0 +1,24 @@ +#include "dxbc_chunk_shex.h" + +namespace dxvk { + + DxbcShex::DxbcShex(DxbcReader reader) { + // The shader version and type are stored in a 32-bit unit, + // where the first byte contains the major and minor version + // numbers, and the high word contains the program type. + reader.skip(2); + auto pType = reader.readEnum(); + m_programInfo = DxbcProgramInfo(pType); + + // Read the actual shader code as an array of DWORDs. + auto codeLength = reader.readu32() - 2; + m_code.resize(codeLength); + reader.read(m_code.data(), codeLength * sizeof(uint32_t)); + } + + + DxbcShex::~DxbcShex() { + + } + +} diff --git a/src/dxbc/dxbc_chunk_shex.h b/src/dxbc/dxbc_chunk_shex.h new file mode 100644 index 0000000..8deecc8 --- /dev/null +++ b/src/dxbc/dxbc_chunk_shex.h @@ -0,0 +1,39 @@ +#pragma once + +#include "dxbc_common.h" +#include "dxbc_decoder.h" +#include "dxbc_reader.h" + +namespace dxvk { + + /** + * \brief Shader code chunk + * + * Stores the DXBC shader code itself, as well + * as some meta info about the shader, i.e. what + * type of shader this is. + */ + class DxbcShex : public RcObject { + + public: + + DxbcShex(DxbcReader reader); + ~DxbcShex(); + + DxbcProgramInfo programInfo() const { + return m_programInfo; + } + + DxbcCodeSlice slice() const { + return DxbcCodeSlice(m_code.data(), + m_code.data() + m_code.size()); + } + + private: + + DxbcProgramInfo m_programInfo; + std::vector m_code; + + }; + +} diff --git a/src/dxbc/dxbc_common.cpp b/src/dxbc/dxbc_common.cpp new file mode 100644 index 0000000..d150c58 --- /dev/null +++ b/src/dxbc/dxbc_common.cpp @@ -0,0 +1,32 @@ +#include "dxbc_common.h" + +namespace dxvk { + + VkShaderStageFlagBits DxbcProgramInfo::shaderStage() const { + switch (m_type) { + case DxbcProgramType::PixelShader : return VK_SHADER_STAGE_FRAGMENT_BIT; + case DxbcProgramType::VertexShader : return VK_SHADER_STAGE_VERTEX_BIT; + case DxbcProgramType::GeometryShader : return VK_SHADER_STAGE_GEOMETRY_BIT; + case DxbcProgramType::HullShader : return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + case DxbcProgramType::DomainShader : return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + case DxbcProgramType::ComputeShader : return VK_SHADER_STAGE_COMPUTE_BIT; + } + + throw DxvkError("DxbcProgramInfo::shaderStage: Unsupported program type"); + } + + + spv::ExecutionModel DxbcProgramInfo::executionModel() const { + switch (m_type) { + case DxbcProgramType::PixelShader : return spv::ExecutionModelFragment; + case DxbcProgramType::VertexShader : return spv::ExecutionModelVertex; + case DxbcProgramType::GeometryShader : return spv::ExecutionModelGeometry; + case DxbcProgramType::HullShader : return spv::ExecutionModelTessellationControl; + case DxbcProgramType::DomainShader : return spv::ExecutionModelTessellationEvaluation; + case DxbcProgramType::ComputeShader : return spv::ExecutionModelGLCompute; + } + + throw DxvkError("DxbcProgramInfo::executionModel: Unsupported program type"); + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_common.h b/src/dxbc/dxbc_common.h new file mode 100644 index 0000000..999c7b7 --- /dev/null +++ b/src/dxbc/dxbc_common.h @@ -0,0 +1,69 @@ +#pragma once + +#include "dxbc_include.h" +#include + +namespace dxvk { + + /** + * \brief DXBC Program type + * + * Defines the shader stage that a DXBC + * module has been compiled form. + */ + enum class DxbcProgramType : uint16_t { + PixelShader = 0, + VertexShader = 1, + GeometryShader = 2, + HullShader = 3, + DomainShader = 4, + ComputeShader = 5, + }; + + + /** + * \brief DXBC shader info + * + * Stores the shader program type. + */ + class DxbcProgramInfo { + + public: + + DxbcProgramInfo() { } + DxbcProgramInfo(DxbcProgramType type) + : m_type(type) { } + + /** + * \brief Program type + * \returns Program type + */ + DxbcProgramType type() const { + return m_type; + } + + /** + * \brief Vulkan shader stage + * + * The \c VkShaderStageFlagBits constant + * that corresponds to the program type. + * \returns Vulkan shaer stage + */ + VkShaderStageFlagBits shaderStage() const; + + /** + * \brief SPIR-V execution model + * + * The execution model that corresponds + * to the Vulkan shader stage. + * \returns SPIR-V execution model + */ + spv::ExecutionModel executionModel() const; + + private: + + DxbcProgramType m_type = DxbcProgramType::PixelShader; + + }; + +} diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp new file mode 100644 index 0000000..154a0ce --- /dev/null +++ b/src/dxbc/dxbc_compiler.cpp @@ -0,0 +1,7937 @@ +#include "dxbc_compiler.h" + +namespace dxvk { + + constexpr uint32_t Icb_BindingSlotId = 14; + constexpr uint32_t Icb_MaxBakedDwords = 16; + + DxbcCompiler::DxbcCompiler( + const std::string& fileName, + const DxbcModuleInfo& moduleInfo, + const DxbcProgramInfo& programInfo, + const Rc& isgn, + const Rc& osgn, + const Rc& psgn, + const DxbcAnalysisInfo& analysis) + : m_moduleInfo (moduleInfo), + m_programInfo(programInfo), + m_module (spvVersion(1, 6)), + m_isgn (isgn), + m_osgn (osgn), + m_psgn (psgn), + m_analysis (&analysis) { + // Declare an entry point ID. We'll need it during the + // initialization phase where the execution mode is set. + m_entryPointId = m_module.allocateId(); + + // Set the shader name so that we recognize it in renderdoc + m_module.setDebugSource( + spv::SourceLanguageUnknown, 0, + m_module.addDebugString(fileName.c_str()), + nullptr); + + if (Logger::logLevel() <= LogLevel::Debug) { + if (m_isgn != nullptr) { + Logger::debug(str::format("Input Signature for - ", fileName.c_str(), "\n")); + m_isgn->printEntries(); + } + if (m_osgn != nullptr) { + Logger::debug(str::format("Output Signature for - ", fileName.c_str(), "\n")); + m_osgn->printEntries(); + } + if (m_psgn != nullptr) { + Logger::debug(str::format("Patch Constant Signature for - ", fileName.c_str(), "\n")); + m_psgn->printEntries(); + } + } + + // Set the memory model. This is the same for all shaders. + m_module.enableCapability( + spv::CapabilityVulkanMemoryModel); + + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelVulkan); + + // Make sure our interface registers are clear + for (uint32_t i = 0; i < DxbcMaxInterfaceRegs; i++) { + m_vRegs.at(i) = DxbcRegisterPointer { }; + m_oRegs.at(i) = DxbcRegisterPointer { }; + } + + this->emitInit(); + } + + + DxbcCompiler::~DxbcCompiler() { + + } + + + void DxbcCompiler::processInstruction(const DxbcShaderInstruction& ins) { + m_lastOp = m_currOp; + m_currOp = ins.op; + + switch (ins.opClass) { + case DxbcInstClass::Declaration: + return this->emitDcl(ins); + + case DxbcInstClass::CustomData: + return this->emitCustomData(ins); + + case DxbcInstClass::Atomic: + return this->emitAtomic(ins); + + case DxbcInstClass::AtomicCounter: + return this->emitAtomicCounter(ins); + + case DxbcInstClass::Barrier: + return this->emitBarrier(ins); + + case DxbcInstClass::BitExtract: + return this->emitBitExtract(ins); + + case DxbcInstClass::BitInsert: + return this->emitBitInsert(ins); + + case DxbcInstClass::BitScan: + return this->emitBitScan(ins); + + case DxbcInstClass::BufferQuery: + return this->emitBufferQuery(ins); + + case DxbcInstClass::BufferLoad: + return this->emitBufferLoad(ins); + + case DxbcInstClass::BufferStore: + return this->emitBufferStore(ins); + + case DxbcInstClass::ConvertFloat16: + return this->emitConvertFloat16(ins); + + case DxbcInstClass::ConvertFloat64: + return this->emitConvertFloat64(ins); + + case DxbcInstClass::ControlFlow: + return this->emitControlFlow(ins); + + case DxbcInstClass::GeometryEmit: + return this->emitGeometryEmit(ins); + + case DxbcInstClass::HullShaderPhase: + return this->emitHullShaderPhase(ins); + + case DxbcInstClass::HullShaderInstCnt: + return this->emitHullShaderInstCnt(ins); + + case DxbcInstClass::Interpolate: + return this->emitInterpolate(ins); + + case DxbcInstClass::NoOperation: + return; + + case DxbcInstClass::SparseCheckAccess: + return this->emitSparseCheckAccess(ins); + + case DxbcInstClass::TextureQuery: + return this->emitTextureQuery(ins); + + case DxbcInstClass::TextureQueryLod: + return this->emitTextureQueryLod(ins); + + case DxbcInstClass::TextureQueryMs: + return this->emitTextureQueryMs(ins); + + case DxbcInstClass::TextureQueryMsPos: + return this->emitTextureQueryMsPos(ins); + + case DxbcInstClass::TextureFetch: + return this->emitTextureFetch(ins); + + case DxbcInstClass::TextureGather: + return this->emitTextureGather(ins); + + case DxbcInstClass::TextureSample: + return this->emitTextureSample(ins); + + case DxbcInstClass::TypedUavLoad: + return this->emitTypedUavLoad(ins); + + case DxbcInstClass::TypedUavStore: + return this->emitTypedUavStore(ins); + + case DxbcInstClass::VectorAlu: + return this->emitVectorAlu(ins); + + case DxbcInstClass::VectorCmov: + return this->emitVectorCmov(ins); + + case DxbcInstClass::VectorCmp: + return this->emitVectorCmp(ins); + + case DxbcInstClass::VectorDeriv: + return this->emitVectorDeriv(ins); + + case DxbcInstClass::VectorDot: + return this->emitVectorDot(ins); + + case DxbcInstClass::VectorIdiv: + return this->emitVectorIdiv(ins); + + case DxbcInstClass::VectorImul: + return this->emitVectorImul(ins); + + case DxbcInstClass::VectorMsad: + return this->emitVectorMsad(ins); + + case DxbcInstClass::VectorShift: + return this->emitVectorShift(ins); + + case DxbcInstClass::VectorSinCos: + return this->emitVectorSinCos(ins); + + default: + Logger::warn( + str::format("DxbcCompiler: Unhandled opcode class: ", + ins.op)); + } + } + + + void DxbcCompiler::processXfbPassthrough() { + m_module.setExecutionMode (m_entryPointId, spv::ExecutionModeInputPoints); + m_module.setExecutionMode (m_entryPointId, spv::ExecutionModeOutputPoints); + m_module.setOutputVertices(m_entryPointId, 1); + m_module.setInvocations (m_entryPointId, 1); + + for (auto e = m_isgn->begin(); e != m_isgn->end(); e++) { + emitDclInput(e->registerId, 1, + e->componentMask, DxbcSystemValue::None, + DxbcInterpolationMode::Undefined); + } + + // Figure out which streams to enable + uint32_t streamMask = 0; + + for (size_t i = 0; i < m_xfbVars.size(); i++) + streamMask |= 1u << m_xfbVars[i].streamId; + + for (uint32_t streamId : bit::BitMask(streamMask)) { + emitXfbOutputSetup(streamId, true); + m_module.opEmitVertex(m_module.constu32(streamId)); + } + + // End the main function + emitFunctionEnd(); + } + + + DxbcCompiler::ShaderCreateInfo DxbcCompiler::finalize() { + // Depending on the shader type, this will prepare + // input registers, call various shader functions + // and write back the output registers. + switch (m_programInfo.type()) { + case DxbcProgramType::VertexShader: this->emitVsFinalize(); break; + case DxbcProgramType::HullShader: this->emitHsFinalize(); break; + case DxbcProgramType::DomainShader: this->emitDsFinalize(); break; + case DxbcProgramType::GeometryShader: this->emitGsFinalize(); break; + case DxbcProgramType::PixelShader: this->emitPsFinalize(); break; + case DxbcProgramType::ComputeShader: this->emitCsFinalize(); break; + } + + // Emit float control mode if the extension is supported + this->emitFloatControl(); + + // Declare the entry point, we now have all the + // information we need, including the interfaces + m_module.addEntryPoint(m_entryPointId, + m_programInfo.executionModel(), "main"); + m_module.setDebugName(m_entryPointId, "main"); + + // Create the shader object + ShaderCreateInfo info; + info.stage = m_programInfo.shaderStage(); + //info.bindingCount = m_bindings.size(); + //info.bindings = m_bindings.data(); + info.inputMask = m_inputMask; + info.outputMask = m_outputMask; + info.uniformSize = m_immConstData.size(); + info.uniformData = m_immConstData.data(); + info.outputTopology = m_outputTopology; + + if (m_programInfo.type() == DxbcProgramType::HullShader) + info.patchVertexCount = m_hs.vertexCountIn; + + if (m_programInfo.type() == DxbcProgramType::PixelShader && m_ps.pushConstantId) + info.pushConstSize = sizeof(DxbcPushConstants); + + if (m_moduleInfo.xfb) { + info.xfbRasterizedStream = m_moduleInfo.xfb->rasterizedStream; + + for (uint32_t i = 0; i < 4; i++) + info.xfbStrides[i] = m_moduleInfo.xfb->strides[i]; + } + + info.code = m_module.compile(); + + return info; + } + + + void DxbcCompiler::emitDcl(const DxbcShaderInstruction& ins) { + switch (ins.op) { + case DxbcOpcode::DclGlobalFlags: + return this->emitDclGlobalFlags(ins); + + case DxbcOpcode::DclIndexRange: + return; // not needed for anything + + case DxbcOpcode::DclTemps: + return this->emitDclTemps(ins); + + case DxbcOpcode::DclIndexableTemp: + return this->emitDclIndexableTemp(ins); + + case DxbcOpcode::DclInput: + case DxbcOpcode::DclInputSgv: + case DxbcOpcode::DclInputSiv: + case DxbcOpcode::DclInputPs: + case DxbcOpcode::DclInputPsSgv: + case DxbcOpcode::DclInputPsSiv: + case DxbcOpcode::DclOutput: + case DxbcOpcode::DclOutputSgv: + case DxbcOpcode::DclOutputSiv: + return this->emitDclInterfaceReg(ins); + + case DxbcOpcode::DclConstantBuffer: + return this->emitDclConstantBuffer(ins); + + case DxbcOpcode::DclSampler: + return this->emitDclSampler(ins); + + case DxbcOpcode::DclStream: + return this->emitDclStream(ins); + + case DxbcOpcode::DclUavTyped: + case DxbcOpcode::DclResource: + return this->emitDclResourceTyped(ins); + + case DxbcOpcode::DclUavRaw: + case DxbcOpcode::DclResourceRaw: + case DxbcOpcode::DclUavStructured: + case DxbcOpcode::DclResourceStructured: + return this->emitDclResourceRawStructured(ins); + + case DxbcOpcode::DclThreadGroupSharedMemoryRaw: + case DxbcOpcode::DclThreadGroupSharedMemoryStructured: + return this->emitDclThreadGroupSharedMemory(ins); + + case DxbcOpcode::DclGsInputPrimitive: + return this->emitDclGsInputPrimitive(ins); + + case DxbcOpcode::DclGsOutputPrimitiveTopology: + return this->emitDclGsOutputTopology(ins); + + case DxbcOpcode::DclMaxOutputVertexCount: + return this->emitDclMaxOutputVertexCount(ins); + + case DxbcOpcode::DclInputControlPointCount: + return this->emitDclInputControlPointCount(ins); + + case DxbcOpcode::DclOutputControlPointCount: + return this->emitDclOutputControlPointCount(ins); + + case DxbcOpcode::DclHsMaxTessFactor: + return this->emitDclHsMaxTessFactor(ins); + + case DxbcOpcode::DclTessDomain: + return this->emitDclTessDomain(ins); + + case DxbcOpcode::DclTessPartitioning: + return this->emitDclTessPartitioning(ins); + + case DxbcOpcode::DclTessOutputPrimitive: + return this->emitDclTessOutputPrimitive(ins); + + case DxbcOpcode::DclThreadGroup: + return this->emitDclThreadGroup(ins); + + case DxbcOpcode::DclGsInstanceCount: + return this->emitDclGsInstanceCount(ins); + + default: + Logger::warn( + str::format("DxbcCompiler: Unhandled opcode: ", + ins.op)); + } + } + + + void DxbcCompiler::emitDclGlobalFlags(const DxbcShaderInstruction& ins) { + const DxbcGlobalFlags flags = ins.controls.globalFlags(); + + if (flags.test(DxbcGlobalFlag::RefactoringAllowed)) + m_precise = false; + + if (flags.test(DxbcGlobalFlag::EarlyFragmentTests)) + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeEarlyFragmentTests); + } + + + void DxbcCompiler::emitDclTemps(const DxbcShaderInstruction& ins) { + // dcl_temps has one operand: + // (imm0) Number of temp registers + + // Ignore this and declare temps on demand. + } + + + void DxbcCompiler::emitDclIndexableTemp(const DxbcShaderInstruction& ins) { + // dcl_indexable_temps has three operands: + // (imm0) Array register index (x#) + // (imm1) Number of vectors stored in the array + // (imm2) Component count of each individual vector. This is + // always 4 in fxc-generated binaries and therefore useless. + const uint32_t regId = ins.imm[0].u32; + + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = m_analysis->xRegMasks.at(regId).minComponents(); + info.type.alength = ins.imm[1].u32; + info.sclass = spv::StorageClassPrivate; + + if (regId >= m_xRegs.size()) + m_xRegs.resize(regId + 1); + + m_xRegs.at(regId).ccount = info.type.ccount; + m_xRegs.at(regId).alength = info.type.alength; + m_xRegs.at(regId).varId = emitNewVariable(info); + + m_module.setDebugName(m_xRegs.at(regId).varId, + str::format("x", regId).c_str()); + } + + + void DxbcCompiler::emitDclInterfaceReg(const DxbcShaderInstruction& ins) { + switch (ins.dst[0].type) { + case DxbcOperandType::InputControlPoint: + if (m_programInfo.type() != DxbcProgramType::HullShader) + break; + [[fallthrough]]; + + case DxbcOperandType::Input: + case DxbcOperandType::Output: { + // dcl_input and dcl_output instructions + // have the following operands: + // (dst0) The register to declare + // (imm0) The system value (optional) + uint32_t regDim = 0; + uint32_t regIdx = 0; + + // In the vertex and fragment shader stage, the + // operand indices will have the following format: + // (0) Register index + // + // In other stages, the input and output registers + // may be declared as arrays of a fixed size: + // (0) Array length + // (1) Register index + if (ins.dst[0].idxDim == 2) { + regDim = ins.dst[0].idx[0].offset; + regIdx = ins.dst[0].idx[1].offset; + } else if (ins.dst[0].idxDim == 1) { + regIdx = ins.dst[0].idx[0].offset; + } else { + Logger::err(str::format( + "DxbcCompiler: ", ins.op, + ": Invalid index dimension")); + return; + } + + // This declaration may map an output register to a system + // value. If that is the case, the system value type will + // be stored in the second operand. + const bool hasSv = + ins.op == DxbcOpcode::DclInputSgv + || ins.op == DxbcOpcode::DclInputSiv + || ins.op == DxbcOpcode::DclInputPsSgv + || ins.op == DxbcOpcode::DclInputPsSiv + || ins.op == DxbcOpcode::DclOutputSgv + || ins.op == DxbcOpcode::DclOutputSiv; + + DxbcSystemValue sv = DxbcSystemValue::None; + + if (hasSv) + sv = static_cast(ins.imm[0].u32); + + // In the pixel shader, inputs are declared with an + // interpolation mode that is part of the op token. + const bool hasInterpolationMode = + ins.op == DxbcOpcode::DclInputPs + || ins.op == DxbcOpcode::DclInputPsSiv; + + DxbcInterpolationMode im = DxbcInterpolationMode::Undefined; + + if (hasInterpolationMode) + im = ins.controls.interpolation(); + + // Declare the actual input/output variable + switch (ins.op) { + case DxbcOpcode::DclInput: + case DxbcOpcode::DclInputSgv: + case DxbcOpcode::DclInputSiv: + case DxbcOpcode::DclInputPs: + case DxbcOpcode::DclInputPsSgv: + case DxbcOpcode::DclInputPsSiv: + this->emitDclInput(regIdx, regDim, ins.dst[0].mask, sv, im); + break; + + case DxbcOpcode::DclOutput: + case DxbcOpcode::DclOutputSgv: + case DxbcOpcode::DclOutputSiv: + this->emitDclOutput(regIdx, regDim, ins.dst[0].mask, sv, im); + break; + + default: + Logger::err(str::format( + "DxbcCompiler: Unexpected opcode: ", + ins.op)); + } + } break; + + case DxbcOperandType::InputThreadId: { + m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 3, 0 }, + spv::StorageClassInput }, + spv::BuiltInGlobalInvocationId, + "vThreadId"); + } break; + + case DxbcOperandType::InputThreadGroupId: { + m_cs.builtinWorkgroupId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 3, 0 }, + spv::StorageClassInput }, + spv::BuiltInWorkgroupId, + "vThreadGroupId"); + } break; + + case DxbcOperandType::InputThreadIdInGroup: { + m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 3, 0 }, + spv::StorageClassInput }, + spv::BuiltInLocalInvocationId, + "vThreadIdInGroup"); + } break; + + case DxbcOperandType::InputThreadIndexInGroup: { + m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInLocalInvocationIndex, + "vThreadIndexInGroup"); + } break; + + case DxbcOperandType::InputCoverageMask: { + m_ps.builtinSampleMaskIn = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 1 }, + spv::StorageClassInput }, + spv::BuiltInSampleMask, + "vCoverage"); + } break; + + case DxbcOperandType::OutputCoverageMask: { + m_ps.builtinSampleMaskOut = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 1 }, + spv::StorageClassOutput }, + spv::BuiltInSampleMask, + "oMask"); + } break; + + case DxbcOperandType::OutputDepth: { + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeDepthReplacing); + m_ps.builtinDepth = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInFragDepth, + "oDepth"); + } break; + + case DxbcOperandType::OutputStencilRef: { + m_module.enableExtension("SPV_EXT_shader_stencil_export"); + m_module.enableCapability(spv::CapabilityStencilExportEXT); + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeStencilRefReplacingEXT); + m_ps.builtinStencilRef = emitNewBuiltinVariable({ + { DxbcScalarType::Sint32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInFragStencilRefEXT, + "oStencilRef"); + } break; + + case DxbcOperandType::OutputDepthGe: { + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthReplacing); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthGreater); + m_ps.builtinDepth = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInFragDepth, + "oDepthGe"); + } break; + + case DxbcOperandType::OutputDepthLe: { + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthReplacing); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDepthLess); + m_ps.builtinDepth = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInFragDepth, + "oDepthLe"); + } break; + + case DxbcOperandType::InputPrimitiveId: { + m_primitiveIdIn = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInPrimitiveId, + "vPrim"); + } break; + + case DxbcOperandType::InputDomainPoint: { + m_ds.builtinTessCoord = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 3, 0 }, + spv::StorageClassInput }, + spv::BuiltInTessCoord, + "vDomain"); + } break; + + case DxbcOperandType::InputForkInstanceId: + case DxbcOperandType::InputJoinInstanceId: { + auto phase = this->getCurrentHsForkJoinPhase(); + + phase->instanceIdPtr = m_module.newVar( + m_module.defPointerType( + m_module.defIntType(32, 0), + spv::StorageClassFunction), + spv::StorageClassFunction); + + m_module.opStore(phase->instanceIdPtr, phase->instanceId); + m_module.setDebugName(phase->instanceIdPtr, + ins.dst[0].type == DxbcOperandType::InputForkInstanceId + ? "vForkInstanceId" : "vJoinInstanceId"); + } break; + + case DxbcOperandType::OutputControlPointId: { + // This system value map to the invocation + // ID, which has been declared already. + } break; + + case DxbcOperandType::InputPatchConstant: + case DxbcOperandType::OutputControlPoint: { + // These have been declared as global input and + // output arrays, so there's nothing left to do. + } break; + + case DxbcOperandType::InputGsInstanceId: { + m_gs.builtinInvocationId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInInvocationId, + "vInstanceID"); + } break; + + case DxbcOperandType::InputInnerCoverage: { + m_module.enableExtension("SPV_EXT_fragment_fully_covered"); + m_module.enableCapability(spv::CapabilityFragmentFullyCoveredEXT); + + // This is bool in SPIR-V but uint32 in DXBC. A bool value of + // false must be 0, and bit 1 must be set to represent true. + uint32_t builtinId = emitNewBuiltinVariable({ + { DxbcScalarType::Bool, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInFullyCoveredEXT, + nullptr); + + m_ps.builtinInnerCoverageId = emitNewVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassPrivate }); + + m_module.setDebugName(m_ps.builtinInnerCoverageId, "vInnerCoverage"); + + uint32_t boolTypeId = m_module.defBoolType(); + uint32_t uintTypeId = m_module.defIntType(32, 0); + + m_module.opStore(m_ps.builtinInnerCoverageId, + m_module.opSelect(uintTypeId, + m_module.opLoad(boolTypeId, builtinId), + m_module.constu32(1), + m_module.constu32(0))); + } break; + + default: + Logger::err(str::format( + "DxbcCompiler: Unsupported operand type declaration: ", + ins.dst[0].type)); + + } + } + + + void DxbcCompiler::emitDclInput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im) { + // Avoid declaring the same variable multiple times. + // This may happen when multiple system values are + // mapped to different parts of the same register. + if (m_vRegs.at(regIdx).id == 0 && sv == DxbcSystemValue::None) { + const DxbcVectorType regType = getInputRegType(regIdx); + + DxbcRegisterInfo info; + info.type.ctype = regType.ctype; + info.type.ccount = regType.ccount; + info.type.alength = regDim; + info.sclass = spv::StorageClassInput; + + const uint32_t varId = emitNewVariable(info); + + m_module.decorateLocation(varId, regIdx); + m_module.setDebugName(varId, str::format("v", regIdx).c_str()); + + m_vRegs.at(regIdx) = { regType, varId }; + + // Interpolation mode, used in pixel shaders + if (im == DxbcInterpolationMode::Constant) + m_module.decorate(varId, spv::DecorationFlat); + + if (im == DxbcInterpolationMode::LinearCentroid + || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid) + m_module.decorate(varId, spv::DecorationCentroid); + + if (im == DxbcInterpolationMode::LinearNoPerspective + || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid + || im == DxbcInterpolationMode::LinearNoPerspectiveSample) + m_module.decorate(varId, spv::DecorationNoPerspective); + + if (im == DxbcInterpolationMode::LinearSample + || im == DxbcInterpolationMode::LinearNoPerspectiveSample) { + m_module.enableCapability(spv::CapabilitySampleRateShading); + m_module.decorate(varId, spv::DecorationSample); + } + + if (m_moduleInfo.options.forceSampleRateShading) { + if (im == DxbcInterpolationMode::Linear + || im == DxbcInterpolationMode::LinearNoPerspective) { + m_module.enableCapability(spv::CapabilitySampleRateShading); + m_module.decorate(varId, spv::DecorationSample); + } + } + + // Declare the input slot as defined + m_inputMask |= 1u << regIdx; + m_vArrayLength = std::max(m_vArrayLength, regIdx + 1); + } else if (sv != DxbcSystemValue::None) { + // Add a new system value mapping if needed + bool skipSv = sv == DxbcSystemValue::ClipDistance + || sv == DxbcSystemValue::CullDistance; + + if (!skipSv) + m_vMappings.push_back({ regIdx, regMask, sv }); + } + } + + + void DxbcCompiler::emitDclOutput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im) { + // Add a new system value mapping if needed. Clip + // and cull distances are handled separately. + if (sv != DxbcSystemValue::None + && sv != DxbcSystemValue::ClipDistance + && sv != DxbcSystemValue::CullDistance) + m_oMappings.push_back({ regIdx, regMask, sv }); + + if (m_programInfo.type() == DxbcProgramType::HullShader) { + // Hull shaders don't use standard outputs + if (getCurrentHsForkJoinPhase() != nullptr) + m_hs.outputPerPatchMask |= 1 << regIdx; + } else if (m_oRegs.at(regIdx).id == 0) { + // Avoid declaring the same variable multiple times. + // This may happen when multiple system values are + // mapped to different parts of the same register. + const DxbcVectorType regType = getOutputRegType(regIdx); + + DxbcRegisterInfo info; + info.type.ctype = regType.ctype; + info.type.ccount = regType.ccount; + info.type.alength = regDim; + info.sclass = spv::StorageClassOutput; + + // In xfb mode, we set up the actual + // output vars when emitting a vertex + if (m_moduleInfo.xfb != nullptr) + info.sclass = spv::StorageClassPrivate; + + // In geometry shaders, don't duplicate system value outputs + // to stay within device limits. The pixel shader will read + // all GS system value outputs as system value inputs. + if (m_programInfo.type() == DxbcProgramType::GeometryShader && sv != DxbcSystemValue::None) + info.sclass = spv::StorageClassPrivate; + + const uint32_t varId = this->emitNewVariable(info); + m_module.setDebugName(varId, str::format("o", regIdx).c_str()); + + if (info.sclass == spv::StorageClassOutput) { + m_module.decorateLocation(varId, regIdx); + + // Add index decoration for potential dual-source blending + if (m_programInfo.type() == DxbcProgramType::PixelShader) + m_module.decorateIndex(varId, 0); + + // Declare vertex positions in all stages as invariant, even if + // this is not the last stage, to help with potential Z fighting. + if (sv == DxbcSystemValue::Position && m_moduleInfo.options.invariantPosition) + m_module.decorate(varId, spv::DecorationInvariant); + } + + m_oRegs.at(regIdx) = { regType, varId }; + + // Declare the output slot as defined + m_outputMask |= 1u << regIdx; + } + } + + + void DxbcCompiler::emitDclConstantBuffer(const DxbcShaderInstruction& ins) { + // dcl_constant_buffer has one operand with two indices: + // (0) Constant buffer register ID (cb#) + // (1) Number of constants in the buffer + const uint32_t bufferId = ins.dst[0].idx[0].offset; + const uint32_t elementCount = ins.dst[0].idx[1].offset; + + this->emitDclConstantBufferVar(bufferId, elementCount, + str::format("cb", bufferId).c_str()); + } + + + void DxbcCompiler::emitDclConstantBufferVar( + uint32_t regIdx, + uint32_t numConstants, + const char* name) { + // Uniform buffer data is stored as a fixed-size array + // of 4x32-bit vectors. SPIR-V requires explicit strides. + const uint32_t arrayType = m_module.defArrayTypeUnique( + getVectorTypeId({ DxbcScalarType::Float32, 4 }), + m_module.constu32(numConstants)); + m_module.decorateArrayStride(arrayType, 16); + + // SPIR-V requires us to put that array into a + // struct and decorate that struct as a block. + const uint32_t structType = m_module.defStructTypeUnique(1, &arrayType); + + m_module.decorate(structType, spv::DecorationBlock); + m_module.memberDecorateOffset(structType, 0, 0); + + m_module.setDebugName (structType, str::format(name, "_t").c_str()); + m_module.setDebugMemberName (structType, 0, "m"); + + // Variable that we'll use to access the buffer + const uint32_t varId = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(varId, name); + + // Compute the DXVK binding slot index for the buffer. + // D3D11 needs to bind the actual buffers to this slot. + uint32_t bindingId = computeConstantBufferBinding( + m_programInfo.type(), regIdx); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + DxbcConstantBuffer buf; + buf.varId = varId; + buf.size = numConstants; + m_constantBuffers.at(regIdx) = buf; + + // Store descriptor info for the shader interface + /*DxvkBindingInfo binding = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }; + binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + binding.access = VK_ACCESS_UNIFORM_READ_BIT; + binding.resourceBinding = bindingId; + binding.uboSet = VK_TRUE; + m_bindings.push_back(binding);*/ + } + + + void DxbcCompiler::emitDclSampler(const DxbcShaderInstruction& ins) { + // dclSampler takes one operand: + // (dst0) The sampler register to declare + const uint32_t samplerId = ins.dst[0].idx[0].offset; + + // The sampler type is opaque, but we still have to + // define a pointer and a variable in oder to use it + const uint32_t samplerType = m_module.defSamplerType(); + const uint32_t samplerPtrType = m_module.defPointerType( + samplerType, spv::StorageClassUniformConstant); + + // Define the sampler variable + const uint32_t varId = m_module.newVar(samplerPtrType, + spv::StorageClassUniformConstant); + m_module.setDebugName(varId, + str::format("s", samplerId).c_str()); + + m_samplers.at(samplerId).varId = varId; + m_samplers.at(samplerId).typeId = samplerType; + + // Compute binding slot index for the sampler + uint32_t bindingId = computeSamplerBinding( + m_programInfo.type(), samplerId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Store descriptor info for the shader interface + /*DxvkBindingInfo binding = { VK_DESCRIPTOR_TYPE_SAMPLER }; + binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + binding.resourceBinding = bindingId; + m_bindings.push_back(binding);*/ + } + + + void DxbcCompiler::emitDclStream(const DxbcShaderInstruction& ins) { + if (ins.dst[0].idx[0].offset != 0 && m_moduleInfo.xfb == nullptr) + Logger::err("Dxbc: Multiple streams not supported"); + } + + + void DxbcCompiler::emitDclResourceTyped(const DxbcShaderInstruction& ins) { + // dclResource takes two operands: + // (dst0) The resource register ID + // (imm0) The resource return type + const uint32_t registerId = ins.dst[0].idx[0].offset; + + // We also handle unordered access views here + const bool isUav = ins.op == DxbcOpcode::DclUavTyped; + + if (isUav) { + if (m_moduleInfo.options.supportsTypedUavLoadR32) + m_module.enableCapability(spv::CapabilityStorageImageReadWithoutFormat); + m_module.enableCapability(spv::CapabilityStorageImageWriteWithoutFormat); + } + + // Defines the type of the resource (texture2D, ...) + const DxbcResourceDim resourceType = ins.controls.resourceDim(); + + // Defines the type of a read operation. DXBC has the ability + // to define four different types whereas SPIR-V only allows + // one, but in practice this should not be much of a problem. + auto xType = static_cast( + bit::extract(ins.imm[0].u32, 0, 3)); + auto yType = static_cast( + bit::extract(ins.imm[0].u32, 4, 7)); + auto zType = static_cast( + bit::extract(ins.imm[0].u32, 8, 11)); + auto wType = static_cast( + bit::extract(ins.imm[0].u32, 12, 15)); + + if ((xType != yType) || (xType != zType) || (xType != wType)) + Logger::warn("DxbcCompiler: dcl_resource: Ignoring resource return types"); + + // Declare the actual sampled type + const DxbcScalarType sampledType = [xType] { + switch (xType) { + // FIXME is this correct? There's no documentation about it + case DxbcResourceReturnType::Mixed: return DxbcScalarType::Uint32; + // FIXME do we have to manually clamp writes to SNORM/UNORM resources? + case DxbcResourceReturnType::Snorm: return DxbcScalarType::Float32; + case DxbcResourceReturnType::Unorm: return DxbcScalarType::Float32; + case DxbcResourceReturnType::Float: return DxbcScalarType::Float32; + case DxbcResourceReturnType::Sint: return DxbcScalarType::Sint32; + case DxbcResourceReturnType::Uint: return DxbcScalarType::Uint32; + default: throw DxvkError(str::format("DxbcCompiler: Invalid sampled type: ", xType)); + } + }(); + + // Declare the resource type + const uint32_t sampledTypeId = getScalarTypeId(sampledType); + const DxbcImageInfo typeInfo = getResourceType(resourceType, isUav); + + // Declare additional capabilities if necessary + switch (resourceType) { + case DxbcResourceDim::Buffer: + m_module.enableCapability(isUav + ? spv::CapabilityImageBuffer + : spv::CapabilitySampledBuffer); + break; + + case DxbcResourceDim::Texture1D: + case DxbcResourceDim::Texture1DArr: + m_module.enableCapability(isUav + ? spv::CapabilityImage1D + : spv::CapabilitySampled1D); + break; + + case DxbcResourceDim::TextureCubeArr: + m_module.enableCapability( + spv::CapabilitySampledCubeArray); + break; + + default: + // No additional capabilities required + break; + } + + // If the read-without-format capability is not set and this + // image is access via a typed load, or if atomic operations + // are used,, we must define the image format explicitly. + spv::ImageFormat imageFormat = spv::ImageFormatUnknown; + + if (isUav) { + if ((m_analysis->uavInfos[registerId].accessAtomicOp) + || (m_analysis->uavInfos[registerId].accessTypedLoad + && !m_moduleInfo.options.supportsTypedUavLoadR32)) + imageFormat = getScalarImageFormat(sampledType); + } + + // We do not know whether the image is going to be used as + // a color image or a depth image yet, but we can pick the + // correct type when creating a sampled image object. + const uint32_t imageTypeId = m_module.defImageType(sampledTypeId, + typeInfo.dim, 0, typeInfo.array, typeInfo.ms, typeInfo.sampled, + imageFormat); + + // We'll declare the texture variable with the color type + // and decide which one to use when the texture is sampled. + const uint32_t resourcePtrType = m_module.defPointerType( + imageTypeId, spv::StorageClassUniformConstant); + + const uint32_t varId = m_module.newVar(resourcePtrType, + spv::StorageClassUniformConstant); + + m_module.setDebugName(varId, + str::format(isUav ? "u" : "t", registerId).c_str()); + + // Compute the DXVK binding slot index for the resource. + // D3D11 needs to bind the actual resource to this slot. + uint32_t bindingId = isUav + ? computeUavBinding(m_programInfo.type(), registerId) + : computeSrvBinding(m_programInfo.type(), registerId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Declare a specialization constant which will + // store whether or not the resource is bound. + if (isUav) { + DxbcUav uav; + uav.type = DxbcResourceType::Typed; + uav.imageInfo = typeInfo; + uav.varId = varId; + uav.ctrId = 0; + uav.sampledType = sampledType; + uav.sampledTypeId = sampledTypeId; + uav.imageTypeId = imageTypeId; + uav.structStride = 0; + uav.coherence = getUavCoherence(registerId, ins.controls.uavFlags()); + uav.isRawSsbo = false; + m_uavs.at(registerId) = uav; + } else { + DxbcShaderResource res; + res.type = DxbcResourceType::Typed; + res.imageInfo = typeInfo; + res.varId = varId; + res.sampledType = sampledType; + res.sampledTypeId = sampledTypeId; + res.imageTypeId = imageTypeId; + res.colorTypeId = imageTypeId; + res.depthTypeId = 0; + res.structStride = 0; + res.isRawSsbo = false; + + if ((sampledType == DxbcScalarType::Float32) + && (resourceType == DxbcResourceDim::Texture2D + || resourceType == DxbcResourceDim::Texture2DArr + || resourceType == DxbcResourceDim::TextureCube + || resourceType == DxbcResourceDim::TextureCubeArr)) { + res.depthTypeId = m_module.defImageType(sampledTypeId, + typeInfo.dim, 1, typeInfo.array, typeInfo.ms, typeInfo.sampled, + spv::ImageFormatUnknown); + } + + m_textures.at(registerId) = res; + } + + // Store descriptor info for the shader interface + /*DxvkBindingInfo binding = { }; + binding.viewType = typeInfo.vtype; + binding.resourceBinding = bindingId; + + if (isUav) { + binding.descriptorType = resourceType == DxbcResourceDim::Buffer + ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER + : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + binding.access = m_analysis->uavInfos[registerId].accessFlags; + + if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT)) + m_module.decorate(varId, spv::DecorationNonWritable); + if (!(binding.access & VK_ACCESS_SHADER_READ_BIT)) + m_module.decorate(varId, spv::DecorationNonReadable); + } else { + binding.descriptorType = resourceType == DxbcResourceDim::Buffer + ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + binding.access = VK_ACCESS_SHADER_READ_BIT; + } + + m_bindings.push_back(binding);*/ + } + + + void DxbcCompiler::emitDclResourceRawStructured(const DxbcShaderInstruction& ins) { + // dcl_resource_raw and dcl_uav_raw take one argument: + // (dst0) The resource register ID + // dcl_resource_structured and dcl_uav_structured take two arguments: + // (dst0) The resource register ID + // (imm0) Structure stride, in bytes + const uint32_t registerId = ins.dst[0].idx[0].offset; + + const bool isUav = ins.op == DxbcOpcode::DclUavRaw + || ins.op == DxbcOpcode::DclUavStructured; + + const bool isStructured = ins.op == DxbcOpcode::DclUavStructured + || ins.op == DxbcOpcode::DclResourceStructured; + + const DxbcScalarType sampledType = DxbcScalarType::Uint32; + const uint32_t sampledTypeId = getScalarTypeId(sampledType); + + const DxbcImageInfo typeInfo = { spv::DimBuffer, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_MAX_ENUM }; + + // Declare the resource type + uint32_t resTypeId = 0; + uint32_t varId = 0; + + // Write back resource info + DxbcResourceType resType = isStructured + ? DxbcResourceType::Structured + : DxbcResourceType::Raw; + + uint32_t resStride = isStructured + ? ins.imm[0].u32 + : 0; + + uint32_t resAlign = isStructured + ? (resStride & -resStride) + : 16; + + // Compute the DXVK binding slot index for the resource. + uint32_t bindingId = isUav + ? computeUavBinding(m_programInfo.type(), registerId) + : computeSrvBinding(m_programInfo.type(), registerId); + + // Test whether we should use a raw SSBO for this resource + bool hasSparseFeedback = isUav + ? m_analysis->uavInfos[registerId].sparseFeedback + : m_analysis->srvInfos[registerId].sparseFeedback; + + bool useRawSsbo = m_moduleInfo.options.minSsboAlignment <= resAlign && !hasSparseFeedback; + + if (useRawSsbo) { + uint32_t elemType = getScalarTypeId(DxbcScalarType::Uint32); + uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(elemType); + uint32_t structType = m_module.defStructTypeUnique(1, &arrayType); + uint32_t ptrType = m_module.defPointerType(structType, spv::StorageClassStorageBuffer); + + resTypeId = m_module.defPointerType(elemType, spv::StorageClassStorageBuffer); + varId = m_module.newVar(ptrType, spv::StorageClassStorageBuffer); + + m_module.decorateArrayStride(arrayType, sizeof(uint32_t)); + m_module.decorate(structType, spv::DecorationBlock); + m_module.memberDecorateOffset(structType, 0, 0); + + m_module.setDebugName(structType, + str::format(isUav ? "u" : "t", registerId, "_t").c_str()); + m_module.setDebugMemberName(structType, 0, "m"); + } else { + // Structured and raw buffers are represented as + // texel buffers consisting of 32-bit integers. + m_module.enableCapability(isUav + ? spv::CapabilityImageBuffer + : spv::CapabilitySampledBuffer); + + resTypeId = m_module.defImageType(sampledTypeId, + typeInfo.dim, 0, typeInfo.array, typeInfo.ms, typeInfo.sampled, + spv::ImageFormatR32ui); + + varId = m_module.newVar( + m_module.defPointerType(resTypeId, spv::StorageClassUniformConstant), + spv::StorageClassUniformConstant); + } + + m_module.setDebugName(varId, + str::format(isUav ? "u" : "t", registerId).c_str()); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + if (isUav) { + DxbcUav uav; + uav.type = resType; + uav.imageInfo = typeInfo; + uav.varId = varId; + uav.ctrId = 0; + uav.sampledType = sampledType; + uav.sampledTypeId = sampledTypeId; + uav.imageTypeId = resTypeId; + uav.structStride = resStride; + uav.coherence = getUavCoherence(registerId, ins.controls.uavFlags()); + uav.isRawSsbo = useRawSsbo; + m_uavs.at(registerId) = uav; + } else { + DxbcShaderResource res; + res.type = resType; + res.imageInfo = typeInfo; + res.varId = varId; + res.sampledType = sampledType; + res.sampledTypeId = sampledTypeId; + res.imageTypeId = resTypeId; + res.colorTypeId = resTypeId; + res.depthTypeId = 0; + res.structStride = resStride; + res.isRawSsbo = useRawSsbo; + m_textures.at(registerId) = res; + } + + // Store descriptor info for the shader interface + /*DxvkBindingInfo binding = { }; + binding.descriptorType = useRawSsbo + ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + : (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); + binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + binding.resourceBinding = bindingId; + binding.access = isUav + ? m_analysis->uavInfos[registerId].accessFlags + : VkAccessFlags(VK_ACCESS_SHADER_READ_BIT); + + if (useRawSsbo || isUav) { + if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT)) + m_module.decorate(varId, spv::DecorationNonWritable); + if (!(binding.access & VK_ACCESS_SHADER_READ_BIT)) + m_module.decorate(varId, spv::DecorationNonReadable); + } + + m_bindings.push_back(binding);*/ + } + + + void DxbcCompiler::emitDclThreadGroupSharedMemory(const DxbcShaderInstruction& ins) { + // dcl_tgsm_raw takes two arguments: + // (dst0) The resource register ID + // (imm0) Block size, in bytes + // dcl_tgsm_structured takes three arguments: + // (dst0) The resource register ID + // (imm0) Structure stride, in bytes + // (imm1) Structure count + const bool isStructured = ins.op == DxbcOpcode::DclThreadGroupSharedMemoryStructured; + + const uint32_t regId = ins.dst[0].idx[0].offset; + + if (regId >= m_gRegs.size()) + m_gRegs.resize(regId + 1); + + const uint32_t elementStride = isStructured ? ins.imm[0].u32 : 0; + const uint32_t elementCount = isStructured ? ins.imm[1].u32 : ins.imm[0].u32; + + DxbcRegisterInfo varInfo; + varInfo.type.ctype = DxbcScalarType::Uint32; + varInfo.type.ccount = 1; + varInfo.type.alength = isStructured + ? elementCount * elementStride / 4 + : elementCount / 4; + varInfo.sclass = spv::StorageClassWorkgroup; + + m_gRegs[regId].type = isStructured + ? DxbcResourceType::Structured + : DxbcResourceType::Raw; + m_gRegs[regId].elementStride = elementStride; + m_gRegs[regId].elementCount = elementCount; + m_gRegs[regId].varId = emitNewVariable(varInfo); + + m_module.setDebugName(m_gRegs[regId].varId, + str::format("g", regId).c_str()); + } + + + void DxbcCompiler::emitDclGsInputPrimitive(const DxbcShaderInstruction& ins) { + // The input primitive type is stored within in the + // control bits of the opcode token. In SPIR-V, we + // have to define an execution mode. + const spv::ExecutionMode mode = [&] { + switch (ins.controls.primitive()) { + case DxbcPrimitive::Point: return spv::ExecutionModeInputPoints; + case DxbcPrimitive::Line: return spv::ExecutionModeInputLines; + case DxbcPrimitive::Triangle: return spv::ExecutionModeTriangles; + case DxbcPrimitive::LineAdj: return spv::ExecutionModeInputLinesAdjacency; + case DxbcPrimitive::TriangleAdj: return spv::ExecutionModeInputTrianglesAdjacency; + default: throw DxvkError("DxbcCompiler: Unsupported primitive type"); + } + }(); + + m_gs.inputPrimitive = ins.controls.primitive(); + m_module.setExecutionMode(m_entryPointId, mode); + + const uint32_t vertexCount + = primitiveVertexCount(m_gs.inputPrimitive); + + emitDclInputArray(vertexCount); + } + + + void DxbcCompiler::emitDclGsOutputTopology(const DxbcShaderInstruction& ins) { + // The input primitive topology is stored within in the + // control bits of the opcode token. In SPIR-V, we have + // to define an execution mode. + auto mode = [&] { + switch (ins.controls.primitiveTopology()) { + case DxbcPrimitiveTopology::PointList: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_POINT_LIST, spv::ExecutionModeOutputPoints); + case DxbcPrimitiveTopology::LineStrip: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_LINE_LIST, spv::ExecutionModeOutputLineStrip); + case DxbcPrimitiveTopology::TriangleStrip: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, spv::ExecutionModeOutputTriangleStrip); + default: throw DxvkError("DxbcCompiler: Unsupported primitive topology"); + } + }(); + + m_outputTopology = mode.first; + m_module.setExecutionMode(m_entryPointId, mode.second); + } + + + void DxbcCompiler::emitDclMaxOutputVertexCount(const DxbcShaderInstruction& ins) { + // dcl_max_output_vertex_count has one operand: + // (imm0) The maximum number of vertices + m_gs.outputVertexCount = ins.imm[0].u32; + + m_module.setOutputVertices(m_entryPointId, m_gs.outputVertexCount); + } + + + void DxbcCompiler::emitDclInputControlPointCount(const DxbcShaderInstruction& ins) { + // dcl_input_control_points has the control point + // count embedded within the opcode token. + if (m_programInfo.type() == DxbcProgramType::HullShader) { + m_hs.vertexCountIn = ins.controls.controlPointCount(); + + emitDclInputArray(m_hs.vertexCountIn); + } else { + m_ds.vertexCountIn = ins.controls.controlPointCount(); + + m_ds.inputPerPatch = emitTessInterfacePerPatch (spv::StorageClassInput); + m_ds.inputPerVertex = emitTessInterfacePerVertex(spv::StorageClassInput, m_ds.vertexCountIn); + } + } + + + void DxbcCompiler::emitDclOutputControlPointCount(const DxbcShaderInstruction& ins) { + // dcl_output_control_points has the control point + // count embedded within the opcode token. + m_hs.vertexCountOut = ins.controls.controlPointCount(); + + m_hs.outputPerPatch = emitTessInterfacePerPatch(spv::StorageClassPrivate); + m_hs.outputPerVertex = emitTessInterfacePerVertex(spv::StorageClassOutput, m_hs.vertexCountOut); + + m_module.setOutputVertices(m_entryPointId, m_hs.vertexCountOut); + } + + + void DxbcCompiler::emitDclHsMaxTessFactor(const DxbcShaderInstruction& ins) { + m_hs.maxTessFactor = ins.imm[0].f32; + } + + + void DxbcCompiler::emitDclTessDomain(const DxbcShaderInstruction& ins) { + auto mode = [&] { + switch (ins.controls.tessDomain()) { + case DxbcTessDomain::Isolines: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_LINE_LIST, spv::ExecutionModeIsolines); + case DxbcTessDomain::Triangles: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, spv::ExecutionModeTriangles); + case DxbcTessDomain::Quads: return std::make_pair(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, spv::ExecutionModeQuads); + default: throw DxvkError("Dxbc: Invalid tess domain"); + } + }(); + + m_outputTopology = mode.first; + m_module.setExecutionMode(m_entryPointId, mode.second); + } + + + void DxbcCompiler::emitDclTessPartitioning(const DxbcShaderInstruction& ins) { + const spv::ExecutionMode executionMode = [&] { + switch (ins.controls.tessPartitioning()) { + case DxbcTessPartitioning::Pow2: + case DxbcTessPartitioning::Integer: return spv::ExecutionModeSpacingEqual; + case DxbcTessPartitioning::FractOdd: return spv::ExecutionModeSpacingFractionalOdd; + case DxbcTessPartitioning::FractEven: return spv::ExecutionModeSpacingFractionalEven; + default: throw DxvkError("Dxbc: Invalid tess partitioning"); + } + }(); + + m_module.setExecutionMode(m_entryPointId, executionMode); + } + + + void DxbcCompiler::emitDclTessOutputPrimitive(const DxbcShaderInstruction& ins) { + switch (ins.controls.tessOutputPrimitive()) { + case DxbcTessOutputPrimitive::Point: + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModePointMode); + break; + + case DxbcTessOutputPrimitive::Line: + break; + + case DxbcTessOutputPrimitive::TriangleCw: + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeVertexOrderCw); + break; + + case DxbcTessOutputPrimitive::TriangleCcw: + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeVertexOrderCcw); + break; + + default: + throw DxvkError("Dxbc: Invalid tess output primitive"); + } + } + + + void DxbcCompiler::emitDclThreadGroup(const DxbcShaderInstruction& ins) { + // dcl_thread_group has three operands: + // (imm0) Number of threads in X dimension + // (imm1) Number of threads in Y dimension + // (imm2) Number of threads in Z dimension + m_cs.workgroupSizeX = ins.imm[0].u32; + m_cs.workgroupSizeY = ins.imm[1].u32; + m_cs.workgroupSizeZ = ins.imm[2].u32; + + m_module.setLocalSize(m_entryPointId, + ins.imm[0].u32, ins.imm[1].u32, ins.imm[2].u32); + } + + + void DxbcCompiler::emitDclGsInstanceCount(const DxbcShaderInstruction& ins) { + // dcl_gs_instance_count has one operand: + // (imm0) Number of geometry shader invocations + m_module.setInvocations(m_entryPointId, ins.imm[0].u32); + m_gs.invocationCount = ins.imm[0].u32; + } + + + uint32_t DxbcCompiler::emitDclUavCounter(uint32_t regId) { + // Declare a structure type which holds the UAV counter + if (m_uavCtrStructType == 0) { + const uint32_t t_u32 = m_module.defIntType(32, 0); + const uint32_t t_struct = m_module.defStructTypeUnique(1, &t_u32); + + m_module.decorate(t_struct, spv::DecorationBlock); + m_module.memberDecorateOffset(t_struct, 0, 0); + + m_module.setDebugName (t_struct, "uav_meta"); + m_module.setDebugMemberName(t_struct, 0, "ctr"); + + m_uavCtrStructType = t_struct; + m_uavCtrPointerType = m_module.defPointerType( + t_struct, spv::StorageClassStorageBuffer); + } + + // Declare the buffer variable + const uint32_t varId = m_module.newVar( + m_uavCtrPointerType, spv::StorageClassStorageBuffer); + + m_module.setDebugName(varId, + str::format("u", regId, "_meta").c_str()); + + uint32_t bindingId = computeUavCounterBinding( + m_programInfo.type(), regId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Declare the storage buffer binding + /*DxvkBindingInfo binding = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; + binding.resourceBinding = bindingId; + binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + binding.access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + m_bindings.push_back(binding); + + return varId;*/ + } + + + void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) { + if (m_immConstBuf != 0) + throw DxvkError("DxbcCompiler: Immediate constant buffer already declared"); + + if ((ins.customDataSize & 0x3) != 0) + throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs"); + + if (ins.customDataSize <= Icb_MaxBakedDwords) { + this->emitDclImmediateConstantBufferBaked( + ins.customDataSize, ins.customData); + } else { + this->emitDclImmediateConstantBufferUbo( + ins.customDataSize, ins.customData); + } + } + + + void DxbcCompiler::emitDclImmediateConstantBufferBaked( + uint32_t dwordCount, + const uint32_t* dwordArray) { + // Declare individual vector constants as 4x32-bit vectors + std::array vectorIds; + + DxbcVectorType vecType; + vecType.ctype = DxbcScalarType::Uint32; + vecType.ccount = 4; + + const uint32_t vectorTypeId = getVectorTypeId(vecType); + const uint32_t vectorCount = dwordCount / 4; + + for (uint32_t i = 0; i < vectorCount; i++) { + std::array scalarIds = { + m_module.constu32(dwordArray[4 * i + 0]), + m_module.constu32(dwordArray[4 * i + 1]), + m_module.constu32(dwordArray[4 * i + 2]), + m_module.constu32(dwordArray[4 * i + 3]), + }; + + vectorIds.at(i) = m_module.constComposite( + vectorTypeId, scalarIds.size(), scalarIds.data()); + } + + // Declare the array that contains all the vectors + DxbcArrayType arrInfo; + arrInfo.ctype = DxbcScalarType::Uint32; + arrInfo.ccount = 4; + arrInfo.alength = vectorCount; + + const uint32_t arrayTypeId = getArrayTypeId(arrInfo); + const uint32_t arrayId = m_module.constComposite( + arrayTypeId, vectorCount, vectorIds.data()); + + // Declare the variable that will hold the constant + // data and initialize it with the constant array. + const uint32_t pointerTypeId = m_module.defPointerType( + arrayTypeId, spv::StorageClassPrivate); + + m_immConstBuf = m_module.newVarInit( + pointerTypeId, spv::StorageClassPrivate, + arrayId); + + m_module.setDebugName(m_immConstBuf, "icb"); + m_module.decorate(m_immConstBuf, spv::DecorationNonWritable); + } + + + void DxbcCompiler::emitDclImmediateConstantBufferUbo( + uint32_t dwordCount, + const uint32_t* dwordArray) { + this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb"); + m_immConstData.resize(dwordCount * sizeof(uint32_t)); + std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size()); + } + + + void DxbcCompiler::emitCustomData(const DxbcShaderInstruction& ins) { + switch (ins.customDataType) { + case DxbcCustomDataClass::ImmConstBuf: + return emitDclImmediateConstantBuffer(ins); + + default: + Logger::warn(str::format( + "DxbcCompiler: Unsupported custom data block: ", + ins.customDataType)); + } + } + + + void DxbcCompiler::emitVectorAlu(const DxbcShaderInstruction& ins) { + std::array src; + + for (uint32_t i = 0; i < ins.srcCount; i++) + src.at(i) = emitRegisterLoad(ins.src[i], ins.dst[0].mask); + + DxbcRegisterValue dst; + dst.type.ctype = ins.dst[0].dataType; + dst.type.ccount = ins.dst[0].mask.popCount(); + + if (isDoubleType(ins.dst[0].dataType)) + dst.type.ccount /= 2; + + const uint32_t typeId = getVectorTypeId(dst.type); + + switch (ins.op) { + ///////////////////// + // Move instructions + case DxbcOpcode::Mov: + case DxbcOpcode::DMov: + dst.id = src.at(0).id; + break; + + ///////////////////////////////////// + // ALU operations on float32 numbers + case DxbcOpcode::Add: + case DxbcOpcode::DAdd: + dst.id = m_module.opFAdd(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Div: + case DxbcOpcode::DDiv: + dst.id = m_module.opFDiv(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Exp: + dst.id = m_module.opExp2( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Frc: + dst.id = m_module.opFract( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Log: + dst.id = m_module.opLog2( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Mad: + case DxbcOpcode::DFma: + dst.id = m_module.opFFma(typeId, + src.at(0).id, src.at(1).id, src.at(2).id); + break; + + case DxbcOpcode::Max: + case DxbcOpcode::DMax: + dst.id = m_module.opNMax(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Min: + case DxbcOpcode::DMin: + dst.id = m_module.opNMin(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Mul: + case DxbcOpcode::DMul: + dst.id = m_module.opFMul(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Rcp: + dst.id = m_module.opFDiv(typeId, + emitBuildConstVecf32( + 1.0f, 1.0f, 1.0f, 1.0f, + ins.dst[0].mask).id, + src.at(0).id); + break; + + case DxbcOpcode::DRcp: + dst.id = m_module.opFDiv(typeId, + emitBuildConstVecf64(1.0, 1.0, + ins.dst[0].mask).id, + src.at(0).id); + break; + + case DxbcOpcode::RoundNe: + dst.id = m_module.opRoundEven( + typeId, src.at(0).id); + break; + + case DxbcOpcode::RoundNi: + dst.id = m_module.opFloor( + typeId, src.at(0).id); + break; + + case DxbcOpcode::RoundPi: + dst.id = m_module.opCeil( + typeId, src.at(0).id); + break; + + case DxbcOpcode::RoundZ: + dst.id = m_module.opTrunc( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Rsq: + dst.id = m_module.opInverseSqrt( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Sqrt: + dst.id = m_module.opSqrt( + typeId, src.at(0).id); + break; + + ///////////////////////////////////// + // ALU operations on signed integers + case DxbcOpcode::IAdd: + dst.id = m_module.opIAdd(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IMad: + case DxbcOpcode::UMad: + dst.id = m_module.opIAdd(typeId, + m_module.opIMul(typeId, + src.at(0).id, src.at(1).id), + src.at(2).id); + break; + + case DxbcOpcode::IMax: + dst.id = m_module.opSMax(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IMin: + dst.id = m_module.opSMin(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::INeg: + dst.id = m_module.opSNegate( + typeId, src.at(0).id); + break; + + /////////////////////////////////////// + // ALU operations on unsigned integers + case DxbcOpcode::UMax: + dst.id = m_module.opUMax(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::UMin: + dst.id = m_module.opUMin(typeId, + src.at(0).id, src.at(1).id); + break; + + /////////////////////////////////////// + // Bit operations on unsigned integers + case DxbcOpcode::And: + dst.id = m_module.opBitwiseAnd(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Not: + dst.id = m_module.opNot( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Or: + dst.id = m_module.opBitwiseOr(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Xor: + dst.id = m_module.opBitwiseXor(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::CountBits: + dst.id = m_module.opBitCount( + typeId, src.at(0).id); + break; + + case DxbcOpcode::BfRev: + dst.id = m_module.opBitReverse( + typeId, src.at(0).id); + break; + + /////////////////////////// + // Conversion instructions + case DxbcOpcode::ItoF: + dst.id = m_module.opConvertStoF( + typeId, src.at(0).id); + break; + + case DxbcOpcode::UtoF: + dst.id = m_module.opConvertUtoF( + typeId, src.at(0).id); + break; + + case DxbcOpcode::FtoI: + dst.id = m_module.opConvertFtoS( + typeId, src.at(0).id); + break; + + case DxbcOpcode::FtoU: + dst.id = m_module.opConvertFtoU( + typeId, src.at(0).id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + if (ins.controls.precise() || m_precise) + m_module.decorate(dst.id, spv::DecorationNoContraction); + + // Store computed value + dst = emitDstOperandModifiers(dst, ins.modifiers); + emitRegisterStore(ins.dst[0], dst); + } + + + void DxbcCompiler::emitVectorCmov(const DxbcShaderInstruction& ins) { + // movc and swapc have the following operands: + // (dst0) The first destination register + // (dst1) The second destination register (swapc only) + // (src0) The condition vector + // (src1) Vector to select from if the condition is not 0 + // (src2) Vector to select from if the condition is 0 + DxbcRegMask condMask = ins.dst[0].mask; + + if (ins.dst[0].dataType == DxbcScalarType::Float64) { + condMask = DxbcRegMask( + condMask[0] && condMask[1], + condMask[2] && condMask[3], + false, false); + } + + const DxbcRegisterValue condition = emitRegisterLoad(ins.src[0], condMask); + const DxbcRegisterValue selectTrue = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + const DxbcRegisterValue selectFalse = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + + uint32_t componentCount = condMask.popCount(); + + // We'll compare against a vector of zeroes to generate a + // boolean vector, which in turn will be used by OpSelect + uint32_t zeroType = m_module.defIntType(32, 0); + uint32_t boolType = m_module.defBoolType(); + + uint32_t zero = m_module.constu32(0); + + if (componentCount > 1) { + zeroType = m_module.defVectorType(zeroType, componentCount); + boolType = m_module.defVectorType(boolType, componentCount); + + const std::array zeroVec = { zero, zero, zero, zero }; + zero = m_module.constComposite(zeroType, componentCount, zeroVec.data()); + } + + // In case of swapc, the second destination operand receives + // the output that a cmov instruction would normally get + const uint32_t trueIndex = ins.op == DxbcOpcode::Swapc ? 1 : 0; + + for (uint32_t i = 0; i < ins.dstCount; i++) { + DxbcRegisterValue result; + result.type.ctype = ins.dst[i].dataType; + result.type.ccount = componentCount; + result.id = m_module.opSelect( + getVectorTypeId(result.type), + m_module.opINotEqual(boolType, condition.id, zero), + i == trueIndex ? selectTrue.id : selectFalse.id, + i != trueIndex ? selectTrue.id : selectFalse.id); + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[i], result); + } + } + + void DxbcCompiler::emitVectorCmp(const DxbcShaderInstruction& ins) { + // Compare instructions have three operands: + // (dst0) The destination register + // (src0) The first vector to compare + // (src1) The second vector to compare + uint32_t componentCount = ins.dst[0].mask.popCount(); + + // For 64-bit operations, we'll return a 32-bit + // vector, so we have to adjust the read mask + DxbcRegMask srcMask = ins.dst[0].mask; + + if (isDoubleType(ins.src[0].dataType)) { + srcMask = DxbcRegMask( + componentCount > 0, componentCount > 0, + componentCount > 1, componentCount > 1); + } + + const std::array src = { + emitRegisterLoad(ins.src[0], srcMask), + emitRegisterLoad(ins.src[1], srcMask), + }; + + // Condition, which is a boolean vector used + // to select between the ~0u and 0u vectors. + uint32_t condition = 0; + uint32_t conditionType = m_module.defBoolType(); + + if (componentCount > 1) + conditionType = m_module.defVectorType(conditionType, componentCount); + + bool invert = false; + + switch (ins.op) { + case DxbcOpcode::Ne: + case DxbcOpcode::DNe: + invert = true; + [[fallthrough]]; + + case DxbcOpcode::Eq: + case DxbcOpcode::DEq: + condition = m_module.opFOrdEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Ge: + case DxbcOpcode::DGe: + condition = m_module.opFOrdGreaterThanEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Lt: + case DxbcOpcode::DLt: + condition = m_module.opFOrdLessThan( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IEq: + condition = m_module.opIEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IGe: + condition = m_module.opSGreaterThanEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::ILt: + condition = m_module.opSLessThan( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::INe: + condition = m_module.opINotEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::UGe: + condition = m_module.opUGreaterThanEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::ULt: + condition = m_module.opULessThan( + conditionType, src.at(0).id, src.at(1).id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // Generate constant vectors for selection + uint32_t sFalse = m_module.constu32( 0u); + uint32_t sTrue = m_module.constu32(~0u); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = componentCount; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (componentCount > 1) { + const std::array vFalse = { sFalse, sFalse, sFalse, sFalse }; + const std::array vTrue = { sTrue, sTrue, sTrue, sTrue }; + + sFalse = m_module.constComposite(typeId, componentCount, vFalse.data()); + sTrue = m_module.constComposite(typeId, componentCount, vTrue .data()); + } + + if (invert) + std::swap(sFalse, sTrue); + + // Perform component-wise mask selection + // based on the condition evaluated above. + result.id = m_module.opSelect( + typeId, condition, sTrue, sFalse); + + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitVectorDeriv(const DxbcShaderInstruction& ins) { + // Derivative instructions have two operands: + // (dst0) Destination register for the derivative + // (src0) The operand to compute the derivative of + DxbcRegisterValue value = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + const uint32_t typeId = getVectorTypeId(value.type); + + switch (ins.op) { + case DxbcOpcode::DerivRtx: + value.id = m_module.opDpdx(typeId, value.id); + break; + + case DxbcOpcode::DerivRty: + value.id = m_module.opDpdy(typeId, value.id); + break; + + case DxbcOpcode::DerivRtxCoarse: + value.id = m_module.opDpdxCoarse(typeId, value.id); + break; + + case DxbcOpcode::DerivRtyCoarse: + value.id = m_module.opDpdyCoarse(typeId, value.id); + break; + + case DxbcOpcode::DerivRtxFine: + value.id = m_module.opDpdxFine(typeId, value.id); + break; + + case DxbcOpcode::DerivRtyFine: + value.id = m_module.opDpdyFine(typeId, value.id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + value = emitDstOperandModifiers(value, ins.modifiers); + emitRegisterStore(ins.dst[0], value); + } + + + void DxbcCompiler::emitVectorDot(const DxbcShaderInstruction& ins) { + const DxbcRegMask srcMask(true, + ins.op >= DxbcOpcode::Dp2, + ins.op >= DxbcOpcode::Dp3, + ins.op >= DxbcOpcode::Dp4); + + const std::array src = { + emitRegisterLoad(ins.src[0], srcMask), + emitRegisterLoad(ins.src[1], srcMask), + }; + + DxbcRegisterValue dst; + dst.type.ctype = ins.dst[0].dataType; + dst.type.ccount = 1; + + dst.id = m_module.opDot( + getVectorTypeId(dst.type), + src.at(0).id, + src.at(1).id); + + if (ins.controls.precise() || m_precise) + m_module.decorate(dst.id, spv::DecorationNoContraction); + + dst = emitDstOperandModifiers(dst, ins.modifiers); + emitRegisterStore(ins.dst[0], dst); + } + + + void DxbcCompiler::emitVectorIdiv(const DxbcShaderInstruction& ins) { + // udiv has four operands: + // (dst0) Quotient destination register + // (dst1) Remainder destination register + // (src0) The first vector to compare + // (src1) The second vector to compare + if (ins.dst[0].type == DxbcOperandType::Null + && ins.dst[1].type == DxbcOperandType::Null) + return; + + // FIXME support this if applications require it + if (ins.dst[0].type != DxbcOperandType::Null + && ins.dst[1].type != DxbcOperandType::Null + && ins.dst[0].mask != ins.dst[1].mask) { + Logger::warn("DxbcCompiler: Idiv with different destination masks not supported"); + return; + } + + // Load source operands as integers with the + // mask of one non-NULL destination operand + const DxbcRegMask srcMask = + ins.dst[0].type != DxbcOperandType::Null + ? ins.dst[0].mask + : ins.dst[1].mask; + + const std::array src = { + emitRegisterLoad(ins.src[0], srcMask), + emitRegisterLoad(ins.src[1], srcMask), + }; + + // Division by zero will return 0xffffffff for both results + auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, srcMask.popCount() }); + + DxbcRegisterValue const0 = emitBuildConstVecu32( 0u, 0u, 0u, 0u, srcMask); + DxbcRegisterValue constff = emitBuildConstVecu32(~0u, ~0u, ~0u, ~0u, srcMask); + + uint32_t cmpValue = m_module.opINotEqual(bvecId, src.at(1).id, const0.id); + + // Compute results only if the destination + // operands are not NULL. + if (ins.dst[0].type != DxbcOperandType::Null) { + DxbcRegisterValue quotient; + quotient.type.ctype = ins.dst[0].dataType; + quotient.type.ccount = ins.dst[0].mask.popCount(); + + quotient.id = m_module.opUDiv( + getVectorTypeId(quotient.type), + src.at(0).id, src.at(1).id); + + quotient.id = m_module.opSelect( + getVectorTypeId(quotient.type), + cmpValue, quotient.id, constff.id); + + quotient = emitDstOperandModifiers(quotient, ins.modifiers); + emitRegisterStore(ins.dst[0], quotient); + } + + if (ins.dst[1].type != DxbcOperandType::Null) { + DxbcRegisterValue remainder; + remainder.type.ctype = ins.dst[1].dataType; + remainder.type.ccount = ins.dst[1].mask.popCount(); + + remainder.id = m_module.opUMod( + getVectorTypeId(remainder.type), + src.at(0).id, src.at(1).id); + + remainder.id = m_module.opSelect( + getVectorTypeId(remainder.type), + cmpValue, remainder.id, constff.id); + + remainder = emitDstOperandModifiers(remainder, ins.modifiers); + emitRegisterStore(ins.dst[1], remainder); + } + } + + + void DxbcCompiler::emitVectorImul(const DxbcShaderInstruction& ins) { + // imul and umul have four operands: + // (dst0) High destination register + // (dst1) Low destination register + // (src0) The first vector to compare + // (src1) The second vector to compare + if (ins.dst[0].type == DxbcOperandType::Null) { + if (ins.dst[1].type == DxbcOperandType::Null) + return; + + // If dst0 is NULL, this instruction behaves just + // like any other three-operand ALU instruction + const std::array src = { + emitRegisterLoad(ins.src[0], ins.dst[1].mask), + emitRegisterLoad(ins.src[1], ins.dst[1].mask), + }; + + DxbcRegisterValue result; + result.type.ctype = ins.dst[1].dataType; + result.type.ccount = ins.dst[1].mask.popCount(); + result.id = m_module.opIMul( + getVectorTypeId(result.type), + src.at(0).id, src.at(1).id); + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[1], result); + } else { + // TODO implement this + Logger::warn("DxbcCompiler: Extended Imul not yet supported"); + } + } + + + void DxbcCompiler::emitVectorMsad(const DxbcShaderInstruction& ins) { + // msad has four operands: + // (dst0) Destination + // (src0) Reference (packed uint8) + // (src1) Source (packed uint8) + // (src2) Accumulator + DxbcRegisterValue refReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + DxbcRegisterValue srcReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + DxbcRegisterValue result = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + + auto typeId = getVectorTypeId(result.type); + auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, result.type.ccount }); + + for (uint32_t i = 0; i < 4; i++) { + auto shift = m_module.constu32(8 * i); + auto count = m_module.constu32(8); + + auto ref = m_module.opBitFieldUExtract(typeId, refReg.id, shift, count); + auto src = m_module.opBitFieldUExtract(typeId, srcReg.id, shift, count); + + auto zero = emitBuildConstVecu32(0, 0, 0, 0, ins.dst[0].mask); + auto mask = m_module.opINotEqual(bvecId, ref, zero.id); + + auto diff = m_module.opSAbs(typeId, m_module.opISub(typeId, ref, src)); + result.id = m_module.opSelect(typeId, mask, m_module.opIAdd(typeId, result.id, diff), result.id); + } + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitVectorShift(const DxbcShaderInstruction& ins) { + // Shift operations have three operands: + // (dst0) The destination register + // (src0) The register to shift + // (src1) The shift amount (scalar) + DxbcRegisterValue shiftReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + DxbcRegisterValue countReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + + if (ins.src[1].type != DxbcOperandType::Imm32) + countReg = emitRegisterMaskBits(countReg, 0x1F); + + if (countReg.type.ccount == 1) + countReg = emitRegisterExtend(countReg, shiftReg.type.ccount); + + DxbcRegisterValue result; + result.type.ctype = ins.dst[0].dataType; + result.type.ccount = ins.dst[0].mask.popCount(); + + switch (ins.op) { + case DxbcOpcode::IShl: + result.id = m_module.opShiftLeftLogical( + getVectorTypeId(result.type), + shiftReg.id, countReg.id); + break; + + case DxbcOpcode::IShr: + result.id = m_module.opShiftRightArithmetic( + getVectorTypeId(result.type), + shiftReg.id, countReg.id); + break; + + case DxbcOpcode::UShr: + result.id = m_module.opShiftRightLogical( + getVectorTypeId(result.type), + shiftReg.id, countReg.id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitVectorSinCos(const DxbcShaderInstruction& ins) { + // sincos has three operands: + // (dst0) Destination register for sin(x) + // (dst1) Destination register for cos(x) + // (src0) Source operand x + + // Load source operand as 32-bit float vector. + const DxbcRegisterValue srcValue = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, true, true, true)); + + // Either output may be DxbcOperandType::Null, in + // which case we don't have to generate any code. + if (ins.dst[0].type != DxbcOperandType::Null) { + const DxbcRegisterValue sinInput = + emitRegisterExtract(srcValue, ins.dst[0].mask); + + DxbcRegisterValue sin; + sin.type = sinInput.type; + sin.id = m_module.opSin( + getVectorTypeId(sin.type), + sinInput.id); + + emitRegisterStore(ins.dst[0], sin); + } + + if (ins.dst[1].type != DxbcOperandType::Null) { + const DxbcRegisterValue cosInput = + emitRegisterExtract(srcValue, ins.dst[1].mask); + + DxbcRegisterValue cos; + cos.type = cosInput.type; + cos.id = m_module.opCos( + getVectorTypeId(cos.type), + cosInput.id); + + emitRegisterStore(ins.dst[1], cos); + } + } + + + void DxbcCompiler::emitGeometryEmit(const DxbcShaderInstruction& ins) { + // In xfb mode we might have multiple streams, so + // we have to figure out which stream to write to + uint32_t streamId = 0; + uint32_t streamVar = 0; + + if (m_moduleInfo.xfb != nullptr) { + streamId = ins.dstCount > 0 ? ins.dst[0].idx[0].offset : 0; + streamVar = m_module.constu32(streamId); + } + + // Checking the negation is easier for EmitThenCut/EmitThenCutStream + bool doEmit = ins.op != DxbcOpcode::Cut && ins.op != DxbcOpcode::CutStream; + bool doCut = ins.op != DxbcOpcode::Emit && ins.op != DxbcOpcode::EmitStream; + + if (doEmit) { + if (m_gs.needsOutputSetup) + emitOutputSetup(); + emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances); + emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances); + emitXfbOutputSetup(streamId, false); + m_module.opEmitVertex(streamVar); + } + + if (doCut) + m_module.opEndPrimitive(streamVar); + } + + + void DxbcCompiler::emitAtomic(const DxbcShaderInstruction& ins) { + // atomic_* operations have the following operands: + // (dst0) Destination u# or g# register + // (src0) Index into the texture or buffer + // (src1) The source value for the operation + // (src2) Second source operand (optional) + // imm_atomic_* operations have the following operands: + // (dst0) Register that receives the result + // (dst1) Destination u# or g# register + // (srcX) As above + const DxbcBufferInfo bufferInfo = getBufferInfo(ins.dst[ins.dstCount - 1]); + + bool isImm = ins.dstCount == 2; + bool isUav = ins.dst[ins.dstCount - 1].type == DxbcOperandType::UnorderedAccessView; + bool isSsbo = bufferInfo.isSsbo; + + // Retrieve destination pointer for the atomic operation> + const DxbcRegisterPointer pointer = emitGetAtomicPointer( + ins.dst[ins.dstCount - 1], ins.src[0]); + + // Load source values + std::array src; + + for (uint32_t i = 1; i < ins.srcCount; i++) { + src[i - 1] = emitRegisterBitcast( + emitRegisterLoad(ins.src[i], DxbcRegMask(true, false, false, false)), + pointer.type.ctype); + } + + // Define memory scope and semantics based on the operands + uint32_t scope = 0; + uint32_t semantics = 0; + + if (isUav) { + scope = spv::ScopeQueueFamily; + semantics = spv::MemorySemanticsAcquireReleaseMask; + + semantics |= isSsbo + ? spv::MemorySemanticsUniformMemoryMask + : spv::MemorySemanticsImageMemoryMask; + } else { + scope = spv::ScopeWorkgroup; + semantics = spv::MemorySemanticsWorkgroupMemoryMask + | spv::MemorySemanticsAcquireReleaseMask; + } + + const uint32_t scopeId = m_module.constu32(scope); + const uint32_t semanticsId = m_module.constu32(semantics); + + // Perform the atomic operation on the given pointer + DxbcRegisterValue value; + value.type = pointer.type; + value.id = 0; + + // The result type, which is a scalar integer + const uint32_t typeId = getVectorTypeId(value.type); + + switch (ins.op) { + case DxbcOpcode::AtomicCmpStore: + case DxbcOpcode::ImmAtomicCmpExch: + value.id = m_module.opAtomicCompareExchange( + typeId, pointer.id, scopeId, semanticsId, + m_module.constu32(spv::MemorySemanticsMaskNone), + src[1].id, src[0].id); + break; + + case DxbcOpcode::ImmAtomicExch: + value.id = m_module.opAtomicExchange(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicIAdd: + case DxbcOpcode::ImmAtomicIAdd: + value.id = m_module.opAtomicIAdd(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicAnd: + case DxbcOpcode::ImmAtomicAnd: + value.id = m_module.opAtomicAnd(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicOr: + case DxbcOpcode::ImmAtomicOr: + value.id = m_module.opAtomicOr(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicXor: + case DxbcOpcode::ImmAtomicXor: + value.id = m_module.opAtomicXor(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicIMin: + case DxbcOpcode::ImmAtomicIMin: + value.id = m_module.opAtomicSMin(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicIMax: + case DxbcOpcode::ImmAtomicIMax: + value.id = m_module.opAtomicSMax(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicUMin: + case DxbcOpcode::ImmAtomicUMin: + value.id = m_module.opAtomicUMin(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + case DxbcOpcode::AtomicUMax: + case DxbcOpcode::ImmAtomicUMax: + value.id = m_module.opAtomicUMax(typeId, + pointer.id, scopeId, semanticsId, + src[0].id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // Write back the result to the destination + // register if this is an imm_atomic_* opcode. + if (isImm) + emitRegisterStore(ins.dst[0], value); + } + + + void DxbcCompiler::emitAtomicCounter(const DxbcShaderInstruction& ins) { + // imm_atomic_alloc and imm_atomic_consume have the following operands: + // (dst0) The register that will hold the old counter value + // (dst1) The UAV whose counter is going to be modified + const uint32_t registerId = ins.dst[1].idx[0].offset; + + if (m_uavs.at(registerId).ctrId == 0) + m_uavs.at(registerId).ctrId = emitDclUavCounter(registerId); + + // Only use subgroup ops on compute to avoid having to + // deal with helper invocations or hardware limitations + bool useSubgroupOps = m_moduleInfo.options.useSubgroupOpsForAtomicCounters + && m_programInfo.type() == DxbcProgramType::ComputeShader; + + // Current block ID used in a phi later on + uint32_t baseBlockId = m_module.getBlockId(); + + // In case we have subgroup ops enabled, we need to + // count the number of active lanes, the lane index, + // and we need to perform the atomic op conditionally + uint32_t laneCount = 0; + uint32_t laneIndex = 0; + + DxbcConditional elect; + + if (useSubgroupOps) { + m_module.enableCapability(spv::CapabilityGroupNonUniform); + m_module.enableCapability(spv::CapabilityGroupNonUniformBallot); + + uint32_t ballot = m_module.opGroupNonUniformBallot( + getVectorTypeId({ DxbcScalarType::Uint32, 4 }), + m_module.constu32(spv::ScopeSubgroup), + m_module.constBool(true)); + + laneCount = m_module.opGroupNonUniformBallotBitCount( + getScalarTypeId(DxbcScalarType::Uint32), + m_module.constu32(spv::ScopeSubgroup), + spv::GroupOperationReduce, ballot); + + laneIndex = m_module.opGroupNonUniformBallotBitCount( + getScalarTypeId(DxbcScalarType::Uint32), + m_module.constu32(spv::ScopeSubgroup), + spv::GroupOperationExclusiveScan, ballot); + + // Elect one lane to perform the atomic op + uint32_t election = m_module.opGroupNonUniformElect( + m_module.defBoolType(), + m_module.constu32(spv::ScopeSubgroup)); + + elect.labelIf = m_module.allocateId(); + elect.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(elect.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(election, elect.labelIf, elect.labelEnd); + + m_module.opLabel(elect.labelIf); + } else { + // We're going to use this for the increment + laneCount = m_module.constu32(1); + } + + // Get a pointer to the atomic counter in question + DxbcRegisterInfo ptrType; + ptrType.type.ctype = DxbcScalarType::Uint32; + ptrType.type.ccount = 1; + ptrType.type.alength = 0; + ptrType.sclass = spv::StorageClassStorageBuffer; + + uint32_t zeroId = m_module.consti32(0); + uint32_t ptrId = m_module.opAccessChain( + getPointerTypeId(ptrType), + m_uavs.at(registerId).ctrId, + 1, &zeroId); + + // Define memory scope and semantics based on the operands + uint32_t scope = spv::ScopeQueueFamily; + uint32_t semantics = spv::MemorySemanticsUniformMemoryMask + | spv::MemorySemanticsAcquireReleaseMask; + + uint32_t scopeId = m_module.constu32(scope); + uint32_t semanticsId = m_module.constu32(semantics); + + // Compute the result value + DxbcRegisterValue value; + value.type.ctype = DxbcScalarType::Uint32; + value.type.ccount = 1; + + uint32_t typeId = getVectorTypeId(value.type); + + switch (ins.op) { + case DxbcOpcode::ImmAtomicAlloc: + value.id = m_module.opAtomicIAdd(typeId, ptrId, + scopeId, semanticsId, laneCount); + break; + + case DxbcOpcode::ImmAtomicConsume: + value.id = m_module.opAtomicISub(typeId, ptrId, + scopeId, semanticsId, laneCount); + value.id = m_module.opISub(typeId, value.id, laneCount); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // If we're using subgroup ops, we have to broadcast + // the result of the atomic op and compute the index + if (useSubgroupOps) { + m_module.opBranch(elect.labelEnd); + m_module.opLabel (elect.labelEnd); + + uint32_t undef = m_module.constUndef(typeId); + + std::array phiLabels = {{ + { value.id, elect.labelIf }, + { undef, baseBlockId }, + }}; + + value.id = m_module.opPhi(typeId, + phiLabels.size(), phiLabels.data()); + value.id = m_module.opGroupNonUniformBroadcastFirst(typeId, + m_module.constu32(spv::ScopeSubgroup), value.id); + value.id = m_module.opIAdd(typeId, value.id, laneIndex); + } + + // Store the result + emitRegisterStore(ins.dst[0], value); + } + + + void DxbcCompiler::emitBarrier(const DxbcShaderInstruction& ins) { + // sync takes no operands. Instead, the synchronization + // scope is defined by the operand control bits. + const DxbcSyncFlags flags = ins.controls.syncFlags(); + + uint32_t executionScope = spv::ScopeInvocation; + uint32_t memoryScope = spv::ScopeInvocation; + uint32_t memorySemantics = 0; + + if (flags.test(DxbcSyncFlag::ThreadsInGroup)) + executionScope = spv::ScopeWorkgroup; + + if (flags.test(DxbcSyncFlag::ThreadGroupSharedMemory)) { + memoryScope = spv::ScopeWorkgroup; + memorySemantics |= spv::MemorySemanticsWorkgroupMemoryMask + | spv::MemorySemanticsAcquireReleaseMask + | spv::MemorySemanticsMakeAvailableMask + | spv::MemorySemanticsMakeVisibleMask; + } + + if (flags.test(DxbcSyncFlag::UavMemoryGroup)) { + memoryScope = spv::ScopeWorkgroup; + memorySemantics |= spv::MemorySemanticsImageMemoryMask + | spv::MemorySemanticsUniformMemoryMask + | spv::MemorySemanticsAcquireReleaseMask + | spv::MemorySemanticsMakeAvailableMask + | spv::MemorySemanticsMakeVisibleMask; + } + + if (flags.test(DxbcSyncFlag::UavMemoryGlobal)) { + memoryScope = spv::ScopeQueueFamily; + + if (m_programInfo.type() == DxbcProgramType::ComputeShader && !m_hasGloballyCoherentUav) + memoryScope = spv::ScopeWorkgroup; + + memorySemantics |= spv::MemorySemanticsImageMemoryMask + | spv::MemorySemanticsUniformMemoryMask + | spv::MemorySemanticsAcquireReleaseMask + | spv::MemorySemanticsMakeAvailableMask + | spv::MemorySemanticsMakeVisibleMask; + } + + if (executionScope != spv::ScopeInvocation) { + m_module.opControlBarrier( + m_module.constu32(executionScope), + m_module.constu32(memoryScope), + m_module.constu32(memorySemantics)); + } else if (memoryScope != spv::ScopeInvocation) { + m_module.opMemoryBarrier( + m_module.constu32(memoryScope), + m_module.constu32(memorySemantics)); + } else { + Logger::warn("DxbcCompiler: sync instruction has no effect"); + } + } + + + void DxbcCompiler::emitBitExtract(const DxbcShaderInstruction& ins) { + // ibfe and ubfe take the following arguments: + // (dst0) The destination register + // (src0) Number of bits to extact + // (src1) Offset of the bits to extract + // (src2) Register to extract bits from + const bool isSigned = ins.op == DxbcOpcode::IBfe; + + DxbcRegisterValue bitCnt = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + DxbcRegisterValue bitOfs = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + + if (ins.src[0].type != DxbcOperandType::Imm32) + bitCnt = emitRegisterMaskBits(bitCnt, 0x1F); + + if (ins.src[1].type != DxbcOperandType::Imm32) + bitOfs = emitRegisterMaskBits(bitOfs, 0x1F); + + const DxbcRegisterValue src = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + + const uint32_t componentCount = src.type.ccount; + std::array componentIds = {{ 0, 0, 0, 0 }}; + + for (uint32_t i = 0; i < componentCount; i++) { + const DxbcRegisterValue currBitCnt = emitRegisterExtract(bitCnt, DxbcRegMask::select(i)); + const DxbcRegisterValue currBitOfs = emitRegisterExtract(bitOfs, DxbcRegMask::select(i)); + const DxbcRegisterValue currSrc = emitRegisterExtract(src, DxbcRegMask::select(i)); + + const uint32_t typeId = getVectorTypeId(currSrc.type); + + componentIds[i] = isSigned + ? m_module.opBitFieldSExtract(typeId, currSrc.id, currBitOfs.id, currBitCnt.id) + : m_module.opBitFieldUExtract(typeId, currSrc.id, currBitOfs.id, currBitCnt.id); + } + + DxbcRegisterValue result; + result.type = src.type; + result.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(result.type), + componentCount, componentIds.data()) + : componentIds[0]; + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitBitInsert(const DxbcShaderInstruction& ins) { + // ibfe and ubfe take the following arguments: + // (dst0) The destination register + // (src0) Number of bits to extact + // (src1) Offset of the bits to extract + // (src2) Register to take bits from + // (src3) Register to replace bits in + DxbcRegisterValue bitCnt = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + DxbcRegisterValue bitOfs = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + + if (ins.src[0].type != DxbcOperandType::Imm32) + bitCnt = emitRegisterMaskBits(bitCnt, 0x1F); + + if (ins.src[1].type != DxbcOperandType::Imm32) + bitOfs = emitRegisterMaskBits(bitOfs, 0x1F); + + const DxbcRegisterValue insert = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + const DxbcRegisterValue base = emitRegisterLoad(ins.src[3], ins.dst[0].mask); + + const uint32_t componentCount = base.type.ccount; + std::array componentIds = {{ 0, 0, 0, 0 }}; + + for (uint32_t i = 0; i < componentCount; i++) { + const DxbcRegisterValue currBitCnt = emitRegisterExtract(bitCnt, DxbcRegMask::select(i)); + const DxbcRegisterValue currBitOfs = emitRegisterExtract(bitOfs, DxbcRegMask::select(i)); + const DxbcRegisterValue currInsert = emitRegisterExtract(insert, DxbcRegMask::select(i)); + const DxbcRegisterValue currBase = emitRegisterExtract(base, DxbcRegMask::select(i)); + + componentIds[i] = m_module.opBitFieldInsert( + getVectorTypeId(currBase.type), + currBase.id, currInsert.id, + currBitOfs.id, currBitCnt.id); + } + + DxbcRegisterValue result; + result.type = base.type; + result.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(result.type), + componentCount, componentIds.data()) + : componentIds[0]; + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitBitScan(const DxbcShaderInstruction& ins) { + // firstbit(lo|hi|shi) have two operands: + // (dst0) The destination operant + // (src0) Source operand to scan + DxbcRegisterValue src = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + + DxbcRegisterValue dst; + dst.type.ctype = ins.dst[0].dataType; + dst.type.ccount = ins.dst[0].mask.popCount(); + + // Result type, should be an unsigned integer + const uint32_t typeId = getVectorTypeId(dst.type); + + switch (ins.op) { + case DxbcOpcode::FirstBitLo: dst.id = m_module.opFindILsb(typeId, src.id); break; + case DxbcOpcode::FirstBitHi: dst.id = m_module.opFindUMsb(typeId, src.id); break; + case DxbcOpcode::FirstBitShi: dst.id = m_module.opFindSMsb(typeId, src.id); break; + default: Logger::warn(str::format("DxbcCompiler: Unhandled instruction: ", ins.op)); return; + } + + // The 'Hi' variants are counted from the MSB in DXBC + // rather than the LSB, so we have to invert the number + if (ins.op == DxbcOpcode::FirstBitHi || ins.op == DxbcOpcode::FirstBitShi) { + uint32_t boolTypeId = m_module.defBoolType(); + + if (dst.type.ccount > 1) + boolTypeId = m_module.defVectorType(boolTypeId, dst.type.ccount); + + DxbcRegisterValue const31 = emitBuildConstVecu32(31u, 31u, 31u, 31u, ins.dst[0].mask); + DxbcRegisterValue constff = emitBuildConstVecu32(~0u, ~0u, ~0u, ~0u, ins.dst[0].mask); + + dst.id = m_module.opSelect(typeId, + m_module.opINotEqual(boolTypeId, dst.id, constff.id), + m_module.opISub(typeId, const31.id, dst.id), + constff.id); + } + + // No modifiers are supported + emitRegisterStore(ins.dst[0], dst); + } + + + void DxbcCompiler::emitBufferQuery(const DxbcShaderInstruction& ins) { + // bufinfo takes two arguments + // (dst0) The destination register + // (src0) The buffer register to query + const DxbcBufferInfo bufferInfo = getBufferInfo(ins.src[0]); + bool isSsbo = bufferInfo.isSsbo; + + // We'll store this as a scalar unsigned integer + DxbcRegisterValue result = isSsbo + ? emitQueryBufferSize(ins.src[0]) + : emitQueryTexelBufferSize(ins.src[0]); + + uint32_t typeId = getVectorTypeId(result.type); + + // Adjust returned size if this is a raw or structured + // buffer, as emitQueryTexelBufferSize only returns the + // number of typed elements in the buffer. + if (bufferInfo.type == DxbcResourceType::Raw) { + result.id = m_module.opIMul(typeId, + result.id, m_module.constu32(4)); + } else if (bufferInfo.type == DxbcResourceType::Structured) { + result.id = m_module.opUDiv(typeId, result.id, + m_module.constu32(bufferInfo.stride / 4)); + } + + // Store the result. The scalar will be extended to a + // vector if the write mask consists of more than one + // component, which is the desired behaviour. + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitBufferLoad(const DxbcShaderInstruction& ins) { + // ld_raw takes three arguments: + // (dst0) Destination register + // (src0) Byte offset + // (src1) Source register + // ld_structured takes four arguments: + // (dst0) Destination register + // (src0) Structure index + // (src1) Byte offset + // (src2) Source register + const bool isStructured = ins.op == DxbcOpcode::LdStructured + || ins.op == DxbcOpcode::LdStructuredS; + + // Source register. The exact way we access + // the data depends on the register type. + const DxbcRegister& dstReg = ins.dst[0]; + const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; + + // Retrieve common info about the buffer + const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg); + + // Compute element index + const DxbcRegisterValue elementIndex = isStructured + ? emitCalcBufferIndexStructured( + emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)), + emitRegisterLoad(ins.src[1], DxbcRegMask(true, false, false, false)), + bufferInfo.stride) + : emitCalcBufferIndexRaw( + emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false))); + + uint32_t sparseFeedbackId = uint32_t(ins.dstCount == 2); + + emitRegisterStore(dstReg, emitRawBufferLoad(srcReg, + elementIndex, dstReg.mask, sparseFeedbackId)); + + if (sparseFeedbackId) + emitStoreSparseFeedback(ins.dst[1], sparseFeedbackId); + } + + + void DxbcCompiler::emitBufferStore(const DxbcShaderInstruction& ins) { + // store_raw takes three arguments: + // (dst0) Destination register + // (src0) Byte offset + // (src1) Source register + // store_structured takes four arguments: + // (dst0) Destination register + // (src0) Structure index + // (src1) Byte offset + // (src2) Source register + const bool isStructured = ins.op == DxbcOpcode::StoreStructured; + + // Source register. The exact way we access + // the data depends on the register type. + const DxbcRegister& dstReg = ins.dst[0]; + const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; + + // Retrieve common info about the buffer + const DxbcBufferInfo bufferInfo = getBufferInfo(dstReg); + + // Compute element index + const DxbcRegisterValue elementIndex = isStructured + ? emitCalcBufferIndexStructured( + emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false)), + emitRegisterLoad(ins.src[1], DxbcRegMask(true, false, false, false)), + bufferInfo.stride) + : emitCalcBufferIndexRaw( + emitRegisterLoad(ins.src[0], DxbcRegMask(true, false, false, false))); + + emitRawBufferStore(dstReg, elementIndex, + emitRegisterLoad(srcReg, dstReg.mask)); + } + + + void DxbcCompiler::emitConvertFloat16(const DxbcShaderInstruction& ins) { + // f32tof16 takes two operands: + // (dst0) Destination register as a uint32 vector + // (src0) Source register as a float32 vector + // f16tof32 takes two operands: + // (dst0) Destination register as a float32 vector + // (src0) Source register as a uint32 vector + const DxbcRegisterValue src = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + + // We handle both packing and unpacking here + const bool isPack = ins.op == DxbcOpcode::F32toF16; + + // The conversion instructions do not map very well to the + // SPIR-V pack instructions, which operate on 2D vectors. + std::array scalarIds = {{ 0, 0, 0, 0 }}; + + const uint32_t componentCount = src.type.ccount; + + // These types are used in both pack and unpack operations + const uint32_t t_u32 = getVectorTypeId({ DxbcScalarType::Uint32, 1 }); + const uint32_t t_f32 = getVectorTypeId({ DxbcScalarType::Float32, 1 }); + const uint32_t t_f32v2 = getVectorTypeId({ DxbcScalarType::Float32, 2 }); + + // Constant zero-bit pattern, used for packing + const uint32_t zerof32 = isPack ? m_module.constf32(0.0f) : 0; + + for (uint32_t i = 0; i < componentCount; i++) { + const DxbcRegisterValue componentValue + = emitRegisterExtract(src, DxbcRegMask::select(i)); + + if (isPack) { // f32tof16 + const std::array packIds = + {{ componentValue.id, zerof32 }}; + + scalarIds[i] = m_module.opPackHalf2x16(t_u32, + m_module.opCompositeConstruct(t_f32v2, packIds.size(), packIds.data())); + } else { // f16tof32 + const uint32_t zeroIndex = 0; + + scalarIds[i] = m_module.opCompositeExtract(t_f32, + m_module.opUnpackHalf2x16(t_f32v2, componentValue.id), + 1, &zeroIndex); + } + } + + DxbcRegisterValue result; + result.type.ctype = ins.dst[0].dataType; + result.type.ccount = componentCount; + + uint32_t typeId = getVectorTypeId(result.type); + result.id = componentCount > 1 + ? m_module.opCompositeConstruct(typeId, + componentCount, scalarIds.data()) + : scalarIds[0]; + + if (isPack) { + // Some drivers return infinity if the input value is above a certain + // threshold, but D3D wants us to return infinity only if the input is + // actually infinite. Fix this up to return the maximum representable + // 16-bit floating point number instead, but preserve input infinity. + uint32_t t_bvec = getVectorTypeId({ DxbcScalarType::Bool, componentCount }); + uint32_t f16Infinity = m_module.constuReplicant(0x7C00, componentCount); + uint32_t f16Unsigned = m_module.constuReplicant(0x7FFF, componentCount); + + uint32_t isInputInf = m_module.opIsInf(t_bvec, src.id); + uint32_t isValueInf = m_module.opIEqual(t_bvec, f16Infinity, + m_module.opBitwiseAnd(typeId, result.id, f16Unsigned)); + + result.id = m_module.opSelect(getVectorTypeId(result.type), + m_module.opLogicalAnd(t_bvec, isValueInf, m_module.opLogicalNot(t_bvec, isInputInf)), + m_module.opISub(typeId, result.id, m_module.constuReplicant(1, componentCount)), + result.id); + } + + // Store result in the destination register + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitConvertFloat64(const DxbcShaderInstruction& ins) { + // ftod and dtof take the following operands: + // (dst0) Destination operand + // (src0) Number to convert + uint32_t dstBits = ins.dst[0].mask.popCount(); + + DxbcRegMask srcMask = isDoubleType(ins.dst[0].dataType) + ? DxbcRegMask(dstBits >= 2, dstBits >= 4, false, false) + : DxbcRegMask(dstBits >= 1, dstBits >= 1, dstBits >= 2, dstBits >= 2); + + // Perform actual conversion, destination modifiers are not applied + DxbcRegisterValue val = emitRegisterLoad(ins.src[0], srcMask); + + DxbcRegisterValue result; + result.type.ctype = ins.dst[0].dataType; + result.type.ccount = val.type.ccount; + + switch (ins.op) { + case DxbcOpcode::DtoF: + case DxbcOpcode::FtoD: + result.id = m_module.opFConvert( + getVectorTypeId(result.type), val.id); + break; + + case DxbcOpcode::DtoI: + result.id = m_module.opConvertFtoS( + getVectorTypeId(result.type), val.id); + break; + + case DxbcOpcode::DtoU: + result.id = m_module.opConvertFtoU( + getVectorTypeId(result.type), val.id); + break; + + case DxbcOpcode::ItoD: + result.id = m_module.opConvertStoF( + getVectorTypeId(result.type), val.id); + break; + + case DxbcOpcode::UtoD: + result.id = m_module.opConvertUtoF( + getVectorTypeId(result.type), val.id); + break; + + default: + Logger::warn(str::format("DxbcCompiler: Unhandled instruction: ", ins.op)); + return; + } + + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitHullShaderInstCnt(const DxbcShaderInstruction& ins) { + this->getCurrentHsForkJoinPhase()->instanceCount = ins.imm[0].u32; + } + + + void DxbcCompiler::emitHullShaderPhase(const DxbcShaderInstruction& ins) { + switch (ins.op) { + case DxbcOpcode::HsDecls: { + if (m_hs.currPhaseType != DxbcCompilerHsPhase::None) + Logger::err("DXBC: HsDecls not the first phase in hull shader"); + + m_hs.currPhaseType = DxbcCompilerHsPhase::Decl; + } break; + + case DxbcOpcode::HsControlPointPhase: { + m_hs.cpPhase = this->emitNewHullShaderControlPointPhase(); + + m_hs.currPhaseType = DxbcCompilerHsPhase::ControlPoint; + m_hs.currPhaseId = 0; + + m_module.setDebugName(m_hs.cpPhase.functionId, "hs_control_point"); + } break; + + case DxbcOpcode::HsForkPhase: { + auto phase = this->emitNewHullShaderForkJoinPhase(); + m_hs.forkPhases.push_back(phase); + + m_hs.currPhaseType = DxbcCompilerHsPhase::Fork; + m_hs.currPhaseId = m_hs.forkPhases.size() - 1; + + m_module.setDebugName(phase.functionId, + str::format("hs_fork_", m_hs.currPhaseId).c_str()); + } break; + + case DxbcOpcode::HsJoinPhase: { + auto phase = this->emitNewHullShaderForkJoinPhase(); + m_hs.joinPhases.push_back(phase); + + m_hs.currPhaseType = DxbcCompilerHsPhase::Join; + m_hs.currPhaseId = m_hs.joinPhases.size() - 1; + + m_module.setDebugName(phase.functionId, + str::format("hs_join_", m_hs.currPhaseId).c_str()); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + } + } + + + void DxbcCompiler::emitInterpolate(const DxbcShaderInstruction& ins) { + m_module.enableCapability(spv::CapabilityInterpolationFunction); + + // The SPIR-V instructions operate on input variable pointers, + // which are all declared as four-component float vectors. + uint32_t registerId = ins.src[0].idx[0].offset; + + DxbcRegisterValue result; + result.type = getInputRegType(registerId); + + switch (ins.op) { + case DxbcOpcode::EvalCentroid: { + result.id = m_module.opInterpolateAtCentroid( + getVectorTypeId(result.type), + m_vRegs.at(registerId).id); + } break; + + case DxbcOpcode::EvalSampleIndex: { + const DxbcRegisterValue sampleIndex = emitRegisterLoad( + ins.src[1], DxbcRegMask(true, false, false, false)); + + result.id = m_module.opInterpolateAtSample( + getVectorTypeId(result.type), + m_vRegs.at(registerId).id, + sampleIndex.id); + } break; + + case DxbcOpcode::EvalSnapped: { + const DxbcRegisterValue offset = emitRegisterLoad( + ins.src[1], DxbcRegMask(true, true, false, false)); + + result.id = m_module.opInterpolateAtOffset( + getVectorTypeId(result.type), + m_vRegs.at(registerId).id, + offset.id); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + result = emitRegisterSwizzle(result, + ins.src[0].swizzle, ins.dst[0].mask); + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitSparseCheckAccess( + const DxbcShaderInstruction& ins) { + // check_access_mapped has two operands: + // (dst0) The destination register + // (src0) The residency code + m_module.enableCapability(spv::CapabilitySparseResidency); + + DxbcRegisterValue srcValue = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + + uint32_t boolId = m_module.opImageSparseTexelsResident( + m_module.defBoolType(), srcValue.id); + + DxbcRegisterValue dstValue; + dstValue.type = { DxbcScalarType::Uint32, 1 }; + dstValue.id = m_module.opSelect(getScalarTypeId(DxbcScalarType::Uint32), + boolId, m_module.constu32(~0u), m_module.constu32(0)); + + emitRegisterStore(ins.dst[0], dstValue); + } + + + void DxbcCompiler::emitTextureQuery(const DxbcShaderInstruction& ins) { + // resinfo has three operands: + // (dst0) The destination register + // (src0) Resource LOD to query + // (src1) Resource to query + const DxbcBufferInfo resourceInfo = getBufferInfo(ins.src[1]); + const DxbcResinfoType resinfoType = ins.controls.resinfoType(); + + // Read the exact LOD for the image query + const DxbcRegisterValue mipLod = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + const DxbcScalarType returnType = resinfoType == DxbcResinfoType::Uint + ? DxbcScalarType::Uint32 : DxbcScalarType::Float32; + + // Query the size of the selected mip level, as well as the + // total number of mip levels. We will have to combine the + // result into a four-component vector later. + DxbcRegisterValue imageSize = emitQueryTextureSize(ins.src[1], mipLod); + DxbcRegisterValue imageLevels = emitQueryTextureLods(ins.src[1]); + + // If the mip level is out of bounds, D3D requires us to return + // zero before applying modifiers, whereas SPIR-V is undefined, + // so we need to fix it up manually here. + imageSize.id = m_module.opSelect(getVectorTypeId(imageSize.type), + m_module.opULessThan(m_module.defBoolType(), mipLod.id, imageLevels.id), + imageSize.id, emitBuildZeroVector(imageSize.type).id); + + // Convert intermediates to the requested type + if (returnType == DxbcScalarType::Float32) { + imageSize.type.ctype = DxbcScalarType::Float32; + imageSize.id = m_module.opConvertUtoF( + getVectorTypeId(imageSize.type), + imageSize.id); + + imageLevels.type.ctype = DxbcScalarType::Float32; + imageLevels.id = m_module.opConvertUtoF( + getVectorTypeId(imageLevels.type), + imageLevels.id); + } + + // If the selected return type is rcpFloat, we need + // to compute the reciprocal of the image dimensions, + // but not the array size, so we need to separate it. + const uint32_t imageCoordDim = imageSize.type.ccount; + + DxbcRegisterValue imageLayers; + imageLayers.type = imageSize.type; + imageLayers.id = 0; + + if (resinfoType == DxbcResinfoType::RcpFloat && resourceInfo.image.array) { + imageLayers = emitRegisterExtract(imageSize, DxbcRegMask::select(imageCoordDim - 1)); + imageSize = emitRegisterExtract(imageSize, DxbcRegMask::firstN(imageCoordDim - 1)); + } + + if (resinfoType == DxbcResinfoType::RcpFloat) { + imageSize.id = m_module.opFDiv( + getVectorTypeId(imageSize.type), + emitBuildConstVecf32(1.0f, 1.0f, 1.0f, 1.0f, + DxbcRegMask::firstN(imageSize.type.ccount)).id, + imageSize.id); + } + + // Concatenate result vectors and scalars to form a + // 4D vector. Unused components will be set to zero. + std::array vectorIds = { imageSize.id, 0, 0, 0 }; + uint32_t numVectorIds = 1; + + if (imageLayers.id != 0) + vectorIds[numVectorIds++] = imageLayers.id; + + if (imageCoordDim < 3) { + const uint32_t zero = returnType == DxbcScalarType::Uint32 + ? m_module.constu32(0) + : m_module.constf32(0.0f); + + for (uint32_t i = imageCoordDim; i < 3; i++) + vectorIds[numVectorIds++] = zero; + } + + vectorIds[numVectorIds++] = imageLevels.id; + + // Create the actual result vector + DxbcRegisterValue result; + result.type.ctype = returnType; + result.type.ccount = 4; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + numVectorIds, vectorIds.data()); + + // Swizzle components using the resource swizzle + // and the destination operand's write mask + result = emitRegisterSwizzle(result, + ins.src[1].swizzle, ins.dst[0].mask); + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitTextureQueryLod(const DxbcShaderInstruction& ins) { + // All sample instructions have at least these operands: + // (dst0) The destination register + // (src0) Texture coordinates + // (src1) The texture itself + // (src2) The sampler object + const DxbcRegister& texCoordReg = ins.src[0]; + const DxbcRegister& textureReg = ins.src[1]; + const DxbcRegister& samplerReg = ins.src[2]; + + // Texture and sampler register IDs + const auto& texture = m_textures.at(textureReg.idx[0].offset); + const auto& sampler = m_samplers.at(samplerReg.idx[0].offset); + + // Load texture coordinates + const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, + DxbcRegMask::firstN(getTexLayerDim(texture.imageInfo))); + + // Query the LOD. The result is a two-dimensional float32 + // vector containing the mip level and virtual LOD numbers. + const uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, false); + const uint32_t queriedLodId = m_module.opImageQueryLod( + getVectorTypeId({ DxbcScalarType::Float32, 2 }), + sampledImageId, coord.id); + + // Build the result array vector by filling up + // the remaining two components with zeroes. + const uint32_t zero = m_module.constf32(0.0f); + const std::array resultIds + = {{ queriedLodId, zero, zero }}; + + DxbcRegisterValue result; + result.type = DxbcVectorType { DxbcScalarType::Float32, 4 }; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + resultIds.size(), resultIds.data()); + + result = emitRegisterSwizzle(result, ins.src[1].swizzle, ins.dst[0].mask); + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler::emitTextureQueryMs(const DxbcShaderInstruction& ins) { + // sampleinfo has two operands: + // (dst0) The destination register + // (src0) Resource to query + DxbcRegisterValue sampleCount = emitQueryTextureSamples(ins.src[0]); + + if (ins.controls.returnType() != DxbcInstructionReturnType::Uint) { + sampleCount.type = { DxbcScalarType::Float32, 1 }; + sampleCount.id = m_module.opConvertUtoF( + getVectorTypeId(sampleCount.type), + sampleCount.id); + } + + emitRegisterStore(ins.dst[0], sampleCount); + } + + + void DxbcCompiler::emitTextureQueryMsPos(const DxbcShaderInstruction& ins) { + // samplepos has three operands: + // (dst0) The destination register + // (src0) Resource to query + // (src1) Sample index + if (m_samplePositions == 0) + m_samplePositions = emitSamplePosArray(); + + // The lookup index is qual to the sample count plus the + // sample index, or 0 if the resource cannot be queried. + DxbcRegisterValue sampleCount = emitQueryTextureSamples(ins.src[0]); + DxbcRegisterValue sampleIndex = emitRegisterLoad( + ins.src[1], DxbcRegMask(true, false, false, false)); + + uint32_t lookupIndex = m_module.opIAdd( + getVectorTypeId(sampleCount.type), + sampleCount.id, sampleIndex.id); + + // Validate the parameters + uint32_t sampleCountValid = m_module.opULessThanEqual( + m_module.defBoolType(), + sampleCount.id, + m_module.constu32(16)); + + uint32_t sampleIndexValid = m_module.opULessThan( + m_module.defBoolType(), + sampleIndex.id, + sampleCount.id); + + // If the lookup cannot be performed, set the lookup + // index to zero, which will return a zero vector. + lookupIndex = m_module.opSelect( + getVectorTypeId(sampleCount.type), + m_module.opLogicalAnd( + m_module.defBoolType(), + sampleCountValid, + sampleIndexValid), + lookupIndex, + m_module.constu32(0)); + + // Load sample pos vector and write the masked + // components to the destination register. + DxbcRegisterPointer samplePos; + samplePos.type.ctype = DxbcScalarType::Float32; + samplePos.type.ccount = 2; + samplePos.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(samplePos.type), + spv::StorageClassPrivate), + m_samplePositions, 1, &lookupIndex); + + // Expand to vec4 by appending zeroes + DxbcRegisterValue result = emitValueLoad(samplePos); + + DxbcRegisterValue zero; + zero.type.ctype = DxbcScalarType::Float32; + zero.type.ccount = 2; + zero.id = m_module.constvec2f32(0.0f, 0.0f); + + result = emitRegisterConcat(result, zero); + + emitRegisterStore(ins.dst[0], + emitRegisterSwizzle(result, + ins.src[0].swizzle, + ins.dst[0].mask)); + } + + + void DxbcCompiler::emitTextureFetch(const DxbcShaderInstruction& ins) { + // ld has three operands: + // (dst0) The destination register + // (src0) Source address + // (src1) Source texture + // ld2dms has four operands: + // (dst0) The destination register + // (src0) Source address + // (src1) Source texture + // (src2) Sample number + const auto& texture = m_textures.at(ins.src[1].idx[0].offset); + const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo); + + bool isMultisampled = ins.op == DxbcOpcode::LdMs + || ins.op == DxbcOpcode::LdMsS; + + // Load the texture coordinates. The last component + // contains the LOD if the resource is an image. + const DxbcRegisterValue address = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, true, true, true)); + + // Additional image operands. This will store + // the LOD and the address offset if present. + SpirvImageOperands imageOperands; + imageOperands.sparse = ins.dstCount == 2; + + if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) { + const std::array offsetIds = { + imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0, + imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0, + imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0, + }; + + imageOperands.flags |= spv::ImageOperandsConstOffsetMask; + imageOperands.sConstOffset = offsetIds[0]; + + if (imageLayerDim > 1) { + imageOperands.sConstOffset = m_module.constComposite( + getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }), + imageLayerDim, offsetIds.data()); + } + } + + // The LOD is not present when reading from + // a buffer or from a multisample texture. + if (texture.imageInfo.dim != spv::DimBuffer && texture.imageInfo.ms == 0) { + DxbcRegisterValue imageLod; + + if (!isMultisampled) { + imageLod = emitRegisterExtract( + address, DxbcRegMask(false, false, false, true)); + } else { + // If we force-disabled MSAA, fetch from LOD 0 + imageLod.type = { DxbcScalarType::Uint32, 1 }; + imageLod.id = m_module.constu32(0); + } + + imageOperands.flags |= spv::ImageOperandsLodMask; + imageOperands.sLod = imageLod.id; + } + + // The ld2dms instruction has a sample index, but we + // are only allowed to set it for multisample views + if (isMultisampled && texture.imageInfo.ms == 1) { + DxbcRegisterValue sampleId = emitRegisterLoad( + ins.src[2], DxbcRegMask(true, false, false, false)); + + imageOperands.flags |= spv::ImageOperandsSampleMask; + imageOperands.sSampleId = sampleId.id; + } + + // Extract coordinates from address + const DxbcRegisterValue coord = emitCalcTexCoord(address, texture.imageInfo); + + // Reading a typed image or buffer view + // always returns a four-component vector. + const uint32_t imageId = m_module.opLoad(texture.imageTypeId, texture.varId); + + DxbcVectorType texelType; + texelType.ctype = texture.sampledType; + texelType.ccount = 4; + + uint32_t texelTypeId = getVectorTypeId(texelType); + uint32_t resultTypeId = texelTypeId; + uint32_t resultId = 0; + + if (imageOperands.sparse) + resultTypeId = getSparseResultTypeId(texelTypeId); + + resultId = m_module.opImageFetch(resultTypeId, + imageId, coord.id, imageOperands); + + DxbcRegisterValue result; + result.type = texelType; + result.id = imageOperands.sparse + ? emitExtractSparseTexel(texelTypeId, resultId) + : resultId; + + // Swizzle components using the texture swizzle + // and the destination operand's write mask + result = emitRegisterSwizzle(result, + ins.src[1].swizzle, ins.dst[0].mask); + + emitRegisterStore(ins.dst[0], result); + + if (imageOperands.sparse) + emitStoreSparseFeedback(ins.dst[1], resultId); + } + + + void DxbcCompiler::emitTextureGather(const DxbcShaderInstruction& ins) { + // Gather4 takes the following operands: + // (dst0) The destination register + // (dst1) The residency code for sparse ops + // (src0) Texture coordinates + // (src1) The texture itself + // (src2) The sampler, with a component selector + // Gather4C takes the following additional operand: + // (src3) The depth reference value + // The Gather4Po variants take an additional operand + // which defines an extended constant offset. + // TODO reduce code duplication by moving some common code + // in both sample() and gather() into separate methods + const bool isExtendedGather = ins.op == DxbcOpcode::Gather4Po + || ins.op == DxbcOpcode::Gather4PoC + || ins.op == DxbcOpcode::Gather4PoS + || ins.op == DxbcOpcode::Gather4PoCS; + + const DxbcRegister& texCoordReg = ins.src[0]; + const DxbcRegister& textureReg = ins.src[1 + isExtendedGather]; + const DxbcRegister& samplerReg = ins.src[2 + isExtendedGather]; + + // Texture and sampler register IDs + const auto& texture = m_textures.at(textureReg.idx[0].offset); + const auto& sampler = m_samplers.at(samplerReg.idx[0].offset); + + // Image type, which stores the image dimensions etc. + const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo); + + // Load the texture coordinates. SPIR-V allows these + // to be float4 even if not all components are used. + DxbcRegisterValue coord = emitLoadTexCoord(texCoordReg, texture.imageInfo); + + // Load reference value for depth-compare operations + const bool isDepthCompare = ins.op == DxbcOpcode::Gather4C + || ins.op == DxbcOpcode::Gather4PoC + || ins.op == DxbcOpcode::Gather4CS + || ins.op == DxbcOpcode::Gather4PoCS; + + const DxbcRegisterValue referenceValue = isDepthCompare + ? emitRegisterLoad(ins.src[3 + isExtendedGather], + DxbcRegMask(true, false, false, false)) + : DxbcRegisterValue(); + + // Accumulate additional image operands. + SpirvImageOperands imageOperands; + imageOperands.sparse = ins.dstCount == 2; + + if (isExtendedGather) { + m_module.enableCapability(spv::CapabilityImageGatherExtended); + + DxbcRegisterValue gatherOffset = emitRegisterLoad( + ins.src[1], DxbcRegMask::firstN(imageLayerDim)); + + imageOperands.flags |= spv::ImageOperandsOffsetMask; + imageOperands.gOffset = gatherOffset.id; + } else if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) { + const std::array offsetIds = { + imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0, + imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0, + imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0, + }; + + imageOperands.flags |= spv::ImageOperandsConstOffsetMask; + imageOperands.sConstOffset = offsetIds[0]; + + if (imageLayerDim > 1) { + imageOperands.sConstOffset = m_module.constComposite( + getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }), + imageLayerDim, offsetIds.data()); + } + } + + // Gathering texels always returns a four-component + // vector, even for the depth-compare variants. + uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, isDepthCompare); + + DxbcVectorType texelType; + texelType.ctype = texture.sampledType; + texelType.ccount = 4; + + uint32_t texelTypeId = getVectorTypeId(texelType); + uint32_t resultTypeId = texelTypeId; + uint32_t resultId = 0; + + if (imageOperands.sparse) + resultTypeId = getSparseResultTypeId(texelTypeId); + + switch (ins.op) { + // Simple image gather operation + case DxbcOpcode::Gather4: + case DxbcOpcode::Gather4S: + case DxbcOpcode::Gather4Po: + case DxbcOpcode::Gather4PoS: { + resultId = m_module.opImageGather( + resultTypeId, sampledImageId, coord.id, + m_module.consti32(samplerReg.swizzle[0]), + imageOperands); + } break; + + // Depth-compare operation + case DxbcOpcode::Gather4C: + case DxbcOpcode::Gather4CS: + case DxbcOpcode::Gather4PoC: + case DxbcOpcode::Gather4PoCS: { + resultId = m_module.opImageDrefGather( + resultTypeId, sampledImageId, coord.id, + referenceValue.id, imageOperands); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // If necessary, deal with the sparse result + DxbcRegisterValue result; + result.type = texelType; + result.id = imageOperands.sparse + ? emitExtractSparseTexel(texelTypeId, resultId) + : resultId; + + // Swizzle components using the texture swizzle + // and the destination operand's write mask + result = emitRegisterSwizzle(result, + textureReg.swizzle, ins.dst[0].mask); + + emitRegisterStore(ins.dst[0], result); + + if (imageOperands.sparse) + emitStoreSparseFeedback(ins.dst[1], resultId); + } + + + void DxbcCompiler::emitTextureSample(const DxbcShaderInstruction& ins) { + // All sample instructions have at least these operands: + // (dst0) The destination register + // (src0) Texture coordinates + // (src1) The texture itself + // (src2) The sampler object + const DxbcRegister& texCoordReg = ins.src[0]; + const DxbcRegister& textureReg = ins.src[1]; + const DxbcRegister& samplerReg = ins.src[2]; + + // Texture and sampler register IDs + const auto& texture = m_textures.at(textureReg.idx[0].offset); + const auto& sampler = m_samplers.at(samplerReg.idx[0].offset); + const uint32_t imageLayerDim = getTexLayerDim(texture.imageInfo); + + // Load the texture coordinates. SPIR-V allows these + // to be float4 even if not all components are used. + DxbcRegisterValue coord = emitLoadTexCoord(texCoordReg, texture.imageInfo); + + // Load reference value for depth-compare operations + const bool isDepthCompare = ins.op == DxbcOpcode::SampleC + || ins.op == DxbcOpcode::SampleClz + || ins.op == DxbcOpcode::SampleCClampS + || ins.op == DxbcOpcode::SampleClzS; + + const DxbcRegisterValue referenceValue = isDepthCompare + ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false)) + : DxbcRegisterValue(); + + // Load explicit gradients for sample operations that require them + const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD + || ins.op == DxbcOpcode::SampleDClampS; + + const DxbcRegisterValue explicitGradientX = hasExplicitGradients + ? emitRegisterLoad(ins.src[3], DxbcRegMask::firstN(imageLayerDim)) + : DxbcRegisterValue(); + + const DxbcRegisterValue explicitGradientY = hasExplicitGradients + ? emitRegisterLoad(ins.src[4], DxbcRegMask::firstN(imageLayerDim)) + : DxbcRegisterValue(); + + // LOD for certain sample operations + const bool hasLod = ins.op == DxbcOpcode::SampleL + || ins.op == DxbcOpcode::SampleLS + || ins.op == DxbcOpcode::SampleB + || ins.op == DxbcOpcode::SampleBClampS; + + const DxbcRegisterValue lod = hasLod + ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false)) + : DxbcRegisterValue(); + + // Min LOD for certain sparse operations + const bool hasMinLod = ins.op == DxbcOpcode::SampleClampS + || ins.op == DxbcOpcode::SampleBClampS + || ins.op == DxbcOpcode::SampleDClampS + || ins.op == DxbcOpcode::SampleCClampS; + + const DxbcRegisterValue minLod = hasMinLod && ins.src[ins.srcCount - 1].type != DxbcOperandType::Null + ? emitRegisterLoad(ins.src[ins.srcCount - 1], DxbcRegMask(true, false, false, false)) + : DxbcRegisterValue(); + + // Accumulate additional image operands. These are + // not part of the actual operand token in SPIR-V. + SpirvImageOperands imageOperands; + imageOperands.sparse = ins.dstCount == 2; + + if (ins.sampleControls.u != 0 || ins.sampleControls.v != 0 || ins.sampleControls.w != 0) { + const std::array offsetIds = { + imageLayerDim >= 1 ? m_module.consti32(ins.sampleControls.u) : 0, + imageLayerDim >= 2 ? m_module.consti32(ins.sampleControls.v) : 0, + imageLayerDim >= 3 ? m_module.consti32(ins.sampleControls.w) : 0, + }; + + imageOperands.flags |= spv::ImageOperandsConstOffsetMask; + imageOperands.sConstOffset = offsetIds[0]; + + if (imageLayerDim > 1) { + imageOperands.sConstOffset = m_module.constComposite( + getVectorTypeId({ DxbcScalarType::Sint32, imageLayerDim }), + imageLayerDim, offsetIds.data()); + } + } + + if (hasMinLod) { + m_module.enableCapability(spv::CapabilityMinLod); + + imageOperands.flags |= spv::ImageOperandsMinLodMask; + imageOperands.sMinLod = minLod.id; + } + + // Combine the texture and the sampler into a sampled image + uint32_t sampledImageId = emitLoadSampledImage(texture, sampler, isDepthCompare); + + // Sampling an image always returns a four-component + // vector, whereas depth-compare ops return a scalar. + DxbcVectorType texelType; + texelType.ctype = texture.sampledType; + texelType.ccount = isDepthCompare ? 1 : 4; + + uint32_t texelTypeId = getVectorTypeId(texelType); + uint32_t resultTypeId = texelTypeId; + uint32_t resultId = 0; + + if (imageOperands.sparse) + resultTypeId = getSparseResultTypeId(texelTypeId); + + switch (ins.op) { + // Simple image sample operation + case DxbcOpcode::Sample: + case DxbcOpcode::SampleClampS: { + resultId = m_module.opImageSampleImplicitLod( + resultTypeId, sampledImageId, coord.id, + imageOperands); + } break; + + // Depth-compare operation + case DxbcOpcode::SampleC: + case DxbcOpcode::SampleCClampS: { + resultId = m_module.opImageSampleDrefImplicitLod( + resultTypeId, sampledImageId, coord.id, + referenceValue.id, imageOperands); + } break; + + // Depth-compare operation on mip level zero + case DxbcOpcode::SampleClz: + case DxbcOpcode::SampleClzS: { + imageOperands.flags |= spv::ImageOperandsLodMask; + imageOperands.sLod = m_module.constf32(0.0f); + + resultId = m_module.opImageSampleDrefExplicitLod( + resultTypeId, sampledImageId, coord.id, + referenceValue.id, imageOperands); + } break; + + // Sample operation with explicit gradients + case DxbcOpcode::SampleD: + case DxbcOpcode::SampleDClampS: { + imageOperands.flags |= spv::ImageOperandsGradMask; + imageOperands.sGradX = explicitGradientX.id; + imageOperands.sGradY = explicitGradientY.id; + + resultId = m_module.opImageSampleExplicitLod( + resultTypeId, sampledImageId, coord.id, + imageOperands); + } break; + + // Sample operation with explicit LOD + case DxbcOpcode::SampleL: + case DxbcOpcode::SampleLS: { + imageOperands.flags |= spv::ImageOperandsLodMask; + imageOperands.sLod = lod.id; + + resultId = m_module.opImageSampleExplicitLod( + resultTypeId, sampledImageId, coord.id, + imageOperands); + } break; + + // Sample operation with LOD bias + case DxbcOpcode::SampleB: + case DxbcOpcode::SampleBClampS: { + imageOperands.flags |= spv::ImageOperandsBiasMask; + imageOperands.sLodBias = lod.id; + + resultId = m_module.opImageSampleImplicitLod( + resultTypeId, sampledImageId, coord.id, + imageOperands); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + DxbcRegisterValue result; + result.type = texelType; + result.id = imageOperands.sparse + ? emitExtractSparseTexel(texelTypeId, resultId) + : resultId; + + // Swizzle components using the texture swizzle + // and the destination operand's write mask + if (result.type.ccount != 1) { + result = emitRegisterSwizzle(result, + textureReg.swizzle, ins.dst[0].mask); + } + + emitRegisterStore(ins.dst[0], result); + + if (imageOperands.sparse) + emitStoreSparseFeedback(ins.dst[1], resultId); + } + + + void DxbcCompiler::emitTypedUavLoad(const DxbcShaderInstruction& ins) { + // load_uav_typed has three operands: + // (dst0) The destination register + // (src0) The texture or buffer coordinates + // (src1) The UAV to load from + const uint32_t registerId = ins.src[1].idx[0].offset; + const DxbcUav uavInfo = m_uavs.at(registerId); + + // Load texture coordinates + DxbcRegisterValue texCoord = emitLoadTexCoord( + ins.src[0], uavInfo.imageInfo); + + SpirvImageOperands imageOperands; + imageOperands.sparse = ins.dstCount == 2; + + if (uavInfo.coherence) { + imageOperands.flags |= spv::ImageOperandsNonPrivateTexelMask + | spv::ImageOperandsMakeTexelVisibleMask; + imageOperands.makeVisible = m_module.constu32(uavInfo.coherence); + } + + DxbcVectorType texelType; + texelType.ctype = uavInfo.sampledType; + texelType.ccount = 4; + + uint32_t texelTypeId = getVectorTypeId(texelType); + uint32_t resultTypeId = texelTypeId; + uint32_t resultId = 0; + + if (imageOperands.sparse) + resultTypeId = getSparseResultTypeId(texelTypeId); + + // Load source value from the UAV + resultId = m_module.opImageRead(resultTypeId, + m_module.opLoad(uavInfo.imageTypeId, uavInfo.varId), + texCoord.id, imageOperands); + + // Apply component swizzle and mask + DxbcRegisterValue uavValue; + uavValue.type = texelType; + uavValue.id = imageOperands.sparse + ? emitExtractSparseTexel(texelTypeId, resultId) + : resultId; + + uavValue = emitRegisterSwizzle(uavValue, + ins.src[1].swizzle, ins.dst[0].mask); + + emitRegisterStore(ins.dst[0], uavValue); + + if (imageOperands.sparse) + emitStoreSparseFeedback(ins.dst[1], resultId); + } + + + void DxbcCompiler::emitTypedUavStore(const DxbcShaderInstruction& ins) { + // store_uav_typed has three operands: + // (dst0) The destination UAV + // (src0) The texture or buffer coordinates + // (src1) The value to store + const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]); + + // Set image operands for coherent access if necessary + SpirvImageOperands imageOperands; + + if (uavInfo.coherence) { + imageOperands.flags |= spv::ImageOperandsNonPrivateTexelMask + | spv::ImageOperandsMakeTexelAvailableMask; + imageOperands.makeAvailable = m_module.constu32(uavInfo.coherence); + } + + // Load texture coordinates + DxbcRegisterValue texCoord = emitLoadTexCoord(ins.src[0], uavInfo.image); + + // Load the value that will be written to the image. We'll + // have to cast it to the component type of the image. + const DxbcRegisterValue texValue = emitRegisterBitcast( + emitRegisterLoad(ins.src[1], DxbcRegMask(true, true, true, true)), + uavInfo.stype); + + // Write the given value to the image + m_module.opImageWrite( + m_module.opLoad(uavInfo.typeId, uavInfo.varId), + texCoord.id, texValue.id, imageOperands); + } + + + void DxbcCompiler::emitControlFlowIf(const DxbcShaderInstruction& ins) { + // Load the first component of the condition + // operand and perform a zero test on it. + const DxbcRegisterValue condition = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + // Declare the 'if' block. We do not know if there + // will be an 'else' block or not, so we'll assume + // that there is one and leave it empty otherwise. + DxbcCfgBlock block; + block.type = DxbcCfgBlockType::If; + block.b_if.ztestId = emitRegisterZeroTest(condition, ins.controls.zeroTest()).id; + block.b_if.labelIf = m_module.allocateId(); + block.b_if.labelElse = 0; + block.b_if.labelEnd = m_module.allocateId(); + block.b_if.headerPtr = m_module.getInsertionPtr(); + m_controlFlowBlocks.push_back(block); + + // We'll insert the branch instruction when closing + // the block, since we don't know whether or not an + // else block is needed right now. + m_module.opLabel(block.b_if.labelIf); + } + + + void DxbcCompiler::emitControlFlowElse(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::If + || m_controlFlowBlocks.back().b_if.labelElse != 0) + throw DxvkError("DxbcCompiler: 'Else' without 'If' found"); + + // Set the 'Else' flag so that we do + // not insert a dummy block on 'EndIf' + DxbcCfgBlock& block = m_controlFlowBlocks.back(); + block.b_if.labelElse = m_module.allocateId(); + + // Close the 'If' block by branching to + // the merge block we declared earlier + m_module.opBranch(block.b_if.labelEnd); + m_module.opLabel (block.b_if.labelElse); + } + + + void DxbcCompiler::emitControlFlowEndIf(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::If) + throw DxvkError("DxbcCompiler: 'EndIf' without 'If' found"); + + // Remove the block from the stack, it's closed + DxbcCfgBlock block = m_controlFlowBlocks.back(); + m_controlFlowBlocks.pop_back(); + + // Write out the 'if' header + m_module.beginInsertion(block.b_if.headerPtr); + + m_module.opSelectionMerge( + block.b_if.labelEnd, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + block.b_if.ztestId, + block.b_if.labelIf, + block.b_if.labelElse != 0 + ? block.b_if.labelElse + : block.b_if.labelEnd); + + m_module.endInsertion(); + + // End the active 'if' or 'else' block + m_module.opBranch(block.b_if.labelEnd); + m_module.opLabel (block.b_if.labelEnd); + } + + + void DxbcCompiler::emitControlFlowSwitch(const DxbcShaderInstruction& ins) { + // Load the selector as a scalar unsigned integer + const DxbcRegisterValue selector = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + // Declare switch block. We cannot insert the switch + // instruction itself yet because the number of case + // statements and blocks is unknown at this point. + DxbcCfgBlock block; + block.type = DxbcCfgBlockType::Switch; + block.b_switch.insertPtr = m_module.getInsertionPtr(); + block.b_switch.selectorId = selector.id; + block.b_switch.labelBreak = m_module.allocateId(); + block.b_switch.labelCase = m_module.allocateId(); + block.b_switch.labelDefault = 0; + block.b_switch.labelCases = nullptr; + m_controlFlowBlocks.push_back(block); + + // Define the first 'case' label + m_module.opLabel(block.b_switch.labelCase); + } + + + void DxbcCompiler::emitControlFlowCase(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch) + throw DxvkError("DxbcCompiler: 'Case' without 'Switch' found"); + + // The source operand must be a 32-bit immediate. + if (ins.src[0].type != DxbcOperandType::Imm32) + throw DxvkError("DxbcCompiler: Invalid operand type for 'Case'"); + + // Use the last label allocated for 'case'. + DxbcCfgBlockSwitch* block = &m_controlFlowBlocks.back().b_switch; + + if (caseBlockIsFallthrough()) { + block->labelCase = m_module.allocateId(); + + m_module.opBranch(block->labelCase); + m_module.opLabel (block->labelCase); + } + + DxbcSwitchLabel label; + label.desc.literal = ins.src[0].imm.u32_1; + label.desc.labelId = block->labelCase; + label.next = block->labelCases; + block->labelCases = new DxbcSwitchLabel(label); + } + + + void DxbcCompiler::emitControlFlowDefault(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch) + throw DxvkError("DxbcCompiler: 'Default' without 'Switch' found"); + + DxbcCfgBlockSwitch* block = &m_controlFlowBlocks.back().b_switch; + + if (caseBlockIsFallthrough()) { + block->labelCase = m_module.allocateId(); + + m_module.opBranch(block->labelCase); + m_module.opLabel (block->labelCase); + } + + // Set the last label allocated for 'case' as the default label. + block->labelDefault = block->labelCase; + } + + + void DxbcCompiler::emitControlFlowEndSwitch(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Switch) + throw DxvkError("DxbcCompiler: 'EndSwitch' without 'Switch' found"); + + // Remove the block from the stack, it's closed + DxbcCfgBlock block = m_controlFlowBlocks.back(); + m_controlFlowBlocks.pop_back(); + + if (!block.b_switch.labelDefault) { + block.b_switch.labelDefault = caseBlockIsFallthrough() + ? block.b_switch.labelBreak + : block.b_switch.labelCase; + } + + // Close the current 'case' block + m_module.opBranch(block.b_switch.labelBreak); + + // Insert the 'switch' statement. For that, we need to + // gather all the literal-label pairs for the construct. + m_module.beginInsertion(block.b_switch.insertPtr); + m_module.opSelectionMerge( + block.b_switch.labelBreak, + spv::SelectionControlMaskNone); + + // We'll restore the original order of the case labels here + std::vector jumpTargets; + for (auto i = block.b_switch.labelCases; i != nullptr; i = i->next) + jumpTargets.insert(jumpTargets.begin(), i->desc); + + m_module.opSwitch( + block.b_switch.selectorId, + block.b_switch.labelDefault, + jumpTargets.size(), + jumpTargets.data()); + m_module.endInsertion(); + + // Destroy the list of case labels + // FIXME we're leaking memory if compilation fails. + DxbcSwitchLabel* caseLabel = block.b_switch.labelCases; + + while (caseLabel != nullptr) + delete std::exchange(caseLabel, caseLabel->next); + + // Begin new block after switch blocks + m_module.opLabel(block.b_switch.labelBreak); + } + + + void DxbcCompiler::emitControlFlowLoop(const DxbcShaderInstruction& ins) { + // Declare the 'loop' block + DxbcCfgBlock block; + block.type = DxbcCfgBlockType::Loop; + block.b_loop.labelHeader = m_module.allocateId(); + block.b_loop.labelBegin = m_module.allocateId(); + block.b_loop.labelContinue = m_module.allocateId(); + block.b_loop.labelBreak = m_module.allocateId(); + m_controlFlowBlocks.push_back(block); + + m_module.opBranch(block.b_loop.labelHeader); + m_module.opLabel (block.b_loop.labelHeader); + + m_module.opLoopMerge( + block.b_loop.labelBreak, + block.b_loop.labelContinue, + spv::LoopControlMaskNone); + + m_module.opBranch(block.b_loop.labelBegin); + m_module.opLabel (block.b_loop.labelBegin); + } + + + void DxbcCompiler::emitControlFlowEndLoop(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() == 0 + || m_controlFlowBlocks.back().type != DxbcCfgBlockType::Loop) + throw DxvkError("DxbcCompiler: 'EndLoop' without 'Loop' found"); + + // Remove the block from the stack, it's closed + const DxbcCfgBlock block = m_controlFlowBlocks.back(); + m_controlFlowBlocks.pop_back(); + + // Declare the continue block + m_module.opBranch(block.b_loop.labelContinue); + m_module.opLabel (block.b_loop.labelContinue); + + // Declare the merge block + m_module.opBranch(block.b_loop.labelHeader); + m_module.opLabel (block.b_loop.labelBreak); + } + + + void DxbcCompiler::emitControlFlowBreak(const DxbcShaderInstruction& ins) { + const bool isBreak = ins.op == DxbcOpcode::Break; + + DxbcCfgBlock* cfgBlock = isBreak + ? cfgFindBlock({ DxbcCfgBlockType::Loop, DxbcCfgBlockType::Switch }) + : cfgFindBlock({ DxbcCfgBlockType::Loop }); + + if (cfgBlock == nullptr) + throw DxvkError("DxbcCompiler: 'Break' or 'Continue' outside 'Loop' or 'Switch' found"); + + if (cfgBlock->type == DxbcCfgBlockType::Loop) { + m_module.opBranch(isBreak + ? cfgBlock->b_loop.labelBreak + : cfgBlock->b_loop.labelContinue); + } else /* if (cfgBlock->type == DxbcCfgBlockType::Switch) */ { + m_module.opBranch(cfgBlock->b_switch.labelBreak); + } + + // Subsequent instructions assume that there is an open block + const uint32_t labelId = m_module.allocateId(); + m_module.opLabel(labelId); + + // If this is on the same level as a switch-case construct, + // rather than being nested inside an 'if' statement, close + // the current 'case' block. + if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch) + cfgBlock->b_switch.labelCase = labelId; + } + + + void DxbcCompiler::emitControlFlowBreakc(const DxbcShaderInstruction& ins) { + const bool isBreak = ins.op == DxbcOpcode::Breakc; + + DxbcCfgBlock* cfgBlock = isBreak + ? cfgFindBlock({ DxbcCfgBlockType::Loop, DxbcCfgBlockType::Switch }) + : cfgFindBlock({ DxbcCfgBlockType::Loop }); + + if (cfgBlock == nullptr) + throw DxvkError("DxbcCompiler: 'Breakc' or 'Continuec' outside 'Loop' or 'Switch' found"); + + // Perform zero test on the first component of the condition + const DxbcRegisterValue condition = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + const DxbcRegisterValue zeroTest = emitRegisterZeroTest( + condition, ins.controls.zeroTest()); + + // We basically have to wrap this into an 'if' block + const uint32_t breakBlock = m_module.allocateId(); + const uint32_t mergeBlock = m_module.allocateId(); + + m_module.opSelectionMerge(mergeBlock, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + zeroTest.id, breakBlock, mergeBlock); + + m_module.opLabel(breakBlock); + + if (cfgBlock->type == DxbcCfgBlockType::Loop) { + m_module.opBranch(isBreak + ? cfgBlock->b_loop.labelBreak + : cfgBlock->b_loop.labelContinue); + } else /* if (cfgBlock->type == DxbcCfgBlockType::Switch) */ { + m_module.opBranch(cfgBlock->b_switch.labelBreak); + } + + m_module.opLabel(mergeBlock); + } + + + void DxbcCompiler::emitControlFlowRet(const DxbcShaderInstruction& ins) { + if (m_controlFlowBlocks.size() != 0) { + uint32_t labelId = m_module.allocateId(); + + m_module.opReturn(); + m_module.opLabel(labelId); + + // return can be used in place of break to terminate a case block + if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch) + m_controlFlowBlocks.back().b_switch.labelCase = labelId; + } else { + // Last instruction in the current function + this->emitFunctionEnd(); + } + } + + + void DxbcCompiler::emitControlFlowRetc(const DxbcShaderInstruction& ins) { + // Perform zero test on the first component of the condition + const DxbcRegisterValue condition = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + const DxbcRegisterValue zeroTest = emitRegisterZeroTest( + condition, ins.controls.zeroTest()); + + // We basically have to wrap this into an 'if' block + const uint32_t returnLabel = m_module.allocateId(); + const uint32_t continueLabel = m_module.allocateId(); + + m_module.opSelectionMerge(continueLabel, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + zeroTest.id, returnLabel, continueLabel); + + m_module.opLabel(returnLabel); + m_module.opReturn(); + + m_module.opLabel(continueLabel); + } + + + void DxbcCompiler::emitControlFlowDiscard(const DxbcShaderInstruction& ins) { + // Discard actually has an operand that determines + // whether or not the fragment should be discarded + const DxbcRegisterValue condition = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + const DxbcRegisterValue zeroTest = emitRegisterZeroTest( + condition, ins.controls.zeroTest()); + + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(zeroTest.id, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + m_module.opDemoteToHelperInvocation(); + m_module.opBranch(cond.labelEnd); + + m_module.opLabel(cond.labelEnd); + + m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation); + } + + + void DxbcCompiler::emitControlFlowLabel(const DxbcShaderInstruction& ins) { + uint32_t functionNr = ins.dst[0].idx[0].offset; + uint32_t functionId = getFunctionId(functionNr); + + this->emitFunctionBegin( + functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + + m_module.opLabel(m_module.allocateId()); + m_module.setDebugName(functionId, str::format("label", functionNr).c_str()); + + m_insideFunction = true; + } + + + void DxbcCompiler::emitControlFlowCall(const DxbcShaderInstruction& ins) { + uint32_t functionNr = ins.src[0].idx[0].offset; + uint32_t functionId = getFunctionId(functionNr); + + m_module.opFunctionCall( + m_module.defVoidType(), + functionId, 0, nullptr); + } + + + void DxbcCompiler::emitControlFlowCallc(const DxbcShaderInstruction& ins) { + uint32_t functionNr = ins.src[1].idx[0].offset; + uint32_t functionId = getFunctionId(functionNr); + + // Perform zero test on the first component of the condition + const DxbcRegisterValue condition = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, false, false, false)); + + const DxbcRegisterValue zeroTest = emitRegisterZeroTest( + condition, ins.controls.zeroTest()); + + // We basically have to wrap this into an 'if' block + const uint32_t callLabel = m_module.allocateId(); + const uint32_t skipLabel = m_module.allocateId(); + + m_module.opSelectionMerge(skipLabel, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + zeroTest.id, callLabel, skipLabel); + + m_module.opLabel(callLabel); + m_module.opFunctionCall( + m_module.defVoidType(), + functionId, 0, nullptr); + + m_module.opBranch(skipLabel); + m_module.opLabel(skipLabel); + } + + + void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) { + switch (ins.op) { + case DxbcOpcode::If: + return this->emitControlFlowIf(ins); + + case DxbcOpcode::Else: + return this->emitControlFlowElse(ins); + + case DxbcOpcode::EndIf: + return this->emitControlFlowEndIf(ins); + + case DxbcOpcode::Switch: + return this->emitControlFlowSwitch(ins); + + case DxbcOpcode::Case: + return this->emitControlFlowCase(ins); + + case DxbcOpcode::Default: + return this->emitControlFlowDefault(ins); + + case DxbcOpcode::EndSwitch: + return this->emitControlFlowEndSwitch(ins); + + case DxbcOpcode::Loop: + return this->emitControlFlowLoop(ins); + + case DxbcOpcode::EndLoop: + return this->emitControlFlowEndLoop(ins); + + case DxbcOpcode::Break: + case DxbcOpcode::Continue: + return this->emitControlFlowBreak(ins); + + case DxbcOpcode::Breakc: + case DxbcOpcode::Continuec: + return this->emitControlFlowBreakc(ins); + + case DxbcOpcode::Ret: + return this->emitControlFlowRet(ins); + + case DxbcOpcode::Retc: + return this->emitControlFlowRetc(ins); + + case DxbcOpcode::Discard: + return this->emitControlFlowDiscard(ins); + + case DxbcOpcode::Label: + return this->emitControlFlowLabel(ins); + + case DxbcOpcode::Call: + return this->emitControlFlowCall(ins); + + case DxbcOpcode::Callc: + return this->emitControlFlowCallc(ins); + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + } + } + + + DxbcRegisterValue DxbcCompiler::emitBuildConstVecf32( + float x, + float y, + float z, + float w, + const DxbcRegMask& writeMask) { + // TODO refactor these functions into one single template + std::array ids = { 0, 0, 0, 0 }; + uint32_t componentIndex = 0; + + if (writeMask[0]) ids[componentIndex++] = m_module.constf32(x); + if (writeMask[1]) ids[componentIndex++] = m_module.constf32(y); + if (writeMask[2]) ids[componentIndex++] = m_module.constf32(z); + if (writeMask[3]) ids[componentIndex++] = m_module.constf32(w); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = componentIndex; + result.id = componentIndex > 1 + ? m_module.constComposite( + getVectorTypeId(result.type), + componentIndex, ids.data()) + : ids[0]; + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitBuildConstVecu32( + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w, + const DxbcRegMask& writeMask) { + std::array ids = { 0, 0, 0, 0 }; + uint32_t componentIndex = 0; + + if (writeMask[0]) ids[componentIndex++] = m_module.constu32(x); + if (writeMask[1]) ids[componentIndex++] = m_module.constu32(y); + if (writeMask[2]) ids[componentIndex++] = m_module.constu32(z); + if (writeMask[3]) ids[componentIndex++] = m_module.constu32(w); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = componentIndex; + result.id = componentIndex > 1 + ? m_module.constComposite( + getVectorTypeId(result.type), + componentIndex, ids.data()) + : ids[0]; + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitBuildConstVeci32( + int32_t x, + int32_t y, + int32_t z, + int32_t w, + const DxbcRegMask& writeMask) { + std::array ids = { 0, 0, 0, 0 }; + uint32_t componentIndex = 0; + + if (writeMask[0]) ids[componentIndex++] = m_module.consti32(x); + if (writeMask[1]) ids[componentIndex++] = m_module.consti32(y); + if (writeMask[2]) ids[componentIndex++] = m_module.consti32(z); + if (writeMask[3]) ids[componentIndex++] = m_module.consti32(w); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Sint32; + result.type.ccount = componentIndex; + result.id = componentIndex > 1 + ? m_module.constComposite( + getVectorTypeId(result.type), + componentIndex, ids.data()) + : ids[0]; + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitBuildConstVecf64( + double xy, + double zw, + const DxbcRegMask& writeMask) { + std::array ids = { 0, 0 }; + uint32_t componentIndex = 0; + + if (writeMask[0] && writeMask[1]) ids[componentIndex++] = m_module.constf64(xy); + if (writeMask[2] && writeMask[3]) ids[componentIndex++] = m_module.constf64(zw); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Float64; + result.type.ccount = componentIndex; + result.id = componentIndex > 1 + ? m_module.constComposite( + getVectorTypeId(result.type), + componentIndex, ids.data()) + : ids[0]; + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitBuildVector( + DxbcRegisterValue scalar, + uint32_t count) { + if (count == 1) + return scalar; + + std::array scalarIds = + { scalar.id, scalar.id, scalar.id, scalar.id }; + + DxbcRegisterValue result; + result.type.ctype = scalar.type.ctype; + result.type.ccount = count; + result.id = m_module.constComposite( + getVectorTypeId(result.type), + count, scalarIds.data()); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitBuildZeroVector( + DxbcVectorType type) { + DxbcRegisterValue result; + result.type.ctype = type.ctype; + result.type.ccount = 1; + + switch (type.ctype) { + case DxbcScalarType::Float32: result.id = m_module.constf32(0.0f); break; + case DxbcScalarType::Uint32: result.id = m_module.constu32(0u); break; + case DxbcScalarType::Sint32: result.id = m_module.consti32(0); break; + default: throw DxvkError("DxbcCompiler: Invalid scalar type"); + } + + return emitBuildVector(result, type.ccount); + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterBitcast( + DxbcRegisterValue srcValue, + DxbcScalarType dstType) { + DxbcScalarType srcType = srcValue.type.ctype; + + if (srcType == dstType) + return srcValue; + + DxbcRegisterValue result; + result.type.ctype = dstType; + result.type.ccount = srcValue.type.ccount; + + if (isDoubleType(srcType)) result.type.ccount *= 2; + if (isDoubleType(dstType)) result.type.ccount /= 2; + + result.id = m_module.opBitcast( + getVectorTypeId(result.type), + srcValue.id); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterSwizzle( + DxbcRegisterValue value, + DxbcRegSwizzle swizzle, + DxbcRegMask writeMask) { + if (value.type.ccount == 1) + return emitRegisterExtend(value, writeMask.popCount()); + + std::array indices; + + uint32_t dstIndex = 0; + + for (uint32_t i = 0; i < 4; i++) { + if (writeMask[i]) + indices[dstIndex++] = swizzle[i]; + } + + // If the swizzle combined with the mask can be reduced + // to a no-op, we don't need to insert any instructions. + bool isIdentitySwizzle = dstIndex == value.type.ccount; + + for (uint32_t i = 0; i < dstIndex && isIdentitySwizzle; i++) + isIdentitySwizzle &= indices[i] == i; + + if (isIdentitySwizzle) + return value; + + // Use OpCompositeExtract if the resulting vector contains + // only one component, and OpVectorShuffle if it is a vector. + DxbcRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = dstIndex; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (dstIndex == 1) { + result.id = m_module.opCompositeExtract( + typeId, value.id, 1, indices.data()); + } else { + result.id = m_module.opVectorShuffle( + typeId, value.id, value.id, + dstIndex, indices.data()); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterExtract( + DxbcRegisterValue value, + DxbcRegMask mask) { + return emitRegisterSwizzle(value, + DxbcRegSwizzle(0, 1, 2, 3), mask); + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterInsert( + DxbcRegisterValue dstValue, + DxbcRegisterValue srcValue, + DxbcRegMask srcMask) { + DxbcRegisterValue result; + result.type = dstValue.type; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (srcMask.popCount() == 0) { + // Nothing to do if the insertion mask is empty + result.id = dstValue.id; + } else if (dstValue.type.ccount == 1) { + // Both values are scalar, so the first component + // of the write mask decides which one to take. + result.id = srcMask[0] ? srcValue.id : dstValue.id; + } else if (srcValue.type.ccount == 1) { + // The source value is scalar. Since OpVectorShuffle + // requires both arguments to be vectors, we have to + // use OpCompositeInsert to modify the vector instead. + const uint32_t componentId = srcMask.firstSet(); + + result.id = m_module.opCompositeInsert(typeId, + srcValue.id, dstValue.id, 1, &componentId); + } else { + // Both arguments are vectors. We can determine which + // components to take from which vector and use the + // OpVectorShuffle instruction. + std::array components; + uint32_t srcComponentId = dstValue.type.ccount; + + for (uint32_t i = 0; i < dstValue.type.ccount; i++) + components.at(i) = srcMask[i] ? srcComponentId++ : i; + + result.id = m_module.opVectorShuffle( + typeId, dstValue.id, srcValue.id, + dstValue.type.ccount, components.data()); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterConcat( + DxbcRegisterValue value1, + DxbcRegisterValue value2) { + std::array ids = + {{ value1.id, value2.id }}; + + DxbcRegisterValue result; + result.type.ctype = value1.type.ctype; + result.type.ccount = value1.type.ccount + value2.type.ccount; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + ids.size(), ids.data()); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterExtend( + DxbcRegisterValue value, + uint32_t size) { + if (size == 1) + return value; + + std::array ids = {{ + value.id, value.id, + value.id, value.id, + }}; + + DxbcRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = size; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + size, ids.data()); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterAbsolute( + DxbcRegisterValue value) { + const uint32_t typeId = getVectorTypeId(value.type); + + switch (value.type.ctype) { + case DxbcScalarType::Float32: value.id = m_module.opFAbs(typeId, value.id); break; + case DxbcScalarType::Float64: value.id = m_module.opFAbs(typeId, value.id); break; + case DxbcScalarType::Sint32: value.id = m_module.opSAbs(typeId, value.id); break; + case DxbcScalarType::Sint64: value.id = m_module.opSAbs(typeId, value.id); break; + default: Logger::warn("DxbcCompiler: Cannot get absolute value for given type"); + } + + return value; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterNegate( + DxbcRegisterValue value) { + const uint32_t typeId = getVectorTypeId(value.type); + + switch (value.type.ctype) { + case DxbcScalarType::Float32: value.id = m_module.opFNegate(typeId, value.id); break; + case DxbcScalarType::Float64: value.id = m_module.opFNegate(typeId, value.id); break; + case DxbcScalarType::Sint32: value.id = m_module.opSNegate(typeId, value.id); break; + case DxbcScalarType::Sint64: value.id = m_module.opSNegate(typeId, value.id); break; + default: Logger::warn("DxbcCompiler: Cannot negate given type"); + } + + return value; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterZeroTest( + DxbcRegisterValue value, + DxbcZeroTest test) { + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Bool; + result.type.ccount = 1; + + const uint32_t zeroId = m_module.constu32(0u); + const uint32_t typeId = getVectorTypeId(result.type); + + result.id = test == DxbcZeroTest::TestZ + ? m_module.opIEqual (typeId, value.id, zeroId) + : m_module.opINotEqual(typeId, value.id, zeroId); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterMaskBits( + DxbcRegisterValue value, + uint32_t mask) { + DxbcRegisterValue maskVector = emitBuildConstVecu32( + mask, mask, mask, mask, DxbcRegMask::firstN(value.type.ccount)); + + DxbcRegisterValue result; + result.type = value.type; + result.id = m_module.opBitwiseAnd( + getVectorTypeId(result.type), + value.id, maskVector.id); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitSrcOperandModifiers( + DxbcRegisterValue value, + DxbcRegModifiers modifiers) { + if (modifiers.test(DxbcRegModifier::Abs)) + value = emitRegisterAbsolute(value); + + if (modifiers.test(DxbcRegModifier::Neg)) + value = emitRegisterNegate(value); + return value; + } + + + uint32_t DxbcCompiler::emitExtractSparseTexel( + uint32_t texelTypeId, + uint32_t resultId) { + uint32_t index = 1; + + return m_module.opCompositeExtract( + texelTypeId, resultId, 1, &index); + } + + + void DxbcCompiler::emitStoreSparseFeedback( + const DxbcRegister& feedbackRegister, + uint32_t resultId) { + if (feedbackRegister.type != DxbcOperandType::Null) { + uint32_t index = 0; + + DxbcRegisterValue result; + result.type = { DxbcScalarType::Uint32, 1 }; + result.id = m_module.opCompositeExtract( + getScalarTypeId(DxbcScalarType::Uint32), + resultId, 1, &index); + + emitRegisterStore(feedbackRegister, result); + } + } + + + DxbcRegisterValue DxbcCompiler::emitDstOperandModifiers( + DxbcRegisterValue value, + DxbcOpModifiers modifiers) { + const uint32_t typeId = getVectorTypeId(value.type); + + if (modifiers.saturate) { + DxbcRegMask mask; + DxbcRegisterValue vec0, vec1; + + if (value.type.ctype == DxbcScalarType::Float32) { + mask = DxbcRegMask::firstN(value.type.ccount); + vec0 = emitBuildConstVecf32(0.0f, 0.0f, 0.0f, 0.0f, mask); + vec1 = emitBuildConstVecf32(1.0f, 1.0f, 1.0f, 1.0f, mask); + } else if (value.type.ctype == DxbcScalarType::Float64) { + mask = DxbcRegMask::firstN(value.type.ccount * 2); + vec0 = emitBuildConstVecf64(0.0, 0.0, mask); + vec1 = emitBuildConstVecf64(1.0, 1.0, mask); + } + + if (mask) + value.id = m_module.opNClamp(typeId, value.id, vec0.id, vec1.id); + } + + return value; + } + + + DxbcRegisterPointer DxbcCompiler::emitArrayAccess( + DxbcRegisterPointer pointer, + spv::StorageClass sclass, + uint32_t index) { + uint32_t ptrTypeId = m_module.defPointerType( + getVectorTypeId(pointer.type), sclass); + + DxbcRegisterPointer result; + result.type = pointer.type; + result.id = m_module.opAccessChain( + ptrTypeId, pointer.id, 1, &index); + return result; + } + + + uint32_t DxbcCompiler::emitLoadSampledImage( + const DxbcShaderResource& textureResource, + const DxbcSampler& samplerResource, + bool isDepthCompare) { + const uint32_t sampledImageType = isDepthCompare + ? m_module.defSampledImageType(textureResource.depthTypeId) + : m_module.defSampledImageType(textureResource.colorTypeId); + + return m_module.opSampledImage(sampledImageType, + m_module.opLoad(textureResource.imageTypeId, textureResource.varId), + m_module.opLoad(samplerResource.typeId, samplerResource.varId)); + } + + + DxbcRegisterPointer DxbcCompiler::emitGetTempPtr( + const DxbcRegister& operand) { + // r# regs are indexed as follows: + // (0) register index (immediate) + uint32_t regIdx = operand.idx[0].offset; + + if (regIdx >= m_rRegs.size()) + m_rRegs.resize(regIdx + 1, 0u); + + if (!m_rRegs.at(regIdx)) { + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.type.alength = 0; + info.sclass = spv::StorageClassPrivate; + + uint32_t varId = emitNewVariable(info); + m_rRegs.at(regIdx) = varId; + + m_module.setDebugName(varId, + str::format("r", regIdx).c_str()); + } + + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + result.id = m_rRegs.at(regIdx); + return result; + } + + + DxbcRegisterPointer DxbcCompiler::emitGetIndexableTempPtr( + const DxbcRegister& operand) { + return getIndexableTempPtr(operand, emitIndexLoad(operand.idx[1])); + } + + + DxbcRegisterPointer DxbcCompiler::emitGetInputPtr( + const DxbcRegister& operand) { + // In the vertex and pixel stages, + // v# regs are indexed as follows: + // (0) register index (relative) + // + // In the tessellation and geometry + // stages, the index has two dimensions: + // (0) vertex index (relative) + // (1) register index (relative) + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + + std::array indices = {{ 0, 0 }}; + + for (uint32_t i = 0; i < operand.idxDim; i++) + indices.at(i) = emitIndexLoad(operand.idx[i]).id; + + // Pick the input array depending on + // the program type and operand type + struct InputArray { + uint32_t id; + spv::StorageClass sclass; + }; + + const InputArray array = [&] () -> InputArray { + switch (operand.type) { + case DxbcOperandType::InputControlPoint: + return m_programInfo.type() == DxbcProgramType::HullShader + ? InputArray { m_vArray, spv::StorageClassPrivate } + : InputArray { m_ds.inputPerVertex, spv::StorageClassInput }; + case DxbcOperandType::InputPatchConstant: + return m_programInfo.type() == DxbcProgramType::HullShader + ? InputArray { m_hs.outputPerPatch, spv::StorageClassPrivate } + : InputArray { m_ds.inputPerPatch, spv::StorageClassInput }; + case DxbcOperandType::OutputControlPoint: + return InputArray { m_hs.outputPerVertex, spv::StorageClassOutput }; + default: + return { m_vArray, spv::StorageClassPrivate }; + } + }(); + + DxbcRegisterInfo info; + info.type.ctype = result.type.ctype; + info.type.ccount = result.type.ccount; + info.type.alength = 0; + info.sclass = array.sclass; + + result.id = m_module.opAccessChain( + getPointerTypeId(info), array.id, + operand.idxDim, indices.data()); + + return result; + } + + + DxbcRegisterPointer DxbcCompiler::emitGetOutputPtr( + const DxbcRegister& operand) { + if (m_programInfo.type() == DxbcProgramType::HullShader) { + // Hull shaders are special in that they have two sets of + // output registers, one for per-patch values and one for + // per-vertex values. + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + + uint32_t registerId = emitIndexLoad(operand.idx[0]).id; + + if (m_hs.currPhaseType == DxbcCompilerHsPhase::ControlPoint) { + std::array indices = {{ + m_module.opLoad(m_module.defIntType(32, 0), m_hs.builtinInvocationId), + registerId, + }}; + + uint32_t ptrTypeId = m_module.defPointerType( + getVectorTypeId(result.type), + spv::StorageClassOutput); + + result.id = m_module.opAccessChain( + ptrTypeId, m_hs.outputPerVertex, + indices.size(), indices.data()); + } else { + uint32_t ptrTypeId = m_module.defPointerType( + getVectorTypeId(result.type), + spv::StorageClassPrivate); + + result.id = m_module.opAccessChain( + ptrTypeId, m_hs.outputPerPatch, + 1, ®isterId); + } + + return result; + } else { + // Regular shaders have their output + // registers set up at declaration time + return m_oRegs.at(operand.idx[0].offset); + } + } + + + DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr( + const DxbcRegister& operand) { + const DxbcRegisterValue constId + = emitIndexLoad(operand.idx[0]); + + if (m_immConstBuf != 0) { + DxbcRegisterInfo ptrInfo; + ptrInfo.type.ctype = DxbcScalarType::Uint32; + ptrInfo.type.ccount = 4; + ptrInfo.type.alength = 0; + ptrInfo.sclass = spv::StorageClassPrivate; + + DxbcRegisterPointer result; + result.type.ctype = ptrInfo.type.ctype; + result.type.ccount = ptrInfo.type.ccount; + result.id = m_module.opAccessChain( + getPointerTypeId(ptrInfo), + m_immConstBuf, 1, &constId.id); + return result; + } else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) { + const std::array indices = + {{ m_module.consti32(0), constId.id }}; + + DxbcRegisterInfo ptrInfo; + ptrInfo.type.ctype = DxbcScalarType::Float32; + ptrInfo.type.ccount = 4; + ptrInfo.type.alength = 0; + ptrInfo.sclass = spv::StorageClassUniform; + + DxbcRegisterPointer result; + result.type.ctype = ptrInfo.type.ctype; + result.type.ccount = ptrInfo.type.ccount; + result.id = m_module.opAccessChain( + getPointerTypeId(ptrInfo), + m_constantBuffers.at(Icb_BindingSlotId).varId, + indices.size(), indices.data()); + return result; + } else { + throw DxvkError("DxbcCompiler: Immediate constant buffer not defined"); + } + } + + + DxbcRegisterPointer DxbcCompiler::emitGetOperandPtr( + const DxbcRegister& operand) { + switch (operand.type) { + case DxbcOperandType::Temp: + return emitGetTempPtr(operand); + + case DxbcOperandType::IndexableTemp: + return emitGetIndexableTempPtr(operand); + + case DxbcOperandType::Input: + case DxbcOperandType::InputControlPoint: + case DxbcOperandType::InputPatchConstant: + case DxbcOperandType::OutputControlPoint: + return emitGetInputPtr(operand); + + case DxbcOperandType::Output: + return emitGetOutputPtr(operand); + + case DxbcOperandType::ImmediateConstantBuffer: + return emitGetImmConstBufPtr(operand); + + case DxbcOperandType::InputThreadId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 3 }, + m_cs.builtinGlobalInvocationId }; + + case DxbcOperandType::InputThreadGroupId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 3 }, + m_cs.builtinWorkgroupId }; + + case DxbcOperandType::InputThreadIdInGroup: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 3 }, + m_cs.builtinLocalInvocationId }; + + case DxbcOperandType::InputThreadIndexInGroup: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + m_cs.builtinLocalInvocationIndex }; + + case DxbcOperandType::InputCoverageMask: { + const std::array indices + = {{ m_module.constu32(0) }}; + + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(result.type), + spv::StorageClassInput), + m_ps.builtinSampleMaskIn, + indices.size(), indices.data()); + return result; + } + + case DxbcOperandType::OutputCoverageMask: { + const std::array indices + = {{ m_module.constu32(0) }}; + + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(result.type), + spv::StorageClassOutput), + m_ps.builtinSampleMaskOut, + indices.size(), indices.data()); + return result; + } + + case DxbcOperandType::OutputDepth: + case DxbcOperandType::OutputDepthGe: + case DxbcOperandType::OutputDepthLe: + return DxbcRegisterPointer { + { DxbcScalarType::Float32, 1 }, + m_ps.builtinDepth }; + + case DxbcOperandType::OutputStencilRef: + return DxbcRegisterPointer { + { DxbcScalarType::Sint32, 1 }, + m_ps.builtinStencilRef }; + + case DxbcOperandType::InputPrimitiveId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + m_primitiveIdIn }; + + case DxbcOperandType::InputDomainPoint: + return DxbcRegisterPointer { + { DxbcScalarType::Float32, 3 }, + m_ds.builtinTessCoord }; + + case DxbcOperandType::OutputControlPointId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + m_hs.builtinInvocationId }; + + case DxbcOperandType::InputForkInstanceId: + case DxbcOperandType::InputJoinInstanceId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + getCurrentHsForkJoinPhase()->instanceIdPtr }; + + case DxbcOperandType::InputGsInstanceId: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + m_gs.builtinInvocationId }; + + case DxbcOperandType::InputInnerCoverage: + return DxbcRegisterPointer { + { DxbcScalarType::Uint32, 1 }, + m_ps.builtinInnerCoverageId }; + + default: + throw DxvkError(str::format( + "DxbcCompiler: Unhandled operand type: ", + operand.type)); + } + } + + + DxbcRegisterPointer DxbcCompiler::emitGetAtomicPointer( + const DxbcRegister& operand, + const DxbcRegister& address) { + // Query information about the resource itself + const uint32_t registerId = operand.idx[0].offset; + const DxbcBufferInfo resourceInfo = getBufferInfo(operand); + + // For UAVs and shared memory, different methods + // of obtaining the final pointer are used. + bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory; + bool isSsbo = resourceInfo.isSsbo; + + // Compute the actual address into the resource + const DxbcRegisterValue addressValue = [&] { + switch (resourceInfo.type) { + case DxbcResourceType::Raw: + return emitCalcBufferIndexRaw(emitRegisterLoad( + address, DxbcRegMask(true, false, false, false))); + + case DxbcResourceType::Structured: { + const DxbcRegisterValue addressComponents = emitRegisterLoad( + address, DxbcRegMask(true, true, false, false)); + + return emitCalcBufferIndexStructured( + emitRegisterExtract(addressComponents, DxbcRegMask(true, false, false, false)), + emitRegisterExtract(addressComponents, DxbcRegMask(false, true, false, false)), + resourceInfo.stride); + }; + + case DxbcResourceType::Typed: { + if (isTgsm) + throw DxvkError("DxbcCompiler: TGSM cannot be typed"); + + return emitLoadTexCoord(address, + m_uavs.at(registerId).imageInfo); + } + + default: + throw DxvkError("DxbcCompiler: Unhandled resource type"); + } + }(); + + // Compute the actual pointer + DxbcRegisterPointer result; + result.type.ctype = resourceInfo.stype; + result.type.ccount = 1; + + if (isTgsm) { + result.id = m_module.opAccessChain(resourceInfo.typeId, + resourceInfo.varId, 1, &addressValue.id); + } else if (isSsbo) { + uint32_t indices[2] = { m_module.constu32(0), addressValue.id }; + result.id = m_module.opAccessChain(resourceInfo.typeId, + resourceInfo.varId, 2, indices); + } else { + result.id = m_module.opImageTexelPointer( + m_module.defPointerType(getVectorTypeId(result.type), spv::StorageClassImage), + resourceInfo.varId, addressValue.id, m_module.constu32(0)); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRawBufferLoad( + const DxbcRegister& operand, + DxbcRegisterValue elementIndex, + DxbcRegMask writeMask, + uint32_t& sparseFeedbackId) { + const DxbcBufferInfo bufferInfo = getBufferInfo(operand); + + // Shared memory is the only type of buffer that + // is not accessed through a texel buffer view + bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory; + bool isSsbo = bufferInfo.isSsbo; + + // Common types and IDs used while loading the data + uint32_t bufferId = isTgsm || isSsbo ? 0 : m_module.opLoad(bufferInfo.typeId, bufferInfo.varId); + + uint32_t vectorTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 4 }); + uint32_t scalarTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 1 }); + + // Since all data is represented as a sequence of 32-bit + // integers, we have to load each component individually. + std::array ccomps = { 0, 0, 0, 0 }; + std::array scomps = { 0, 0, 0, 0 }; + uint32_t scount = 0; + + // The sparse feedback ID will be non-zero for sparse + // instructions on input. We need to reset it to 0. + SpirvMemoryOperands memoryOperands; + SpirvImageOperands imageOperands; + imageOperands.sparse = sparseFeedbackId != 0; + + uint32_t coherence = bufferInfo.coherence; + + if (isTgsm && m_moduleInfo.options.forceVolatileTgsmAccess) { + memoryOperands.flags |= spv::MemoryAccessVolatileMask; + coherence = spv::ScopeWorkgroup; + } + + if (coherence) { + memoryOperands.flags |= spv::MemoryAccessNonPrivatePointerMask; + + if (coherence != spv::ScopeInvocation) { + memoryOperands.flags |= spv::MemoryAccessMakePointerVisibleMask; + memoryOperands.makeVisible = m_module.constu32(coherence); + + imageOperands.flags = spv::ImageOperandsNonPrivateTexelMask + | spv::ImageOperandsMakeTexelVisibleMask; + imageOperands.makeVisible = m_module.constu32(coherence); + } + } + + sparseFeedbackId = 0; + + for (uint32_t i = 0; i < 4; i++) { + uint32_t sindex = operand.swizzle[i]; + + if (!writeMask[i]) + continue; + + if (ccomps[sindex] == 0) { + uint32_t elementIndexAdjusted = m_module.opIAdd( + getVectorTypeId(elementIndex.type), elementIndex.id, + m_module.consti32(sindex)); + + // Load requested component from the buffer + uint32_t zero = 0; + + if (isTgsm) { + ccomps[sindex] = m_module.opLoad(scalarTypeId, + m_module.opAccessChain(bufferInfo.typeId, + bufferInfo.varId, 1, &elementIndexAdjusted), + memoryOperands); + } else if (isSsbo) { + uint32_t indices[2] = { m_module.constu32(0), elementIndexAdjusted }; + ccomps[sindex] = m_module.opLoad(scalarTypeId, + m_module.opAccessChain(bufferInfo.typeId, + bufferInfo.varId, 2, indices), + memoryOperands); + } else { + uint32_t resultTypeId = vectorTypeId; + uint32_t resultId = 0; + + if (imageOperands.sparse) + resultTypeId = getSparseResultTypeId(vectorTypeId); + + if (operand.type == DxbcOperandType::Resource) { + resultId = m_module.opImageFetch(resultTypeId, + bufferId, elementIndexAdjusted, imageOperands); + } else if (operand.type == DxbcOperandType::UnorderedAccessView) { + resultId = m_module.opImageRead(resultTypeId, + bufferId, elementIndexAdjusted, imageOperands); + } else { + throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw load"); + } + + // Only read sparse feedback once. This may be somewhat inaccurate + // for reads that straddle pages, but we can't easily emulate this. + if (imageOperands.sparse) { + imageOperands.sparse = false; + sparseFeedbackId = resultId; + + resultId = emitExtractSparseTexel(vectorTypeId, resultId); + } + + ccomps[sindex] = m_module.opCompositeExtract(scalarTypeId, resultId, 1, &zero); + } + } + } + + for (uint32_t i = 0; i < 4; i++) { + uint32_t sindex = operand.swizzle[i]; + + if (writeMask[i]) + scomps[scount++] = ccomps[sindex]; + } + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = scount; + result.id = scomps[0]; + + if (scount > 1) { + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + scount, scomps.data()); + } + + return result; + } + + + void DxbcCompiler::emitRawBufferStore( + const DxbcRegister& operand, + DxbcRegisterValue elementIndex, + DxbcRegisterValue value) { + const DxbcBufferInfo bufferInfo = getBufferInfo(operand); + + // Cast source value to the expected data type + value = emitRegisterBitcast(value, DxbcScalarType::Uint32); + + // Thread Group Shared Memory is not accessed through a texel buffer view + bool isTgsm = operand.type == DxbcOperandType::ThreadGroupSharedMemory; + bool isSsbo = bufferInfo.isSsbo; + + // Perform the actual write operation + uint32_t bufferId = isTgsm || isSsbo ? 0 : m_module.opLoad(bufferInfo.typeId, bufferInfo.varId); + + uint32_t scalarTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 1 }); + uint32_t vectorTypeId = getVectorTypeId({ DxbcScalarType::Uint32, 4 }); + + uint32_t srcComponentIndex = 0; + + // Set memory operands according to resource properties + SpirvMemoryOperands memoryOperands; + SpirvImageOperands imageOperands; + + uint32_t coherence = bufferInfo.coherence; + + if (isTgsm && m_moduleInfo.options.forceVolatileTgsmAccess) { + memoryOperands.flags |= spv::MemoryAccessVolatileMask; + coherence = spv::ScopeWorkgroup; + } + + if (coherence) { + memoryOperands.flags |= spv::MemoryAccessNonPrivatePointerMask; + + if (coherence != spv::ScopeInvocation) { + memoryOperands.flags |= spv::MemoryAccessMakePointerAvailableMask; + memoryOperands.makeAvailable = m_module.constu32(coherence); + + imageOperands.flags = spv::ImageOperandsNonPrivateTexelMask + | spv::ImageOperandsMakeTexelAvailableMask; + imageOperands.makeAvailable = m_module.constu32(coherence); + } + } + + for (uint32_t i = 0; i < 4; i++) { + if (operand.mask[i]) { + uint32_t srcComponentId = value.type.ccount > 1 + ? m_module.opCompositeExtract(scalarTypeId, + value.id, 1, &srcComponentIndex) + : value.id; + + // Add the component offset to the element index + uint32_t elementIndexAdjusted = i != 0 + ? m_module.opIAdd(getVectorTypeId(elementIndex.type), + elementIndex.id, m_module.consti32(i)) + : elementIndex.id; + + if (isTgsm) { + m_module.opStore( + m_module.opAccessChain(bufferInfo.typeId, + bufferInfo.varId, 1, &elementIndexAdjusted), + srcComponentId, memoryOperands); + } else if (isSsbo) { + uint32_t indices[2] = { m_module.constu32(0), elementIndexAdjusted }; + m_module.opStore( + m_module.opAccessChain(bufferInfo.typeId, + bufferInfo.varId, 2, indices), + srcComponentId, memoryOperands); + } else if (operand.type == DxbcOperandType::UnorderedAccessView) { + const std::array srcVectorIds = { + srcComponentId, srcComponentId, + srcComponentId, srcComponentId, + }; + + m_module.opImageWrite( + bufferId, elementIndexAdjusted, + m_module.opCompositeConstruct(vectorTypeId, + 4, srcVectorIds.data()), + imageOperands); + } else { + throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw store"); + } + + // Write next component + srcComponentIndex += 1; + } + } + } + + + DxbcRegisterValue DxbcCompiler::emitQueryBufferSize( + const DxbcRegister& resource) { + const DxbcBufferInfo bufferInfo = getBufferInfo(resource); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opArrayLength( + getVectorTypeId(result.type), + bufferInfo.varId, 0); + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitQueryTexelBufferSize( + const DxbcRegister& resource) { + // Load the texel buffer object. This cannot be used with + // constant buffers or any other type of resource. + const DxbcBufferInfo bufferInfo = getBufferInfo(resource); + + const uint32_t bufferId = m_module.opLoad( + bufferInfo.typeId, bufferInfo.varId); + + // We'll store this as a scalar unsigned integer + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opImageQuerySize( + getVectorTypeId(result.type), bufferId); + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitQueryTextureLods( + const DxbcRegister& resource) { + const DxbcBufferInfo info = getBufferInfo(resource); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + + if (info.image.sampled == 1) { + result.id = m_module.opImageQueryLevels( + getVectorTypeId(result.type), + m_module.opLoad(info.typeId, info.varId)); + } else { + // Report one LOD in case of UAVs + result.id = m_module.constu32(1); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitQueryTextureSamples( + const DxbcRegister& resource) { + if (resource.type == DxbcOperandType::Rasterizer) { + // SPIR-V has no gl_NumSamples equivalent, so we + // have to work around it using a push constant + if (!m_ps.pushConstantId) + m_ps.pushConstantId = emitPushConstants(); + + uint32_t uintTypeId = m_module.defIntType(32, 0); + uint32_t ptrTypeId = m_module.defPointerType(uintTypeId, spv::StorageClassPushConstant); + uint32_t index = m_module.constu32(0); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opLoad(uintTypeId, + m_module.opAccessChain(ptrTypeId, m_ps.pushConstantId, 1, &index)); + return result; + } else { + DxbcBufferInfo info = getBufferInfo(resource); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + + if (info.image.ms) { + result.id = m_module.opImageQuerySamples( + getVectorTypeId(result.type), + m_module.opLoad(info.typeId, info.varId)); + } else { + // OpImageQuerySamples requires MSAA images + result.id = m_module.constu32(1); + } + + return result; + } + } + + + DxbcRegisterValue DxbcCompiler::emitQueryTextureSize( + const DxbcRegister& resource, + DxbcRegisterValue lod) { + const DxbcBufferInfo info = getBufferInfo(resource); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = getTexSizeDim(info.image); + + if (info.image.ms == 0 && info.image.sampled == 1) { + result.id = m_module.opImageQuerySizeLod( + getVectorTypeId(result.type), + m_module.opLoad(info.typeId, info.varId), + lod.id); + } else { + result.id = m_module.opImageQuerySize( + getVectorTypeId(result.type), + m_module.opLoad(info.typeId, info.varId)); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitCalcBufferIndexStructured( + DxbcRegisterValue structId, + DxbcRegisterValue structOffset, + uint32_t structStride) { + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Sint32; + result.type.ccount = 1; + + uint32_t typeId = getVectorTypeId(result.type); + uint32_t offset = m_module.opShiftRightLogical(typeId, structOffset.id, m_module.consti32(2)); + + result.id = m_module.opIAdd(typeId, + m_module.opIMul(typeId, structId.id, m_module.consti32(structStride / 4)), + offset); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitCalcBufferIndexRaw( + DxbcRegisterValue byteOffset) { + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Sint32; + result.type.ccount = 1; + + uint32_t typeId = getVectorTypeId(result.type); + result.id = m_module.opShiftRightLogical(typeId, byteOffset.id, m_module.consti32(2)); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitCalcTexCoord( + DxbcRegisterValue coordVector, + const DxbcImageInfo& imageInfo) { + const uint32_t dim = getTexCoordDim(imageInfo); + + if (dim != coordVector.type.ccount) { + coordVector = emitRegisterExtract( + coordVector, DxbcRegMask::firstN(dim)); + } + + return coordVector; + } + + + DxbcRegisterValue DxbcCompiler::emitLoadTexCoord( + const DxbcRegister& coordReg, + const DxbcImageInfo& imageInfo) { + return emitCalcTexCoord(emitRegisterLoad(coordReg, + DxbcRegMask(true, true, true, true)), imageInfo); + } + + + DxbcRegisterValue DxbcCompiler::emitIndexLoad( + DxbcRegIndex index) { + if (index.relReg != nullptr) { + DxbcRegisterValue result = emitRegisterLoad( + *index.relReg, DxbcRegMask(true, false, false, false)); + + if (index.offset != 0) { + result.id = m_module.opIAdd( + getVectorTypeId(result.type), result.id, + m_module.consti32(index.offset)); + } + + return result; + } else { + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Sint32; + result.type.ccount = 1; + result.id = m_module.consti32(index.offset); + return result; + } + } + + + DxbcRegisterValue DxbcCompiler::emitValueLoad( + DxbcRegisterPointer ptr) { + DxbcRegisterValue result; + result.type = ptr.type; + result.id = m_module.opLoad( + getVectorTypeId(result.type), + ptr.id); + return result; + } + + + void DxbcCompiler::emitValueStore( + DxbcRegisterPointer ptr, + DxbcRegisterValue value, + DxbcRegMask writeMask) { + // If the component types are not compatible, + // we need to bit-cast the source variable. + if (value.type.ctype != ptr.type.ctype) + value = emitRegisterBitcast(value, ptr.type.ctype); + + // If the source value consists of only one component, + // it is stored in all components of the destination. + if (value.type.ccount == 1) + value = emitRegisterExtend(value, writeMask.popCount()); + + if (ptr.type.ccount == writeMask.popCount()) { + // Simple case: We write to the entire register + m_module.opStore(ptr.id, value.id); + } else { + // We only write to part of the destination + // register, so we need to load and modify it + DxbcRegisterValue tmp = emitValueLoad(ptr); + tmp = emitRegisterInsert(tmp, value, writeMask); + + m_module.opStore(ptr.id, tmp.id); + } + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw( + const DxbcRegister& reg) { + if (reg.type == DxbcOperandType::IndexableTemp) { + bool doBoundsCheck = reg.idx[1].relReg != nullptr; + DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]); + + if (doBoundsCheck) { + uint32_t boundsCheck = m_module.opULessThan( + m_module.defBoolType(), vectorId.id, + m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength)); + + // Kind of ugly to have an empty else block here but there's no + // way for us to know the current block ID for the phi below + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelElse = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(boundsCheck, cond.labelIf, cond.labelElse); + + m_module.opLabel(cond.labelIf); + + DxbcRegisterValue returnValue = emitValueLoad(emitGetOperandPtr(reg)); + + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelElse); + + DxbcRegisterValue zeroValue = emitBuildZeroVector(returnValue.type); + + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); + + std::array phiLabels = {{ + { returnValue.id, cond.labelIf }, + { zeroValue.id, cond.labelElse }, + }}; + + returnValue.id = m_module.opPhi( + getVectorTypeId(returnValue.type), + phiLabels.size(), phiLabels.data()); + return returnValue; + } + } + + return emitValueLoad(emitGetOperandPtr(reg)); + } + + + DxbcRegisterValue DxbcCompiler::emitConstantBufferLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask) { + // Constant buffers take a two-dimensional index: + // (0) register index (immediate) + // (1) constant offset (relative) + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.type.alength = 0; + info.sclass = spv::StorageClassUniform; + + uint32_t regId = reg.idx[0].offset; + DxbcRegisterValue constId = emitIndexLoad(reg.idx[1]); + + uint32_t ptrTypeId = getPointerTypeId(info); + + const std::array indices = + {{ m_module.consti32(0), constId.id }}; + + DxbcRegisterPointer ptr; + ptr.type.ctype = info.type.ctype; + ptr.type.ccount = info.type.ccount; + ptr.id = m_module.opAccessChain(ptrTypeId, + m_constantBuffers.at(regId).varId, + indices.size(), indices.data()); + + // Load individual components from buffer + std::array ccomps = { 0, 0, 0, 0 }; + std::array scomps = { 0, 0, 0, 0 }; + uint32_t scount = 0; + + for (uint32_t i = 0; i < 4; i++) { + uint32_t sindex = reg.swizzle[i]; + + if (!writeMask[i] || ccomps[sindex]) + continue; + + uint32_t componentId = m_module.constu32(sindex); + uint32_t componentPtr = m_module.opAccessChain( + m_module.defPointerType( + getScalarTypeId(DxbcScalarType::Float32), + spv::StorageClassUniform), + ptr.id, 1, &componentId); + + ccomps[sindex] = m_module.opLoad( + getScalarTypeId(DxbcScalarType::Float32), + componentPtr); + } + + for (uint32_t i = 0; i < 4; i++) { + uint32_t sindex = reg.swizzle[i]; + + if (writeMask[i]) + scomps[scount++] = ccomps[sindex]; + } + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = scount; + result.id = scomps[0]; + + if (scount > 1) { + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + scount, scomps.data()); + } + + // Apply any post-processing that might be necessary + result = emitRegisterBitcast(result, reg.dataType); + result = emitSrcOperandModifiers(result, reg.modifiers); + return result; + } + + + DxbcRegisterValue DxbcCompiler::emitRegisterLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask) { + if (reg.type == DxbcOperandType::Imm32 + || reg.type == DxbcOperandType::Imm64) { + DxbcRegisterValue result; + + if (reg.componentCount == DxbcComponentCount::Component1) { + // Create one single u32 constant + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.constu32(reg.imm.u32_1); + + result = emitRegisterExtend(result, writeMask.popCount()); + } else if (reg.componentCount == DxbcComponentCount::Component4) { + // Create a u32 vector with as many components as needed + std::array indices = { }; + uint32_t indexId = 0; + + for (uint32_t i = 0; i < indices.size(); i++) { + if (writeMask[i]) { + indices.at(indexId++) = + m_module.constu32(reg.imm.u32_4[i]); + } + } + + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = writeMask.popCount(); + result.id = indices.at(0); + + if (indexId > 1) { + result.id = m_module.constComposite( + getVectorTypeId(result.type), + result.type.ccount, indices.data()); + } + + } else { + // Something went horribly wrong in the decoder or the shader is broken + throw DxvkError("DxbcCompiler: Invalid component count for immediate operand"); + } + + // Cast constants to the requested type + return emitRegisterBitcast(result, reg.dataType); + } else if (reg.type == DxbcOperandType::ConstantBuffer) { + return emitConstantBufferLoad(reg, writeMask); + } else { + // Load operand from the operand pointer + DxbcRegisterValue result = emitRegisterLoadRaw(reg); + + // Apply operand swizzle to the operand value + result = emitRegisterSwizzle(result, reg.swizzle, writeMask); + + // Cast it to the requested type. We need to do + // this after the swizzling for 64-bit types. + result = emitRegisterBitcast(result, reg.dataType); + + // Apply operand modifiers + result = emitSrcOperandModifiers(result, reg.modifiers); + return result; + } + } + + + void DxbcCompiler::emitRegisterStore( + const DxbcRegister& reg, + DxbcRegisterValue value) { + if (reg.type == DxbcOperandType::IndexableTemp) { + bool doBoundsCheck = reg.idx[1].relReg != nullptr; + DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]); + + if (doBoundsCheck) { + uint32_t boundsCheck = m_module.opULessThan( + m_module.defBoolType(), vectorId.id, + m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength)); + + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(boundsCheck, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + + emitValueStore(getIndexableTempPtr(reg, vectorId), value, reg.mask); + + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); + } else { + emitValueStore(getIndexableTempPtr(reg, vectorId), value, reg.mask); + } + } else { + emitValueStore(emitGetOperandPtr(reg), value, reg.mask); + } + } + + + void DxbcCompiler::emitInputSetup() { + m_module.setLateConst(m_vArrayLengthId, &m_vArrayLength); + + // Copy all defined v# registers into the input array + const uint32_t vecTypeId = m_module.defVectorType(m_module.defFloatType(32), 4); + const uint32_t ptrTypeId = m_module.defPointerType(vecTypeId, spv::StorageClassPrivate); + + for (uint32_t i = 0; i < m_vRegs.size(); i++) { + if (m_vRegs.at(i).id != 0) { + const uint32_t registerId = m_module.consti32(i); + + DxbcRegisterPointer srcPtr = m_vRegs.at(i); + DxbcRegisterValue srcValue = emitRegisterBitcast( + emitValueLoad(srcPtr), DxbcScalarType::Float32); + + DxbcRegisterPointer dstPtr; + dstPtr.type = { DxbcScalarType::Float32, 4 }; + dstPtr.id = m_module.opAccessChain( + ptrTypeId, m_vArray, 1, ®isterId); + + emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount)); + } + } + + // Copy all system value registers into the array, + // preserving any previously written contents. + for (const DxbcSvMapping& map : m_vMappings) { + const uint32_t registerId = m_module.consti32(map.regId); + + const DxbcRegisterValue value = [&] { + switch (m_programInfo.type()) { + case DxbcProgramType::VertexShader: return emitVsSystemValueLoad(map.sv, map.regMask); + case DxbcProgramType::PixelShader: return emitPsSystemValueLoad(map.sv, map.regMask); + default: throw DxvkError(str::format("DxbcCompiler: Unexpected stage: ", m_programInfo.type())); + } + }(); + + DxbcRegisterPointer inputReg; + inputReg.type.ctype = DxbcScalarType::Float32; + inputReg.type.ccount = 4; + inputReg.id = m_module.opAccessChain( + ptrTypeId, m_vArray, 1, ®isterId); + emitValueStore(inputReg, value, map.regMask); + } + } + + + void DxbcCompiler::emitInputSetup(uint32_t vertexCount) { + m_module.setLateConst(m_vArrayLengthId, &m_vArrayLength); + + // Copy all defined v# registers into the input array. Note + // that the outer index of the array is the vertex index. + const uint32_t vecTypeId = m_module.defVectorType(m_module.defFloatType(32), 4); + const uint32_t dstPtrTypeId = m_module.defPointerType(vecTypeId, spv::StorageClassPrivate); + + for (uint32_t i = 0; i < m_vRegs.size(); i++) { + if (m_vRegs.at(i).id != 0) { + const uint32_t registerId = m_module.consti32(i); + + for (uint32_t v = 0; v < vertexCount; v++) { + std::array indices + = {{ m_module.consti32(v), registerId }}; + + DxbcRegisterPointer srcPtr; + srcPtr.type = m_vRegs.at(i).type; + srcPtr.id = m_module.opAccessChain( + m_module.defPointerType(getVectorTypeId(srcPtr.type), spv::StorageClassInput), + m_vRegs.at(i).id, 1, indices.data()); + + DxbcRegisterValue srcValue = emitRegisterBitcast( + emitValueLoad(srcPtr), DxbcScalarType::Float32); + + DxbcRegisterPointer dstPtr; + dstPtr.type = { DxbcScalarType::Float32, 4 }; + dstPtr.id = m_module.opAccessChain( + dstPtrTypeId, m_vArray, 2, indices.data()); + + emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount)); + } + } + } + + // Copy all system value registers into the array, + // preserving any previously written contents. + for (const DxbcSvMapping& map : m_vMappings) { + const uint32_t registerId = m_module.consti32(map.regId); + + for (uint32_t v = 0; v < vertexCount; v++) { + const DxbcRegisterValue value = [&] { + switch (m_programInfo.type()) { + case DxbcProgramType::GeometryShader: return emitGsSystemValueLoad(map.sv, map.regMask, v); + default: throw DxvkError(str::format("DxbcCompiler: Unexpected stage: ", m_programInfo.type())); + } + }(); + + std::array indices = { + m_module.consti32(v), registerId, + }; + + DxbcRegisterPointer inputReg; + inputReg.type.ctype = DxbcScalarType::Float32; + inputReg.type.ccount = 4; + inputReg.id = m_module.opAccessChain(dstPtrTypeId, + m_vArray, indices.size(), indices.data()); + emitValueStore(inputReg, value, map.regMask); + } + } + } + + + void DxbcCompiler::emitOutputSetup() { + for (const DxbcSvMapping& svMapping : m_oMappings) { + DxbcRegisterPointer outputReg = m_oRegs.at(svMapping.regId); + + if (m_programInfo.type() == DxbcProgramType::HullShader) { + uint32_t registerIndex = m_module.constu32(svMapping.regId); + + outputReg.type = { DxbcScalarType::Float32, 4 }; + outputReg.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(outputReg.type), + spv::StorageClassPrivate), + m_hs.outputPerPatch, + 1, ®isterIndex); + } + + auto sv = svMapping.sv; + auto mask = svMapping.regMask; + auto value = emitValueLoad(outputReg); + + switch (m_programInfo.type()) { + case DxbcProgramType::VertexShader: emitVsSystemValueStore(sv, mask, value); break; + case DxbcProgramType::GeometryShader: emitGsSystemValueStore(sv, mask, value); break; + case DxbcProgramType::HullShader: emitHsSystemValueStore(sv, mask, value); break; + case DxbcProgramType::DomainShader: emitDsSystemValueStore(sv, mask, value); break; + case DxbcProgramType::PixelShader: emitPsSystemValueStore(sv, mask, value); break; + case DxbcProgramType::ComputeShader: break; + } + } + } + + + void DxbcCompiler::emitOutputDepthClamp() { + // HACK: Some drivers do not clamp FragDepth to [minDepth..maxDepth] + // before writing to the depth attachment, but we do not have acccess + // to those. Clamp to [0..1] instead. + if (m_ps.builtinDepth) { + DxbcRegisterPointer ptr; + ptr.type = { DxbcScalarType::Float32, 1 }; + ptr.id = m_ps.builtinDepth; + + DxbcRegisterValue value = emitValueLoad(ptr); + + value.id = m_module.opFClamp( + getVectorTypeId(ptr.type), + value.id, + m_module.constf32(0.0f), + m_module.constf32(1.0f)); + + emitValueStore(ptr, value, + DxbcRegMask::firstN(1)); + } + } + + + void DxbcCompiler::emitInitWorkgroupMemory() { + bool hasTgsm = false; + + SpirvMemoryOperands memoryOperands; + memoryOperands.flags = spv::MemoryAccessNonPrivatePointerMask; + + for (uint32_t i = 0; i < m_gRegs.size(); i++) { + if (!m_gRegs[i].varId) + continue; + + if (!m_cs.builtinLocalInvocationIndex) { + m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInLocalInvocationIndex, + "vThreadIndexInGroup"); + } + + uint32_t intTypeId = getScalarTypeId(DxbcScalarType::Uint32); + uint32_t ptrTypeId = m_module.defPointerType( + intTypeId, spv::StorageClassWorkgroup); + + uint32_t numElements = m_gRegs[i].type == DxbcResourceType::Structured + ? m_gRegs[i].elementCount * m_gRegs[i].elementStride / 4 + : m_gRegs[i].elementCount / 4; + + uint32_t numThreads = m_cs.workgroupSizeX * + m_cs.workgroupSizeY * m_cs.workgroupSizeZ; + + uint32_t numElementsPerThread = numElements / numThreads; + uint32_t numElementsRemaining = numElements % numThreads; + + uint32_t threadId = m_module.opLoad( + intTypeId, m_cs.builtinLocalInvocationIndex); + + uint32_t strideId = m_module.constu32(numThreads); + uint32_t zeroId = m_module.constu32(0); + + for (uint32_t e = 0; e < numElementsPerThread; e++) { + uint32_t ofsId = m_module.opIAdd(intTypeId, threadId, + m_module.opIMul(intTypeId, strideId, m_module.constu32(e))); + + uint32_t ptrId = m_module.opAccessChain( + ptrTypeId, m_gRegs[i].varId, 1, &ofsId); + + m_module.opStore(ptrId, zeroId, memoryOperands); + } + + if (numElementsRemaining) { + uint32_t condition = m_module.opULessThan( + m_module.defBoolType(), threadId, + m_module.constu32(numElementsRemaining)); + + DxbcConditional cond; + cond.labelIf = m_module.allocateId(); + cond.labelEnd = m_module.allocateId(); + + m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone); + m_module.opBranchConditional(condition, cond.labelIf, cond.labelEnd); + + m_module.opLabel(cond.labelIf); + + uint32_t ofsId = m_module.opIAdd(intTypeId, + m_module.constu32(numThreads * numElementsPerThread), + threadId); + + uint32_t ptrId = m_module.opAccessChain( + ptrTypeId, m_gRegs[i].varId, 1, &ofsId); + + m_module.opStore(ptrId, zeroId, memoryOperands); + + m_module.opBranch(cond.labelEnd); + m_module.opLabel (cond.labelEnd); + } + + hasTgsm = true; + } + + if (hasTgsm) { + m_module.opControlBarrier( + m_module.constu32(spv::ScopeWorkgroup), + m_module.constu32(spv::ScopeWorkgroup), + m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask + | spv::MemorySemanticsAcquireReleaseMask + | spv::MemorySemanticsMakeAvailableMask + | spv::MemorySemanticsMakeVisibleMask)); + } + } + + + DxbcRegisterValue DxbcCompiler::emitVsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask) { + switch (sv) { + case DxbcSystemValue::VertexId: { + const uint32_t typeId = getScalarTypeId(DxbcScalarType::Uint32); + + if (m_vs.builtinVertexId == 0) { + m_vs.builtinVertexId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInVertexIndex, + "vs_vertex_index"); + } + + if (m_vs.builtinBaseVertex == 0) { + m_vs.builtinBaseVertex = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInBaseVertex, + "vs_base_vertex"); + } + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opISub(typeId, + m_module.opLoad(typeId, m_vs.builtinVertexId), + m_module.opLoad(typeId, m_vs.builtinBaseVertex)); + return result; + } break; + + case DxbcSystemValue::InstanceId: { + const uint32_t typeId = getScalarTypeId(DxbcScalarType::Uint32); + + if (m_vs.builtinInstanceId == 0) { + m_vs.builtinInstanceId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInInstanceIndex, + "vs_instance_index"); + } + + if (m_vs.builtinBaseInstance == 0) { + m_vs.builtinBaseInstance = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInBaseInstance, + "vs_base_instance"); + } + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opISub(typeId, + m_module.opLoad(typeId, m_vs.builtinInstanceId), + m_module.opLoad(typeId, m_vs.builtinBaseInstance)); + return result; + } break; + + default: + throw DxvkError(str::format( + "DxbcCompiler: Unhandled VS SV input: ", sv)); + } + } + + + DxbcRegisterValue DxbcCompiler::emitGsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask, + uint32_t vertexId) { + switch (sv) { + case DxbcSystemValue::Position: { + uint32_t arrayIndex = m_module.consti32(vertexId); + + if (!m_positionIn) { + m_positionIn = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 4, primitiveVertexCount(m_gs.inputPrimitive) }, + spv::StorageClassInput }, + spv::BuiltInPosition, + "in_position"); + } + + DxbcRegisterPointer ptrIn; + ptrIn.type.ctype = DxbcScalarType::Float32; + ptrIn.type.ccount = 4; + ptrIn.id = m_module.opAccessChain( + m_module.defPointerType(getVectorTypeId(ptrIn.type), spv::StorageClassInput), + m_positionIn, 1, &arrayIndex); + + return emitRegisterExtract(emitValueLoad(ptrIn), mask); + } break; + + default: + throw DxvkError(str::format( + "DxbcCompiler: Unhandled GS SV input: ", sv)); + } + } + + + DxbcRegisterValue DxbcCompiler::emitPsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask) { + switch (sv) { + case DxbcSystemValue::Position: { + if (m_ps.builtinFragCoord == 0) { + m_ps.builtinFragCoord = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 4, 0 }, + spv::StorageClassInput }, + spv::BuiltInFragCoord, + "ps_frag_coord"); + } + + DxbcRegisterPointer ptrIn; + ptrIn.type = { DxbcScalarType::Float32, 4 }; + ptrIn.id = m_ps.builtinFragCoord; + + // The X, Y and Z components of the SV_POSITION semantic + // are identical to Vulkan's FragCoord builtin, but we + // need to compute the reciprocal of the W component. + DxbcRegisterValue fragCoord = emitValueLoad(ptrIn); + + uint32_t componentIndex = 3; + uint32_t t_f32 = m_module.defFloatType(32); + uint32_t v_wComp = m_module.opCompositeExtract(t_f32, fragCoord.id, 1, &componentIndex); + v_wComp = m_module.opFDiv(t_f32, m_module.constf32(1.0f), v_wComp); + + fragCoord.id = m_module.opCompositeInsert( + getVectorTypeId(fragCoord.type), + v_wComp, fragCoord.id, + 1, &componentIndex); + + return emitRegisterExtract(fragCoord, mask); + } break; + + case DxbcSystemValue::IsFrontFace: { + if (m_ps.builtinIsFrontFace == 0) { + m_ps.builtinIsFrontFace = emitNewBuiltinVariable({ + { DxbcScalarType::Bool, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInFrontFacing, + "ps_is_front_face"); + } + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.opSelect( + getVectorTypeId(result.type), + m_module.opLoad( + m_module.defBoolType(), + m_ps.builtinIsFrontFace), + m_module.constu32(0xFFFFFFFF), + m_module.constu32(0x00000000)); + return result; + } break; + + case DxbcSystemValue::PrimitiveId: { + if (m_primitiveIdIn == 0) { + m_module.enableCapability(spv::CapabilityGeometry); + + m_primitiveIdIn = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInPrimitiveId, + "ps_primitive_id"); + } + + DxbcRegisterPointer ptrIn; + ptrIn.type = { DxbcScalarType::Uint32, 1 }; + ptrIn.id = m_primitiveIdIn; + + return emitValueLoad(ptrIn); + } break; + + case DxbcSystemValue::SampleIndex: { + if (m_ps.builtinSampleId == 0) { + m_module.enableCapability(spv::CapabilitySampleRateShading); + + m_ps.builtinSampleId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInSampleId, + "ps_sample_id"); + } + + DxbcRegisterPointer ptrIn; + ptrIn.type.ctype = DxbcScalarType::Uint32; + ptrIn.type.ccount = 1; + ptrIn.id = m_ps.builtinSampleId; + + return emitValueLoad(ptrIn); + } break; + + case DxbcSystemValue::RenderTargetId: { + if (m_ps.builtinLayer == 0) { + m_module.enableCapability(spv::CapabilityGeometry); + + m_ps.builtinLayer = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInLayer, + "v_layer"); + } + + DxbcRegisterPointer ptr; + ptr.type.ctype = DxbcScalarType::Uint32; + ptr.type.ccount = 1; + ptr.id = m_ps.builtinLayer; + + return emitValueLoad(ptr); + } break; + + case DxbcSystemValue::ViewportId: { + if (m_ps.builtinViewportId == 0) { + m_module.enableCapability(spv::CapabilityMultiViewport); + + m_ps.builtinViewportId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInViewportIndex, + "v_viewport"); + } + + DxbcRegisterPointer ptr; + ptr.type.ctype = DxbcScalarType::Uint32; + ptr.type.ccount = 1; + ptr.id = m_ps.builtinViewportId; + + return emitValueLoad(ptr); + } break; + + default: + throw DxvkError(str::format( + "DxbcCompiler: Unhandled PS SV input: ", sv)); + } + } + + + void DxbcCompiler::emitVsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value) { + switch (sv) { + case DxbcSystemValue::Position: { + if (!m_positionOut) { + m_positionOut = emitNewBuiltinVariable({ + { DxbcScalarType::Float32, 4, 0 }, + spv::StorageClassOutput }, + spv::BuiltInPosition, + "out_position"); + } + + DxbcRegisterPointer ptr; + ptr.type.ctype = DxbcScalarType::Float32; + ptr.type.ccount = 4; + ptr.id = m_positionOut; + + emitValueStore(ptr, value, mask); + } break; + + case DxbcSystemValue::RenderTargetId: { + if (m_programInfo.type() != DxbcProgramType::GeometryShader) + m_module.enableCapability(spv::CapabilityShaderLayer); + + if (m_gs.builtinLayer == 0) { + m_module.enableCapability(spv::CapabilityGeometry); + + m_gs.builtinLayer = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInLayer, + "o_layer"); + } + + DxbcRegisterPointer ptr; + ptr.type = { DxbcScalarType::Uint32, 1 }; + ptr.id = m_gs.builtinLayer; + + emitValueStore( + ptr, emitRegisterExtract(value, mask), + DxbcRegMask(true, false, false, false)); + } break; + + case DxbcSystemValue::ViewportId: { + if (m_programInfo.type() != DxbcProgramType::GeometryShader) + m_module.enableCapability(spv::CapabilityShaderViewportIndex); + + if (m_gs.builtinViewportId == 0) { + m_module.enableCapability(spv::CapabilityMultiViewport); + + m_gs.builtinViewportId = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInViewportIndex, + "o_viewport"); + } + + DxbcRegisterPointer ptr; + ptr.type = { DxbcScalarType::Uint32, 1}; + ptr.id = m_gs.builtinViewportId; + + emitValueStore( + ptr, emitRegisterExtract(value, mask), + DxbcRegMask(true, false, false, false)); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled VS SV output: ", sv)); + } + } + + + void DxbcCompiler::emitHsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value) { + if (sv >= DxbcSystemValue::FinalQuadUeq0EdgeTessFactor + && sv <= DxbcSystemValue::FinalLineDensityTessFactor) { + struct TessFactor { + uint32_t array = 0; + uint32_t index = 0; + }; + + static const std::array s_tessFactors = {{ + { m_hs.builtinTessLevelOuter, 0 }, // FinalQuadUeq0EdgeTessFactor + { m_hs.builtinTessLevelOuter, 1 }, // FinalQuadVeq0EdgeTessFactor + { m_hs.builtinTessLevelOuter, 2 }, // FinalQuadUeq1EdgeTessFactor + { m_hs.builtinTessLevelOuter, 3 }, // FinalQuadVeq1EdgeTessFactor + { m_hs.builtinTessLevelInner, 0 }, // FinalQuadUInsideTessFactor + { m_hs.builtinTessLevelInner, 1 }, // FinalQuadVInsideTessFactor + { m_hs.builtinTessLevelOuter, 0 }, // FinalTriUeq0EdgeTessFactor + { m_hs.builtinTessLevelOuter, 1 }, // FinalTriVeq0EdgeTessFactor + { m_hs.builtinTessLevelOuter, 2 }, // FinalTriWeq0EdgeTessFactor + { m_hs.builtinTessLevelInner, 0 }, // FinalTriInsideTessFactor + { m_hs.builtinTessLevelOuter, 0 }, // FinalLineDensityTessFactor + { m_hs.builtinTessLevelOuter, 1 }, // FinalLineDetailTessFactor + }}; + + const TessFactor tessFactor = s_tessFactors.at(uint32_t(sv) + - uint32_t(DxbcSystemValue::FinalQuadUeq0EdgeTessFactor)); + + const uint32_t tessFactorArrayIndex + = m_module.constu32(tessFactor.index); + + // Apply global tess factor limit + float maxTessFactor = m_hs.maxTessFactor; + + if (m_moduleInfo.tess != nullptr) { + if (m_moduleInfo.tess->maxTessFactor < maxTessFactor) + maxTessFactor = m_moduleInfo.tess->maxTessFactor; + } + + DxbcRegisterValue tessValue = emitRegisterExtract(value, mask); + tessValue.id = m_module.opFClamp(getVectorTypeId(tessValue.type), + tessValue.id, m_module.constf32(0.0f), + m_module.constf32(maxTessFactor)); + + DxbcRegisterPointer ptr; + ptr.type.ctype = DxbcScalarType::Float32; + ptr.type.ccount = 1; + ptr.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(ptr.type), + spv::StorageClassOutput), + tessFactor.array, 1, + &tessFactorArrayIndex); + + emitValueStore(ptr, tessValue, + DxbcRegMask(true, false, false, false)); + } else { + Logger::warn(str::format( + "DxbcCompiler: Unhandled HS SV output: ", sv)); + } + } + + + void DxbcCompiler::emitGsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value) { + switch (sv) { + case DxbcSystemValue::Position: + case DxbcSystemValue::CullDistance: + case DxbcSystemValue::ClipDistance: + case DxbcSystemValue::RenderTargetId: + case DxbcSystemValue::ViewportId: + emitVsSystemValueStore(sv, mask, value); + break; + + case DxbcSystemValue::PrimitiveId: { + if (m_primitiveIdOut == 0) { + m_primitiveIdOut = emitNewBuiltinVariable({ + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInPrimitiveId, + "gs_primitive_id"); + } + + DxbcRegisterPointer ptr; + ptr.type = { DxbcScalarType::Uint32, 1}; + ptr.id = m_primitiveIdOut; + + emitValueStore( + ptr, emitRegisterExtract(value, mask), + DxbcRegMask(true, false, false, false)); + } break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled GS SV output: ", sv)); + } + } + + + void DxbcCompiler::emitPsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value) { + Logger::warn(str::format( + "DxbcCompiler: Unhandled PS SV output: ", sv)); + } + + + void DxbcCompiler::emitDsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value) { + switch (sv) { + case DxbcSystemValue::Position: + case DxbcSystemValue::CullDistance: + case DxbcSystemValue::ClipDistance: + case DxbcSystemValue::RenderTargetId: + case DxbcSystemValue::ViewportId: + emitVsSystemValueStore(sv, mask, value); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled DS SV output: ", sv)); + } + } + + + void DxbcCompiler::emitClipCullStore( + DxbcSystemValue sv, + uint32_t dstArray) { + uint32_t offset = 0; + + if (dstArray == 0) + return; + + for (auto e = m_osgn->begin(); e != m_osgn->end(); e++) { + if (e->systemValue == sv) { + DxbcRegisterPointer srcPtr = m_oRegs.at(e->registerId); + DxbcRegisterValue srcValue = emitValueLoad(srcPtr); + + for (uint32_t i = 0; i < 4; i++) { + if (e->componentMask[i]) { + uint32_t offsetId = m_module.consti32(offset++); + + DxbcRegisterValue component = emitRegisterExtract( + srcValue, DxbcRegMask::select(i)); + + DxbcRegisterPointer dstPtr; + dstPtr.type = { DxbcScalarType::Float32, 1 }; + dstPtr.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(dstPtr.type), + spv::StorageClassOutput), + dstArray, 1, &offsetId); + + emitValueStore(dstPtr, component, + DxbcRegMask(true, false, false, false)); + } + } + } + } + } + + + void DxbcCompiler::emitClipCullLoad( + DxbcSystemValue sv, + uint32_t srcArray) { + uint32_t offset = 0; + + if (srcArray == 0) + return; + + for (auto e = m_isgn->begin(); e != m_isgn->end(); e++) { + if (e->systemValue == sv) { + // Load individual components from the source array + uint32_t componentIndex = 0; + std::array componentIds = {{ 0, 0, 0, 0 }}; + + for (uint32_t i = 0; i < 4; i++) { + if (e->componentMask[i]) { + uint32_t offsetId = m_module.consti32(offset++); + + DxbcRegisterPointer srcPtr; + srcPtr.type = { DxbcScalarType::Float32, 1 }; + srcPtr.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(srcPtr.type), + spv::StorageClassInput), + srcArray, 1, &offsetId); + + componentIds[componentIndex++] + = emitValueLoad(srcPtr).id; + } + } + + // Put everything into one vector + DxbcRegisterValue dstValue; + dstValue.type = { DxbcScalarType::Float32, componentIndex }; + dstValue.id = componentIds[0]; + + if (componentIndex > 1) { + dstValue.id = m_module.opCompositeConstruct( + getVectorTypeId(dstValue.type), + componentIndex, componentIds.data()); + } + + // Store vector to the input array + uint32_t registerId = m_module.consti32(e->registerId); + + DxbcRegisterPointer dstInput; + dstInput.type = { DxbcScalarType::Float32, 4 }; + dstInput.id = m_module.opAccessChain( + m_module.defPointerType( + getVectorTypeId(dstInput.type), + spv::StorageClassPrivate), + m_vArray, 1, ®isterId); + + emitValueStore(dstInput, dstValue, e->componentMask); + } + } + } + + + void DxbcCompiler::emitPointSizeStore() { + if (!m_pointSizeOut) { + m_pointSizeOut = emitNewBuiltinVariable(DxbcRegisterInfo { + { DxbcScalarType::Float32, 1, 0 }, + spv::StorageClassOutput }, + spv::BuiltInPointSize, + "point_size"); + } + + m_module.opStore(m_pointSizeOut, m_module.constf32(1.0f)); + } + + + void DxbcCompiler::emitInit() { + // Set up common capabilities for all shaders + m_module.enableCapability(spv::CapabilityShader); + m_module.enableCapability(spv::CapabilityImageQuery); + + // Initialize the shader module with capabilities + // etc. Each shader type has its own peculiarities. + switch (m_programInfo.type()) { + case DxbcProgramType::VertexShader: emitVsInit(); break; + case DxbcProgramType::HullShader: emitHsInit(); break; + case DxbcProgramType::DomainShader: emitDsInit(); break; + case DxbcProgramType::GeometryShader: emitGsInit(); break; + case DxbcProgramType::PixelShader: emitPsInit(); break; + case DxbcProgramType::ComputeShader: emitCsInit(); break; + } + } + + + void DxbcCompiler::emitFunctionBegin( + uint32_t entryPoint, + uint32_t returnType, + uint32_t funcType) { + this->emitFunctionEnd(); + + m_module.functionBegin( + returnType, entryPoint, funcType, + spv::FunctionControlMaskNone); + + m_insideFunction = true; + } + + + void DxbcCompiler::emitFunctionEnd() { + if (m_insideFunction) { + m_module.opReturn(); + m_module.functionEnd(); + } + + m_insideFunction = false; + } + + + void DxbcCompiler::emitFunctionLabel() { + m_module.opLabel(m_module.allocateId()); + } + + + void DxbcCompiler::emitMainFunctionBegin() { + this->emitFunctionBegin( + m_entryPointId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitVsInit() { + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityCullDistance); + m_module.enableCapability(spv::CapabilityDrawParameters); + + // Standard input array + emitDclInputArray(0); + + // Cull/clip distances as outputs + m_clipDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numClipPlanes, + spv::BuiltInClipDistance, + spv::StorageClassOutput); + + m_cullDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numCullPlanes, + spv::BuiltInCullDistance, + spv::StorageClassOutput); + + // Main function of the vertex shader + m_vs.functionId = m_module.allocateId(); + m_module.setDebugName(m_vs.functionId, "vs_main"); + + this->emitFunctionBegin( + m_vs.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitHsInit() { + m_module.enableCapability(spv::CapabilityTessellation); + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityCullDistance); + + m_hs.builtinInvocationId = emitNewBuiltinVariable( + DxbcRegisterInfo { + { DxbcScalarType::Uint32, 1, 0 }, + spv::StorageClassInput }, + spv::BuiltInInvocationId, + "vOutputControlPointId"); + + m_hs.builtinTessLevelOuter = emitBuiltinTessLevelOuter(spv::StorageClassOutput); + m_hs.builtinTessLevelInner = emitBuiltinTessLevelInner(spv::StorageClassOutput); + } + + + void DxbcCompiler::emitDsInit() { + m_module.enableCapability(spv::CapabilityTessellation); + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityCullDistance); + + m_ds.builtinTessLevelOuter = emitBuiltinTessLevelOuter(spv::StorageClassInput); + m_ds.builtinTessLevelInner = emitBuiltinTessLevelInner(spv::StorageClassInput); + + // Cull/clip distances as outputs + m_clipDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numClipPlanes, + spv::BuiltInClipDistance, + spv::StorageClassOutput); + + m_cullDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numCullPlanes, + spv::BuiltInCullDistance, + spv::StorageClassOutput); + + // Main function of the domain shader + m_ds.functionId = m_module.allocateId(); + m_module.setDebugName(m_ds.functionId, "ds_main"); + + this->emitFunctionBegin( + m_ds.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitGsInit() { + m_module.enableCapability(spv::CapabilityGeometry); + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityCullDistance); + + // Enable capabilities for xfb mode if necessary + if (m_moduleInfo.xfb) { + m_module.enableCapability(spv::CapabilityGeometryStreams); + m_module.enableCapability(spv::CapabilityTransformFeedback); + + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeXfb); + } + + // We only need outputs if rasterization is enabled + m_gs.needsOutputSetup = !m_moduleInfo.xfb + || m_moduleInfo.xfb->rasterizedStream >= 0; + + // Cull/clip distances as outputs + m_clipDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numClipPlanes, + spv::BuiltInClipDistance, + spv::StorageClassOutput); + + m_cullDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullOut.numCullPlanes, + spv::BuiltInCullDistance, + spv::StorageClassOutput); + + // Emit Xfb variables if necessary + if (m_moduleInfo.xfb) + emitXfbOutputDeclarations(); + + // Main function of the vertex shader + m_gs.functionId = m_module.allocateId(); + m_module.setDebugName(m_gs.functionId, "gs_main"); + + this->emitFunctionBegin( + m_gs.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitPsInit() { + m_module.enableCapability(spv::CapabilityDerivativeControl); + + m_module.setExecutionMode(m_entryPointId, + spv::ExecutionModeOriginUpperLeft); + + // Standard input array + emitDclInputArray(0); + + // Cull/clip distances as inputs + m_clipDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullIn.numClipPlanes, + spv::BuiltInClipDistance, + spv::StorageClassInput); + + m_cullDistances = emitDclClipCullDistanceArray( + m_analysis->clipCullIn.numCullPlanes, + spv::BuiltInCullDistance, + spv::StorageClassInput); + + // Main function of the pixel shader + m_ps.functionId = m_module.allocateId(); + m_module.setDebugName(m_ps.functionId, "ps_main"); + + this->emitFunctionBegin( + m_ps.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitCsInit() { + // Main function of the compute shader + m_cs.functionId = m_module.allocateId(); + m_module.setDebugName(m_cs.functionId, "cs_main"); + + this->emitFunctionBegin( + m_cs.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + } + + + void DxbcCompiler::emitVsFinalize() { + this->emitMainFunctionBegin(); + this->emitInputSetup(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_vs.functionId, 0, nullptr); + this->emitOutputSetup(); + this->emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances); + this->emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances); + this->emitPointSizeStore(); + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitHsFinalize() { + if (m_hs.cpPhase.functionId == 0) + m_hs.cpPhase = this->emitNewHullShaderPassthroughPhase(); + + // Control point phase + this->emitMainFunctionBegin(); + this->emitInputSetup(m_hs.vertexCountIn); + this->emitHsControlPointPhase(m_hs.cpPhase); + this->emitHsPhaseBarrier(); + + // Fork-join phases and output setup + this->emitHsInvocationBlockBegin(1); + + for (const auto& phase : m_hs.forkPhases) + this->emitHsForkJoinPhase(phase); + + for (const auto& phase : m_hs.joinPhases) + this->emitHsForkJoinPhase(phase); + + this->emitOutputSetup(); + this->emitHsOutputSetup(); + this->emitHsInvocationBlockEnd(); + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitDsFinalize() { + this->emitMainFunctionBegin(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_ds.functionId, 0, nullptr); + this->emitOutputSetup(); + this->emitClipCullStore(DxbcSystemValue::ClipDistance, m_clipDistances); + this->emitClipCullStore(DxbcSystemValue::CullDistance, m_cullDistances); + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitGsFinalize() { + if (!m_gs.invocationCount) + m_module.setInvocations(m_entryPointId, 1); + + this->emitMainFunctionBegin(); + this->emitInputSetup( + primitiveVertexCount(m_gs.inputPrimitive)); + m_module.opFunctionCall( + m_module.defVoidType(), + m_gs.functionId, 0, nullptr); + // No output setup at this point as that was + // already done during the EmitVertex step + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitPsFinalize() { + this->emitMainFunctionBegin(); + this->emitInputSetup(); + this->emitClipCullLoad(DxbcSystemValue::ClipDistance, m_clipDistances); + this->emitClipCullLoad(DxbcSystemValue::CullDistance, m_cullDistances); + + if (m_hasRasterizerOrderedUav) { + // For simplicity, just lock the entire fragment shader + // if there are any rasterizer ordered views. + m_module.enableExtension("SPV_EXT_fragment_shader_interlock"); + + if (m_module.hasCapability(spv::CapabilitySampleRateShading) + && m_moduleInfo.options.enableSampleShadingInterlock) { + m_module.enableCapability(spv::CapabilityFragmentShaderSampleInterlockEXT); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeSampleInterlockOrderedEXT); + } else { + m_module.enableCapability(spv::CapabilityFragmentShaderPixelInterlockEXT); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModePixelInterlockOrderedEXT); + } + + m_module.opBeginInvocationInterlock(); + } + + m_module.opFunctionCall( + m_module.defVoidType(), + m_ps.functionId, 0, nullptr); + + if (m_hasRasterizerOrderedUav) + m_module.opEndInvocationInterlock(); + + this->emitOutputSetup(); + + if (m_moduleInfo.options.useDepthClipWorkaround) + this->emitOutputDepthClamp(); + + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitCsFinalize() { + this->emitMainFunctionBegin(); + + if (m_moduleInfo.options.zeroInitWorkgroupMemory) + this->emitInitWorkgroupMemory(); + + m_module.opFunctionCall( + m_module.defVoidType(), + m_cs.functionId, 0, nullptr); + + this->emitFunctionEnd(); + } + + + void DxbcCompiler::emitXfbOutputDeclarations() { + for (uint32_t i = 0; i < m_moduleInfo.xfb->entryCount; i++) { + const DxbcXfbEntry* xfbEntry = m_moduleInfo.xfb->entries + i; + const DxbcSgnEntry* sigEntry = m_osgn->find( + xfbEntry->semanticName, + xfbEntry->semanticIndex, + xfbEntry->streamId); + + if (sigEntry == nullptr) + continue; + + DxbcRegisterInfo varInfo; + varInfo.type.ctype = DxbcScalarType::Float32; + varInfo.type.ccount = xfbEntry->componentCount; + varInfo.type.alength = 0; + varInfo.sclass = spv::StorageClassOutput; + + uint32_t dstComponentMask = (1 << xfbEntry->componentCount) - 1; + uint32_t srcComponentMask = dstComponentMask + << sigEntry->componentMask.firstSet() + << xfbEntry->componentIndex; + + DxbcXfbVar xfbVar; + xfbVar.varId = emitNewVariable(varInfo); + xfbVar.streamId = xfbEntry->streamId; + xfbVar.outputId = sigEntry->registerId; + xfbVar.srcMask = DxbcRegMask(srcComponentMask); + xfbVar.dstMask = DxbcRegMask(dstComponentMask); + m_xfbVars.push_back(xfbVar); + + m_module.setDebugName(xfbVar.varId, + str::format("xfb", i).c_str()); + + m_module.decorateXfb(xfbVar.varId, + xfbEntry->streamId, xfbEntry->bufferId, xfbEntry->offset, + m_moduleInfo.xfb->strides[xfbEntry->bufferId]); + } + + // TODO Compact location/component assignment + for (uint32_t i = 0; i < m_xfbVars.size(); i++) { + m_xfbVars[i].location = i; + m_xfbVars[i].component = 0; + } + + for (uint32_t i = 0; i < m_xfbVars.size(); i++) { + const DxbcXfbVar* var = &m_xfbVars[i]; + + m_module.decorateLocation (var->varId, var->location); + m_module.decorateComponent(var->varId, var->component); + } + } + + + void DxbcCompiler::emitXfbOutputSetup( + uint32_t streamId, + bool passthrough) { + for (size_t i = 0; i < m_xfbVars.size(); i++) { + if (m_xfbVars[i].streamId == streamId) { + DxbcRegisterPointer srcPtr = passthrough + ? m_vRegs[m_xfbVars[i].outputId] + : m_oRegs[m_xfbVars[i].outputId]; + + if (passthrough) { + srcPtr = emitArrayAccess(srcPtr, + spv::StorageClassInput, + m_module.constu32(0)); + } + + DxbcRegisterPointer dstPtr; + dstPtr.type.ctype = DxbcScalarType::Float32; + dstPtr.type.ccount = m_xfbVars[i].dstMask.popCount(); + dstPtr.id = m_xfbVars[i].varId; + + DxbcRegisterValue value = emitRegisterExtract( + emitValueLoad(srcPtr), m_xfbVars[i].srcMask); + emitValueStore(dstPtr, value, m_xfbVars[i].dstMask); + } + } + } + + + void DxbcCompiler::emitHsControlPointPhase( + const DxbcCompilerHsControlPointPhase& phase) { + m_module.opFunctionCall( + m_module.defVoidType(), + phase.functionId, 0, nullptr); + } + + + void DxbcCompiler::emitHsForkJoinPhase( + const DxbcCompilerHsForkJoinPhase& phase) { + for (uint32_t i = 0; i < phase.instanceCount; i++) { + uint32_t invocationId = m_module.constu32(i); + + m_module.opFunctionCall( + m_module.defVoidType(), + phase.functionId, 1, + &invocationId); + } + } + + + void DxbcCompiler::emitDclInputArray(uint32_t vertexCount) { + DxbcVectorType info; + info.ctype = DxbcScalarType::Float32; + info.ccount = 4; + + // Define the array type. This will be two-dimensional + // in some shaders, with the outer index representing + // the vertex ID within an invocation. + m_vArrayLength = m_isgn != nullptr ? std::max(1u, m_isgn->maxRegisterCount()) : 1; + m_vArrayLengthId = m_module.lateConst32(getScalarTypeId(DxbcScalarType::Uint32)); + + uint32_t vectorTypeId = getVectorTypeId(info); + uint32_t arrayTypeId = m_module.defArrayType(vectorTypeId, m_vArrayLengthId); + + if (vertexCount != 0) { + arrayTypeId = m_module.defArrayType( + arrayTypeId, m_module.constu32(vertexCount)); + } + + // Define the actual variable. Note that this is private + // because we will copy input registers and some system + // variables to the array during the setup phase. + const uint32_t ptrTypeId = m_module.defPointerType( + arrayTypeId, spv::StorageClassPrivate); + + const uint32_t varId = m_module.newVar( + ptrTypeId, spv::StorageClassPrivate); + + m_module.setDebugName(varId, "shader_in"); + m_vArray = varId; + } + + + uint32_t DxbcCompiler::emitDclClipCullDistanceArray( + uint32_t length, + spv::BuiltIn builtIn, + spv::StorageClass storageClass) { + if (length == 0) + return 0; + + uint32_t t_f32 = m_module.defFloatType(32); + uint32_t t_arr = m_module.defArrayType(t_f32, m_module.constu32(length)); + uint32_t t_ptr = m_module.defPointerType(t_arr, storageClass); + uint32_t varId = m_module.newVar(t_ptr, storageClass); + + m_module.decorateBuiltIn(varId, builtIn); + m_module.setDebugName(varId, + builtIn == spv::BuiltInClipDistance + ? "clip_distances" + : "cull_distances"); + + return varId; + } + + + DxbcCompilerHsControlPointPhase DxbcCompiler::emitNewHullShaderControlPointPhase() { + uint32_t funTypeId = m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr); + + uint32_t funId = m_module.allocateId(); + + this->emitFunctionBegin(funId, + m_module.defVoidType(), + funTypeId); + this->emitFunctionLabel(); + + DxbcCompilerHsControlPointPhase result; + result.functionId = funId; + return result; + } + + + DxbcCompilerHsControlPointPhase DxbcCompiler::emitNewHullShaderPassthroughPhase() { + uint32_t funTypeId = m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr); + + // Begin passthrough function + uint32_t funId = m_module.allocateId(); + m_module.setDebugName(funId, "hs_passthrough"); + + this->emitFunctionBegin(funId, + m_module.defVoidType(), + funTypeId); + this->emitFunctionLabel(); + + // We'll basically copy each input variable to the corresponding + // output, using the shader's invocation ID as the array index. + const uint32_t invocationId = m_module.opLoad( + getScalarTypeId(DxbcScalarType::Uint32), + m_hs.builtinInvocationId); + + for (auto i = m_isgn->begin(); i != m_isgn->end(); i++) { + this->emitDclInput( + i->registerId, m_hs.vertexCountIn, + i->componentMask, + DxbcSystemValue::None, + DxbcInterpolationMode::Undefined); + + // Vector type index + const std::array dstIndices + = {{ invocationId, m_module.constu32(i->registerId) }}; + + DxbcRegisterPointer srcPtr; + srcPtr.type = m_vRegs.at(i->registerId).type; + srcPtr.id = m_module.opAccessChain( + m_module.defPointerType(getVectorTypeId(srcPtr.type), spv::StorageClassInput), + m_vRegs.at(i->registerId).id, 1, &invocationId); + + DxbcRegisterValue srcValue = emitRegisterBitcast( + emitValueLoad(srcPtr), DxbcScalarType::Float32); + + DxbcRegisterPointer dstPtr; + dstPtr.type = { DxbcScalarType::Float32, 4 }; + dstPtr.id = m_module.opAccessChain( + m_module.defPointerType(getVectorTypeId(dstPtr.type), spv::StorageClassOutput), + m_hs.outputPerVertex, dstIndices.size(), dstIndices.data()); + + emitValueStore(dstPtr, srcValue, DxbcRegMask::firstN(srcValue.type.ccount)); + } + + // End function + this->emitFunctionEnd(); + + DxbcCompilerHsControlPointPhase result; + result.functionId = funId; + return result; + } + + + DxbcCompilerHsForkJoinPhase DxbcCompiler::emitNewHullShaderForkJoinPhase() { + uint32_t argTypeId = m_module.defIntType(32, 0); + uint32_t funTypeId = m_module.defFunctionType( + m_module.defVoidType(), 1, &argTypeId); + + uint32_t funId = m_module.allocateId(); + + this->emitFunctionBegin(funId, + m_module.defVoidType(), + funTypeId); + + uint32_t argId = m_module.functionParameter(argTypeId); + this->emitFunctionLabel(); + + DxbcCompilerHsForkJoinPhase result; + result.functionId = funId; + result.instanceId = argId; + return result; + } + + + void DxbcCompiler::emitHsPhaseBarrier() { + uint32_t exeScopeId = m_module.constu32(spv::ScopeWorkgroup); + uint32_t memScopeId = m_module.constu32(spv::ScopeWorkgroup); + uint32_t semanticId = m_module.constu32( + spv::MemorySemanticsOutputMemoryMask | + spv::MemorySemanticsAcquireReleaseMask | + spv::MemorySemanticsMakeAvailableMask | + spv::MemorySemanticsMakeVisibleMask); + + m_module.opControlBarrier(exeScopeId, memScopeId, semanticId); + } + + + void DxbcCompiler::emitHsInvocationBlockBegin(uint32_t count) { + uint32_t invocationId = m_module.opLoad( + getScalarTypeId(DxbcScalarType::Uint32), + m_hs.builtinInvocationId); + + uint32_t condition = m_module.opULessThan( + m_module.defBoolType(), invocationId, + m_module.constu32(count)); + + m_hs.invocationBlockBegin = m_module.allocateId(); + m_hs.invocationBlockEnd = m_module.allocateId(); + + m_module.opSelectionMerge( + m_hs.invocationBlockEnd, + spv::SelectionControlMaskNone); + + m_module.opBranchConditional( + condition, + m_hs.invocationBlockBegin, + m_hs.invocationBlockEnd); + + m_module.opLabel( + m_hs.invocationBlockBegin); + } + + + void DxbcCompiler::emitHsInvocationBlockEnd() { + m_module.opBranch (m_hs.invocationBlockEnd); + m_module.opLabel (m_hs.invocationBlockEnd); + + m_hs.invocationBlockBegin = 0; + m_hs.invocationBlockEnd = 0; + } + + + void DxbcCompiler::emitHsOutputSetup() { + uint32_t outputPerPatch = emitTessInterfacePerPatch(spv::StorageClassOutput); + + if (!outputPerPatch) + return; + + uint32_t vecType = getVectorTypeId({ DxbcScalarType::Float32, 4 }); + + uint32_t srcPtrType = m_module.defPointerType(vecType, spv::StorageClassPrivate); + uint32_t dstPtrType = m_module.defPointerType(vecType, spv::StorageClassOutput); + + for (uint32_t i = 0; i < 32; i++) { + if (m_hs.outputPerPatchMask & (1 << i)) { + uint32_t index = m_module.constu32(i); + + uint32_t srcPtr = m_module.opAccessChain(srcPtrType, m_hs.outputPerPatch, 1, &index); + uint32_t dstPtr = m_module.opAccessChain(dstPtrType, outputPerPatch, 1, &index); + + m_module.opStore(dstPtr, m_module.opLoad(vecType, srcPtr)); + } + } + } + + + uint32_t DxbcCompiler::emitTessInterfacePerPatch(spv::StorageClass storageClass) { + const char* name = "vPatch"; + + if (storageClass == spv::StorageClassPrivate) + name = "rPatch"; + if (storageClass == spv::StorageClassOutput) + name = "oPatch"; + + uint32_t arrLen = m_psgn != nullptr ? m_psgn->maxRegisterCount() : 0; + + if (!arrLen) + return 0; + + uint32_t vecType = m_module.defVectorType (m_module.defFloatType(32), 4); + uint32_t arrType = m_module.defArrayType (vecType, m_module.constu32(arrLen)); + uint32_t ptrType = m_module.defPointerType(arrType, storageClass); + uint32_t varId = m_module.newVar (ptrType, storageClass); + + m_module.setDebugName (varId, name); + + if (storageClass != spv::StorageClassPrivate) { + m_module.decorate (varId, spv::DecorationPatch); + m_module.decorateLocation (varId, 0); + } + + return varId; + } + + + uint32_t DxbcCompiler::emitTessInterfacePerVertex(spv::StorageClass storageClass, uint32_t vertexCount) { + const bool isInput = storageClass == spv::StorageClassInput; + + uint32_t arrLen = isInput + ? (m_isgn != nullptr ? m_isgn->maxRegisterCount() : 0) + : (m_osgn != nullptr ? m_osgn->maxRegisterCount() : 0); + + if (!arrLen) + return 0; + + uint32_t locIdx = m_psgn != nullptr + ? m_psgn->maxRegisterCount() + : 0; + + uint32_t vecType = m_module.defVectorType (m_module.defFloatType(32), 4); + uint32_t arrTypeInner = m_module.defArrayType (vecType, m_module.constu32(arrLen)); + uint32_t arrTypeOuter = m_module.defArrayType (arrTypeInner, m_module.constu32(vertexCount)); + uint32_t ptrType = m_module.defPointerType(arrTypeOuter, storageClass); + uint32_t varId = m_module.newVar (ptrType, storageClass); + + m_module.setDebugName (varId, isInput ? "vVertex" : "oVertex"); + m_module.decorateLocation (varId, locIdx); + return varId; + } + + + uint32_t DxbcCompiler::emitSamplePosArray() { + const std::array samplePosVectors = {{ + // Invalid sample count / unbound resource + m_module.constvec2f32( 0.0f, 0.0f), + // VK_SAMPLE_COUNT_1_BIT + m_module.constvec2f32( 0.0f, 0.0f), + // VK_SAMPLE_COUNT_2_BIT + m_module.constvec2f32( 0.25f, 0.25f), + m_module.constvec2f32(-0.25f,-0.25f), + // VK_SAMPLE_COUNT_4_BIT + m_module.constvec2f32(-0.125f,-0.375f), + m_module.constvec2f32( 0.375f,-0.125f), + m_module.constvec2f32(-0.375f, 0.125f), + m_module.constvec2f32( 0.125f, 0.375f), + // VK_SAMPLE_COUNT_8_BIT + m_module.constvec2f32( 0.0625f,-0.1875f), + m_module.constvec2f32(-0.0625f, 0.1875f), + m_module.constvec2f32( 0.3125f, 0.0625f), + m_module.constvec2f32(-0.1875f,-0.3125f), + m_module.constvec2f32(-0.3125f, 0.3125f), + m_module.constvec2f32(-0.4375f,-0.0625f), + m_module.constvec2f32( 0.1875f, 0.4375f), + m_module.constvec2f32( 0.4375f,-0.4375f), + // VK_SAMPLE_COUNT_16_BIT + m_module.constvec2f32( 0.0625f, 0.0625f), + m_module.constvec2f32(-0.0625f,-0.1875f), + m_module.constvec2f32(-0.1875f, 0.1250f), + m_module.constvec2f32( 0.2500f,-0.0625f), + m_module.constvec2f32(-0.3125f,-0.1250f), + m_module.constvec2f32( 0.1250f, 0.3125f), + m_module.constvec2f32( 0.3125f, 0.1875f), + m_module.constvec2f32( 0.1875f,-0.3125f), + m_module.constvec2f32(-0.1250f, 0.3750f), + m_module.constvec2f32( 0.0000f,-0.4375f), + m_module.constvec2f32(-0.2500f,-0.3750f), + m_module.constvec2f32(-0.3750f, 0.2500f), + m_module.constvec2f32(-0.5000f, 0.0000f), + m_module.constvec2f32( 0.4375f,-0.2500f), + m_module.constvec2f32( 0.3750f, 0.4375f), + m_module.constvec2f32(-0.4375f,-0.5000f), + }}; + + uint32_t arrayTypeId = getArrayTypeId({ + DxbcScalarType::Float32, 2, + static_cast(samplePosVectors.size()) }); + + uint32_t samplePosArray = m_module.constComposite( + arrayTypeId, + samplePosVectors.size(), + samplePosVectors.data()); + + uint32_t varId = m_module.newVarInit( + m_module.defPointerType(arrayTypeId, spv::StorageClassPrivate), + spv::StorageClassPrivate, samplePosArray); + + m_module.setDebugName(varId, "g_sample_pos"); + m_module.decorate(varId, spv::DecorationNonWritable); + return varId; + } + + + void DxbcCompiler::emitFloatControl() { + DxbcFloatControlFlags flags = m_moduleInfo.options.floatControl; + + if (flags.isClear()) + return; + + const uint32_t width32 = 32; + const uint32_t width64 = 64; + + if (flags.test(DxbcFloatControlFlag::DenormFlushToZero32)) { + m_module.enableCapability(spv::CapabilityDenormFlushToZero); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDenormFlushToZero, 1, &width32); + } + + if (flags.test(DxbcFloatControlFlag::PreserveNan32)) { + m_module.enableCapability(spv::CapabilitySignedZeroInfNanPreserve); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeSignedZeroInfNanPreserve, 1, &width32); + } + + if (m_module.hasCapability(spv::CapabilityFloat64)) { + if (flags.test(DxbcFloatControlFlag::DenormPreserve64)) { + m_module.enableCapability(spv::CapabilityDenormPreserve); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeDenormPreserve, 1, &width64); + } + + if (flags.test(DxbcFloatControlFlag::PreserveNan64)) { + m_module.enableCapability(spv::CapabilitySignedZeroInfNanPreserve); + m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeSignedZeroInfNanPreserve, 1, &width64); + } + } + } + + + uint32_t DxbcCompiler::emitNewVariable(const DxbcRegisterInfo& info) { + const uint32_t ptrTypeId = this->getPointerTypeId(info); + return m_module.newVar(ptrTypeId, info.sclass); + } + + + uint32_t DxbcCompiler::emitNewBuiltinVariable( + const DxbcRegisterInfo& info, + spv::BuiltIn builtIn, + const char* name) { + const uint32_t varId = emitNewVariable(info); + + if (name) + m_module.setDebugName(varId, name); + + m_module.decorateBuiltIn(varId, builtIn); + + if (m_programInfo.type() == DxbcProgramType::PixelShader + && info.type.ctype != DxbcScalarType::Float32 + && info.type.ctype != DxbcScalarType::Bool + && info.sclass == spv::StorageClassInput) + m_module.decorate(varId, spv::DecorationFlat); + + return varId; + } + + + uint32_t DxbcCompiler::emitBuiltinTessLevelOuter(spv::StorageClass storageClass) { + uint32_t id = emitNewBuiltinVariable( + DxbcRegisterInfo { + { DxbcScalarType::Float32, 0, 4 }, + storageClass }, + spv::BuiltInTessLevelOuter, + "bTessLevelOuter"); + + m_module.decorate(id, spv::DecorationPatch); + return id; + } + + + uint32_t DxbcCompiler::emitBuiltinTessLevelInner(spv::StorageClass storageClass) { + uint32_t id = emitNewBuiltinVariable( + DxbcRegisterInfo { + { DxbcScalarType::Float32, 0, 2 }, + storageClass }, + spv::BuiltInTessLevelInner, + "bTessLevelInner"); + + m_module.decorate(id, spv::DecorationPatch); + return id; + } + + + uint32_t DxbcCompiler::emitPushConstants() { + uint32_t uintTypeId = m_module.defIntType(32, 0); + uint32_t structTypeId = m_module.defStructTypeUnique(1, &uintTypeId); + + m_module.setDebugName(structTypeId, "pc_t"); + m_module.setDebugMemberName(structTypeId, 0, "RasterizerSampleCount"); + m_module.memberDecorateOffset(structTypeId, 0, 0); + + uint32_t ptrTypeId = m_module.defPointerType(structTypeId, spv::StorageClassPushConstant); + uint32_t varId = m_module.newVar(ptrTypeId, spv::StorageClassPushConstant); + + m_module.setDebugName(varId, "pc"); + return varId; + } + + + DxbcCfgBlock* DxbcCompiler::cfgFindBlock( + const std::initializer_list& types) { + for (auto cur = m_controlFlowBlocks.rbegin(); + cur != m_controlFlowBlocks.rend(); cur++) { + for (auto type : types) { + if (cur->type == type) + return &(*cur); + } + } + + return nullptr; + } + + + DxbcBufferInfo DxbcCompiler::getBufferInfo(const DxbcRegister& reg) { + const uint32_t registerId = reg.idx[0].offset; + + switch (reg.type) { + case DxbcOperandType::Resource: { + const auto& texture = m_textures.at(registerId); + + DxbcBufferInfo result; + result.image = texture.imageInfo; + result.stype = texture.sampledType; + result.type = texture.type; + result.typeId = texture.imageTypeId; + result.varId = texture.varId; + result.stride = texture.structStride; + result.coherence = 0; + result.isSsbo = texture.isRawSsbo; + return result; + } break; + + case DxbcOperandType::UnorderedAccessView: { + const auto& uav = m_uavs.at(registerId); + + DxbcBufferInfo result; + result.image = uav.imageInfo; + result.stype = uav.sampledType; + result.type = uav.type; + result.typeId = uav.imageTypeId; + result.varId = uav.varId; + result.stride = uav.structStride; + result.coherence = uav.coherence; + result.isSsbo = uav.isRawSsbo; + return result; + } break; + + case DxbcOperandType::ThreadGroupSharedMemory: { + DxbcBufferInfo result; + result.image = { spv::DimBuffer, 0, 0, 0 }; + result.stype = DxbcScalarType::Uint32; + result.type = m_gRegs.at(registerId).type; + result.typeId = m_module.defPointerType( + getScalarTypeId(DxbcScalarType::Uint32), + spv::StorageClassWorkgroup); + result.varId = m_gRegs.at(registerId).varId; + result.stride = m_gRegs.at(registerId).elementStride; + result.coherence = spv::ScopeInvocation; + result.isSsbo = false; + return result; + } break; + + default: + throw DxvkError(str::format("DxbcCompiler: Invalid operand type for buffer: ", reg.type)); + } + } + + + uint32_t DxbcCompiler::getTexSizeDim(const DxbcImageInfo& imageType) const { + switch (imageType.dim) { + case spv::DimBuffer: return 1 + imageType.array; + case spv::Dim1D: return 1 + imageType.array; + case spv::Dim2D: return 2 + imageType.array; + case spv::Dim3D: return 3 + imageType.array; + case spv::DimCube: return 2 + imageType.array; + default: throw DxvkError("DxbcCompiler: getTexLayerDim: Unsupported image dimension"); + } + } + + + uint32_t DxbcCompiler::getTexLayerDim(const DxbcImageInfo& imageType) const { + switch (imageType.dim) { + case spv::DimBuffer: return 1; + case spv::Dim1D: return 1; + case spv::Dim2D: return 2; + case spv::Dim3D: return 3; + case spv::DimCube: return 3; + default: throw DxvkError("DxbcCompiler: getTexLayerDim: Unsupported image dimension"); + } + } + + + uint32_t DxbcCompiler::getTexCoordDim(const DxbcImageInfo& imageType) const { + return getTexLayerDim(imageType) + imageType.array; + } + + + DxbcRegMask DxbcCompiler::getTexCoordMask(const DxbcImageInfo& imageType) const { + return DxbcRegMask::firstN(getTexCoordDim(imageType)); + } + + + DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const { + switch (m_programInfo.type()) { + case DxbcProgramType::VertexShader: { + const DxbcSgnEntry* entry = m_isgn->findByRegister(regIdx); + + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + + if (entry != nullptr) { + result.ctype = entry->componentType; + result.ccount = entry->componentMask.popCount(); + } + + return result; + } + + case DxbcProgramType::DomainShader: { + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + return result; + } + + default: { + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + + if (m_isgn->findByRegister(regIdx)) + result.ccount = m_isgn->regMask(regIdx).minComponents(); + return result; + } + } + } + + + DxbcVectorType DxbcCompiler::getOutputRegType(uint32_t regIdx) const { + switch (m_programInfo.type()) { + case DxbcProgramType::PixelShader: { + const DxbcSgnEntry* entry = m_osgn->findByRegister(regIdx); + + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + + if (entry != nullptr) { + result.ctype = entry->componentType; + result.ccount = entry->componentMask.popCount(); + } + + return result; + } + + case DxbcProgramType::HullShader: { + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + return result; + } + + default: { + DxbcVectorType result; + result.ctype = DxbcScalarType::Float32; + result.ccount = 4; + + if (m_osgn->findByRegister(regIdx)) + result.ccount = m_osgn->regMask(regIdx).minComponents(); + return result; + } + } + } + + + DxbcImageInfo DxbcCompiler::getResourceType( + DxbcResourceDim resourceType, + bool isUav) const { + uint32_t ms = m_moduleInfo.options.disableMsaa ? 0 : 1; + + switch (resourceType) { + case DxbcResourceDim::Buffer: return { spv::DimBuffer, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_MAX_ENUM }; + case DxbcResourceDim::Texture1D: return { spv::Dim1D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_1D }; + case DxbcResourceDim::Texture1DArr: return { spv::Dim1D, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_1D_ARRAY }; + case DxbcResourceDim::Texture2D: return { spv::Dim2D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D }; + case DxbcResourceDim::Texture2DArr: return { spv::Dim2D, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D_ARRAY }; + case DxbcResourceDim::Texture2DMs: return { spv::Dim2D, 0, ms,isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D }; + case DxbcResourceDim::Texture2DMsArr: return { spv::Dim2D, 1, ms,isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_2D_ARRAY }; + case DxbcResourceDim::Texture3D: return { spv::Dim3D, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_3D }; + case DxbcResourceDim::TextureCube: return { spv::DimCube, 0, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_CUBE }; + case DxbcResourceDim::TextureCubeArr: return { spv::DimCube, 1, 0, isUav ? 2u : 1u, VK_IMAGE_VIEW_TYPE_CUBE_ARRAY }; + default: throw DxvkError(str::format("DxbcCompiler: Unsupported resource type: ", resourceType)); + } + } + + + spv::ImageFormat DxbcCompiler::getScalarImageFormat(DxbcScalarType type) const { + switch (type) { + case DxbcScalarType::Float32: return spv::ImageFormatR32f; + case DxbcScalarType::Sint32: return spv::ImageFormatR32i; + case DxbcScalarType::Uint32: return spv::ImageFormatR32ui; + default: throw DxvkError("DxbcCompiler: Unhandled scalar resource type"); + } + } + + + bool DxbcCompiler::isDoubleType(DxbcScalarType type) const { + return type == DxbcScalarType::Sint64 + || type == DxbcScalarType::Uint64 + || type == DxbcScalarType::Float64; + } + + DxbcRegisterPointer DxbcCompiler::getIndexableTempPtr( + const DxbcRegister& operand, + DxbcRegisterValue vectorId) { + // x# regs are indexed as follows: + // (0) register index (immediate) + // (1) element index (relative) + const uint32_t regId = operand.idx[0].offset; + + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = m_xRegs[regId].ccount; + info.type.alength = 0; + info.sclass = spv::StorageClassPrivate; + + DxbcRegisterPointer result; + result.type.ctype = info.type.ctype; + result.type.ccount = info.type.ccount; + result.id = m_module.opAccessChain( + getPointerTypeId(info), + m_xRegs.at(regId).varId, + 1, &vectorId.id); + + return result; + } + + bool DxbcCompiler::caseBlockIsFallthrough() const { + return m_lastOp != DxbcOpcode::Case + && m_lastOp != DxbcOpcode::Default + && m_lastOp != DxbcOpcode::Break + && m_lastOp != DxbcOpcode::Ret; + } + + + uint32_t DxbcCompiler::getUavCoherence(uint32_t registerId, DxbcUavFlags flags) { + // For any ROV with write access, we must ensure that + // availability operations happen within the locked scope. + if (flags.test(DxbcUavFlag::RasterizerOrdered) + && (m_analysis->uavInfos[registerId].accessFlags & VK_ACCESS_SHADER_WRITE_BIT)) { + m_hasGloballyCoherentUav = true; + m_hasRasterizerOrderedUav = true; + return spv::ScopeQueueFamily; + } + + // Ignore any resources that can't both be read and written in + // the current shader, explicit availability/visibility operands + // are not useful in that case. + if (m_analysis->uavInfos[registerId].accessFlags != (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT)) + return 0; + + // If the globally coherent flag is set, the resource must be + // coherent across multiple workgroups of the same dispatch + if (flags.test(DxbcUavFlag::GloballyCoherent)) { + m_hasGloballyCoherentUav = true; + return spv::ScopeQueueFamily; + } + + // In compute shaders, UAVs are implicitly workgroup coherent, + // but we can rely on memory barrier instructions to make any + // access available and visible to the entire workgroup. + if (m_programInfo.type() == DxbcProgramType::ComputeShader) + return spv::ScopeInvocation; + + return 0; + } + + + uint32_t DxbcCompiler::getScalarTypeId(DxbcScalarType type) { + if (type == DxbcScalarType::Float64) + m_module.enableCapability(spv::CapabilityFloat64); + + if (type == DxbcScalarType::Sint64 || type == DxbcScalarType::Uint64) + m_module.enableCapability(spv::CapabilityInt64); + + switch (type) { + case DxbcScalarType::Uint32: return m_module.defIntType(32, 0); + case DxbcScalarType::Uint64: return m_module.defIntType(64, 0); + case DxbcScalarType::Sint32: return m_module.defIntType(32, 1); + case DxbcScalarType::Sint64: return m_module.defIntType(64, 1); + case DxbcScalarType::Float32: return m_module.defFloatType(32); + case DxbcScalarType::Float64: return m_module.defFloatType(64); + case DxbcScalarType::Bool: return m_module.defBoolType(); + } + + throw DxvkError("DxbcCompiler: Invalid scalar type"); + } + + + uint32_t DxbcCompiler::getVectorTypeId(const DxbcVectorType& type) { + uint32_t typeId = this->getScalarTypeId(type.ctype); + + if (type.ccount > 1) + typeId = m_module.defVectorType(typeId, type.ccount); + + return typeId; + } + + + uint32_t DxbcCompiler::getArrayTypeId(const DxbcArrayType& type) { + DxbcVectorType vtype; + vtype.ctype = type.ctype; + vtype.ccount = type.ccount; + + uint32_t typeId = this->getVectorTypeId(vtype); + + if (type.alength != 0) { + typeId = m_module.defArrayType(typeId, + m_module.constu32(type.alength)); + } + + return typeId; + } + + + uint32_t DxbcCompiler::getPointerTypeId(const DxbcRegisterInfo& type) { + return m_module.defPointerType( + this->getArrayTypeId(type.type), + type.sclass); + } + + + uint32_t DxbcCompiler::getSparseResultTypeId(uint32_t baseType) { + m_module.enableCapability(spv::CapabilitySparseResidency); + + uint32_t uintType = getScalarTypeId(DxbcScalarType::Uint32); + std::array typeIds = { uintType, baseType }; + return m_module.defStructType(typeIds.size(), typeIds.data()); + } + + + uint32_t DxbcCompiler::getFunctionId( + uint32_t functionNr) { + auto entry = m_subroutines.find(functionNr); + if (entry != m_subroutines.end()) + return entry->second; + + uint32_t functionId = m_module.allocateId(); + m_subroutines.insert({ functionNr, functionId }); + return functionId; + } + + + DxbcCompilerHsForkJoinPhase* DxbcCompiler::getCurrentHsForkJoinPhase() { + switch (m_hs.currPhaseType) { + case DxbcCompilerHsPhase::Fork: return &m_hs.forkPhases.at(m_hs.currPhaseId); + case DxbcCompilerHsPhase::Join: return &m_hs.joinPhases.at(m_hs.currPhaseId); + default: return nullptr; + } + } + +} diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h new file mode 100644 index 0000000..1dbf561 --- /dev/null +++ b/src/dxbc/dxbc_compiler.h @@ -0,0 +1,1276 @@ +#pragma once + +#include +#include + +#include "../spirv/spirv_module.h" + +#include "dxbc_analysis.h" +#include "dxbc_chunk_isgn.h" +#include "dxbc_decoder.h" +#include "dxbc_defs.h" +#include "dxbc_modinfo.h" +#include "dxbc_names.h" +#include "dxbc_util.h" + +namespace dxvk { + + /** + * \brief Vector type + * + * Convenience struct that stores a scalar + * type and a component count. The compiler + * can use this to generate SPIR-V types. + */ + struct DxbcVectorType { + DxbcScalarType ctype; + uint32_t ccount; + }; + + + /** + * \brief Array type + * + * Convenience struct that stores a scalar type, a + * component count and an array size. An array of + * length 0 will be evaluated to a vector type. The + * compiler can use this to generate SPIR-V types. + */ + struct DxbcArrayType { + DxbcScalarType ctype; + uint32_t ccount; + uint32_t alength; + }; + + + /** + * \brief Register info + * + * Stores the array type of a register and + * its storage class. The compiler can use + * this to generate SPIR-V pointer types. + */ + struct DxbcRegisterInfo { + DxbcArrayType type; + spv::StorageClass sclass; + }; + + + /** + * \brief Register value + * + * Stores a vector type and a SPIR-V ID that + * represents an intermediate value. This is + * used to track the type of such values. + */ + struct DxbcRegisterValue { + DxbcVectorType type; + uint32_t id; + }; + + + /** + * \brief Register pointer + * + * Stores a vector type and a SPIR-V ID that + * represents a pointer to such a vector. This + * can be used to load registers conveniently. + */ + struct DxbcRegisterPointer { + DxbcVectorType type; + uint32_t id; + }; + + + struct DxbcXreg { + uint32_t ccount = 0; + uint32_t alength = 0; + uint32_t varId = 0; + }; + + + struct DxbcGreg { + DxbcResourceType type = DxbcResourceType::Raw; + uint32_t elementStride = 0; + uint32_t elementCount = 0; + uint32_t varId = 0; + }; + + + /** + * \brief Specialization constant properties + * + * Stores the name, data type and initial + * value of a specialization constant. + */ + struct DxbcSpecConstant { + DxbcScalarType ctype; + uint32_t ccount; + uint32_t value; + const char* name; + }; + + + /** + * \brief Helper struct for conditional execution + * + * Stores a set of labels required to implement either + * an if-then block or an if-then-else block. This is + * not used to implement control flow instructions. + */ + struct DxbcConditional { + uint32_t labelIf = 0; + uint32_t labelElse = 0; + uint32_t labelEnd = 0; + }; + + + struct DxbcXfbVar { + uint32_t varId = 0; + uint32_t streamId = 0; + uint32_t outputId = 0; + DxbcRegMask srcMask = 0; + DxbcRegMask dstMask = 0; + uint32_t location = 0; + uint32_t component = 0; + }; + + + /** + * \brief Vertex shader-specific structure + */ + struct DxbcCompilerVsPart { + uint32_t functionId = 0; + + uint32_t builtinVertexId = 0; + uint32_t builtinInstanceId = 0; + uint32_t builtinBaseVertex = 0; + uint32_t builtinBaseInstance = 0; + }; + + + /** + * \brief Geometry shader-specific structure + */ + struct DxbcCompilerGsPart { + DxbcPrimitive inputPrimitive = DxbcPrimitive::Undefined; + DxbcPrimitiveTopology outputTopology = DxbcPrimitiveTopology::Undefined; + uint32_t outputVertexCount = 0; + uint32_t functionId = 0; + + uint32_t builtinLayer = 0; + uint32_t builtinViewportId = 0; + uint32_t builtinInvocationId = 0; + uint32_t invocationCount = 0; + + bool needsOutputSetup = false; + }; + + + /** + * \brief Pixel shader-specific structure + */ + struct DxbcCompilerPsPart { + uint32_t functionId = 0; + + uint32_t builtinFragCoord = 0; + uint32_t builtinDepth = 0; + uint32_t builtinStencilRef = 0; + uint32_t builtinIsFrontFace = 0; + uint32_t builtinSampleId = 0; + uint32_t builtinSampleMaskIn = 0; + uint32_t builtinSampleMaskOut = 0; + uint32_t builtinLayer = 0; + uint32_t builtinViewportId = 0; + uint32_t builtinInnerCoverageId = 0; + + uint32_t pushConstantId = 0; + }; + + + /** + * \brief Compute shader-specific structure + */ + struct DxbcCompilerCsPart { + uint32_t functionId = 0; + + uint32_t workgroupSizeX = 0; + uint32_t workgroupSizeY = 0; + uint32_t workgroupSizeZ = 0; + + uint32_t builtinGlobalInvocationId = 0; + uint32_t builtinLocalInvocationId = 0; + uint32_t builtinLocalInvocationIndex = 0; + uint32_t builtinWorkgroupId = 0; + }; + + + /** + * \brief Hull shader fork/join phase + * + * Defines a function and built-in variables + * for a single fork or join phase sub-program. + */ + struct DxbcCompilerHsForkJoinPhase { + uint32_t functionId = 0; + uint32_t instanceCount = 1; + + uint32_t instanceId = 0; + uint32_t instanceIdPtr = 0; + }; + + + /** + * \brief Hull shader control point phase + * + * Defines the function for the control + * point phase program of a hull shader. + */ + struct DxbcCompilerHsControlPointPhase { + uint32_t functionId = 0; + }; + + + /** + * \brief Hull shader phase + * + * Used to identify the current + * phase and function ID. + */ + enum class DxbcCompilerHsPhase : uint32_t { + None, ///< No active phase + Decl, ///< \c hs_decls + ControlPoint, ///< \c hs_control_point_phase + Fork, ///< \c hs_fork_phase + Join, ///< \c hs_join_phase + }; + + + /** + * \brief Hull shader-specific structure + */ + struct DxbcCompilerHsPart { + DxbcCompilerHsPhase currPhaseType = DxbcCompilerHsPhase::None; + size_t currPhaseId = 0; + + float maxTessFactor = 64.0f; + + uint32_t vertexCountIn = 0; + uint32_t vertexCountOut = 0; + + uint32_t builtinInvocationId = 0; + uint32_t builtinTessLevelOuter = 0; + uint32_t builtinTessLevelInner = 0; + + uint32_t outputPerPatch = 0; + uint32_t outputPerVertex = 0; + + uint32_t invocationBlockBegin = 0; + uint32_t invocationBlockEnd = 0; + + uint32_t outputPerPatchMask = 0; + + DxbcCompilerHsControlPointPhase cpPhase; + std::vector forkPhases; + std::vector joinPhases; + }; + + + /** + * \brief Domain shader-specific structure + */ + struct DxbcCompilerDsPart { + uint32_t functionId = 0; + + uint32_t builtinTessCoord = 0; + uint32_t builtinTessLevelOuter = 0; + uint32_t builtinTessLevelInner = 0; + + uint32_t vertexCountIn = 0; + + uint32_t inputPerPatch = 0; + uint32_t inputPerVertex = 0; + }; + + + enum class DxbcCfgBlockType : uint32_t { + If, Loop, Switch, + }; + + + struct DxbcCfgBlockIf { + uint32_t ztestId; + uint32_t labelIf; + uint32_t labelElse; + uint32_t labelEnd; + size_t headerPtr; + }; + + + struct DxbcCfgBlockLoop { + uint32_t labelHeader; + uint32_t labelBegin; + uint32_t labelContinue; + uint32_t labelBreak; + }; + + + struct DxbcSwitchLabel { + SpirvSwitchCaseLabel desc; + DxbcSwitchLabel* next; + }; + + + struct DxbcCfgBlockSwitch { + size_t insertPtr; + uint32_t selectorId; + uint32_t labelBreak; + uint32_t labelCase; + uint32_t labelDefault; + DxbcSwitchLabel* labelCases; + }; + + + struct DxbcCfgBlock { + DxbcCfgBlockType type; + + union { + DxbcCfgBlockIf b_if; + DxbcCfgBlockLoop b_loop; + DxbcCfgBlockSwitch b_switch; + }; + }; + + + struct DxbcBufferInfo { + DxbcImageInfo image; + DxbcScalarType stype; + DxbcResourceType type; + uint32_t typeId; + uint32_t varId; + uint32_t stride; + uint32_t coherence; + bool isSsbo; + }; + + + /** + * \brief DXBC to SPIR-V shader compiler + * + * Processes instructions from a DXBC shader and creates + * a DXVK shader object, which contains the SPIR-V module + * and information about the shader resource bindings. + */ + class DxbcCompiler { + + public: + + DxbcCompiler( + const std::string& fileName, + const DxbcModuleInfo& moduleInfo, + const DxbcProgramInfo& programInfo, + const Rc& isgn, + const Rc& osgn, + const Rc& psgn, + const DxbcAnalysisInfo& analysis); + ~DxbcCompiler(); + + /** + * \brief Processes a single instruction + * \param [in] ins The instruction + */ + void processInstruction( + const DxbcShaderInstruction& ins); + + /** + * \brief Emits transform feedback passthrough + * + * Writes all captured input variables to the + * corresponding xfb outputs, and sets up the + * geometry shader for point-to-point mode. + */ + void processXfbPassthrough(); + + struct ShaderCreateInfo { + + VkShaderStageFlagBits stage; + uint32_t inputMask; + uint32_t outputMask; + unsigned long uniformSize; + char *uniformData; + VkPrimitiveTopology outputTopology; + uint32_t patchVertexCount; + size_t pushConstSize; + int32_t xfbRasterizedStream; + int xfbStrides[4]; + SpirvCodeBuffer code; + }; + + /** + * \brief Finalizes the shader + * \returns The final shader object + */ + ShaderCreateInfo finalize(); + + private: + + DxbcModuleInfo m_moduleInfo; + DxbcProgramInfo m_programInfo; + SpirvModule m_module; + + Rc m_isgn; + Rc m_osgn; + Rc m_psgn; + + const DxbcAnalysisInfo* m_analysis; + + /////////////////////////////////////////////////////// + // Resource slot description for the shader. This will + // be used to map D3D11 bindings to DXVK bindings. + //std::vector m_bindings; + + //////////////////////////////////////////////// + // Temporary r# vector registers with immediate + // indexing, and x# vector array registers. + std::vector m_rRegs; + std::vector m_xRegs; + + ///////////////////////////////////////////// + // Thread group shared memory (g#) registers + std::vector m_gRegs; + + /////////////////////////////////////////////////////////// + // v# registers as defined by the shader. The type of each + // of these inputs is either float4 or an array of float4. + std::array< + DxbcRegisterPointer, + DxbcMaxInterfaceRegs> m_vRegs; + std::vector m_vMappings; + + ////////////////////////////////////////////////////////// + // o# registers as defined by the shader. In the fragment + // shader stage, these registers are typed by the signature, + // in all other stages, they are float4 registers or arrays. + std::array< + DxbcRegisterPointer, + DxbcMaxInterfaceRegs> m_oRegs; + std::vector m_oMappings; + + ///////////////////////////////////////////// + // xfb output registers for geometry shaders + std::vector m_xfbVars; + + ////////////////////////////////////////////////////// + // Shader resource variables. These provide access to + // constant buffers, samplers, textures, and UAVs. + std::array m_constantBuffers; + std::array m_samplers; + std::array m_textures; + std::array m_uavs; + + bool m_hasGloballyCoherentUav = false; + bool m_hasRasterizerOrderedUav = false; + + /////////////////////////////////////////////// + // Control flow information. Stores labels for + // currently active if-else blocks and loops. + std::vector m_controlFlowBlocks; + + ////////////////////////////////////////////// + // Function state tracking. Required in order + // to properly end functions in some cases. + bool m_insideFunction = false; + + /////////////////////////////////////////////////////////// + // Array of input values. Since v# registers are indexable + // in DXBC, we need to copy them into an array first. + uint32_t m_vArrayLength = 0; + uint32_t m_vArrayLengthId = 0; + + uint32_t m_vArray = 0; + + //////////////////////////////////////////////////// + // Per-vertex input and output blocks. Depending on + // the shader stage, these may be declared as arrays. + uint32_t m_positionIn = 0; + uint32_t m_positionOut = 0; + + uint32_t m_clipDistances = 0; + uint32_t m_cullDistances = 0; + + uint32_t m_primitiveIdIn = 0; + uint32_t m_primitiveIdOut = 0; + + uint32_t m_pointSizeOut = 0; + + ////////////////////////////////////////////////// + // Immediate constant buffer. If defined, this is + // an array of four-component uint32 vectors. + uint32_t m_immConstBuf = 0; + std::vector m_immConstData; + + /////////////////////////////////////////////////// + // Sample pos array. If defined, this iis an array + // of 32 four-component float vectors. + uint32_t m_samplePositions = 0; + + //////////////////////////////////////////// + // Struct type used for UAV counter buffers + uint32_t m_uavCtrStructType = 0; + uint32_t m_uavCtrPointerType = 0; + + //////////////////////////////// + // Function IDs for subroutines + std::unordered_map m_subroutines; + + /////////////////////////////////////////////////// + // Entry point description - we'll need to declare + // the function ID and all input/output variables. + uint32_t m_entryPointId = 0; + + //////////////////////////////////////////// + // Inter-stage shader interface slots. Also + // covers vertex input and fragment output. + uint32_t m_inputMask = 0u; + uint32_t m_outputMask = 0u; + + /////////////////////////////////// + // Shader-specific data structures + DxbcCompilerVsPart m_vs; + DxbcCompilerHsPart m_hs; + DxbcCompilerDsPart m_ds; + DxbcCompilerGsPart m_gs; + DxbcCompilerPsPart m_ps; + DxbcCompilerCsPart m_cs; + + ////////////////////// + // Global state stuff + bool m_precise = true; + + DxbcOpcode m_lastOp = DxbcOpcode::Nop; + DxbcOpcode m_currOp = DxbcOpcode::Nop; + + VkPrimitiveTopology m_outputTopology = VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; + + ///////////////////////////////////////////////////// + // Shader interface and metadata declaration methods + void emitDcl( + const DxbcShaderInstruction& ins); + + void emitDclGlobalFlags( + const DxbcShaderInstruction& ins); + + void emitDclTemps( + const DxbcShaderInstruction& ins); + + void emitDclIndexableTemp( + const DxbcShaderInstruction& ins); + + void emitDclInterfaceReg( + const DxbcShaderInstruction& ins); + + void emitDclInput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im); + + void emitDclOutput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im); + + void emitDclConstantBuffer( + const DxbcShaderInstruction& ins); + + void emitDclConstantBufferVar( + uint32_t regIdx, + uint32_t numConstants, + const char* name); + + void emitDclSampler( + const DxbcShaderInstruction& ins); + + void emitDclStream( + const DxbcShaderInstruction& ins); + + void emitDclResourceTyped( + const DxbcShaderInstruction& ins); + + void emitDclResourceRawStructured( + const DxbcShaderInstruction& ins); + + void emitDclThreadGroupSharedMemory( + const DxbcShaderInstruction& ins); + + void emitDclGsInputPrimitive( + const DxbcShaderInstruction& ins); + + void emitDclGsOutputTopology( + const DxbcShaderInstruction& ins); + + void emitDclMaxOutputVertexCount( + const DxbcShaderInstruction& ins); + + void emitDclInputControlPointCount( + const DxbcShaderInstruction& ins); + + void emitDclOutputControlPointCount( + const DxbcShaderInstruction& ins); + + void emitDclHsMaxTessFactor( + const DxbcShaderInstruction& ins); + + void emitDclTessDomain( + const DxbcShaderInstruction& ins); + + void emitDclTessPartitioning( + const DxbcShaderInstruction& ins); + + void emitDclTessOutputPrimitive( + const DxbcShaderInstruction& ins); + + void emitDclThreadGroup( + const DxbcShaderInstruction& ins); + + void emitDclGsInstanceCount( + const DxbcShaderInstruction& ins); + + uint32_t emitDclUavCounter( + uint32_t regId); + + //////////////////////// + // Custom data handlers + void emitDclImmediateConstantBuffer( + const DxbcShaderInstruction& ins); + + void emitDclImmediateConstantBufferBaked( + uint32_t dwordCount, + const uint32_t* dwordArray); + + void emitDclImmediateConstantBufferUbo( + uint32_t dwordCount, + const uint32_t* dwordArray); + + void emitCustomData( + const DxbcShaderInstruction& ins); + + ////////////////////////////// + // Instruction class handlers + void emitVectorAlu( + const DxbcShaderInstruction& ins); + + void emitVectorCmov( + const DxbcShaderInstruction& ins); + + void emitVectorCmp( + const DxbcShaderInstruction& ins); + + void emitVectorDeriv( + const DxbcShaderInstruction& ins); + + void emitVectorDot( + const DxbcShaderInstruction& ins); + + void emitVectorIdiv( + const DxbcShaderInstruction& ins); + + void emitVectorImul( + const DxbcShaderInstruction& ins); + + void emitVectorMsad( + const DxbcShaderInstruction& ins); + + void emitVectorShift( + const DxbcShaderInstruction& ins); + + void emitVectorSinCos( + const DxbcShaderInstruction& ins); + + void emitGeometryEmit( + const DxbcShaderInstruction& ins); + + void emitAtomic( + const DxbcShaderInstruction& ins); + + void emitAtomicCounter( + const DxbcShaderInstruction& ins); + + void emitBarrier( + const DxbcShaderInstruction& ins); + + void emitBitExtract( + const DxbcShaderInstruction& ins); + + void emitBitInsert( + const DxbcShaderInstruction& ins); + + void emitBitScan( + const DxbcShaderInstruction& ins); + + void emitBufferQuery( + const DxbcShaderInstruction& ins); + + void emitBufferLoad( + const DxbcShaderInstruction& ins); + + void emitBufferStore( + const DxbcShaderInstruction& ins); + + void emitConvertFloat16( + const DxbcShaderInstruction& ins); + + void emitConvertFloat64( + const DxbcShaderInstruction& ins); + + void emitHullShaderPhase( + const DxbcShaderInstruction& ins); + + void emitHullShaderInstCnt( + const DxbcShaderInstruction& ins); + + void emitInterpolate( + const DxbcShaderInstruction& ins); + + void emitSparseCheckAccess( + const DxbcShaderInstruction& ins); + + void emitTextureQuery( + const DxbcShaderInstruction& ins); + + void emitTextureQueryLod( + const DxbcShaderInstruction& ins); + + void emitTextureQueryMs( + const DxbcShaderInstruction& ins); + + void emitTextureQueryMsPos( + const DxbcShaderInstruction& ins); + + void emitTextureFetch( + const DxbcShaderInstruction& ins); + + void emitTextureGather( + const DxbcShaderInstruction& ins); + + void emitTextureSample( + const DxbcShaderInstruction& ins); + + void emitTypedUavLoad( + const DxbcShaderInstruction& ins); + + void emitTypedUavStore( + const DxbcShaderInstruction& ins); + + ///////////////////////////////////// + // Control flow instruction handlers + void emitControlFlowIf( + const DxbcShaderInstruction& ins); + + void emitControlFlowElse( + const DxbcShaderInstruction& ins); + + void emitControlFlowEndIf( + const DxbcShaderInstruction& ins); + + void emitControlFlowSwitch( + const DxbcShaderInstruction& ins); + + void emitControlFlowCase( + const DxbcShaderInstruction& ins); + + void emitControlFlowDefault( + const DxbcShaderInstruction& ins); + + void emitControlFlowEndSwitch( + const DxbcShaderInstruction& ins); + + void emitControlFlowLoop( + const DxbcShaderInstruction& ins); + + void emitControlFlowEndLoop( + const DxbcShaderInstruction& ins); + + void emitControlFlowBreak( + const DxbcShaderInstruction& ins); + + void emitControlFlowBreakc( + const DxbcShaderInstruction& ins); + + void emitControlFlowRet( + const DxbcShaderInstruction& ins); + + void emitControlFlowRetc( + const DxbcShaderInstruction& ins); + + void emitControlFlowDiscard( + const DxbcShaderInstruction& ins); + + void emitControlFlowLabel( + const DxbcShaderInstruction& ins); + + void emitControlFlowCall( + const DxbcShaderInstruction& ins); + + void emitControlFlowCallc( + const DxbcShaderInstruction& ins); + + void emitControlFlow( + const DxbcShaderInstruction& ins); + + //////////////////////////////////////////////// + // Constant building methods. These are used to + // generate constant vectors that store the same + // value in each component. + DxbcRegisterValue emitBuildConstVecf32( + float x, + float y, + float z, + float w, + const DxbcRegMask& writeMask); + + DxbcRegisterValue emitBuildConstVecu32( + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w, + const DxbcRegMask& writeMask); + + DxbcRegisterValue emitBuildConstVeci32( + int32_t x, + int32_t y, + int32_t z, + int32_t w, + const DxbcRegMask& writeMask); + + DxbcRegisterValue emitBuildConstVecf64( + double xy, + double zw, + const DxbcRegMask& writeMask); + + DxbcRegisterValue emitBuildVector( + DxbcRegisterValue scalar, + uint32_t count); + + DxbcRegisterValue emitBuildZeroVector( + DxbcVectorType type); + + ///////////////////////////////////////// + // Generic register manipulation methods + DxbcRegisterValue emitRegisterBitcast( + DxbcRegisterValue srcValue, + DxbcScalarType dstType); + + DxbcRegisterValue emitRegisterSwizzle( + DxbcRegisterValue value, + DxbcRegSwizzle swizzle, + DxbcRegMask writeMask); + + DxbcRegisterValue emitRegisterExtract( + DxbcRegisterValue value, + DxbcRegMask mask); + + DxbcRegisterValue emitRegisterInsert( + DxbcRegisterValue dstValue, + DxbcRegisterValue srcValue, + DxbcRegMask srcMask); + + DxbcRegisterValue emitRegisterConcat( + DxbcRegisterValue value1, + DxbcRegisterValue value2); + + DxbcRegisterValue emitRegisterExtend( + DxbcRegisterValue value, + uint32_t size); + + DxbcRegisterValue emitRegisterAbsolute( + DxbcRegisterValue value); + + DxbcRegisterValue emitRegisterNegate( + DxbcRegisterValue value); + + DxbcRegisterValue emitRegisterZeroTest( + DxbcRegisterValue value, + DxbcZeroTest test); + + DxbcRegisterValue emitRegisterMaskBits( + DxbcRegisterValue value, + uint32_t mask); + + DxbcRegisterValue emitSrcOperandModifiers( + DxbcRegisterValue value, + DxbcRegModifiers modifiers); + + DxbcRegisterValue emitDstOperandModifiers( + DxbcRegisterValue value, + DxbcOpModifiers modifiers); + + /////////////////////////// + // Sparse feedback methods + uint32_t emitExtractSparseTexel( + uint32_t texelTypeId, + uint32_t resultId); + + void emitStoreSparseFeedback( + const DxbcRegister& feedbackRegister, + uint32_t resultId); + + //////////////////////////////// + // Pointer manipulation methods + DxbcRegisterPointer emitArrayAccess( + DxbcRegisterPointer pointer, + spv::StorageClass sclass, + uint32_t index); + + /////////////////////////////////////// + // Image register manipulation methods + uint32_t emitLoadSampledImage( + const DxbcShaderResource& textureResource, + const DxbcSampler& samplerResource, + bool isDepthCompare); + + //////////////////////// + // Address load methods + DxbcRegisterPointer emitGetTempPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetIndexableTempPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetInputPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetOutputPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetConstBufPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetImmConstBufPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetOperandPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetAtomicPointer( + const DxbcRegister& operand, + const DxbcRegister& address); + + /////////////////////////////// + // Resource load/store methods + DxbcRegisterValue emitRawBufferLoad( + const DxbcRegister& operand, + DxbcRegisterValue elementIndex, + DxbcRegMask writeMask, + uint32_t& sparseFeedbackId); + + void emitRawBufferStore( + const DxbcRegister& operand, + DxbcRegisterValue elementIndex, + DxbcRegisterValue value); + + ////////////////////////// + // Resource query methods + DxbcRegisterValue emitQueryBufferSize( + const DxbcRegister& resource); + + DxbcRegisterValue emitQueryTexelBufferSize( + const DxbcRegister& resource); + + DxbcRegisterValue emitQueryTextureLods( + const DxbcRegister& resource); + + DxbcRegisterValue emitQueryTextureSamples( + const DxbcRegister& resource); + + DxbcRegisterValue emitQueryTextureSize( + const DxbcRegister& resource, + DxbcRegisterValue lod); + + //////////////////////////////////// + // Buffer index calculation methods + DxbcRegisterValue emitCalcBufferIndexStructured( + DxbcRegisterValue structId, + DxbcRegisterValue structOffset, + uint32_t structStride); + + DxbcRegisterValue emitCalcBufferIndexRaw( + DxbcRegisterValue byteOffset); + + DxbcRegisterValue emitCalcTexCoord( + DxbcRegisterValue coordVector, + const DxbcImageInfo& imageInfo); + + DxbcRegisterValue emitLoadTexCoord( + const DxbcRegister& coordReg, + const DxbcImageInfo& imageInfo); + + ////////////////////////////// + // Operand load/store methods + DxbcRegisterValue emitIndexLoad( + DxbcRegIndex index); + + DxbcRegisterValue emitValueLoad( + DxbcRegisterPointer ptr); + + void emitValueStore( + DxbcRegisterPointer ptr, + DxbcRegisterValue value, + DxbcRegMask writeMask); + + DxbcRegisterValue emitRegisterLoadRaw( + const DxbcRegister& reg); + + DxbcRegisterValue emitConstantBufferLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask); + + DxbcRegisterValue emitRegisterLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask); + + void emitRegisterStore( + const DxbcRegister& reg, + DxbcRegisterValue value); + + //////////////////////////// + // Input/output preparation + void emitInputSetup(); + void emitInputSetup(uint32_t vertexCount); + + void emitOutputSetup(); + void emitOutputDepthClamp(); + + void emitInitWorkgroupMemory(); + + ////////////////////////////////////////// + // System value load methods (per shader) + DxbcRegisterValue emitVsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask); + + DxbcRegisterValue emitGsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask, + uint32_t vertexId); + + DxbcRegisterValue emitPsSystemValueLoad( + DxbcSystemValue sv, + DxbcRegMask mask); + + /////////////////////////////////////////// + // System value store methods (per shader) + void emitVsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value); + + void emitHsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value); + + void emitDsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value); + + void emitGsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value); + + void emitPsSystemValueStore( + DxbcSystemValue sv, + DxbcRegMask mask, + const DxbcRegisterValue& value); + + /////////////////////////////// + // Special system value stores + void emitClipCullStore( + DxbcSystemValue sv, + uint32_t dstArray); + + void emitClipCullLoad( + DxbcSystemValue sv, + uint32_t srcArray); + + void emitPointSizeStore(); + + ////////////////////////////////////// + // Common function definition methods + void emitInit(); + + void emitFunctionBegin( + uint32_t entryPoint, + uint32_t returnType, + uint32_t funcType); + + void emitFunctionEnd(); + + void emitFunctionLabel(); + + void emitMainFunctionBegin(); + + ///////////////////////////////// + // Shader initialization methods + void emitVsInit(); + void emitHsInit(); + void emitDsInit(); + void emitGsInit(); + void emitPsInit(); + void emitCsInit(); + + /////////////////////////////// + // Shader finalization methods + void emitVsFinalize(); + void emitHsFinalize(); + void emitDsFinalize(); + void emitGsFinalize(); + void emitPsFinalize(); + void emitCsFinalize(); + + /////////////////////// + // Xfb related methods + void emitXfbOutputDeclarations(); + + void emitXfbOutputSetup( + uint32_t streamId, + bool passthrough); + + /////////////////////////////// + // Hull shader phase methods + void emitHsControlPointPhase( + const DxbcCompilerHsControlPointPhase& phase); + + void emitHsForkJoinPhase( + const DxbcCompilerHsForkJoinPhase& phase); + + void emitHsPhaseBarrier(); + + void emitHsInvocationBlockBegin( + uint32_t count); + + void emitHsInvocationBlockEnd(); + + void emitHsOutputSetup(); + + uint32_t emitTessInterfacePerPatch( + spv::StorageClass storageClass); + + uint32_t emitTessInterfacePerVertex( + spv::StorageClass storageClass, + uint32_t vertexCount); + + ////////////// + // Misc stuff + void emitDclInputArray( + uint32_t vertexCount); + + uint32_t emitDclClipCullDistanceArray( + uint32_t length, + spv::BuiltIn builtIn, + spv::StorageClass storageClass); + + DxbcCompilerHsControlPointPhase emitNewHullShaderControlPointPhase(); + + DxbcCompilerHsControlPointPhase emitNewHullShaderPassthroughPhase(); + + DxbcCompilerHsForkJoinPhase emitNewHullShaderForkJoinPhase(); + + uint32_t emitSamplePosArray(); + + void emitFloatControl(); + + /////////////////////////////// + // Variable definition methods + uint32_t emitNewVariable( + const DxbcRegisterInfo& info); + + uint32_t emitNewBuiltinVariable( + const DxbcRegisterInfo& info, + spv::BuiltIn builtIn, + const char* name); + + uint32_t emitBuiltinTessLevelOuter( + spv::StorageClass storageClass); + + uint32_t emitBuiltinTessLevelInner( + spv::StorageClass storageClass); + + uint32_t emitPushConstants(); + + //////////////// + // Misc methods + DxbcCfgBlock* cfgFindBlock( + const std::initializer_list& types); + + DxbcBufferInfo getBufferInfo( + const DxbcRegister& reg); + + uint32_t getTexSizeDim( + const DxbcImageInfo& imageType) const; + + uint32_t getTexLayerDim( + const DxbcImageInfo& imageType) const; + + uint32_t getTexCoordDim( + const DxbcImageInfo& imageType) const; + + DxbcRegMask getTexCoordMask( + const DxbcImageInfo& imageType) const; + + DxbcVectorType getInputRegType( + uint32_t regIdx) const; + + DxbcVectorType getOutputRegType( + uint32_t regIdx) const; + + DxbcImageInfo getResourceType( + DxbcResourceDim resourceType, + bool isUav) const; + + spv::ImageFormat getScalarImageFormat( + DxbcScalarType type) const; + + bool isDoubleType( + DxbcScalarType type) const; + + DxbcRegisterPointer getIndexableTempPtr( + const DxbcRegister& operand, + DxbcRegisterValue vectorId); + + bool caseBlockIsFallthrough() const; + + uint32_t getUavCoherence( + uint32_t registerId, + DxbcUavFlags flags); + + /////////////////////////// + // Type definition methods + uint32_t getScalarTypeId( + DxbcScalarType type); + + uint32_t getVectorTypeId( + const DxbcVectorType& type); + + uint32_t getArrayTypeId( + const DxbcArrayType& type); + + uint32_t getPointerTypeId( + const DxbcRegisterInfo& type); + + uint32_t getSparseResultTypeId( + uint32_t baseType); + + uint32_t getFunctionId( + uint32_t functionNr); + + DxbcCompilerHsForkJoinPhase* getCurrentHsForkJoinPhase(); + + }; + +} diff --git a/src/dxbc/dxbc_decoder.cpp b/src/dxbc/dxbc_decoder.cpp new file mode 100644 index 0000000..0de7dda --- /dev/null +++ b/src/dxbc/dxbc_decoder.cpp @@ -0,0 +1,360 @@ +#include "dxbc_decoder.h" + +namespace dxvk { + + const uint32_t* DxbcCodeSlice::ptrAt(uint32_t id) const { + if (m_ptr + id >= m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return m_ptr + id; + } + + + uint32_t DxbcCodeSlice::at(uint32_t id) const { + if (m_ptr + id >= m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return m_ptr[id]; + } + + + uint32_t DxbcCodeSlice::read() { + if (m_ptr >= m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return *(m_ptr++); + } + + + DxbcCodeSlice DxbcCodeSlice::take(uint32_t n) const { + if (m_ptr + n > m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return DxbcCodeSlice(m_ptr, m_ptr + n); + } + + + DxbcCodeSlice DxbcCodeSlice::skip(uint32_t n) const { + if (m_ptr + n > m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return DxbcCodeSlice(m_ptr + n, m_end); + } + + + + void DxbcDecodeContext::decodeInstruction(DxbcCodeSlice& code) { + const uint32_t token0 = code.at(0); + + // Initialize the instruction structure. Some of these values + // may not get written otherwise while decoding the instruction. + m_instruction.op = static_cast(bit::extract(token0, 0, 10)); + m_instruction.opClass = DxbcInstClass::Undefined; + m_instruction.sampleControls = { 0, 0, 0 }; + m_instruction.dstCount = 0; + m_instruction.srcCount = 0; + m_instruction.immCount = 0; + m_instruction.dst = m_dstOperands.data(); + m_instruction.src = m_srcOperands.data(); + m_instruction.imm = m_immOperands.data(); + m_instruction.customDataType = DxbcCustomDataClass::Comment; + m_instruction.customDataSize = 0; + m_instruction.customData = nullptr; + + // Reset the index pointer, which may still contain + // a non-zero value from the previous iteration + m_indexId = 0; + + // Instruction length, in DWORDs. This includes the token + // itself and any other prefix that an instruction may have. + uint32_t length = 0; + + if (m_instruction.op == DxbcOpcode::CustomData) { + length = code.at(1); + this->decodeCustomData(code.take(length)); + } else { + length = bit::extract(token0, 24, 30); + this->decodeOperation(code.take(length)); + } + + // Advance the caller's slice to the next token so that + // they can make consecutive calls to decodeInstruction() + code = code.skip(length); + } + + + void DxbcDecodeContext::decodeCustomData(DxbcCodeSlice code) { + const uint32_t blockLength = code.at(1); + + if (blockLength < 2) { + Logger::err("DxbcDecodeContext: Invalid custom data block"); + return; + } + + // Custom data blocks have their own instruction class + m_instruction.op = DxbcOpcode::CustomData; + m_instruction.opClass = DxbcInstClass::CustomData; + + // We'll point into the code buffer rather than making a copy + m_instruction.customDataType = static_cast( + bit::extract(code.at(0), 11, 31)); + m_instruction.customDataSize = blockLength - 2; + m_instruction.customData = code.ptrAt(2); + } + + + void DxbcDecodeContext::decodeOperation(DxbcCodeSlice code) { + uint32_t token = code.read(); + + // Result modifiers, which are applied to common ALU ops + m_instruction.modifiers.saturate = !!bit::extract(token, 13, 13); + m_instruction.modifiers.precise = !!bit::extract(token, 19, 22); + + // Opcode controls. It will depend on the + // opcode itself which ones are valid. + m_instruction.controls = DxbcShaderOpcodeControls(token); + + // Process extended opcode tokens + while (bit::extract(token, 31, 31)) { + token = code.read(); + + const DxbcExtOpcode extOpcode + = static_cast(bit::extract(token, 0, 5)); + + switch (extOpcode) { + case DxbcExtOpcode::SampleControls: { + struct { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + aoffimmi.u = bit::extract(token, 9, 12); + aoffimmi.v = bit::extract(token, 13, 16); + aoffimmi.w = bit::extract(token, 17, 20); + + // Four-bit signed numbers, sign-extend them + m_instruction.sampleControls.u = aoffimmi.u; + m_instruction.sampleControls.v = aoffimmi.v; + m_instruction.sampleControls.w = aoffimmi.w; + } break; + + case DxbcExtOpcode::ResourceDim: + case DxbcExtOpcode::ResourceReturnType: + break; // part of resource description + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled extended opcode: ", + extOpcode)); + } + } + + // Retrieve the instruction format in order to parse the + // operands. Doing this mostly automatically means that + // the compiler can rely on the operands being valid. + const DxbcInstFormat format = dxbcInstructionFormat(m_instruction.op); + m_instruction.opClass = format.instructionClass; + + for (uint32_t i = 0; i < format.operandCount; i++) + this->decodeOperand(code, format.operands[i]); + } + + + void DxbcDecodeContext::decodeComponentSelection(DxbcRegister& reg, uint32_t token) { + // Pick the correct component selection mode based on the + // component count. We'll simplify this here so that the + // compiler can assume that everything is a 4D vector. + reg.componentCount = static_cast(bit::extract(token, 0, 1)); + + switch (reg.componentCount) { + // No components - used for samplers etc. + case DxbcComponentCount::Component0: + reg.mask = DxbcRegMask(false, false, false, false); + reg.swizzle = DxbcRegSwizzle(0, 0, 0, 0); + break; + + // One component - used for immediates + // and a few built-in registers. + case DxbcComponentCount::Component1: + reg.mask = DxbcRegMask(true, false, false, false); + reg.swizzle = DxbcRegSwizzle(0, 0, 0, 0); + break; + + // Four components - everything else. This requires us + // to actually parse the component selection mode. + case DxbcComponentCount::Component4: { + const DxbcRegMode componentMode = + static_cast(bit::extract(token, 2, 3)); + + switch (componentMode) { + // Write mask for destination operands + case DxbcRegMode::Mask: + reg.mask = bit::extract(token, 4, 7); + reg.swizzle = DxbcRegSwizzle(0, 1, 2, 3); + break; + + // Swizzle for source operands (including resources) + case DxbcRegMode::Swizzle: + reg.mask = DxbcRegMask(true, true, true, true); + reg.swizzle = DxbcRegSwizzle( + bit::extract(token, 4, 5), + bit::extract(token, 6, 7), + bit::extract(token, 8, 9), + bit::extract(token, 10, 11)); + break; + + // Selection of one component. We can generate both a + // mask and a swizzle for this so that the compiler + // won't have to deal with this case specifically. + case DxbcRegMode::Select1: { + const uint32_t n = bit::extract(token, 4, 5); + reg.mask = DxbcRegMask(n == 0, n == 1, n == 2, n == 3); + reg.swizzle = DxbcRegSwizzle(n, n, n, n); + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component selection mode"); + } + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component count"); + } + } + + + void DxbcDecodeContext::decodeOperandExtensions(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token) { + while (bit::extract(token, 31, 31)) { + token = code.read(); + + // Type of the extended operand token + const DxbcOperandExt extTokenType = + static_cast(bit::extract(token, 0, 5)); + + switch (extTokenType) { + // Operand modifiers, which are used to manipulate the + // value of a source operand during the load operation + case DxbcOperandExt::OperandModifier: + reg.modifiers = bit::extract(token, 6, 13); + break; + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled extended operand token: ", + extTokenType)); + } + } + } + + + void DxbcDecodeContext::decodeOperandImmediates(DxbcCodeSlice& code, DxbcRegister& reg) { + if (reg.type == DxbcOperandType::Imm32 + || reg.type == DxbcOperandType::Imm64) { + switch (reg.componentCount) { + // This is commonly used if only one vector + // component is involved in an operation + case DxbcComponentCount::Component1: { + reg.imm.u32_1 = code.read(); + } break; + + // Typical four-component vector + case DxbcComponentCount::Component4: { + reg.imm.u32_4[0] = code.read(); + reg.imm.u32_4[1] = code.read(); + reg.imm.u32_4[2] = code.read(); + reg.imm.u32_4[3] = code.read(); + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component count for immediate operand"); + } + } + } + + + void DxbcDecodeContext::decodeOperandIndex(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token) { + reg.idxDim = bit::extract(token, 20, 21); + + for (uint32_t i = 0; i < reg.idxDim; i++) { + // An index can be encoded in various different ways + const DxbcOperandIndexRepresentation repr = + static_cast( + bit::extract(token, 22 + 3 * i, 24 + 3 * i)); + + switch (repr) { + case DxbcOperandIndexRepresentation::Imm32: + reg.idx[i].offset = static_cast(code.read()); + reg.idx[i].relReg = nullptr; + break; + + case DxbcOperandIndexRepresentation::Relative: + reg.idx[i].offset = 0; + reg.idx[i].relReg = &m_indices.at(m_indexId); + + this->decodeRegister(code, + m_indices.at(m_indexId++), + DxbcScalarType::Sint32); + break; + + case DxbcOperandIndexRepresentation::Imm32Relative: + reg.idx[i].offset = static_cast(code.read()); + reg.idx[i].relReg = &m_indices.at(m_indexId); + + this->decodeRegister(code, + m_indices.at(m_indexId++), + DxbcScalarType::Sint32); + break; + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled index representation: ", + repr)); + } + } + } + + + void DxbcDecodeContext::decodeRegister(DxbcCodeSlice& code, DxbcRegister& reg, DxbcScalarType type) { + const uint32_t token = code.read(); + + reg.type = static_cast(bit::extract(token, 12, 19)); + reg.dataType = type; + reg.modifiers = 0; + reg.idxDim = 0; + + for (uint32_t i = 0; i < DxbcMaxRegIndexDim; i++) { + reg.idx[i].relReg = nullptr; + reg.idx[i].offset = 0; + } + + this->decodeComponentSelection(reg, token); + this->decodeOperandExtensions(code, reg, token); + this->decodeOperandImmediates(code, reg); + this->decodeOperandIndex(code, reg, token); + } + + + void DxbcDecodeContext::decodeImm32(DxbcCodeSlice& code, DxbcImmediate& imm, DxbcScalarType type) { + imm.u32 = code.read(); + } + + + void DxbcDecodeContext::decodeOperand(DxbcCodeSlice& code, const DxbcInstOperandFormat& format) { + switch (format.kind) { + case DxbcOperandKind::DstReg: { + const uint32_t operandId = m_instruction.dstCount++; + this->decodeRegister(code, m_dstOperands.at(operandId), format.type); + } break; + + case DxbcOperandKind::SrcReg: { + const uint32_t operandId = m_instruction.srcCount++; + this->decodeRegister(code, m_srcOperands.at(operandId), format.type); + } break; + + case DxbcOperandKind::Imm32: { + const uint32_t operandId = m_instruction.immCount++; + this->decodeImm32(code, m_immOperands.at(operandId), format.type); + } break; + + default: + throw DxvkError("DxbcDecodeContext: Invalid operand format"); + } + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_decoder.h b/src/dxbc/dxbc_decoder.h new file mode 100644 index 0000000..326e1aa --- /dev/null +++ b/src/dxbc/dxbc_decoder.h @@ -0,0 +1,505 @@ +#pragma once + +#include + +#include "dxbc_common.h" +#include "dxbc_decoder.h" +#include "dxbc_defs.h" +#include "dxbc_enums.h" +#include "dxbc_names.h" + +namespace dxvk { + + constexpr size_t DxbcMaxRegIndexDim = 3; + + struct DxbcRegister; + + /** + * \brief Source operand modifiers + * + * These are applied after loading + * an operand register. + */ + enum class DxbcRegModifier : uint32_t { + Neg = 0, + Abs = 1, + }; + + using DxbcRegModifiers = Flags; + + + /** + * \brief Constant buffer binding + * + * Stores information required to + * access a constant buffer. + */ + struct DxbcConstantBuffer { + uint32_t varId = 0; + uint32_t size = 0; + }; + + /** + * \brief Sampler binding + * + * Stores a sampler variable that can be + * used together with a texture resource. + */ + struct DxbcSampler { + uint32_t varId = 0; + uint32_t typeId = 0; + }; + + + /** + * \brief Image type information + */ + struct DxbcImageInfo { + spv::Dim dim = spv::Dim1D; + uint32_t array = 0; + uint32_t ms = 0; + uint32_t sampled = 0; + VkImageViewType vtype = VK_IMAGE_VIEW_TYPE_MAX_ENUM; + }; + + + /** + * \brief Shader resource binding + * + * Stores a resource variable + * and associated type IDs. + */ + struct DxbcShaderResource { + DxbcResourceType type = DxbcResourceType::Typed; + DxbcImageInfo imageInfo; + uint32_t varId = 0; + uint32_t specId = 0; + DxbcScalarType sampledType = DxbcScalarType::Float32; + uint32_t sampledTypeId = 0; + uint32_t imageTypeId = 0; + uint32_t colorTypeId = 0; + uint32_t depthTypeId = 0; + uint32_t structStride = 0; + bool isRawSsbo = false; + }; + + + /** + * \brief Unordered access binding + * + * Stores a resource variable that is provided + * by a UAV, as well as associated type IDs. + */ + struct DxbcUav { + DxbcResourceType type = DxbcResourceType::Typed; + DxbcImageInfo imageInfo; + uint32_t varId = 0; + uint32_t ctrId = 0; + uint32_t specId = 0; + DxbcScalarType sampledType = DxbcScalarType::Float32; + uint32_t sampledTypeId = 0; + uint32_t imageTypeId = 0; + uint32_t structStride = 0; + uint32_t coherence = 0; + bool isRawSsbo = false; + }; + + + /** + * \brief Component swizzle + * + * Maps vector components to + * other vector components. + */ + class DxbcRegSwizzle { + + public: + + DxbcRegSwizzle() { } + DxbcRegSwizzle(uint32_t x, uint32_t y, uint32_t z, uint32_t w) + : m_mask((x << 0) | (y << 2) | (z << 4) | (w << 6)) { } + + uint32_t operator [] (uint32_t id) const { + return (m_mask >> (id + id)) & 0x3; + } + + bool operator == (const DxbcRegSwizzle& other) const { return m_mask == other.m_mask; } + bool operator != (const DxbcRegSwizzle& other) const { return m_mask != other.m_mask; } + + private: + + uint8_t m_mask = 0; + + }; + + + /** + * \brief Component mask + * + * Enables access to certain + * subset of vector components. + */ + class DxbcRegMask { + + public: + + DxbcRegMask() { } + DxbcRegMask(uint32_t mask) : m_mask(mask) { } + DxbcRegMask(bool x, bool y, bool z, bool w) + : m_mask((x ? 0x1 : 0) | (y ? 0x2 : 0) + | (z ? 0x4 : 0) | (w ? 0x8 : 0)) { } + + bool operator [] (uint32_t id) const { + return (m_mask >> id) & 1; + } + + uint32_t popCount() const { + const uint8_t n[16] = { 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4 }; + return n[m_mask & 0xF]; + } + + uint32_t firstSet() const { + const uint8_t n[16] = { 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0 }; + return n[m_mask & 0xF]; + } + + uint32_t minComponents() const { + const uint8_t n[16] = { 0, 1, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4 }; + return n[m_mask & 0xF]; + } + + bool operator == (const DxbcRegMask& other) const { return m_mask == other.m_mask; } + bool operator != (const DxbcRegMask& other) const { return m_mask != other.m_mask; } + + DxbcRegMask& operator |= (const DxbcRegMask& other) { + m_mask |= other.m_mask; + return *this; + } + + static DxbcRegMask firstN(uint32_t n) { + return DxbcRegMask(n >= 1, n >= 2, n >= 3, n >= 4); + } + + static DxbcRegMask select(uint32_t n) { + return DxbcRegMask(n == 0, n == 1, n == 2, n == 3); + } + + std::string maskString() const { + std::string out = ""; + out += (m_mask & 0x1) ? "x" : ""; + out += (m_mask & 0x2) ? "y" : ""; + out += (m_mask & 0x4) ? "z" : ""; + out += (m_mask & 0x8) ? "w" : ""; + return out; + } + + operator bool () const { + return m_mask != 0; + } + + private: + + uint8_t m_mask = 0; + + }; + + + /** + * \brief System value mapping + * + * Maps a system value to a given set of + * components of an input or output register. + */ + struct DxbcSvMapping { + uint32_t regId; + DxbcRegMask regMask; + DxbcSystemValue sv; + }; + + + struct DxbcRegIndex { + DxbcRegister* relReg; + int32_t offset; + }; + + + /** + * \brief Instruction operand + */ + struct DxbcRegister { + DxbcOperandType type; + DxbcScalarType dataType; + DxbcComponentCount componentCount; + + uint32_t idxDim; + DxbcRegIndex idx[DxbcMaxRegIndexDim]; + + DxbcRegMask mask; + DxbcRegSwizzle swizzle; + DxbcRegModifiers modifiers; + + union { + uint32_t u32_4[4]; + uint32_t u32_1; + } imm; + }; + + + /** + * \brief Instruction result modifiers + * + * Modifiers that are applied + * to all destination operands. + */ + struct DxbcOpModifiers { + bool saturate; + bool precise; + }; + + + /** + * \brief Opcode controls + * + * Instruction-specific controls. Usually, + * only one of the members will be valid. + */ + class DxbcShaderOpcodeControls { + + public: + + DxbcShaderOpcodeControls() + : m_bits(0) { } + + DxbcShaderOpcodeControls(uint32_t bits) + : m_bits(bits) { } + + DxbcInstructionReturnType returnType() const { + return DxbcInstructionReturnType(bit::extract(m_bits, 11, 11)); + } + + DxbcGlobalFlags globalFlags() const { + return DxbcGlobalFlags(bit::extract(m_bits, 11, 14)); + } + + DxbcZeroTest zeroTest() const { + return DxbcZeroTest(bit::extract(m_bits, 18, 18)); + } + + DxbcSyncFlags syncFlags() const { + return DxbcSyncFlags(bit::extract(m_bits, 11, 14)); + } + + DxbcResourceDim resourceDim() const { + return DxbcResourceDim(bit::extract(m_bits, 11, 15)); + } + + DxbcResinfoType resinfoType() const { + return DxbcResinfoType(bit::extract(m_bits, 11, 12)); + } + + DxbcInterpolationMode interpolation() const { + return DxbcInterpolationMode(bit::extract(m_bits, 11, 14)); + } + + DxbcSamplerMode samplerMode() const { + return DxbcSamplerMode(bit::extract(m_bits, 11, 14)); + } + + DxbcPrimitiveTopology primitiveTopology() const { + return DxbcPrimitiveTopology(bit::extract(m_bits, 11, 17)); + } + + DxbcPrimitive primitive() const { + return DxbcPrimitive(bit::extract(m_bits, 11, 16)); + } + + DxbcTessDomain tessDomain() const { + return DxbcTessDomain(bit::extract(m_bits, 11, 12)); + } + + DxbcTessOutputPrimitive tessOutputPrimitive() const { + return DxbcTessOutputPrimitive(bit::extract(m_bits, 11, 13)); + } + + DxbcTessPartitioning tessPartitioning() const { + return DxbcTessPartitioning(bit::extract(m_bits, 11, 13)); + } + + DxbcUavFlags uavFlags() const { + return DxbcUavFlags(bit::extract(m_bits, 16, 17)); + } + + DxbcConstantBufferAccessType accessType() const { + return DxbcConstantBufferAccessType(bit::extract(m_bits, 11, 11)); + } + + uint32_t controlPointCount() const { + return bit::extract(m_bits, 11, 16); + } + + bool precise() const { + return bit::extract(m_bits, 19, 22) != 0; + } + + private: + + uint32_t m_bits; + + }; + + + /** + * \brief Sample controls + * + * Constant texel offset with + * values raning from -8 to 7. + */ + struct DxbcShaderSampleControls { + int u, v, w; + }; + + + /** + * \brief Immediate value + * + * Immediate argument represented either + * as a 32-bit or 64-bit unsigned integer, + * or a 32-bit or 32-bit floating point number. + */ + union DxbcImmediate { + float f32; + double f64; + uint32_t u32; + uint64_t u64; + }; + + + /** + * \brief Shader instruction + * + * Note that this structure may store pointer to + * external structures, such as the original code + * buffer. This is safe to use if and only if: + * - The \ref DxbcDecodeContext that created it + * still exists and was not moved + * - The code buffer that was being decoded + * still exists and was not moved. + */ + struct DxbcShaderInstruction { + DxbcOpcode op; + DxbcInstClass opClass; + DxbcOpModifiers modifiers; + DxbcShaderOpcodeControls controls; + DxbcShaderSampleControls sampleControls; + + uint32_t dstCount; + uint32_t srcCount; + uint32_t immCount; + + const DxbcRegister* dst; + const DxbcRegister* src; + const DxbcImmediate* imm; + + DxbcCustomDataClass customDataType; + uint32_t customDataSize; + const uint32_t* customData; + }; + + + /** + * \brief DXBC code slice + * + * Convenient pointer pair that allows + * reading the code word stream safely. + */ + class DxbcCodeSlice { + + public: + + DxbcCodeSlice( + const uint32_t* ptr, + const uint32_t* end) + : m_ptr(ptr), m_end(end) { } + + const uint32_t* ptrAt(uint32_t id) const; + + uint32_t at(uint32_t id) const; + uint32_t read(); + + DxbcCodeSlice take(uint32_t n) const; + DxbcCodeSlice skip(uint32_t n) const; + + bool atEnd() const { + return m_ptr == m_end; + } + + private: + + const uint32_t* m_ptr = nullptr; + const uint32_t* m_end = nullptr; + + }; + + + /** + * \brief Decode context + * + * Stores data that is required to decode a single + * instruction. This data is not persistent, so it + * should be forwarded to the compiler right away. + */ + class DxbcDecodeContext { + + public: + + /** + * \brief Retrieves current instruction + * + * This is only valid after a call to \ref decode. + * \returns Reference to last decoded instruction + */ + const DxbcShaderInstruction& getInstruction() const { + return m_instruction; + } + + /** + * \brief Decodes an instruction + * + * This also advances the given code slice by the + * number of dwords consumed by the instruction. + * \param [in] code Code slice + */ + void decodeInstruction(DxbcCodeSlice& code); + + private: + + DxbcShaderInstruction m_instruction; + + std::array m_dstOperands; + std::array m_srcOperands; + std::array m_immOperands; + std::array m_indices; + + // Index into the indices array. Used when decoding + // instruction operands with relative indexing. + uint32_t m_indexId = 0; + + void decodeCustomData(DxbcCodeSlice code); + void decodeOperation(DxbcCodeSlice code); + + void decodeComponentSelection(DxbcRegister& reg, uint32_t token); + void decodeOperandExtensions(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token); + void decodeOperandImmediates(DxbcCodeSlice& code, DxbcRegister& reg); + void decodeOperandIndex(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token); + + void decodeRegister(DxbcCodeSlice& code, DxbcRegister& reg, DxbcScalarType type); + void decodeImm32(DxbcCodeSlice& code, DxbcImmediate& imm, DxbcScalarType type); + + void decodeOperand(DxbcCodeSlice& code, const DxbcInstOperandFormat& format); + + }; + +} diff --git a/src/dxbc/dxbc_defs.cpp b/src/dxbc/dxbc_defs.cpp new file mode 100644 index 0000000..d8763ea --- /dev/null +++ b/src/dxbc/dxbc_defs.cpp @@ -0,0 +1,1255 @@ +#include "dxbc_defs.h" + +namespace dxvk { + + const std::array g_instructionFormats = {{ + /* Add */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* And */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Break */ + { 0, DxbcInstClass::ControlFlow }, + /* Breakc */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Call */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Callc */ + { 2, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Case */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Continue */ + { 0, DxbcInstClass::ControlFlow }, + /* Continuec */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Cut */ + { 0, DxbcInstClass::GeometryEmit }, + /* Default */ + { 0, DxbcInstClass::ControlFlow }, + /* DerivRtx */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DerivRty */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Discard */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Div */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Dp2 */ + { 3, DxbcInstClass::VectorDot, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Dp3 */ + { 3, DxbcInstClass::VectorDot, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Dp4 */ + { 3, DxbcInstClass::VectorDot, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Else */ + { 0, DxbcInstClass::ControlFlow }, + /* Emit */ + { 0, DxbcInstClass::GeometryEmit }, + /* EmitThenCut */ + { 0, DxbcInstClass::GeometryEmit }, + /* EndIf */ + { 0, DxbcInstClass::ControlFlow }, + /* EndLoop */ + { 0, DxbcInstClass::ControlFlow }, + /* EndSwitch */ + { 0, DxbcInstClass::ControlFlow }, + /* Eq */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Exp */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Frc */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* FtoI */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* FtoU */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Ge */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* IAdd */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* If */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* IEq */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IGe */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* ILt */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IMad */ + { 4, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IMax */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IMin */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IMul */ + { 4, DxbcInstClass::VectorImul, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* INe */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* INeg */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* IShl */ + { 3, DxbcInstClass::VectorShift, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* IShr */ + { 3, DxbcInstClass::VectorShift, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ItoF */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* Label */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* Ld */ + { 3, DxbcInstClass::TextureFetch, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* LdMs */ + { 4, DxbcInstClass::TextureFetch, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* Log */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Loop */ + { 0, DxbcInstClass::ControlFlow }, + /* Lt */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Mad */ + { 4, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Min */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Max */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* CustomData */ + { 0, DxbcInstClass::CustomData }, + /* Mov */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Movc */ + { 4, DxbcInstClass::VectorCmov, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Mul */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Ne */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Nop */ + { 0, DxbcInstClass::NoOperation }, + /* Not */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Or */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ResInfo */ + { 3, DxbcInstClass::TextureQuery, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Ret */ + { 0, DxbcInstClass::ControlFlow }, + /* Retc */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* RoundNe */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* RoundNi */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* RoundPi */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* RoundZ */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Rsq */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Sample */ + { 4, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleC */ + { 5, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleClz */ + { 5, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleL */ + { 5, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleD */ + { 6, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleB */ + { 5, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Sqrt */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Switch */ + { 1, DxbcInstClass::ControlFlow, { + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* SinCos */ + { 3, DxbcInstClass::VectorSinCos, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* UDiv */ + { 4, DxbcInstClass::VectorIdiv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ULt */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UGe */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UMul */ + { 4, DxbcInstClass::VectorImul, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UMad */ + { 4, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UMax */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UMin */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UShr */ + { 3, DxbcInstClass::VectorShift, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UtoF */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Xor */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* DclResource */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclConstantBuffer */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclSampler */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclIndexRange */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclGsOutputPrimitiveTopology */ + { 0, DxbcInstClass::Declaration }, + /* DclGsInputPrimitive */ + { 0, DxbcInstClass::Declaration }, + /* DclMaxOutputVertexCount */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclInput */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclInputSgv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclInputSiv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclInputPs */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclInputPsSgv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclInputPsSiv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclOutput */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclOutputSgv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclOutputSiv */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclTemps */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclIndexableTemp */ + { 3, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclGlobalFlags */ + { 0, DxbcInstClass::Declaration }, + /* Reserved0 */ + { 0, DxbcInstClass::Undefined }, + /* Lod */ + { 4, DxbcInstClass::TextureQueryLod, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4 */ + { 4, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SamplePos */ + { 3, DxbcInstClass::TextureQueryMsPos, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* SampleInfo */ + { 2, DxbcInstClass::TextureQueryMs, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Reserved1 */ + { }, + /* HsDecls */ + { 0, DxbcInstClass::HullShaderPhase }, + /* HsControlPointPhase */ + { 0, DxbcInstClass::HullShaderPhase }, + /* HsForkPhase */ + { 0, DxbcInstClass::HullShaderPhase }, + /* HsJoinPhase */ + { 0, DxbcInstClass::HullShaderPhase }, + /* EmitStream */ + { 1, DxbcInstClass::GeometryEmit, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* CutStream */ + { 1, DxbcInstClass::GeometryEmit, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* EmitThenCutStream */ + { 1, DxbcInstClass::GeometryEmit, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* InterfaceCall */ + { }, + /* BufInfo */ + { 2, DxbcInstClass::BufferQuery, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* DerivRtxCoarse */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DerivRtxFine */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DerivRtyCoarse */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DerivRtyFine */ + { 2, DxbcInstClass::VectorDeriv, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4C */ + { 5, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4Po */ + { 5, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4PoC */ + { 6, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Rcp */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* F32toF16 */ + { 2, DxbcInstClass::ConvertFloat16, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* F16toF32 */ + { 2, DxbcInstClass::ConvertFloat16, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UAddc */ + { }, + /* USubb */ + { }, + /* CountBits */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* FirstBitHi */ + { 2, DxbcInstClass::BitScan, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* FirstBitLo */ + { 2, DxbcInstClass::BitScan, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* FirstBitShi */ + { 2, DxbcInstClass::BitScan, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* UBfe */ + { 4, DxbcInstClass::BitExtract, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* IBfe */ + { 4, DxbcInstClass::BitExtract, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* Bfi */ + { 5, DxbcInstClass::BitInsert, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* BfRev */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Swapc */ + { 5, DxbcInstClass::VectorCmov, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DclStream */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* DclFunctionBody */ + { }, + /* DclFunctionTable */ + { }, + /* DclInterface */ + { }, + /* DclInputControlPointCount */ + { 0, DxbcInstClass::Declaration }, + /* DclOutputControlPointCount */ + { 0, DxbcInstClass::Declaration }, + /* DclTessDomain */ + { 0, DxbcInstClass::Declaration }, + /* DclTessPartitioning */ + { 0, DxbcInstClass::Declaration }, + /* DclTessOutputPrimitive */ + { 0, DxbcInstClass::Declaration }, + /* DclHsMaxTessFactor */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Float32 }, + } }, + /* DclHsForkPhaseInstanceCount */ + { 1, DxbcInstClass::HullShaderInstCnt, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclHsJoinPhaseInstanceCount */ + { 1, DxbcInstClass::HullShaderInstCnt, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclThreadGroup */ + { 3, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclUavTyped */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclUavRaw */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclUavStructured */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclThreadGroupSharedMemoryRaw */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclThreadGroupSharedMemoryStructured */ + { 3, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* DclResourceRaw */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + } }, + /* DclResourceStructured */ + { 2, DxbcInstClass::Declaration, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* LdUavTyped */ + { 3, DxbcInstClass::TypedUavLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* StoreUavTyped */ + { 3, DxbcInstClass::TypedUavStore, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* LdRaw */ + { 3, DxbcInstClass::BufferLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* StoreRaw */ + { 3, DxbcInstClass::BufferStore, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* LdStructured */ + { 4, DxbcInstClass::BufferLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* StoreStructured */ + { 4, DxbcInstClass::BufferStore, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicAnd */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicOr */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicXor */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicCmpStore */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicIAdd */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicIMax */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* AtomicIMin */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* AtomicUMax */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* AtomicUMin */ + { 3, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicAlloc */ + { 2, DxbcInstClass::AtomicCounter, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicConsume */ + { 2, DxbcInstClass::AtomicCounter, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicIAdd */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicAnd */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicOr */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicXor */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicExch */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicCmpExch */ + { 5, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicIMax */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* ImmAtomicIMin */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* ImmAtomicUMax */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ImmAtomicUMin */ + { 4, DxbcInstClass::Atomic, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* Sync */ + { 0, DxbcInstClass::Barrier }, + /* DAdd */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DMax */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DMin */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DMul */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DEq */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DGe */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DLt */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DNe */ + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DMov */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DMovc */ + { 4, DxbcInstClass::VectorCmov, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DtoF */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* FtoD */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* EvalSnapped */ + { 3, DxbcInstClass::Interpolate, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* EvalSampleIndex */ + { 3, DxbcInstClass::Interpolate, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* EvalCentroid */ + { 2, DxbcInstClass::Interpolate, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* DclGsInstanceCount */ + { 1, DxbcInstClass::Declaration, { + { DxbcOperandKind::Imm32, DxbcScalarType::Uint32 }, + } }, + /* Abort */ + { }, + /* DebugBreak */ + { }, + /* ReservedBegin11_1 */ + { }, + /* DDiv */ + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DFma */ + { 4, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DRcp */ + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* Msad */ + { 4, DxbcInstClass::VectorMsad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* DtoI */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* DtoU */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, + } }, + /* ItoD */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* UtoD */ + { 2, DxbcInstClass::ConvertFloat64, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float64 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* ReservedBegin11_2 */ + { }, + /* Gather4S */ + { 5, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4CS */ + { 6, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4PoS */ + { 6, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* Gather4PoCS */ + { 7, DxbcInstClass::TextureGather, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* LdS */ + { 4, DxbcInstClass::TextureFetch, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* LdMsS */ + { 5, DxbcInstClass::TextureFetch, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, + /* LdUavTypedS */ + { 4, DxbcInstClass::TypedUavLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* LdRawS */ + { 4, DxbcInstClass::BufferLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* LdStructuredS */ + { 5, DxbcInstClass::BufferLoad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + /* SampleLS */ + { 6, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleClzS */ + { 6, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleClampS */ + { 6, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleBClampS */ + { 7, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleDClampS */ + { 8, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* SampleCClampS */ + { 7, DxbcInstClass::TextureSample, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, + /* CheckAccessFullyMapped */ + { 2, DxbcInstClass::SparseCheckAccess, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, + }}; + + + DxbcInstFormat dxbcInstructionFormat(DxbcOpcode opcode) { + const uint32_t idx = static_cast(opcode); + + return (idx < g_instructionFormats.size()) + ? g_instructionFormats.at(idx) + : DxbcInstFormat(); + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_defs.h b/src/dxbc/dxbc_defs.h new file mode 100644 index 0000000..8782481 --- /dev/null +++ b/src/dxbc/dxbc_defs.h @@ -0,0 +1,104 @@ +#pragma once + +#include "dxbc_enums.h" + +namespace dxvk { + + constexpr size_t DxbcMaxInterfaceRegs = 32; + constexpr size_t DxbcMaxOperandCount = 8; + + /** + * \brief Operand kind + * + * In the instruction format definition, this specified + * whether an operand uses an actual operand token, or + * whether it is stored as an immediate value. + */ + enum class DxbcOperandKind { + DstReg, ///< Destination register + SrcReg, ///< Source register + Imm32, ///< Constant number + }; + + /** + * \brief Instruction class + * + * Instructions with a similar format are grouped into + * instruction classes in order to make implementing + * new instructions easier. + */ + enum class DxbcInstClass { + Declaration, ///< Interface or resource declaration + CustomData, ///< Immediate constant buffer + ControlFlow, ///< Control flow instructions + GeometryEmit, ///< Special geometry shader instructions + Atomic, ///< Atomic operations + AtomicCounter, ///< Atomic counter operations + Barrier, ///< Execution or memory barrier + BitExtract, ///< Bit field extract operations + BitInsert, ///< Bit field insert operations + BitScan, ///< Bit scan operations + BufferQuery, ///< Buffer query instruction + BufferLoad, ///< Structured or raw buffer load + BufferStore, ///< Structured or raw buffer store + ConvertFloat16, ///< 16-bit float packing/unpacking + ConvertFloat64, ///< 64-bit float conversion + HullShaderPhase, ///< Hull shader phase declaration + HullShaderInstCnt, ///< Hull shader phase instance count + Interpolate, ///< Input attribute interpolation + NoOperation, ///< The most useful instruction class + SparseCheckAccess, ///< Verifies sparse resource access + TextureQuery, ///< Texture query instruction + TextureQueryLod, ///< Texture LOD query instruction + TextureQueryMs, ///< Multisample texture query + TextureQueryMsPos, ///< Sample position query + TextureFetch, ///< Texture fetch instruction + TextureGather, ///< Texture gather instruction + TextureSample, ///< Texture sampling instruction + TypedUavLoad, ///< Typed UAV load + TypedUavStore, ///< Typed UAV store + VectorAlu, ///< Component-wise vector instructions + VectorCmov, ///< Component-wise conditional move + VectorCmp, ///< Component-wise vector comparison + VectorDeriv, ///< Vector derivatives + VectorDot, ///< Dot product instruction + VectorIdiv, ///< Component-wise integer division + VectorImul, ///< Component-wise integer multiplication + VectorMsad, ///< Component-wise sum of absolute difference + VectorShift, ///< Bit shift operations on vectors + VectorSinCos, ///< Sine and Cosine instruction + Undefined, ///< Instruction code not defined + }; + + /** + * \brief Instruction operand format + * + * Stores the kind and the expected data type + * of an operand. Used when parsing instructions. + */ + struct DxbcInstOperandFormat { + DxbcOperandKind kind; + DxbcScalarType type; + }; + + /** + * \brief Instruction format + * + * Defines the instruction class as well as + * the format of the insttruction operands. + */ + struct DxbcInstFormat { + uint32_t operandCount = 0; + DxbcInstClass instructionClass = DxbcInstClass::Undefined; + DxbcInstOperandFormat operands[DxbcMaxOperandCount]; + }; + + /** + * \brief Retrieves instruction format info + * + * \param [in] opcode The opcode to retrieve + * \returns Instruction format info + */ + DxbcInstFormat dxbcInstructionFormat(DxbcOpcode opcode); + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_enums.h b/src/dxbc/dxbc_enums.h new file mode 100644 index 0000000..b09a0cc --- /dev/null +++ b/src/dxbc/dxbc_enums.h @@ -0,0 +1,655 @@ +#pragma once + +#include "dxbc_include.h" +#include "util_flags.h" + +namespace dxvk { + + /** + * \brief Instruction code listing + */ + enum class DxbcOpcode : uint32_t { + Add = 0, + And = 1, + Break = 2, + Breakc = 3, + Call = 4, + Callc = 5, + Case = 6, + Continue = 7, + Continuec = 8, + Cut = 9, + Default = 10, + DerivRtx = 11, + DerivRty = 12, + Discard = 13, + Div = 14, + Dp2 = 15, + Dp3 = 16, + Dp4 = 17, + Else = 18, + Emit = 19, + EmitThenCut = 20, + EndIf = 21, + EndLoop = 22, + EndSwitch = 23, + Eq = 24, + Exp = 25, + Frc = 26, + FtoI = 27, + FtoU = 28, + Ge = 29, + IAdd = 30, + If = 31, + IEq = 32, + IGe = 33, + ILt = 34, + IMad = 35, + IMax = 36, + IMin = 37, + IMul = 38, + INe = 39, + INeg = 40, + IShl = 41, + IShr = 42, + ItoF = 43, + Label = 44, + Ld = 45, + LdMs = 46, + Log = 47, + Loop = 48, + Lt = 49, + Mad = 50, + Min = 51, + Max = 52, + CustomData = 53, + Mov = 54, + Movc = 55, + Mul = 56, + Ne = 57, + Nop = 58, + Not = 59, + Or = 60, + ResInfo = 61, + Ret = 62, + Retc = 63, + RoundNe = 64, + RoundNi = 65, + RoundPi = 66, + RoundZ = 67, + Rsq = 68, + Sample = 69, + SampleC = 70, + SampleClz = 71, + SampleL = 72, + SampleD = 73, + SampleB = 74, + Sqrt = 75, + Switch = 76, + SinCos = 77, + UDiv = 78, + ULt = 79, + UGe = 80, + UMul = 81, + UMad = 82, + UMax = 83, + UMin = 84, + UShr = 85, + UtoF = 86, + Xor = 87, + DclResource = 88, + DclConstantBuffer = 89, + DclSampler = 90, + DclIndexRange = 91, + DclGsOutputPrimitiveTopology = 92, + DclGsInputPrimitive = 93, + DclMaxOutputVertexCount = 94, + DclInput = 95, + DclInputSgv = 96, + DclInputSiv = 97, + DclInputPs = 98, + DclInputPsSgv = 99, + DclInputPsSiv = 100, + DclOutput = 101, + DclOutputSgv = 102, + DclOutputSiv = 103, + DclTemps = 104, + DclIndexableTemp = 105, + DclGlobalFlags = 106, + Reserved0 = 107, + Lod = 108, + Gather4 = 109, + SamplePos = 110, + SampleInfo = 111, + Reserved1 = 112, + HsDecls = 113, + HsControlPointPhase = 114, + HsForkPhase = 115, + HsJoinPhase = 116, + EmitStream = 117, + CutStream = 118, + EmitThenCutStream = 119, + InterfaceCall = 120, + BufInfo = 121, + DerivRtxCoarse = 122, + DerivRtxFine = 123, + DerivRtyCoarse = 124, + DerivRtyFine = 125, + Gather4C = 126, + Gather4Po = 127, + Gather4PoC = 128, + Rcp = 129, + F32toF16 = 130, + F16toF32 = 131, + UAddc = 132, + USubb = 133, + CountBits = 134, + FirstBitHi = 135, + FirstBitLo = 136, + FirstBitShi = 137, + UBfe = 138, + IBfe = 139, + Bfi = 140, + BfRev = 141, + Swapc = 142, + DclStream = 143, + DclFunctionBody = 144, + DclFunctionTable = 145, + DclInterface = 146, + DclInputControlPointCount = 147, + DclOutputControlPointCount = 148, + DclTessDomain = 149, + DclTessPartitioning = 150, + DclTessOutputPrimitive = 151, + DclHsMaxTessFactor = 152, + DclHsForkPhaseInstanceCount = 153, + DclHsJoinPhaseInstanceCount = 154, + DclThreadGroup = 155, + DclUavTyped = 156, + DclUavRaw = 157, + DclUavStructured = 158, + DclThreadGroupSharedMemoryRaw = 159, + DclThreadGroupSharedMemoryStructured = 160, + DclResourceRaw = 161, + DclResourceStructured = 162, + LdUavTyped = 163, + StoreUavTyped = 164, + LdRaw = 165, + StoreRaw = 166, + LdStructured = 167, + StoreStructured = 168, + AtomicAnd = 169, + AtomicOr = 170, + AtomicXor = 171, + AtomicCmpStore = 172, + AtomicIAdd = 173, + AtomicIMax = 174, + AtomicIMin = 175, + AtomicUMax = 176, + AtomicUMin = 177, + ImmAtomicAlloc = 178, + ImmAtomicConsume = 179, + ImmAtomicIAdd = 180, + ImmAtomicAnd = 181, + ImmAtomicOr = 182, + ImmAtomicXor = 183, + ImmAtomicExch = 184, + ImmAtomicCmpExch = 185, + ImmAtomicIMax = 186, + ImmAtomicIMin = 187, + ImmAtomicUMax = 188, + ImmAtomicUMin = 189, + Sync = 190, + DAdd = 191, + DMax = 192, + DMin = 193, + DMul = 194, + DEq = 195, + DGe = 196, + DLt = 197, + DNe = 198, + DMov = 199, + DMovc = 200, + DtoF = 201, + FtoD = 202, + EvalSnapped = 203, + EvalSampleIndex = 204, + EvalCentroid = 205, + DclGsInstanceCount = 206, + Abort = 207, + DebugBreak = 208, + ReservedBegin11_1 = 209, + DDiv = 210, + DFma = 211, + DRcp = 212, + Msad = 213, + DtoI = 214, + DtoU = 215, + ItoD = 216, + UtoD = 217, + ReservedBegin11_2 = 218, + Gather4S = 219, + Gather4CS = 220, + Gather4PoS = 221, + Gather4PoCS = 222, + LdS = 223, + LdMsS = 224, + LdUavTypedS = 225, + LdRawS = 226, + LdStructuredS = 227, + SampleLS = 228, + SampleClzS = 229, + SampleClampS = 230, + SampleBClampS = 231, + SampleDClampS = 232, + SampleCClampS = 233, + CheckAccessFullyMapped = 234, + }; + + + /** + * \brief Extended opcode + */ + enum class DxbcExtOpcode : uint32_t { + Empty = 0, + SampleControls = 1, + ResourceDim = 2, + ResourceReturnType = 3, + }; + + + /** + * \brief Operand type + * + * Selects the 'register file' from which + * to retrieve an operand's value. + */ + enum class DxbcOperandType : uint32_t { + Temp = 0, + Input = 1, + Output = 2, + IndexableTemp = 3, + Imm32 = 4, + Imm64 = 5, + Sampler = 6, + Resource = 7, + ConstantBuffer = 8, + ImmediateConstantBuffer = 9, + Label = 10, + InputPrimitiveId = 11, + OutputDepth = 12, + Null = 13, + Rasterizer = 14, + OutputCoverageMask = 15, + Stream = 16, + FunctionBody = 17, + FunctionTable = 18, + Interface = 19, + FunctionInput = 20, + FunctionOutput = 21, + OutputControlPointId = 22, + InputForkInstanceId = 23, + InputJoinInstanceId = 24, + InputControlPoint = 25, + OutputControlPoint = 26, + InputPatchConstant = 27, + InputDomainPoint = 28, + ThisPointer = 29, + UnorderedAccessView = 30, + ThreadGroupSharedMemory = 31, + InputThreadId = 32, + InputThreadGroupId = 33, + InputThreadIdInGroup = 34, + InputCoverageMask = 35, + InputThreadIndexInGroup = 36, + InputGsInstanceId = 37, + OutputDepthGe = 38, + OutputDepthLe = 39, + CycleCounter = 40, + OutputStencilRef = 41, + InputInnerCoverage = 42, + }; + + + /** + * \brief Number of components + * + * Used by operands to determine whether the + * operand has one, four or zero components. + */ + enum class DxbcComponentCount : uint32_t { + Component0 = 0, + Component1 = 1, + Component4 = 2, + }; + + + /** + * \brief Component selection mode + * + * When an operand has four components, the + * component selection mode deterines which + * components are used for the operation. + */ + enum class DxbcRegMode : uint32_t { + Mask = 0, + Swizzle = 1, + Select1 = 2, + }; + + + /** + * \brief Index representation + * + * Determines how an operand + * register index is stored. + */ + enum class DxbcOperandIndexRepresentation : uint32_t { + Imm32 = 0, + Imm64 = 1, + Relative = 2, + Imm32Relative = 3, + Imm64Relative = 4, + }; + + + /** + * \brief Extended operand type + */ + enum class DxbcOperandExt : uint32_t { + OperandModifier = 1, + }; + + + /** + * \brief Resource dimension + * The type of a resource. + */ + enum class DxbcResourceDim : uint32_t { + Unknown = 0, + Buffer = 1, + Texture1D = 2, + Texture2D = 3, + Texture2DMs = 4, + Texture3D = 5, + TextureCube = 6, + Texture1DArr = 7, + Texture2DArr = 8, + Texture2DMsArr = 9, + TextureCubeArr = 10, + RawBuffer = 11, + StructuredBuffer = 12, + }; + + + /** + * \brief Resource return type + * Data type for resource read ops. + */ + enum class DxbcResourceReturnType : uint32_t { + Unorm = 1, + Snorm = 2, + Sint = 3, + Uint = 4, + Float = 5, + Mixed = 6, /// ? + Double = 7, + Continued = 8, /// ? + Unused = 9, /// ? + }; + + + /** + * \brief Register component type + * Data type of a register component. + */ + enum class DxbcRegisterComponentType : uint32_t { + Unknown = 0, + Uint32 = 1, + Sint32 = 2, + Float32 = 3, + }; + + + /** + * \brief Instruction return type + */ + enum class DxbcInstructionReturnType : uint32_t { + Float = 0, + Uint = 1, + }; + + + enum class DxbcSystemValue : uint32_t { + None = 0, + Position = 1, + ClipDistance = 2, + CullDistance = 3, + RenderTargetId = 4, + ViewportId = 5, + VertexId = 6, + PrimitiveId = 7, + InstanceId = 8, + IsFrontFace = 9, + SampleIndex = 10, + FinalQuadUeq0EdgeTessFactor = 11, + FinalQuadVeq0EdgeTessFactor = 12, + FinalQuadUeq1EdgeTessFactor = 13, + FinalQuadVeq1EdgeTessFactor = 14, + FinalQuadUInsideTessFactor = 15, + FinalQuadVInsideTessFactor = 16, + FinalTriUeq0EdgeTessFactor = 17, + FinalTriVeq0EdgeTessFactor = 18, + FinalTriWeq0EdgeTessFactor = 19, + FinalTriInsideTessFactor = 20, + FinalLineDetailTessFactor = 21, + FinalLineDensityTessFactor = 22, + Target = 64, + Depth = 65, + Coverage = 66, + DepthGe = 67, + DepthLe = 68 + }; + + + enum class DxbcInterpolationMode : uint32_t { + Undefined = 0, + Constant = 1, + Linear = 2, + LinearCentroid = 3, + LinearNoPerspective = 4, + LinearNoPerspectiveCentroid = 5, + LinearSample = 6, + LinearNoPerspectiveSample = 7, + }; + + + enum class DxbcGlobalFlag : uint32_t { + RefactoringAllowed = 0, + DoublePrecision = 1, + EarlyFragmentTests = 2, + RawStructuredBuffers = 3, + }; + + using DxbcGlobalFlags = Flags; + + enum class DxbcZeroTest : uint32_t { + TestZ = 0, + TestNz = 1, + }; + + enum class DxbcResinfoType : uint32_t { + Float = 0, + RcpFloat = 1, + Uint = 2, + }; + + enum class DxbcSyncFlag : uint32_t { + ThreadsInGroup = 0, + ThreadGroupSharedMemory = 1, + UavMemoryGroup = 2, + UavMemoryGlobal = 3, + }; + + using DxbcSyncFlags = Flags; + + + /** + * \brief Geometry shader input primitive + */ + enum class DxbcPrimitive : uint32_t { + Undefined = 0, + Point = 1, + Line = 2, + Triangle = 3, + LineAdj = 6, + TriangleAdj = 7, + Patch1 = 8, + Patch2 = 9, + Patch3 = 10, + Patch4 = 11, + Patch5 = 12, + Patch6 = 13, + Patch7 = 14, + Patch8 = 15, + Patch9 = 16, + Patch10 = 17, + Patch11 = 18, + Patch12 = 19, + Patch13 = 20, + Patch14 = 21, + Patch15 = 22, + Patch16 = 23, + Patch17 = 24, + Patch18 = 25, + Patch19 = 26, + Patch20 = 27, + Patch21 = 28, + Patch22 = 29, + Patch23 = 30, + Patch24 = 31, + Patch25 = 32, + Patch26 = 33, + Patch27 = 34, + Patch28 = 35, + Patch29 = 36, + Patch30 = 37, + Patch31 = 38, + Patch32 = 39, + }; + + + /** + * \brief Geometry shader output topology + */ + enum class DxbcPrimitiveTopology : uint32_t { + Undefined = 0, + PointList = 1, + LineList = 2, + LineStrip = 3, + TriangleList = 4, + TriangleStrip = 5, + LineListAdj = 10, + LineStripAdj = 11, + TriangleListAdj = 12, + TriangleStripAdj = 13, + }; + + + /** + * \brief Sampler operation mode + */ + enum class DxbcSamplerMode : uint32_t { + Default = 0, + Comparison = 1, + Mono = 2, + }; + + + /** + * \brief Scalar value type + * + * Enumerates possible register component + * types. Scalar types are represented as + * a one-component vector type. + */ + enum class DxbcScalarType : uint32_t { + Uint32 = 0, + Uint64 = 1, + Sint32 = 2, + Sint64 = 3, + Float32 = 4, + Float64 = 5, + Bool = 6, + }; + + + /** + * \brief Tessellator domain + */ + enum class DxbcTessDomain : uint32_t { + Undefined = 0, + Isolines = 1, + Triangles = 2, + Quads = 3, + }; + + /** + * \brief Tessellator partitioning + */ + enum class DxbcTessPartitioning : uint32_t { + Undefined = 0, + Integer = 1, + Pow2 = 2, + FractOdd = 3, + FractEven = 4, + }; + + /** + * \brief UAV definition flags + */ + enum class DxbcUavFlag : uint32_t { + GloballyCoherent = 0, + RasterizerOrdered = 1, + }; + + using DxbcUavFlags = Flags; + + /** + * \brief Tessellator output primitive + */ + enum class DxbcTessOutputPrimitive : uint32_t { + Undefined = 0, + Point = 1, + Line = 2, + TriangleCw = 3, + TriangleCcw = 4, + }; + + /** + * \brief Custom data class + * + * Stores which type of custom data is + * referenced by the instruction. + */ + enum class DxbcCustomDataClass : uint32_t { + Comment = 0, + DebugInfo = 1, + Opaque = 2, + ImmConstBuf = 3, + }; + + + enum class DxbcResourceType : uint32_t { + Typed = 0, + Raw = 1, + Structured = 2, + }; + + + enum class DxbcConstantBufferAccessType : uint32_t { + StaticallyIndexed = 0, + DynamicallyIndexed = 1, + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_header.cpp b/src/dxbc/dxbc_header.cpp new file mode 100644 index 0000000..9b5f698 --- /dev/null +++ b/src/dxbc/dxbc_header.cpp @@ -0,0 +1,30 @@ +#include "dxbc_header.h" + +namespace dxvk { + + DxbcHeader::DxbcHeader(DxbcReader& reader) { + // FourCC at the start of the file, must be 'DXBC' + DxbcTag fourcc = reader.readTag(); + + if (fourcc != "DXBC") + throw DxvkError("DxbcHeader::DxbcHeader: Invalid fourcc, expected 'DXBC'"); + + // Stuff we don't actually need to store + reader.skip(4 * sizeof(uint32_t)); // Check sum + reader.skip(1 * sizeof(uint32_t)); // Constant 1 + reader.skip(1 * sizeof(uint32_t)); // Bytecode length + + // Number of chunks in the file + uint32_t chunkCount = reader.readu32(); + + // Chunk offsets are stored immediately after + for (uint32_t i = 0; i < chunkCount; i++) + m_chunkOffsets.push_back(reader.readu32()); + } + + + DxbcHeader::~DxbcHeader() { + + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_header.h b/src/dxbc/dxbc_header.h new file mode 100644 index 0000000..8eca0d4 --- /dev/null +++ b/src/dxbc/dxbc_header.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +#include "dxbc_reader.h" + +namespace dxvk { + + /** + * \brief DXBC header + * + * Stores information about the shader file itself + * and the data chunks stored inside the file. + */ + class DxbcHeader { + + public: + + DxbcHeader(DxbcReader& reader); + ~DxbcHeader(); + + /** + * \brief Number of chunks + * \returns Chunk count + */ + uint32_t numChunks() const { + return m_chunkOffsets.size(); + } + + /** + * \brief Chunk offset + * + * Retrieves the offset of a chunk, in + * bytes, from the start of the file. + * \param [in] chunkId Chunk index + * \returns Byte offset of that chunk + */ + uint32_t chunkOffset(uint32_t chunkId) const { + return m_chunkOffsets.at(chunkId); + } + + private: + + std::vector m_chunkOffsets; + + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_include.h b/src/dxbc/dxbc_include.h new file mode 100644 index 0000000..0bd1aef --- /dev/null +++ b/src/dxbc/dxbc_include.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "util/com/com_guid.h" +#include "util/com/com_object.h" +#include "util/com/com_pointer.h" + +#include "util/log/log.h" +#include "util/log/log_debug.h" + +#include "util/rc/util_rc.h" +#include "util/rc/util_rc_ptr.h" + +#include "util/util_bit.h" +#include "util/util_enum.h" +#include "util/util_error.h" +#include "util/util_string.h" diff --git a/src/dxbc/dxbc_modinfo.h b/src/dxbc/dxbc_modinfo.h new file mode 100644 index 0000000..fe4e107 --- /dev/null +++ b/src/dxbc/dxbc_modinfo.h @@ -0,0 +1,59 @@ +#pragma once + +#include "dxbc_options.h" + +namespace dxvk { + + /** + * \brief Tessellation info + * + * Stores the maximum tessellation factor + * to export from tessellation shaders. + */ + struct DxbcTessInfo { + float maxTessFactor; + }; + + /** + * \brief Xfb capture entry + * + * Stores an output variable to capture, + * as well as the buffer to write it to. + */ + struct DxbcXfbEntry { + const char* semanticName; + uint32_t semanticIndex; + uint32_t componentIndex; + uint32_t componentCount; + uint32_t streamId; + uint32_t bufferId; + uint32_t offset; + }; + + /** + * \brief Xfb info + * + * Stores capture entries and output buffer + * strides. This structure must only be + * defined if \c entryCount is non-zero. + */ + struct DxbcXfbInfo { + uint32_t entryCount; + DxbcXfbEntry entries[128]; + uint32_t strides[4]; + int32_t rasterizedStream; + }; + + /** + * \brief Shader module info + * + * Stores information which may affect shader compilation. + * This data can be supplied by the client API implementation. + */ + struct DxbcModuleInfo { + DxbcOptions options; + DxbcTessInfo* tess = nullptr; + DxbcXfbInfo* xfb = nullptr; + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_module.cpp b/src/dxbc/dxbc_module.cpp new file mode 100644 index 0000000..57bcda4 --- /dev/null +++ b/src/dxbc/dxbc_module.cpp @@ -0,0 +1,116 @@ +#include "dxbc_analysis.h" +#include "dxbc_compiler.h" +#include "dxbc_module.h" + +namespace dxvk { + + DxbcModule::DxbcModule(DxbcReader& reader) + : m_header(reader) { + for (uint32_t i = 0; i < m_header.numChunks(); i++) { + + // The chunk tag is stored at the beginning of each chunk + auto chunkReader = reader.clone(m_header.chunkOffset(i)); + auto tag = chunkReader.readTag(); + + // The chunk size follows right after the four-character + // code. This does not include the eight bytes that are + // consumed by the FourCC and chunk length entry. + auto chunkLength = chunkReader.readu32(); + + chunkReader = chunkReader.clone(8); + chunkReader = chunkReader.resize(chunkLength); + + if ((tag == "SHDR") || (tag == "SHEX")) + m_shexChunk = new DxbcShex(chunkReader); + + if ((tag == "ISGN") || (tag == "ISG1")) + m_isgnChunk = new DxbcIsgn(chunkReader, tag); + + if ((tag == "OSGN") || (tag == "OSG5") || (tag == "OSG1")) + m_osgnChunk = new DxbcIsgn(chunkReader, tag); + + if ((tag == "PCSG") || (tag == "PSG1")) + m_psgnChunk = new DxbcIsgn(chunkReader, tag); + } + } + + + DxbcModule::~DxbcModule() { + + } + + + DxbcCompiler::ShaderCreateInfo DxbcModule::compile( + const DxbcModuleInfo& moduleInfo, + const std::string& fileName) const { + if (m_shexChunk == nullptr) + throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); + + DxbcAnalysisInfo analysisInfo; + + DxbcAnalyzer analyzer(moduleInfo, + m_shexChunk->programInfo(), + m_isgnChunk, m_osgnChunk, + m_psgnChunk, analysisInfo); + + this->runAnalyzer(analyzer, m_shexChunk->slice()); + + DxbcCompiler compiler( + fileName, moduleInfo, + m_shexChunk->programInfo(), + m_isgnChunk, m_osgnChunk, + m_psgnChunk, analysisInfo); + + this->runCompiler(compiler, m_shexChunk->slice()); + + return compiler.finalize(); + } + + + DxbcCompiler::ShaderCreateInfo DxbcModule::compilePassthroughShader( + const DxbcModuleInfo& moduleInfo, + const std::string& fileName) const { + if (m_shexChunk == nullptr) + throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); + + DxbcAnalysisInfo analysisInfo; + + DxbcCompiler compiler( + fileName, moduleInfo, + DxbcProgramType::GeometryShader, + m_osgnChunk, m_osgnChunk, + m_psgnChunk, analysisInfo); + + compiler.processXfbPassthrough(); + return compiler.finalize(); + } + + + void DxbcModule::runAnalyzer( + DxbcAnalyzer& analyzer, + DxbcCodeSlice slice) const { + DxbcDecodeContext decoder; + + while (!slice.atEnd()) { + decoder.decodeInstruction(slice); + + analyzer.processInstruction( + decoder.getInstruction()); + } + } + + + void DxbcModule::runCompiler( + DxbcCompiler& compiler, + DxbcCodeSlice slice) const { + DxbcDecodeContext decoder; + + while (!slice.atEnd()) { + decoder.decodeInstruction(slice); + + compiler.processInstruction( + decoder.getInstruction()); + } + } + +} diff --git a/src/dxbc/dxbc_module.h b/src/dxbc/dxbc_module.h new file mode 100644 index 0000000..b250543 --- /dev/null +++ b/src/dxbc/dxbc_module.h @@ -0,0 +1,102 @@ +#pragma once + +//#include "../dxvk/dxvk_shader.h" + +#include + +#include "dxbc_chunk_isgn.h" +#include "dxbc_chunk_shex.h" +#include "dxbc_header.h" +#include "dxbc_modinfo.h" +#include "dxbc_reader.h" +#include "dxbc_compiler.h" + +// References used for figuring out DXBC: +// - https://github.com/tgjones/slimshader-cpp +// - Wine + +namespace dxvk { + + class DxbcAnalyzer; + class DxbcCompiler; + + /** + * \brief DXBC shader module + * + * Reads the DXBC byte code and extracts information + * about the resource bindings and the instruction + * stream. A module can then be compiled to SPIR-V. + */ + class DxbcModule { + + public: + + DxbcModule(DxbcReader& reader); + ~DxbcModule(); + + /** + * \brief Shader type + * \returns Shader type + */ + std::optional programInfo() const { + if (m_shexChunk == nullptr) + return std::nullopt; + + return m_shexChunk->programInfo(); + } + + /** + * \brief Input and output signature chunks + * + * Parts of the D3D11 API need access to the + * input or output signature of the shader. + */ + Rc isgn() const { return m_isgnChunk; } + Rc osgn() const { return m_osgnChunk; } + + /** + * \brief Compiles DXBC shader to SPIR-V module + * + * \param [in] moduleInfo DXBC module info + * \param [in] fileName File name, will be added to + * the compiled SPIR-V for debugging purposes. + * \returns The compiled shader object + */ + DxbcCompiler::ShaderCreateInfo compile( + const DxbcModuleInfo& moduleInfo, + const std::string& fileName) const; + + /** + * \brief Compiles a pass-through geometry shader + * + * Applications can pass a vertex shader to create + * a geometry shader with stream output. In this + * case, we have to create a passthrough geometry + * shader, which operates in point to point mode. + * \param [in] moduleInfo DXBC module info + * \param [in] fileName SPIR-V shader name + */ + DxbcCompiler::ShaderCreateInfo compilePassthroughShader( + const DxbcModuleInfo& moduleInfo, + const std::string& fileName) const; + + private: + + DxbcHeader m_header; + + Rc m_isgnChunk; + Rc m_osgnChunk; + Rc m_psgnChunk; + Rc m_shexChunk; + + void runAnalyzer( + DxbcAnalyzer& analyzer, + DxbcCodeSlice slice) const; + + void runCompiler( + DxbcCompiler& compiler, + DxbcCodeSlice slice) const; + + }; + +} diff --git a/src/dxbc/dxbc_names.cpp b/src/dxbc/dxbc_names.cpp new file mode 100644 index 0000000..c6a00ec --- /dev/null +++ b/src/dxbc/dxbc_names.cpp @@ -0,0 +1,445 @@ +#include "dxbc_names.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, DxbcOpcode e) { + switch (e) { + ENUM_NAME(DxbcOpcode::Add); + ENUM_NAME(DxbcOpcode::And); + ENUM_NAME(DxbcOpcode::Break); + ENUM_NAME(DxbcOpcode::Breakc); + ENUM_NAME(DxbcOpcode::Call); + ENUM_NAME(DxbcOpcode::Callc); + ENUM_NAME(DxbcOpcode::Case); + ENUM_NAME(DxbcOpcode::Continue); + ENUM_NAME(DxbcOpcode::Continuec); + ENUM_NAME(DxbcOpcode::Cut); + ENUM_NAME(DxbcOpcode::Default); + ENUM_NAME(DxbcOpcode::DerivRtx); + ENUM_NAME(DxbcOpcode::DerivRty); + ENUM_NAME(DxbcOpcode::Discard); + ENUM_NAME(DxbcOpcode::Div); + ENUM_NAME(DxbcOpcode::Dp2); + ENUM_NAME(DxbcOpcode::Dp3); + ENUM_NAME(DxbcOpcode::Dp4); + ENUM_NAME(DxbcOpcode::Else); + ENUM_NAME(DxbcOpcode::Emit); + ENUM_NAME(DxbcOpcode::EmitThenCut); + ENUM_NAME(DxbcOpcode::EndIf); + ENUM_NAME(DxbcOpcode::EndLoop); + ENUM_NAME(DxbcOpcode::EndSwitch); + ENUM_NAME(DxbcOpcode::Eq); + ENUM_NAME(DxbcOpcode::Exp); + ENUM_NAME(DxbcOpcode::Frc); + ENUM_NAME(DxbcOpcode::FtoI); + ENUM_NAME(DxbcOpcode::FtoU); + ENUM_NAME(DxbcOpcode::Ge); + ENUM_NAME(DxbcOpcode::IAdd); + ENUM_NAME(DxbcOpcode::If); + ENUM_NAME(DxbcOpcode::IEq); + ENUM_NAME(DxbcOpcode::IGe); + ENUM_NAME(DxbcOpcode::ILt); + ENUM_NAME(DxbcOpcode::IMad); + ENUM_NAME(DxbcOpcode::IMax); + ENUM_NAME(DxbcOpcode::IMin); + ENUM_NAME(DxbcOpcode::IMul); + ENUM_NAME(DxbcOpcode::INe); + ENUM_NAME(DxbcOpcode::INeg); + ENUM_NAME(DxbcOpcode::IShl); + ENUM_NAME(DxbcOpcode::IShr); + ENUM_NAME(DxbcOpcode::ItoF); + ENUM_NAME(DxbcOpcode::Label); + ENUM_NAME(DxbcOpcode::Ld); + ENUM_NAME(DxbcOpcode::LdMs); + ENUM_NAME(DxbcOpcode::Log); + ENUM_NAME(DxbcOpcode::Loop); + ENUM_NAME(DxbcOpcode::Lt); + ENUM_NAME(DxbcOpcode::Mad); + ENUM_NAME(DxbcOpcode::Min); + ENUM_NAME(DxbcOpcode::Max); + ENUM_NAME(DxbcOpcode::CustomData); + ENUM_NAME(DxbcOpcode::Mov); + ENUM_NAME(DxbcOpcode::Movc); + ENUM_NAME(DxbcOpcode::Mul); + ENUM_NAME(DxbcOpcode::Ne); + ENUM_NAME(DxbcOpcode::Nop); + ENUM_NAME(DxbcOpcode::Not); + ENUM_NAME(DxbcOpcode::Or); + ENUM_NAME(DxbcOpcode::ResInfo); + ENUM_NAME(DxbcOpcode::Ret); + ENUM_NAME(DxbcOpcode::Retc); + ENUM_NAME(DxbcOpcode::RoundNe); + ENUM_NAME(DxbcOpcode::RoundNi); + ENUM_NAME(DxbcOpcode::RoundPi); + ENUM_NAME(DxbcOpcode::RoundZ); + ENUM_NAME(DxbcOpcode::Rsq); + ENUM_NAME(DxbcOpcode::Sample); + ENUM_NAME(DxbcOpcode::SampleC); + ENUM_NAME(DxbcOpcode::SampleClz); + ENUM_NAME(DxbcOpcode::SampleL); + ENUM_NAME(DxbcOpcode::SampleD); + ENUM_NAME(DxbcOpcode::SampleB); + ENUM_NAME(DxbcOpcode::Sqrt); + ENUM_NAME(DxbcOpcode::Switch); + ENUM_NAME(DxbcOpcode::SinCos); + ENUM_NAME(DxbcOpcode::UDiv); + ENUM_NAME(DxbcOpcode::ULt); + ENUM_NAME(DxbcOpcode::UGe); + ENUM_NAME(DxbcOpcode::UMul); + ENUM_NAME(DxbcOpcode::UMad); + ENUM_NAME(DxbcOpcode::UMax); + ENUM_NAME(DxbcOpcode::UMin); + ENUM_NAME(DxbcOpcode::UShr); + ENUM_NAME(DxbcOpcode::UtoF); + ENUM_NAME(DxbcOpcode::Xor); + ENUM_NAME(DxbcOpcode::DclResource); + ENUM_NAME(DxbcOpcode::DclConstantBuffer); + ENUM_NAME(DxbcOpcode::DclSampler); + ENUM_NAME(DxbcOpcode::DclIndexRange); + ENUM_NAME(DxbcOpcode::DclGsOutputPrimitiveTopology); + ENUM_NAME(DxbcOpcode::DclGsInputPrimitive); + ENUM_NAME(DxbcOpcode::DclMaxOutputVertexCount); + ENUM_NAME(DxbcOpcode::DclInput); + ENUM_NAME(DxbcOpcode::DclInputSgv); + ENUM_NAME(DxbcOpcode::DclInputSiv); + ENUM_NAME(DxbcOpcode::DclInputPs); + ENUM_NAME(DxbcOpcode::DclInputPsSgv); + ENUM_NAME(DxbcOpcode::DclInputPsSiv); + ENUM_NAME(DxbcOpcode::DclOutput); + ENUM_NAME(DxbcOpcode::DclOutputSgv); + ENUM_NAME(DxbcOpcode::DclOutputSiv); + ENUM_NAME(DxbcOpcode::DclTemps); + ENUM_NAME(DxbcOpcode::DclIndexableTemp); + ENUM_NAME(DxbcOpcode::DclGlobalFlags); + ENUM_NAME(DxbcOpcode::Reserved0); + ENUM_NAME(DxbcOpcode::Lod); + ENUM_NAME(DxbcOpcode::Gather4); + ENUM_NAME(DxbcOpcode::SamplePos); + ENUM_NAME(DxbcOpcode::SampleInfo); + ENUM_NAME(DxbcOpcode::Reserved1); + ENUM_NAME(DxbcOpcode::HsDecls); + ENUM_NAME(DxbcOpcode::HsControlPointPhase); + ENUM_NAME(DxbcOpcode::HsForkPhase); + ENUM_NAME(DxbcOpcode::HsJoinPhase); + ENUM_NAME(DxbcOpcode::EmitStream); + ENUM_NAME(DxbcOpcode::CutStream); + ENUM_NAME(DxbcOpcode::EmitThenCutStream); + ENUM_NAME(DxbcOpcode::InterfaceCall); + ENUM_NAME(DxbcOpcode::BufInfo); + ENUM_NAME(DxbcOpcode::DerivRtxCoarse); + ENUM_NAME(DxbcOpcode::DerivRtxFine); + ENUM_NAME(DxbcOpcode::DerivRtyCoarse); + ENUM_NAME(DxbcOpcode::DerivRtyFine); + ENUM_NAME(DxbcOpcode::Gather4C); + ENUM_NAME(DxbcOpcode::Gather4Po); + ENUM_NAME(DxbcOpcode::Gather4PoC); + ENUM_NAME(DxbcOpcode::Rcp); + ENUM_NAME(DxbcOpcode::F32toF16); + ENUM_NAME(DxbcOpcode::F16toF32); + ENUM_NAME(DxbcOpcode::UAddc); + ENUM_NAME(DxbcOpcode::USubb); + ENUM_NAME(DxbcOpcode::CountBits); + ENUM_NAME(DxbcOpcode::FirstBitHi); + ENUM_NAME(DxbcOpcode::FirstBitLo); + ENUM_NAME(DxbcOpcode::FirstBitShi); + ENUM_NAME(DxbcOpcode::UBfe); + ENUM_NAME(DxbcOpcode::IBfe); + ENUM_NAME(DxbcOpcode::Bfi); + ENUM_NAME(DxbcOpcode::BfRev); + ENUM_NAME(DxbcOpcode::Swapc); + ENUM_NAME(DxbcOpcode::DclStream); + ENUM_NAME(DxbcOpcode::DclFunctionBody); + ENUM_NAME(DxbcOpcode::DclFunctionTable); + ENUM_NAME(DxbcOpcode::DclInterface); + ENUM_NAME(DxbcOpcode::DclInputControlPointCount); + ENUM_NAME(DxbcOpcode::DclOutputControlPointCount); + ENUM_NAME(DxbcOpcode::DclTessDomain); + ENUM_NAME(DxbcOpcode::DclTessPartitioning); + ENUM_NAME(DxbcOpcode::DclTessOutputPrimitive); + ENUM_NAME(DxbcOpcode::DclHsMaxTessFactor); + ENUM_NAME(DxbcOpcode::DclHsForkPhaseInstanceCount); + ENUM_NAME(DxbcOpcode::DclHsJoinPhaseInstanceCount); + ENUM_NAME(DxbcOpcode::DclThreadGroup); + ENUM_NAME(DxbcOpcode::DclUavTyped); + ENUM_NAME(DxbcOpcode::DclUavRaw); + ENUM_NAME(DxbcOpcode::DclUavStructured); + ENUM_NAME(DxbcOpcode::DclThreadGroupSharedMemoryRaw); + ENUM_NAME(DxbcOpcode::DclThreadGroupSharedMemoryStructured); + ENUM_NAME(DxbcOpcode::DclResourceRaw); + ENUM_NAME(DxbcOpcode::DclResourceStructured); + ENUM_NAME(DxbcOpcode::LdUavTyped); + ENUM_NAME(DxbcOpcode::StoreUavTyped); + ENUM_NAME(DxbcOpcode::LdRaw); + ENUM_NAME(DxbcOpcode::StoreRaw); + ENUM_NAME(DxbcOpcode::LdStructured); + ENUM_NAME(DxbcOpcode::StoreStructured); + ENUM_NAME(DxbcOpcode::AtomicAnd); + ENUM_NAME(DxbcOpcode::AtomicOr); + ENUM_NAME(DxbcOpcode::AtomicXor); + ENUM_NAME(DxbcOpcode::AtomicCmpStore); + ENUM_NAME(DxbcOpcode::AtomicIAdd); + ENUM_NAME(DxbcOpcode::AtomicIMax); + ENUM_NAME(DxbcOpcode::AtomicIMin); + ENUM_NAME(DxbcOpcode::AtomicUMax); + ENUM_NAME(DxbcOpcode::AtomicUMin); + ENUM_NAME(DxbcOpcode::ImmAtomicAlloc); + ENUM_NAME(DxbcOpcode::ImmAtomicConsume); + ENUM_NAME(DxbcOpcode::ImmAtomicIAdd); + ENUM_NAME(DxbcOpcode::ImmAtomicAnd); + ENUM_NAME(DxbcOpcode::ImmAtomicOr); + ENUM_NAME(DxbcOpcode::ImmAtomicXor); + ENUM_NAME(DxbcOpcode::ImmAtomicExch); + ENUM_NAME(DxbcOpcode::ImmAtomicCmpExch); + ENUM_NAME(DxbcOpcode::ImmAtomicIMax); + ENUM_NAME(DxbcOpcode::ImmAtomicIMin); + ENUM_NAME(DxbcOpcode::ImmAtomicUMax); + ENUM_NAME(DxbcOpcode::ImmAtomicUMin); + ENUM_NAME(DxbcOpcode::Sync); + ENUM_NAME(DxbcOpcode::DAdd); + ENUM_NAME(DxbcOpcode::DMax); + ENUM_NAME(DxbcOpcode::DMin); + ENUM_NAME(DxbcOpcode::DMul); + ENUM_NAME(DxbcOpcode::DEq); + ENUM_NAME(DxbcOpcode::DGe); + ENUM_NAME(DxbcOpcode::DLt); + ENUM_NAME(DxbcOpcode::DNe); + ENUM_NAME(DxbcOpcode::DMov); + ENUM_NAME(DxbcOpcode::DMovc); + ENUM_NAME(DxbcOpcode::DtoF); + ENUM_NAME(DxbcOpcode::FtoD); + ENUM_NAME(DxbcOpcode::EvalSnapped); + ENUM_NAME(DxbcOpcode::EvalSampleIndex); + ENUM_NAME(DxbcOpcode::EvalCentroid); + ENUM_NAME(DxbcOpcode::DclGsInstanceCount); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcExtOpcode e) { + switch (e) { + ENUM_NAME(DxbcExtOpcode::Empty); + ENUM_NAME(DxbcExtOpcode::SampleControls); + ENUM_NAME(DxbcExtOpcode::ResourceDim); + ENUM_NAME(DxbcExtOpcode::ResourceReturnType); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcOperandType e) { + switch (e) { + ENUM_NAME(DxbcOperandType::Temp); + ENUM_NAME(DxbcOperandType::Input); + ENUM_NAME(DxbcOperandType::Output); + ENUM_NAME(DxbcOperandType::IndexableTemp); + ENUM_NAME(DxbcOperandType::Imm32); + ENUM_NAME(DxbcOperandType::Imm64); + ENUM_NAME(DxbcOperandType::Sampler); + ENUM_NAME(DxbcOperandType::Resource); + ENUM_NAME(DxbcOperandType::ConstantBuffer); + ENUM_NAME(DxbcOperandType::ImmediateConstantBuffer); + ENUM_NAME(DxbcOperandType::Label); + ENUM_NAME(DxbcOperandType::InputPrimitiveId); + ENUM_NAME(DxbcOperandType::OutputDepth); + ENUM_NAME(DxbcOperandType::Null); + ENUM_NAME(DxbcOperandType::Rasterizer); + ENUM_NAME(DxbcOperandType::OutputCoverageMask); + ENUM_NAME(DxbcOperandType::Stream); + ENUM_NAME(DxbcOperandType::FunctionBody); + ENUM_NAME(DxbcOperandType::FunctionTable); + ENUM_NAME(DxbcOperandType::Interface); + ENUM_NAME(DxbcOperandType::FunctionInput); + ENUM_NAME(DxbcOperandType::FunctionOutput); + ENUM_NAME(DxbcOperandType::OutputControlPointId); + ENUM_NAME(DxbcOperandType::InputForkInstanceId); + ENUM_NAME(DxbcOperandType::InputJoinInstanceId); + ENUM_NAME(DxbcOperandType::InputControlPoint); + ENUM_NAME(DxbcOperandType::OutputControlPoint); + ENUM_NAME(DxbcOperandType::InputPatchConstant); + ENUM_NAME(DxbcOperandType::InputDomainPoint); + ENUM_NAME(DxbcOperandType::ThisPointer); + ENUM_NAME(DxbcOperandType::UnorderedAccessView); + ENUM_NAME(DxbcOperandType::ThreadGroupSharedMemory); + ENUM_NAME(DxbcOperandType::InputThreadId); + ENUM_NAME(DxbcOperandType::InputThreadGroupId); + ENUM_NAME(DxbcOperandType::InputThreadIdInGroup); + ENUM_NAME(DxbcOperandType::InputCoverageMask); + ENUM_NAME(DxbcOperandType::InputThreadIndexInGroup); + ENUM_NAME(DxbcOperandType::InputGsInstanceId); + ENUM_NAME(DxbcOperandType::OutputDepthGe); + ENUM_NAME(DxbcOperandType::OutputDepthLe); + ENUM_NAME(DxbcOperandType::CycleCounter); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, dxvk::DxbcOperandExt e) { + switch (e) { + ENUM_NAME(DxbcOperandExt::OperandModifier); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcComponentCount e) { + switch (e) { + ENUM_NAME(DxbcComponentCount::Component0); + ENUM_NAME(DxbcComponentCount::Component1); + ENUM_NAME(DxbcComponentCount::Component4); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcRegMode e) { + switch (e) { + ENUM_NAME(DxbcRegMode::Mask); + ENUM_NAME(DxbcRegMode::Swizzle); + ENUM_NAME(DxbcRegMode::Select1); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcOperandIndexRepresentation e) { + switch (e) { + ENUM_NAME(DxbcOperandIndexRepresentation::Imm32); + ENUM_NAME(DxbcOperandIndexRepresentation::Imm64); + ENUM_NAME(DxbcOperandIndexRepresentation::Relative); + ENUM_NAME(DxbcOperandIndexRepresentation::Imm32Relative); + ENUM_NAME(DxbcOperandIndexRepresentation::Imm64Relative); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcResourceDim e) { + switch (e) { + ENUM_NAME(DxbcResourceDim::Unknown); + ENUM_NAME(DxbcResourceDim::Buffer); + ENUM_NAME(DxbcResourceDim::Texture1D); + ENUM_NAME(DxbcResourceDim::Texture2D); + ENUM_NAME(DxbcResourceDim::Texture2DMs); + ENUM_NAME(DxbcResourceDim::Texture3D); + ENUM_NAME(DxbcResourceDim::TextureCube); + ENUM_NAME(DxbcResourceDim::Texture1DArr); + ENUM_NAME(DxbcResourceDim::Texture2DArr); + ENUM_NAME(DxbcResourceDim::Texture2DMsArr); + ENUM_NAME(DxbcResourceDim::TextureCubeArr); + ENUM_NAME(DxbcResourceDim::RawBuffer); + ENUM_NAME(DxbcResourceDim::StructuredBuffer); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcResourceReturnType e) { + switch (e) { + ENUM_NAME(DxbcResourceReturnType::Unorm); + ENUM_NAME(DxbcResourceReturnType::Snorm); + ENUM_NAME(DxbcResourceReturnType::Sint); + ENUM_NAME(DxbcResourceReturnType::Uint); + ENUM_NAME(DxbcResourceReturnType::Float); + ENUM_NAME(DxbcResourceReturnType::Mixed); + ENUM_NAME(DxbcResourceReturnType::Double); + ENUM_NAME(DxbcResourceReturnType::Continued); + ENUM_NAME(DxbcResourceReturnType::Unused); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcRegisterComponentType e) { + switch (e) { + ENUM_NAME(DxbcRegisterComponentType::Unknown); + ENUM_NAME(DxbcRegisterComponentType::Uint32); + ENUM_NAME(DxbcRegisterComponentType::Sint32); + ENUM_NAME(DxbcRegisterComponentType::Float32); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcInstructionReturnType e) { + switch (e) { + ENUM_NAME(DxbcInstructionReturnType::Float); + ENUM_NAME(DxbcInstructionReturnType::Uint); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, DxbcSystemValue e) { + switch (e) { + ENUM_NAME(DxbcSystemValue::None); + ENUM_NAME(DxbcSystemValue::Position); + ENUM_NAME(DxbcSystemValue::ClipDistance); + ENUM_NAME(DxbcSystemValue::CullDistance); + ENUM_NAME(DxbcSystemValue::RenderTargetId); + ENUM_NAME(DxbcSystemValue::ViewportId); + ENUM_NAME(DxbcSystemValue::VertexId); + ENUM_NAME(DxbcSystemValue::PrimitiveId); + ENUM_NAME(DxbcSystemValue::InstanceId); + ENUM_NAME(DxbcSystemValue::IsFrontFace); + ENUM_NAME(DxbcSystemValue::SampleIndex); + ENUM_NAME(DxbcSystemValue::FinalQuadUeq0EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalQuadVeq0EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalQuadUeq1EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalQuadVeq1EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalQuadUInsideTessFactor); + ENUM_NAME(DxbcSystemValue::FinalQuadVInsideTessFactor); + ENUM_NAME(DxbcSystemValue::FinalTriUeq0EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalTriVeq0EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalTriWeq0EdgeTessFactor); + ENUM_NAME(DxbcSystemValue::FinalTriInsideTessFactor); + ENUM_NAME(DxbcSystemValue::FinalLineDetailTessFactor); + ENUM_NAME(DxbcSystemValue::FinalLineDensityTessFactor); + ENUM_NAME(DxbcSystemValue::Target); + ENUM_NAME(DxbcSystemValue::Depth); + ENUM_NAME(DxbcSystemValue::Coverage); + ENUM_NAME(DxbcSystemValue::DepthGe); + ENUM_NAME(DxbcSystemValue::DepthLe); + ENUM_DEFAULT(e); + } + } + + + std::ostream& operator << (std::ostream& os, dxvk::DxbcProgramType e) { + switch (e) { + ENUM_NAME(DxbcProgramType::PixelShader); + ENUM_NAME(DxbcProgramType::VertexShader); + ENUM_NAME(DxbcProgramType::GeometryShader); + ENUM_NAME(DxbcProgramType::HullShader); + ENUM_NAME(DxbcProgramType::DomainShader); + ENUM_NAME(DxbcProgramType::ComputeShader); + ENUM_DEFAULT(e); + } + } + + std::ostream& operator << (std::ostream& os, dxvk::DxbcCustomDataClass e) { + switch (e) { + ENUM_NAME(DxbcCustomDataClass::Comment); + ENUM_NAME(DxbcCustomDataClass::DebugInfo); + ENUM_NAME(DxbcCustomDataClass::Opaque); + ENUM_NAME(DxbcCustomDataClass::ImmConstBuf); + ENUM_DEFAULT(e); + } + } + + std::ostream& operator << (std::ostream& os, dxvk::DxbcScalarType e) { + switch (e) { + ENUM_NAME(DxbcScalarType::Uint32); + ENUM_NAME(DxbcScalarType::Uint64); + ENUM_NAME(DxbcScalarType::Sint32); + ENUM_NAME(DxbcScalarType::Sint64); + ENUM_NAME(DxbcScalarType::Float32); + ENUM_NAME(DxbcScalarType::Float64); + ENUM_NAME(DxbcScalarType::Bool); + ENUM_DEFAULT(e); + } + } + + +} //namespace dxvk diff --git a/src/dxbc/dxbc_names.h b/src/dxbc/dxbc_names.h new file mode 100644 index 0000000..52235ae --- /dev/null +++ b/src/dxbc/dxbc_names.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include "dxbc_common.h" +#include "dxbc_enums.h" + +namespace dxvk { + + std::ostream& operator << (std::ostream& os, DxbcOpcode e); + std::ostream& operator << (std::ostream& os, DxbcExtOpcode e); + std::ostream& operator << (std::ostream& os, DxbcOperandType e); + std::ostream& operator << (std::ostream& os, DxbcOperandExt e); + std::ostream& operator << (std::ostream& os, DxbcComponentCount e); + std::ostream& operator << (std::ostream& os, DxbcRegMode e); + std::ostream& operator << (std::ostream& os, DxbcOperandIndexRepresentation e); + std::ostream& operator << (std::ostream& os, DxbcResourceDim e); + std::ostream& operator << (std::ostream& os, DxbcResourceReturnType e); + std::ostream& operator << (std::ostream& os, DxbcRegisterComponentType e); + std::ostream& operator << (std::ostream& os, DxbcInstructionReturnType e); + std::ostream& operator << (std::ostream& os, DxbcSystemValue e); + std::ostream& operator << (std::ostream& os, DxbcProgramType e); + std::ostream& operator << (std::ostream& os, DxbcCustomDataClass e); + std::ostream& operator << (std::ostream& os, DxbcScalarType e); + +} // namespace dxvk diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp new file mode 100644 index 0000000..19b4a30 --- /dev/null +++ b/src/dxbc/dxbc_options.cpp @@ -0,0 +1,60 @@ +//#include "../d3d11/d3d11_options.h" + +#include "dxbc_options.h" + +namespace dxvk { + + DxbcOptions::DxbcOptions() { + + } + + + DxbcOptions::DxbcOptions(/*const Rc& device, */const D3D11Options& options) { + /*const Rc adapter = device->adapter(); + + const DxvkDeviceFeatures& devFeatures = device->features(); + const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt(); + + useDepthClipWorkaround + = !devFeatures.extDepthClipEnable.depthClipEnable; + useSubgroupOpsForAtomicCounters + = (devInfo.vk11.subgroupSupportedStages & VK_SHADER_STAGE_COMPUTE_BIT) + && (devInfo.vk11.subgroupSupportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); + + VkFormatFeatureFlags2 r32Features + = device->getFormatFeatures(VK_FORMAT_R32_SFLOAT).optimal + & device->getFormatFeatures(VK_FORMAT_R32_UINT).optimal + & device->getFormatFeatures(VK_FORMAT_R32_SINT).optimal; + + supportsTypedUavLoadR32 = (r32Features & VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT); + + switch (device->config().useRawSsbo) { + case Tristate::Auto: minSsboAlignment = devInfo.core.properties.limits.minStorageBufferOffsetAlignment; break; + case Tristate::True: minSsboAlignment = 4u; break; + case Tristate::False: minSsboAlignment = ~0u; break; + } + + invariantPosition = options.invariantPosition; + zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory; + forceVolatileTgsmAccess = options.forceVolatileTgsmAccess; + disableMsaa = options.disableMsaa; + forceSampleRateShading = options.forceSampleRateShading; + enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock; + + // Figure out float control flags to match D3D11 rules + if (options.floatControls) { + if (devInfo.vk12.shaderSignedZeroInfNanPreserveFloat32) + floatControl.set(DxbcFloatControlFlag::PreserveNan32); + if (devInfo.vk12.shaderSignedZeroInfNanPreserveFloat64) + floatControl.set(DxbcFloatControlFlag::PreserveNan64); + + if (devInfo.vk12.denormBehaviorIndependence != VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE) { + if (devInfo.vk12.shaderDenormFlushToZeroFloat32) + floatControl.set(DxbcFloatControlFlag::DenormFlushToZero32); + if (devInfo.vk12.shaderDenormPreserveFloat64) + floatControl.set(DxbcFloatControlFlag::DenormPreserve64); + } + }*/ + } + +} diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h new file mode 100644 index 0000000..a446412 --- /dev/null +++ b/src/dxbc/dxbc_options.h @@ -0,0 +1,63 @@ +#pragma once + +#include + +#include "util_flags.h" +#include + +namespace dxvk { + + struct D3D11Options; + + enum class DxbcFloatControlFlag : uint32_t { + DenormFlushToZero32, + DenormPreserve64, + PreserveNan32, + PreserveNan64, + }; + + using DxbcFloatControlFlags = Flags; + + struct DxbcOptions { + DxbcOptions(); + DxbcOptions(const D3D11Options& options); + + // Clamp oDepth in fragment shaders if the depth + // clip device feature is not supported + bool useDepthClipWorkaround = false; + + /// Determines whether format qualifiers + /// on typed UAV loads are required + bool supportsTypedUavLoadR32 = false; + + /// Use subgroup operations to reduce the number of + /// atomic operations for append/consume buffers. + bool useSubgroupOpsForAtomicCounters = false; + + /// Clear thread-group shared memory to zero + bool zeroInitWorkgroupMemory = false; + + /// Declare vertex positions as invariant + bool invariantPosition = false; + + /// Insert memory barriers after TGSM stoes + bool forceVolatileTgsmAccess = false; + + /// Replace ld_ms with ld + bool disableMsaa = false; + + /// Force sample rate shading by using sample + /// interpolation for fragment shader inputs + bool forceSampleRateShading = false; + + // Enable per-sample interlock if supported + bool enableSampleShadingInterlock = false; + + /// Float control flags + DxbcFloatControlFlags floatControl; + + /// Minimum storage buffer alignment + VkDeviceSize minSsboAlignment = 0; + }; + +} diff --git a/src/dxbc/dxbc_reader.cpp b/src/dxbc/dxbc_reader.cpp new file mode 100644 index 0000000..9b9a340 --- /dev/null +++ b/src/dxbc/dxbc_reader.cpp @@ -0,0 +1,58 @@ +#include + +#include "dxbc_reader.h" + +namespace dxvk { + + DxbcTag DxbcReader::readTag() { + DxbcTag tag; + this->read(&tag, 4); + return tag; + } + + + std::string DxbcReader::readString() { + std::string result; + + while (m_data[m_pos] != '\0') + result.push_back(m_data[m_pos++]); + + m_pos++; + return result; + } + + + void DxbcReader::read(void* dst, size_t n) { + if (m_pos + n > m_size) + throw DxvkError("DxbcReader::read: Unexpected end of file"); + std::memcpy(dst, m_data + m_pos, n); + m_pos += n; + } + + + void DxbcReader::skip(size_t n) { + if (m_pos + n > m_size) + throw DxvkError("DxbcReader::skip: Unexpected end of file"); + m_pos += n; + } + + + DxbcReader DxbcReader::clone(size_t pos) const { + if (pos > m_size) + throw DxvkError("DxbcReader::clone: Invalid offset"); + return DxbcReader(m_data + pos, m_size - pos); + } + + + DxbcReader DxbcReader::resize(size_t size) const { + if (size > m_size) + throw DxvkError("DxbcReader::resize: Invalid size"); + return DxbcReader(m_data, size, m_pos); + } + + + void DxbcReader::store(std::ostream&& stream) const { + stream.write(m_data, m_size); + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_reader.h b/src/dxbc/dxbc_reader.h new file mode 100644 index 0000000..a160001 --- /dev/null +++ b/src/dxbc/dxbc_reader.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include + +#include "dxbc_tag.h" + +namespace dxvk { + + /** + * \brief DXBC bytecode reader + * + * Holds references to the shader byte code and + * provides methods to read + */ + class DxbcReader { + + public: + + DxbcReader(const char* data, size_t size) + : DxbcReader(data, size, 0) { } + + auto readu8 () { return this->readNum (); } + auto readu16() { return this->readNum(); } + auto readu32() { return this->readNum(); } + auto readu64() { return this->readNum(); } + + auto readi8 () { return this->readNum (); } + auto readi16() { return this->readNum (); } + auto readi32() { return this->readNum (); } + auto readi64() { return this->readNum (); } + + auto readf32() { return this->readNum (); } + auto readf64() { return this->readNum (); } + + template + auto readEnum() { + using Tx = std::underlying_type_t; + return static_cast(this->readNum()); + } + + DxbcTag readTag(); + + std::string readString(); + + void read(void* dst, size_t n); + + void skip(size_t n); + + DxbcReader clone(size_t pos) const; + + DxbcReader resize(size_t size) const; + + bool eof() const { + return m_pos >= m_size; + } + + void store(std::ostream&& stream) const; + + private: + + DxbcReader(const char* data, size_t size, size_t pos) + : m_data(data), m_size(size), m_pos(pos) { } + + const char* m_data = nullptr; + size_t m_size = 0; + size_t m_pos = 0; + + template + T readNum() { + T result; + this->read(&result, sizeof(result)); + return result; + } + + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_tag.h b/src/dxbc/dxbc_tag.h new file mode 100644 index 0000000..2ba1750 --- /dev/null +++ b/src/dxbc/dxbc_tag.h @@ -0,0 +1,47 @@ +#pragma once + +#include "dxbc_include.h" + +namespace dxvk { + + /** + * \brief Four-character tag + * + * Used to identify chunks in the + * compiled DXBC file by name. + */ + class DxbcTag { + + public: + + DxbcTag() { + for (size_t i = 0; i < 4; i++) + m_chars[i] = '\0'; + } + + DxbcTag(const char* tag) { + for (size_t i = 0; i < 4; i++) + m_chars[i] = tag[i]; + } + + bool operator == (const DxbcTag& other) const { + bool result = true; + for (size_t i = 0; i < 4; i++) + result &= m_chars[i] == other.m_chars[i]; + return result; + } + + bool operator != (const DxbcTag& other) const { + return !this->operator == (other); + } + + const char* operator & () const { return m_chars; } + char* operator & () { return m_chars; } + + private: + + char m_chars[4]; + + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_util.cpp b/src/dxbc/dxbc_util.cpp new file mode 100644 index 0000000..3d885fc --- /dev/null +++ b/src/dxbc/dxbc_util.cpp @@ -0,0 +1,26 @@ +#include "dxbc_util.h" + +namespace dxvk { + + uint32_t primitiveVertexCount(DxbcPrimitive primitive) { + static const std::array s_vertexCounts = { + 0, // Undefined + 1, // Point + 2, // Line + 3, // Triangle + 0, // Undefined + 0, // Undefined + 4, // Line with adjacency + 6, // Triangle with adjacency + }; + + if (primitive >= DxbcPrimitive::Patch1) { + return uint32_t(primitive) + - uint32_t(DxbcPrimitive::Patch1) + + 1u; + } else { + return s_vertexCounts.at(uint32_t(primitive)); + } + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_util.h b/src/dxbc/dxbc_util.h new file mode 100644 index 0000000..04bec75 --- /dev/null +++ b/src/dxbc/dxbc_util.h @@ -0,0 +1,127 @@ +#pragma once + +#include "dxbc_common.h" +#include "dxbc_enums.h" + +namespace dxvk { + + /** + * \brief Push constant struct + */ + struct DxbcPushConstants { + uint32_t rasterizerSampleCount; + }; + + + /** + * \brief Binding numbers and properties + */ + enum DxbcBindingProperties : uint32_t { + DxbcConstBufBindingIndex = 0, + DxbcConstBufBindingCount = 16, + DxbcSamplerBindingIndex = DxbcConstBufBindingIndex + + DxbcConstBufBindingCount, + DxbcSamplerBindingCount = 16, + DxbcResourceBindingIndex = DxbcSamplerBindingIndex + + DxbcSamplerBindingCount, + DxbcResourceBindingCount = 128, + DxbcStageBindingCount = DxbcConstBufBindingCount + + DxbcSamplerBindingCount + + DxbcResourceBindingCount, + DxbcUavBindingIndex = DxbcStageBindingCount * 6, + DxbcUavBindingCount = 64, + }; + + + /** + * \brief Computes first binding index for a given stage + * + * \param [in] stage The shader stage + * \returns Index of first binding + */ + inline uint32_t computeStageBindingOffset(DxbcProgramType stage) { + return DxbcStageBindingCount * uint32_t(stage); + } + + + /** + * \brief Computes first UAV binding index offset for a given stage + * + * \param [in] stage The shader stage + * \returns Index of first UAV binding + */ + inline uint32_t computeStageUavBindingOffset(DxbcProgramType stage) { + return DxbcUavBindingIndex + + DxbcUavBindingCount * (stage == DxbcProgramType::ComputeShader ? 2 : 0); + } + + + /** + * \brief Computes constant buffer binding index + * + * \param [in] stage Shader stage + * \param [in] index Constant buffer index + * \returns Binding index + */ + inline uint32_t computeConstantBufferBinding(DxbcProgramType stage, uint32_t index) { + return computeStageBindingOffset(stage) + DxbcConstBufBindingIndex + index; + } + + + /** + * \brief Computes sampler binding index + * + * \param [in] stage Shader stage + * \param [in] index Sampler index + * \returns Binding index + */ + inline uint32_t computeSamplerBinding(DxbcProgramType stage, uint32_t index) { + return computeStageBindingOffset(stage) + DxbcSamplerBindingIndex + index; + } + + + /** + * \brief Computes resource binding index + * + * \param [in] stage Shader stage + * \param [in] index Resource index + * \returns Binding index + */ + inline uint32_t computeSrvBinding(DxbcProgramType stage, uint32_t index) { + return computeStageBindingOffset(stage) + DxbcResourceBindingIndex + index; + } + + + /** + * \brief Computes UAV binding offset + * + * \param [in] stage Shader stage + * \param [in] index UAV index + * \returns Binding index + */ + inline uint32_t computeUavBinding(DxbcProgramType stage, uint32_t index) { + return computeStageUavBindingOffset(stage) + index; + } + + + /** + * \brief Computes UAV counter binding offset + * + * \param [in] stage Shader stage + * \param [in] index UAV index + * \returns Binding index + */ + inline uint32_t computeUavCounterBinding(DxbcProgramType stage, uint32_t index) { + return computeStageUavBindingOffset(stage) + DxbcUavBindingCount + index; + } + + /** + * \brief Primitive vertex count + * + * Calculates the number of vertices + * for a given primitive type. + */ + uint32_t primitiveVertexCount( + DxbcPrimitive primitive); + +} \ No newline at end of file diff --git a/src/dxbc/dxvk_shader.h b/src/dxbc/dxvk_shader.h new file mode 100644 index 0000000..e3c0d8b --- /dev/null +++ b/src/dxbc/dxvk_shader.h @@ -0,0 +1,607 @@ +#pragma once + +#include + +#include "dxvk_include.h" +#include "dxvk_limits.h" +#include "dxvk_pipelayout.h" +#include "dxvk_shader_key.h" + +#include "../spirv/spirv_code_buffer.h" +#include "../spirv/spirv_compression.h" +#include "../spirv/spirv_module.h" + +namespace dxvk { + + class DxvkShader; + class DxvkShaderModule; + class DxvkPipelineManager; + struct DxvkPipelineStats; + + /** + * \brief Shader flags + * + * Provides extra information about the features + * used by a shader. + */ + enum DxvkShaderFlag : uint64_t { + HasSampleRateShading, + HasTransformFeedback, + ExportsPosition, + ExportsStencilRef, + ExportsViewportIndexLayerFromVertexStage, + ExportsSampleMask, + UsesFragmentCoverage, + UsesSparseResidency, + }; + + using DxvkShaderFlags = Flags; + + /** + * \brief Shader info + */ + struct DxvkShaderCreateInfo { + /// Shader stage + VkShaderStageFlagBits stage; + /// Descriptor info + uint32_t bindingCount = 0; + const DxvkBindingInfo* bindings = nullptr; + /// Input and output register mask + uint32_t inputMask = 0; + uint32_t outputMask = 0; + /// Flat shading input mask + uint32_t flatShadingInputs = 0; + /// Push constant range + uint32_t pushConstOffset = 0; + uint32_t pushConstSize = 0; + /// Uniform buffer data + uint32_t uniformSize = 0; + const char* uniformData = nullptr; + /// Rasterized stream, or -1 + int32_t xfbRasterizedStream = 0; + /// Tess control patch vertex count + uint32_t patchVertexCount = 0; + /// Transform feedback vertex strides + uint32_t xfbStrides[MaxNumXfbBuffers] = { }; + /// Output primitive topology + VkPrimitiveTopology outputTopology = VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; + }; + + + /** + * \brief Shader module create info + */ + struct DxvkShaderModuleCreateInfo { + bool fsDualSrcBlend = false; + bool fsFlatShading = false; + uint32_t undefinedInputs = 0; + + std::array rtSwizzles = { }; + + bool eq(const DxvkShaderModuleCreateInfo& other) const; + + size_t hash() const; + }; + + + /** + * \brief Shader object + * + * Stores a SPIR-V shader and information on the + * bindings that the shader uses. In order to use + * the shader with a pipeline, a shader module + * needs to be created from he shader object. + */ + class DxvkShader : public RcObject { + + public: + + DxvkShader( + const DxvkShaderCreateInfo& info, + SpirvCodeBuffer&& spirv); + + ~DxvkShader(); + + /** + * \brief Shader info + * \returns Shader info + */ + const DxvkShaderCreateInfo& info() const { + return m_info; + } + + /** + * \brief Retrieves shader flags + * \returns Shader flags + */ + DxvkShaderFlags flags() const { + return m_flags; + } + + /** + * \brief Retrieves binding layout + * \returns Binding layout + */ + const DxvkBindingLayout& getBindings() const { + return m_bindings; + } + + /** + * \brief Retrieves spec constant mask + * \returns Bit mask of used spec constants + */ + uint32_t getSpecConstantMask() const { + return m_specConstantMask; + } + + /** + * \brief Tests whether this shader needs to be compiled + * + * If pipeline libraries are supported, this will return + * \c false once the pipeline library is being compiled. + * \returns \c true if compilation is still needed + */ + bool needsLibraryCompile() const { + return m_needsLibraryCompile.load(); + } + + /** + * \brief Notifies library compile + * + * Called automatically when pipeline compilation begins. + * Subsequent calls to \ref needsLibraryCompile will return + * \c false. + */ + void notifyLibraryCompile() { + m_needsLibraryCompile.store(false); + } + + /** + * \brief Gets raw code without modification + */ + SpirvCodeBuffer getRawCode() const { + return m_code.decompress(); + } + + /** + * \brief Patches code using given info + * + * Rewrites binding IDs and potentially fixes up other + * parts of the code depending on pipeline state. + * \param [in] layout Biding layout + * \param [in] state Pipeline state info + * \returns Uncompressed SPIR-V code buffer + */ + SpirvCodeBuffer getCode( + const DxvkBindingLayoutObjects* layout, + const DxvkShaderModuleCreateInfo& state) const; + + /** + * \brief Tests whether this shader supports pipeline libraries + * + * This is true for any vertex, fragment, or compute shader that does not + * require additional pipeline state to be compiled into something useful. + * \param [in] standalone Set to \c true to evaluate this in the context + * of a single-shader pipeline library, or \c false for a pre-raster + * shader library consisting of multiple shader stages. + * \returns \c true if this shader can be used with pipeline libraries + */ + bool canUsePipelineLibrary(bool standalone) const; + + /** + * \brief Dumps SPIR-V shader + * + * Can be used to store the SPIR-V code in a file. + * \param [in] outputStream Stream to write to + */ + void dump(std::ostream& outputStream) const; + + /** + * \brief Sets the shader key + * \param [in] key Unique key + */ + void setShaderKey(const DxvkShaderKey& key) { + m_key = key; + m_hash = key.hash(); + } + + /** + * \brief Retrieves shader key + * \returns The unique shader key + */ + DxvkShaderKey getShaderKey() const { + return m_key; + } + + /** + * \brief Get lookup hash + * + * Retrieves a non-unique hash value derived from the + * shader key which can be used to perform lookups. + * This is better than relying on the pointer value. + * \returns Hash value for map lookups + */ + size_t getHash() const { + return m_hash; + } + + /** + * \brief Retrieves debug name + * \returns The shader's name + */ + std::string debugName() const { + return m_key.toString(); + } + + /** + * \brief Get lookup hash for a shader + * + * Convenience method that returns \c 0 for a null + * pointer, and the shader's lookup hash otherwise. + * \param [in] shader The shader + * \returns The shader's lookup hash, or 0 + */ + static size_t getHash(const Rc& shader) { + return shader != nullptr ? shader->getHash() : 0; + } + + private: + + struct BindingOffsets { + uint32_t bindingId; + uint32_t bindingOffset; + uint32_t setOffset; + }; + + DxvkShaderCreateInfo m_info; + SpirvCompressedBuffer m_code; + + DxvkShaderFlags m_flags; + DxvkShaderKey m_key; + size_t m_hash = 0; + + size_t m_o1IdxOffset = 0; + size_t m_o1LocOffset = 0; + + uint32_t m_specConstantMask = 0; + std::atomic m_needsLibraryCompile = { true }; + + std::vector m_uniformData; + std::vector m_bindingOffsets; + + DxvkBindingLayout m_bindings; + + static void eliminateInput( + SpirvCodeBuffer& code, + uint32_t location); + + static void emitOutputSwizzles( + SpirvCodeBuffer& code, + uint32_t outputMask, + const VkComponentMapping* swizzles); + + static void emitFlatShadingDeclarations( + SpirvCodeBuffer& code, + uint32_t inputMask); + + }; + + + /** + * \brief Shader module object + * + * Manages a Vulkan shader module. This will not + * perform any shader compilation. Instead, the + * context will create pipeline objects on the + * fly when executing draw calls. + */ + class DxvkShaderStageInfo { + + public: + + DxvkShaderStageInfo(const DxvkDevice* device); + + DxvkShaderStageInfo (DxvkShaderStageInfo&& other) = delete; + DxvkShaderStageInfo& operator = (DxvkShaderStageInfo&& other) = delete; + + ~DxvkShaderStageInfo(); + + /** + * \brief Counts shader stages + * \returns Shader stage count + */ + uint32_t getStageCount() const { + return m_stageCount; + } + + /** + * \brief Queries shader stage infos + * \returns Pointer to shader stage infos + */ + const VkPipelineShaderStageCreateInfo* getStageInfos() const { + return m_stageInfos.data(); + } + + /** + * \brief Adds a shader stage with specialization info + * + * \param [in] stage Shader stage + * \param [in] code SPIR-V code + * \param [in] specinfo Specialization info + */ + void addStage( + VkShaderStageFlagBits stage, + SpirvCodeBuffer&& code, + const VkSpecializationInfo* specInfo); + + /** + * \brief Adds stage using a module identifier + * + * \param [in] stage Shader stage + * \param [in] identifier Shader module identifier + * \param [in] specinfo Specialization info + */ + void addStage( + VkShaderStageFlagBits stage, + const VkShaderModuleIdentifierEXT& identifier, + const VkSpecializationInfo* specInfo); + + private: + + const DxvkDevice* m_device; + + struct ShaderModuleIdentifier { + VkPipelineShaderStageModuleIdentifierCreateInfoEXT createInfo; + std::array data; + }; + + union ShaderModuleInfo { + ShaderModuleIdentifier moduleIdentifier; + VkShaderModuleCreateInfo moduleInfo; + }; + + std::array m_codeBuffers; + std::array m_moduleInfos = { }; + std::array m_stageInfos = { }; + uint32_t m_stageCount = 0; + + }; + + + /** + * \brief Shader pipeline library compile args + */ + struct DxvkShaderPipelineLibraryCompileArgs { + VkBool32 depthClipEnable = VK_TRUE; + + bool operator == (const DxvkShaderPipelineLibraryCompileArgs& other) const { + return depthClipEnable == other.depthClipEnable; + } + + bool operator != (const DxvkShaderPipelineLibraryCompileArgs& other) const { + return !this->operator == (other); + } + + size_t hash() const { + return size_t(depthClipEnable); + } + }; + + + /** + * \brief Shader set + * + * Stores a set of shader pointers + * for use in a pipeline library. + */ + struct DxvkShaderSet { + DxvkShader* vs = nullptr; + DxvkShader* tcs = nullptr; + DxvkShader* tes = nullptr; + DxvkShader* gs = nullptr; + DxvkShader* fs = nullptr; + DxvkShader* cs = nullptr; + }; + + + /** + * \brief Shader identifer set + * + * Stores a set of shader module identifiers + * for use in a pipeline library. + */ + struct DxvkShaderIdentifierSet { + VkShaderModuleIdentifierEXT vs = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + VkShaderModuleIdentifierEXT tcs = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + VkShaderModuleIdentifierEXT tes = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + VkShaderModuleIdentifierEXT gs = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + VkShaderModuleIdentifierEXT fs = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + VkShaderModuleIdentifierEXT cs = { VK_STRUCTURE_TYPE_SHADER_MODULE_IDENTIFIER_EXT }; + }; + + + /** + * \brief Shader pipeline library key + */ + class DxvkShaderPipelineLibraryKey { + + public: + + DxvkShaderPipelineLibraryKey(); + + ~DxvkShaderPipelineLibraryKey(); + + /** + * \brief Creates shader set from key + * \returns Shader set + */ + DxvkShaderSet getShaderSet() const; + + /** + * \brief Generates merged binding layout + * \returns Binding layout + */ + DxvkBindingLayout getBindings() const; + + /** + * \brief Adds a shader to the key + * + * Shaders must be added in stage order. + * \param [in] shader Shader to add + */ + void addShader( + const Rc& shader); + + /** + * \brief Checks wether a pipeline library can be created + * \returns \c true if all added shaders are compatible + */ + bool canUsePipelineLibrary() const; + + /** + * \brief Checks for equality + * + * \param [in] other Key to compare to + * \returns \c true if the keys are equal + */ + bool eq( + const DxvkShaderPipelineLibraryKey& other) const; + + /** + * \brief Computes key hash + * \returns Key hash + */ + size_t hash() const; + + private: + + uint32_t m_shaderCount = 0; + VkShaderStageFlags m_shaderStages = 0; + std::array, 4> m_shaders; + + }; + + + /** + * \brief Shader pipeline library + * + * Stores a pipeline object for either a complete compute + * pipeline, a pre-rasterization pipeline library consisting + * of a single vertex shader, or a fragment shader pipeline + * library. All state unknown at shader compile time will + * be made dynamic. + */ + class DxvkShaderPipelineLibrary { + + public: + + DxvkShaderPipelineLibrary( + const DxvkDevice* device, + DxvkPipelineManager* manager, + const DxvkShaderPipelineLibraryKey& key, + const DxvkBindingLayoutObjects* layout); + + ~DxvkShaderPipelineLibrary(); + + /** + * \brief Queries shader module identifier + * + * Can be used to compile an optimized pipeline using the same + * shader code, but without having to wait for the pipeline + * library for this shader shader to compile first. + * \param [in] stage Shader stage to query + * \returns Shader module identifier + */ + VkShaderModuleIdentifierEXT getModuleIdentifier( + VkShaderStageFlagBits stage); + + /** + * \brief Acquires pipeline handle for the given set of arguments + * + * Either returns an already compiled pipeline library object, or + * performs the compilation step if that has not happened yet. + * Increments the use count by one. + * \param [in] args Compile arguments + * \returns Vulkan pipeline handle + */ + VkPipeline acquirePipelineHandle( + const DxvkShaderPipelineLibraryCompileArgs& args); + + /** + * \brief Releases pipeline + * + * Decrements the use count by 1. If the use count reaches 0, + * any previously compiled pipeline library object may be + * destroyed in order to save memory. + */ + void releasePipelineHandle(); + + /** + * \brief Compiles the pipeline with default arguments + * + * This is meant to be called from a worker thread in + * order to reduce the amount of work done on the app's + * main thread. + */ + void compilePipeline(); + + private: + + const DxvkDevice* m_device; + DxvkPipelineStats* m_stats; + DxvkShaderSet m_shaders; + const DxvkBindingLayoutObjects* m_layout; + + dxvk::mutex m_mutex; + VkPipeline m_pipeline = VK_NULL_HANDLE; + VkPipeline m_pipelineNoDepthClip = VK_NULL_HANDLE; + uint32_t m_useCount = 0u; + bool m_compiledOnce = false; + + dxvk::mutex m_identifierMutex; + DxvkShaderIdentifierSet m_identifiers; + + void destroyShaderPipelinesLocked(); + + VkPipeline compileShaderPipelineLocked( + const DxvkShaderPipelineLibraryCompileArgs& args); + + VkPipeline compileShaderPipeline( + const DxvkShaderPipelineLibraryCompileArgs& args, + VkPipelineCreateFlags flags); + + VkPipeline compileVertexShaderPipeline( + const DxvkShaderPipelineLibraryCompileArgs& args, + const DxvkShaderStageInfo& stageInfo, + VkPipelineCreateFlags flags); + + VkPipeline compileFragmentShaderPipeline( + const DxvkShaderStageInfo& stageInfo, + VkPipelineCreateFlags flags); + + VkPipeline compileComputeShaderPipeline( + const DxvkShaderStageInfo& stageInfo, + VkPipelineCreateFlags flags); + + SpirvCodeBuffer getShaderCode( + VkShaderStageFlagBits stage) const; + + void generateModuleIdentifierLocked( + VkShaderModuleIdentifierEXT* identifier, + const SpirvCodeBuffer& spirvCode); + + VkShaderStageFlags getShaderStages() const; + + DxvkShader* getShader( + VkShaderStageFlagBits stage) const; + + VkShaderModuleIdentifierEXT* getShaderIdentifier( + VkShaderStageFlagBits stage); + + void notifyLibraryCompile() const; + + bool canUsePipelineCacheControl() const; + + }; + +} \ No newline at end of file diff --git a/src/dxvk/dxvk_hash.h b/src/dxvk/dxvk_hash.h new file mode 100644 index 0000000..a0fe561 --- /dev/null +++ b/src/dxvk/dxvk_hash.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +namespace dxvk { + + struct DxvkEq { + template + size_t operator () (const T& a, const T& b) const { + return a.eq(b); + } + }; + + struct DxvkHash { + template + size_t operator () (const T& object) const { + return object.hash(); + } + }; + + class DxvkHashState { + + public: + + void add(size_t hash) { + m_value ^= hash + 0x9e3779b9 + + (m_value << 6) + + (m_value >> 2); + } + + operator size_t () const { + return m_value; + } + + private: + + size_t m_value = 0; + + }; + +} \ No newline at end of file diff --git a/src/spirv/CMakeLists.txt b/src/spirv/CMakeLists.txt new file mode 100644 index 0000000..598764b --- /dev/null +++ b/src/spirv/CMakeLists.txt @@ -0,0 +1,13 @@ +add_library(dxbc-spirv STATIC) +target_sources(dxbc-spirv PRIVATE + spirv_code_buffer.cpp + spirv_code_buffer.h + spirv_compression.cpp + spirv_compression.h + spirv_include.h + spirv_instruction.h + spirv_module.cpp + spirv_module.h +) +target_include_directories(dxbc-spirv PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(dxbc-spirv PUBLIC windows-headers) diff --git a/src/spirv/spirv_code_buffer.cpp b/src/spirv/spirv_code_buffer.cpp new file mode 100644 index 0000000..74d8dd9 --- /dev/null +++ b/src/spirv/spirv_code_buffer.cpp @@ -0,0 +1,154 @@ +#include +#include + +#include "spirv_code_buffer.h" + +namespace dxvk { + + SpirvCodeBuffer:: SpirvCodeBuffer() { } + SpirvCodeBuffer::~SpirvCodeBuffer() { } + + + SpirvCodeBuffer::SpirvCodeBuffer(uint32_t size) + : m_ptr(size) { + m_code.resize(size); + } + + + SpirvCodeBuffer::SpirvCodeBuffer(uint32_t size, const uint32_t* data) + : m_ptr(size) { + m_code.resize(size); + std::memcpy(m_code.data(), data, size * sizeof(uint32_t)); + } + + + SpirvCodeBuffer::SpirvCodeBuffer(std::istream& stream) { + stream.ignore(std::numeric_limits::max()); + std::streamsize length = stream.gcount(); + stream.clear(); + stream.seekg(0, std::ios_base::beg); + + std::vector buffer(length); + stream.read(buffer.data(), length); + buffer.resize(stream.gcount()); + + m_code.resize(buffer.size() / sizeof(uint32_t)); + std::memcpy(reinterpret_cast(m_code.data()), + buffer.data(), m_code.size() * sizeof(uint32_t)); + + m_ptr = m_code.size(); + } + + + uint32_t SpirvCodeBuffer::allocId() { + constexpr size_t BoundIdsOffset = 3; + + if (m_code.size() <= BoundIdsOffset) + return 0; + + return m_code[BoundIdsOffset]++; + } + + + void SpirvCodeBuffer::append(const SpirvCodeBuffer& other) { + if (other.size() != 0) { + const size_t size = m_code.size(); + m_code.resize(size + other.m_code.size()); + + uint32_t* dst = this->m_code.data(); + const uint32_t* src = other.m_code.data(); + + std::memcpy(dst + size, src, other.size()); + m_ptr += other.m_code.size(); + } + } + + + void SpirvCodeBuffer::putWord(uint32_t word) { + m_code.insert(m_code.begin() + m_ptr, word); + m_ptr += 1; + } + + + void SpirvCodeBuffer::putIns(spv::Op opCode, uint16_t wordCount) { + this->putWord( + (static_cast(opCode) << 0) + | (static_cast(wordCount) << 16)); + } + + + void SpirvCodeBuffer::putInt32(uint32_t word) { + this->putWord(word); + } + + + void SpirvCodeBuffer::putInt64(uint64_t value) { + this->putWord(value >> 0); + this->putWord(value >> 32); + } + + + void SpirvCodeBuffer::putFloat32(float value) { + uint32_t tmp; + static_assert(sizeof(tmp) == sizeof(value)); + std::memcpy(&tmp, &value, sizeof(value)); + this->putInt32(tmp); + } + + + void SpirvCodeBuffer::putFloat64(double value) { + uint64_t tmp; + static_assert(sizeof(tmp) == sizeof(value)); + std::memcpy(&tmp, &value, sizeof(value)); + this->putInt64(tmp); + } + + + void SpirvCodeBuffer::putStr(const char* str) { + uint32_t word = 0; + uint32_t nbit = 0; + + for (uint32_t i = 0; str[i] != '\0'; str++) { + word |= (static_cast(str[i]) & 0xFF) << nbit; + + if ((nbit += 8) == 32) { + this->putWord(word); + word = 0; + nbit = 0; + } + } + + // Commit current word + this->putWord(word); + } + + + void SpirvCodeBuffer::putHeader(uint32_t version, uint32_t boundIds) { + this->putWord(spv::MagicNumber); + this->putWord(version); + this->putWord(0); // Generator + this->putWord(boundIds); + this->putWord(0); // Schema + } + + + void SpirvCodeBuffer::erase(size_t size) { + m_code.erase( + m_code.begin() + m_ptr, + m_code.begin() + m_ptr + size); + } + + + uint32_t SpirvCodeBuffer::strLen(const char* str) { + // Null-termination plus padding + return (std::strlen(str) + 4) / 4; + } + + + void SpirvCodeBuffer::store(std::ostream& stream) const { + stream.write( + reinterpret_cast(m_code.data()), + sizeof(uint32_t) * m_code.size()); + } + +} \ No newline at end of file diff --git a/src/spirv/spirv_code_buffer.h b/src/spirv/spirv_code_buffer.h new file mode 100644 index 0000000..65c5bff --- /dev/null +++ b/src/spirv/spirv_code_buffer.h @@ -0,0 +1,228 @@ +#pragma once + +#include +#include +#include + +#include "spirv_instruction.h" + +namespace dxvk { + + /** + * \brief SPIR-V code buffer + * + * Helper class for generating SPIR-V shaders. + * Stores arbitrary SPIR-V instructions in a + * format that can be read by Vulkan drivers. + */ + class SpirvCodeBuffer { + + public: + + SpirvCodeBuffer(); + explicit SpirvCodeBuffer(uint32_t size); + SpirvCodeBuffer(const SpirvCodeBuffer &) = default; + SpirvCodeBuffer(SpirvCodeBuffer &&) = default; + SpirvCodeBuffer(uint32_t size, const uint32_t* data); + SpirvCodeBuffer(std::istream& stream); + + template + SpirvCodeBuffer(const uint32_t (&data)[N]) + : SpirvCodeBuffer(N, data) { } + + ~SpirvCodeBuffer(); + + SpirvCodeBuffer &operator=(const SpirvCodeBuffer &) = default; + SpirvCodeBuffer &operator=(SpirvCodeBuffer &&) = default; + + /** + * \brief Code data + * \returns Code data + */ + const uint32_t* data() const { return m_code.data(); } + uint32_t* data() { return m_code.data(); } + + /** + * \brief Code size, in dwords + * \returns Code size, in dwords + */ + uint32_t dwords() const { + return m_code.size(); + } + + /** + * \brief Code size, in bytes + * \returns Code size, in bytes + */ + size_t size() const { + return m_code.size() * sizeof(uint32_t); + } + + /** + * \brief Begin instruction iterator + * + * Points to the first instruction in the instruction + * block. The header, if any, will be skipped over. + * \returns Instruction iterator + */ + SpirvInstructionIterator begin() { + return SpirvInstructionIterator( + m_code.data(), 0, m_code.size()); + } + + /** + * \brief End instruction iterator + * + * Points to the end of the instruction block. + * \returns Instruction iterator + */ + SpirvInstructionIterator end() { + return SpirvInstructionIterator(nullptr, 0, 0); + } + + /** + * \brief Allocates a new ID + * + * Returns a new valid ID and increments the + * maximum ID count stored in the header. + * \returns The new SPIR-V ID + */ + uint32_t allocId(); + + /** + * \brief Merges two code buffers + * + * This is useful to generate declarations or + * the SPIR-V header at the same time as the + * code when doing so in advance is impossible. + * \param [in] other Code buffer to append + */ + void append(const SpirvCodeBuffer& other); + + /** + * \brief Appends an 32-bit word to the buffer + * \param [in] word The word to append + */ + void putWord(uint32_t word); + + /** + * \brief Appends an instruction word to the buffer + * + * Adds a single word containing both the word count + * and the op code number for a single instruction. + * \param [in] opCode Operand code + * \param [in] wordCount Number of words + */ + void putIns(spv::Op opCode, uint16_t wordCount); + + /** + * \brief Appends a 32-bit integer to the buffer + * \param [in] value The number to add + */ + void putInt32(uint32_t word); + + /** + * \brief Appends a 64-bit integer to the buffer + * + * A 64-bit integer will take up two 32-bit words. + * \param [in] value 64-bit value to add + */ + void putInt64(uint64_t value); + + /** + * \brief Appends a 32-bit float to the buffer + * \param [in] value The number to add + */ + void putFloat32(float value); + + /** + * \brief Appends a 64-bit float to the buffer + * \param [in] value The number to add + */ + void putFloat64(double value); + + /** + * \brief Appends a literal string to the buffer + * \param [in] str String to append to the buffer + */ + void putStr(const char* str); + + /** + * \brief Adds the header to the buffer + * + * \param [in] version SPIR-V version + * \param [in] boundIds Number of bound IDs + */ + void putHeader(uint32_t version, uint32_t boundIds); + + /** + * \brief Erases given number of dwords + * + * Removes data from the code buffer, starting + * at the current insertion offset. + * \param [in] size Number of words to remove + */ + void erase(size_t size); + + /** + * \brief Computes length of a literal string + * + * \param [in] str The string to check + * \returns Number of words consumed by a string + */ + uint32_t strLen(const char* str); + + /** + * \brief Stores the SPIR-V module to a stream + * + * The ability to save modules to a file + * exists mostly for debugging purposes. + * \param [in] stream Output stream + */ + void store(std::ostream& stream) const; + + /** + * \brief Retrieves current insertion pointer + * + * Sometimes it may be necessay to insert code into the + * middle of the stream rather than appending it. This + * retrieves the current function pointer. Note that the + * pointer will become invalid if any code is inserted + * before the current pointer location. + * \returns Current instruction pointr + */ + size_t getInsertionPtr() const { + return m_ptr; + } + + /** + * \brief Sets insertion pointer to a specific value + * + * Sets the insertion pointer to a value that was + * previously retrieved by \ref getInsertionPtr. + * \returns Current instruction pointr + */ + void beginInsertion(size_t ptr) { + m_ptr = ptr; + } + + /** + * \brief Sets insertion pointer to the end + * + * After this call, new instructions will be + * appended to the stream. In other words, + * this will restore default behaviour. + * \returns Previous instruction pointer + */ + size_t endInsertion() { + return std::exchange(m_ptr, m_code.size()); + } + + private: + + std::vector m_code; + size_t m_ptr = 0; + + }; + +} diff --git a/src/spirv/spirv_compression.cpp b/src/spirv/spirv_compression.cpp new file mode 100644 index 0000000..1051620 --- /dev/null +++ b/src/spirv/spirv_compression.cpp @@ -0,0 +1,123 @@ +#include "spirv_compression.h" + +namespace dxvk { + + SpirvCompressedBuffer::SpirvCompressedBuffer() + : m_size(0) { + + } + + + SpirvCompressedBuffer::SpirvCompressedBuffer(SpirvCodeBuffer& code) + : m_size(code.dwords()) { + // The compression (detailed below) achieves roughly 55% of the + // original size on average and is very consistent, so an initial + // estimate of roughly 58% will be accurate most of the time. + const uint32_t* data = code.data(); + m_code.reserve((m_size * 75) / 128); + + std::array block; + uint32_t blockMask = 0; + uint32_t blockOffset = 0; + + // The algorithm used is a simple variable-to-fixed compression that + // encodes up to two consecutive SPIR-V tokens into one DWORD using + // a small number of different encodings. While not achieving great + // compression ratios, the main goal is to allow decompression code + // to be fast, with short dependency chains. + // Compressed tokens are stored in blocks of 16 DWORDs, each preceeded + // by a single DWORD which stores the layout for each DWORD, two bits + // each. The supported layouts, are as follows: + // 0x0: 1x 32-bit; 0x1: 1x 20-bit + 1x 12-bit + // 0x2: 2x 16-bit; 0x3: 1x 12-bit + 1x 20-bit + // These layouts are chosen to allow reasonably efficient encoding of + // opcode tokens, which usually fit into 20 bits, followed by type IDs, + // which tend to be low as well since most types are defined early. + for (size_t i = 0; i < m_size; ) { + if (likely(i + 1 < m_size)) { + uint32_t a = data[i]; + uint32_t b = data[i + 1]; + uint32_t schema; + uint32_t encode; + + if (std::max(a, b) < (1u << 16)) { + schema = 0x2; + encode = a | (b << 16); + } else if (a < (1u << 20) && b < (1u << 12)) { + schema = 0x1; + encode = a | (b << 20); + } else if (a < (1u << 12) && b < (1u << 20)) { + schema = 0x3; + encode = a | (b << 12); + } else { + schema = 0x0; + encode = a; + } + + block[blockOffset] = encode; + blockMask |= schema << (blockOffset << 1); + blockOffset += 1; + + i += schema ? 2 : 1; + } else { + block[blockOffset] = data[i++]; + blockOffset += 1; + } + + if (unlikely(blockOffset == 16) || unlikely(i == m_size)) { + m_code.insert(m_code.end(), blockMask); + m_code.insert(m_code.end(), block.begin(), block.begin() + blockOffset); + + blockMask = 0; + blockOffset = 0; + } + } + + // Only shrink the array if we have lots of overhead for some reason. + // This should only happen on shaders where our initial estimate was + // too small. In general, we want to avoid reallocation here. + if (m_code.capacity() > (m_code.size() * 10) / 9) + m_code.shrink_to_fit(); + } + + + SpirvCompressedBuffer::~SpirvCompressedBuffer() { + + } + + + SpirvCodeBuffer SpirvCompressedBuffer::decompress() const { + SpirvCodeBuffer code(m_size); + uint32_t* data = code.data(); + + uint32_t srcOffset = 0; + uint32_t dstOffset = 0; + + constexpr uint32_t shiftAmounts = 0x0c101420; + + while (dstOffset < m_size) { + uint32_t blockMask = m_code[srcOffset]; + + for (uint32_t i = 0; i < 16 && dstOffset < m_size; i++) { + // Use 64-bit integers for some of the operands so we can + // shift by 32 bits and not handle it as a special cases + uint32_t schema = (blockMask >> (i << 1)) & 0x3; + uint32_t shift = (shiftAmounts >> (schema << 3)) & 0xff; + uint64_t mask = ~(~0ull << shift); + uint64_t encode = m_code[srcOffset + i + 1]; + + data[dstOffset] = encode & mask; + + if (likely(schema)) + data[dstOffset + 1] = encode >> shift; + + dstOffset += schema ? 2 : 1; + } + + srcOffset += 17; + } + + return code; + } + +} \ No newline at end of file diff --git a/src/spirv/spirv_compression.h b/src/spirv/spirv_compression.h new file mode 100644 index 0000000..e48f398 --- /dev/null +++ b/src/spirv/spirv_compression.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include "spirv_code_buffer.h" + +namespace dxvk { + + /** + * \brief Compressed SPIR-V code buffer + * + * Implements a fast in-memory compression + * to keep memory footprint low. + */ + class SpirvCompressedBuffer { + + public: + + SpirvCompressedBuffer(); + + SpirvCompressedBuffer(SpirvCodeBuffer& code); + + ~SpirvCompressedBuffer(); + + SpirvCodeBuffer decompress() const; + + private: + + size_t m_size; + std::vector m_code; + + void encodeDword(uint32_t dw); + + uint32_t decodeDword(size_t& offset) const; + + }; + +} \ No newline at end of file diff --git a/src/spirv/spirv_include.h b/src/spirv/spirv_include.h new file mode 100644 index 0000000..9d281d8 --- /dev/null +++ b/src/spirv/spirv_include.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +#include "../util/log/log.h" +#include "../util/log/log_debug.h" + +#include "../util/util_error.h" +#include "../util/util_flags.h" +#include "../util/util_likely.h" +#include "../util/util_string.h" + +#include "../util/rc/util_rc.h" +#include "../util/rc/util_rc_ptr.h" diff --git a/src/spirv/spirv_instruction.h b/src/spirv/spirv_instruction.h new file mode 100644 index 0000000..1f93141 --- /dev/null +++ b/src/spirv/spirv_instruction.h @@ -0,0 +1,158 @@ +#pragma once + +#include "spirv_include.h" + +namespace dxvk { + + /** + * \brief SPIR-V instruction + * + * Helps parsing a single instruction, providing + * access to the op code, instruction length and + * instruction arguments. + */ + class SpirvInstruction { + + public: + + SpirvInstruction() { } + SpirvInstruction(uint32_t* code, uint32_t offset, uint32_t length) + : m_code(code), m_offset(offset), m_length(length) { } + + /** + * \brief SPIR-V Op code + * \returns The op code + */ + spv::Op opCode() const { + return static_cast( + this->arg(0) & spv::OpCodeMask); + } + + /** + * \brief Instruction length + * \returns Number of DWORDs + */ + uint32_t length() const { + return this->arg(0) >> spv::WordCountShift; + } + + /** + * \brief Instruction offset + * \returns Offset in DWORDs + */ + uint32_t offset() const { + return m_offset; + } + + /** + * \brief Argument value + * + * Retrieves an argument DWORD. Note that some instructions + * take 64-bit arguments which require more than one DWORD. + * Arguments start at index 1. Calling this method with an + * argument ID of 0 will return the opcode token. + * \param [in] idx Argument index, starting at 1 + * \returns The argument value + */ + uint32_t arg(uint32_t idx) const { + const uint32_t index = m_offset + idx; + return index < m_length ? m_code[index] : 0; + } + + /** + * \brief Argument string + * + * Retrieves a pointer to a UTF-8-encoded string. + * \param [in] idx Argument index, starting at 1 + * \returns Pointer to the literal string + */ + const char* chr(uint32_t idx) const { + const uint32_t index = m_offset + idx; + return index < m_length ? reinterpret_cast(&m_code[index]) : nullptr; + } + + /** + * \brief Changes the value of an argument + * + * \param [in] idx Argument index, starting at 1 + * \param [in] word New argument word + */ + void setArg(uint32_t idx, uint32_t word) const { + if (m_offset + idx < m_length) + m_code[m_offset + idx] = word; + } + + private: + + uint32_t* m_code = nullptr; + uint32_t m_offset = 0; + uint32_t m_length = 0; + + }; + + + /** + * \brief SPIR-V instruction iterator + * + * Convenient iterator that can be used + * to process raw SPIR-V shader code. + */ + class SpirvInstructionIterator { + + public: + + SpirvInstructionIterator() { } + SpirvInstructionIterator(uint32_t* code, uint32_t offset, uint32_t length) + : m_code (length != 0 ? code : nullptr), + m_offset(length != 0 ? offset : 0), + m_length(length) { + if ((length >= 5) && (offset == 0) && (m_code[0] == spv::MagicNumber)) + this->advance(5); + } + + SpirvInstructionIterator& operator ++ () { + this->advance(SpirvInstruction(m_code, m_offset, m_length).length()); + return *this; + } + + SpirvInstructionIterator operator ++ (int) { + SpirvInstructionIterator result = *this; + this->advance(SpirvInstruction(m_code, m_offset, m_length).length()); + return result; + } + + SpirvInstruction operator * () const { + return SpirvInstruction(m_code, m_offset, m_length); + } + + bool operator == (const SpirvInstructionIterator& other) const { + return this->m_code == other.m_code + && this->m_offset == other.m_offset + && this->m_length == other.m_length; + } + + bool operator != (const SpirvInstructionIterator& other) const { + return this->m_code != other.m_code + || this->m_offset != other.m_offset + || this->m_length != other.m_length; + } + + private: + + uint32_t* m_code = nullptr; + uint32_t m_offset = 0; + uint32_t m_length = 0; + + void advance(uint32_t n) { + if (m_offset + n < m_length) { + m_offset += n; + } else { + m_code = nullptr; + m_offset = 0; + m_length = 0; + } + } + + }; + +} \ No newline at end of file diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp new file mode 100644 index 0000000..09240f2 --- /dev/null +++ b/src/spirv/spirv_module.cpp @@ -0,0 +1,3877 @@ +#include + +#include "spirv_module.h" + +namespace dxvk { + + SpirvModule::SpirvModule(uint32_t version) + : m_version(version) { + this->instImportGlsl450(); + } + + + SpirvModule::~SpirvModule() { + + } + + + SpirvCodeBuffer SpirvModule::compile() const { + SpirvCodeBuffer result; + result.putHeader(m_version, m_id); + result.append(m_capabilities); + result.append(m_extensions); + result.append(m_instExt); + result.append(m_memoryModel); + result.append(m_entryPoints); + result.append(m_execModeInfo); + result.append(m_debugNames); + result.append(m_annotations); + result.append(m_typeConstDefs); + result.append(m_variables); + result.append(m_code); + return result; + } + + + uint32_t SpirvModule::allocateId() { + return m_id++; + } + + + bool SpirvModule::hasCapability( + spv::Capability capability) { + for (auto ins : m_capabilities) { + if (ins.opCode() == spv::OpCapability && ins.arg(1) == capability) + return true; + } + + return false; + } + + void SpirvModule::enableCapability( + spv::Capability capability) { + // Scan the generated instructions to check + // whether we already enabled the capability. + if (!hasCapability(capability)) { + m_capabilities.putIns (spv::OpCapability, 2); + m_capabilities.putWord(capability); + } + } + + + void SpirvModule::enableExtension( + const char* extensionName) { + m_extensions.putIns (spv::OpExtension, 1 + m_extensions.strLen(extensionName)); + m_extensions.putStr (extensionName); + } + + + void SpirvModule::addEntryPoint( + uint32_t entryPointId, + spv::ExecutionModel executionModel, + const char* name) { + m_entryPoints.putIns (spv::OpEntryPoint, 3 + m_entryPoints.strLen(name) + m_interfaceVars.size()); + m_entryPoints.putWord (executionModel); + m_entryPoints.putWord (entryPointId); + m_entryPoints.putStr (name); + + for (uint32_t varId : m_interfaceVars) + m_entryPoints.putWord(varId); + } + + + void SpirvModule::setMemoryModel( + spv::AddressingModel addressModel, + spv::MemoryModel memoryModel) { + m_memoryModel.putIns (spv::OpMemoryModel, 3); + m_memoryModel.putWord (addressModel); + m_memoryModel.putWord (memoryModel); + } + + + void SpirvModule::setExecutionMode( + uint32_t entryPointId, + spv::ExecutionMode executionMode) { + m_execModeInfo.putIns (spv::OpExecutionMode, 3); + m_execModeInfo.putWord(entryPointId); + m_execModeInfo.putWord(executionMode); + } + + + void SpirvModule::setExecutionMode( + uint32_t entryPointId, + spv::ExecutionMode executionMode, + uint32_t argCount, + const uint32_t* args) { + m_execModeInfo.putIns (spv::OpExecutionMode, 3 + argCount); + m_execModeInfo.putWord(entryPointId); + m_execModeInfo.putWord(executionMode); + + for (uint32_t i = 0; i < argCount; i++) + m_execModeInfo.putWord(args[i]); + } + + + void SpirvModule::setInvocations( + uint32_t entryPointId, + uint32_t invocations) { + m_execModeInfo.putIns (spv::OpExecutionMode, 4); + m_execModeInfo.putWord (entryPointId); + m_execModeInfo.putWord (spv::ExecutionModeInvocations); + m_execModeInfo.putInt32(invocations); + } + + + void SpirvModule::setLocalSize( + uint32_t entryPointId, + uint32_t x, + uint32_t y, + uint32_t z) { + m_execModeInfo.putIns (spv::OpExecutionMode, 6); + m_execModeInfo.putWord (entryPointId); + m_execModeInfo.putWord (spv::ExecutionModeLocalSize); + m_execModeInfo.putInt32(x); + m_execModeInfo.putInt32(y); + m_execModeInfo.putInt32(z); + } + + + void SpirvModule::setOutputVertices( + uint32_t entryPointId, + uint32_t vertexCount) { + m_execModeInfo.putIns (spv::OpExecutionMode, 4); + m_execModeInfo.putWord(entryPointId); + m_execModeInfo.putWord(spv::ExecutionModeOutputVertices); + m_execModeInfo.putWord(vertexCount); + } + + + uint32_t SpirvModule::addDebugString( + const char* string) { + uint32_t resultId = this->allocateId(); + + m_debugNames.putIns (spv::OpString, + 2 + m_debugNames.strLen(string)); + m_debugNames.putWord(resultId); + m_debugNames.putStr (string); + return resultId; + } + + + void SpirvModule::setDebugSource( + spv::SourceLanguage language, + uint32_t version, + uint32_t file, + const char* source) { + uint32_t strLen = source != nullptr + ? m_debugNames.strLen(source) : 0; + + m_debugNames.putIns (spv::OpSource, 4 + strLen); + m_debugNames.putWord(language); + m_debugNames.putWord(version); + m_debugNames.putWord(file); + + if (source != nullptr) + m_debugNames.putStr(source); + } + + void SpirvModule::setDebugName( + uint32_t expressionId, + const char* debugName) { + m_debugNames.putIns (spv::OpName, 2 + m_debugNames.strLen(debugName)); + m_debugNames.putWord(expressionId); + m_debugNames.putStr (debugName); + } + + + void SpirvModule::setDebugMemberName( + uint32_t structId, + uint32_t memberId, + const char* debugName) { + m_debugNames.putIns (spv::OpMemberName, 3 + m_debugNames.strLen(debugName)); + m_debugNames.putWord(structId); + m_debugNames.putWord(memberId); + m_debugNames.putStr (debugName); + } + + + uint32_t SpirvModule::constBool( + bool v) { + return this->defConst(v + ? spv::OpConstantTrue + : spv::OpConstantFalse, + this->defBoolType(), + 0, nullptr); + } + + + uint32_t SpirvModule::consti32( + int32_t v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defIntType(32, 1), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::consti64( + int64_t v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defIntType(64, 1), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::constu32( + uint32_t v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defIntType(32, 0), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::constu64( + uint64_t v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defIntType(64, 0), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::constf32( + float v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defFloatType(32), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::constf64( + double v) { + std::array data; + std::memcpy(data.data(), &v, sizeof(v)); + + return this->defConst( + spv::OpConstant, + this->defFloatType(64), + data.size(), + data.data()); + } + + + uint32_t SpirvModule::constvec4i32( + int32_t x, + int32_t y, + int32_t z, + int32_t w) { + std::array args = {{ + this->consti32(x), this->consti32(y), + this->consti32(z), this->consti32(w), + }}; + + uint32_t scalarTypeId = this->defIntType(32, 1); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 4); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constvec4b32( + bool x, + bool y, + bool z, + bool w) { + std::array args = {{ + this->constBool(x), this->constBool(y), + this->constBool(z), this->constBool(w), + }}; + + uint32_t scalarTypeId = this->defBoolType(); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 4); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constvec4u32( + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w) { + std::array args = {{ + this->constu32(x), this->constu32(y), + this->constu32(z), this->constu32(w), + }}; + + uint32_t scalarTypeId = this->defIntType(32, 0); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 4); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constvec2f32( + float x, + float y) { + std::array args = {{ + this->constf32(x), this->constf32(y), + }}; + + uint32_t scalarTypeId = this->defFloatType(32); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 2); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constvec3f32( + float x, + float y, + float z) { + std::array args = {{ + this->constf32(x), this->constf32(y), + this->constf32(z), + }}; + + uint32_t scalarTypeId = this->defFloatType(32); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 3); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constvec4f32( + float x, + float y, + float z, + float w) { + std::array args = {{ + this->constf32(x), this->constf32(y), + this->constf32(z), this->constf32(w), + }}; + + uint32_t scalarTypeId = this->defFloatType(32); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, 4); + + return this->constComposite(vectorTypeId, args.size(), args.data()); + } + + + uint32_t SpirvModule::constfReplicant( + float replicant, + uint32_t count) { + uint32_t value = this->constf32(replicant); + + std::array args = { value, value, value, value }; + + // Can't make a scalar composite. + if (count == 1) + return args[0]; + + uint32_t scalarTypeId = this->defFloatType(32); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, count); + + return this->constComposite(vectorTypeId, count, args.data()); + } + + + uint32_t SpirvModule::constbReplicant( + bool replicant, + uint32_t count) { + uint32_t value = this->constBool(replicant); + + std::array args = { value, value, value, value }; + + // Can't make a scalar composite. + if (count == 1) + return args[0]; + + uint32_t scalarTypeId = this->defBoolType(); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, count); + + return this->constComposite(vectorTypeId, count, args.data()); + } + + + uint32_t SpirvModule::constiReplicant( + int32_t replicant, + uint32_t count) { + uint32_t value = this->consti32(replicant); + + std::array args = { value, value, value, value }; + + // Can't make a scalar composite. + if (count == 1) + return args[0]; + + uint32_t scalarTypeId = this->defIntType(32, 1); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, count); + + return this->constComposite(vectorTypeId, count, args.data()); + } + + + uint32_t SpirvModule::constuReplicant( + int32_t replicant, + uint32_t count) { + uint32_t value = this->constu32(replicant); + + std::array args = { value, value, value, value }; + + // Can't make a scalar composite. + if (count == 1) + return args[0]; + + uint32_t scalarTypeId = this->defIntType(32, 0); + uint32_t vectorTypeId = this->defVectorType(scalarTypeId, count); + + return this->constComposite(vectorTypeId, count, args.data()); + } + + + uint32_t SpirvModule::constComposite( + uint32_t typeId, + uint32_t constCount, + const uint32_t* constIds) { + return this->defConst( + spv::OpConstantComposite, + typeId, constCount, constIds); + } + + + uint32_t SpirvModule::constUndef( + uint32_t typeId) { + return this->defConst(spv::OpUndef, + typeId, 0, nullptr); + } + + + uint32_t SpirvModule::lateConst32( + uint32_t typeId) { + uint32_t resultId = this->allocateId(); + m_lateConsts.insert(resultId); + + m_typeConstDefs.putIns (spv::OpConstant, 4); + m_typeConstDefs.putWord(typeId); + m_typeConstDefs.putWord(resultId); + m_typeConstDefs.putWord(0); + return resultId; + } + + + void SpirvModule::setLateConst( + uint32_t constId, + const uint32_t* argIds) { + for (auto ins : m_typeConstDefs) { + if (ins.opCode() != spv::OpConstant + && ins.opCode() != spv::OpConstantComposite) + continue; + + if (ins.arg(2) != constId) + continue; + + for (uint32_t i = 3; i < ins.length(); i++) + ins.setArg(i, argIds[i - 3]); + + return; + } + } + + + uint32_t SpirvModule::specConstBool( + bool v) { + uint32_t typeId = this->defBoolType(); + uint32_t resultId = this->allocateId(); + + const spv::Op op = v + ? spv::OpSpecConstantTrue + : spv::OpSpecConstantFalse; + + m_typeConstDefs.putIns (op, 3); + m_typeConstDefs.putWord (typeId); + m_typeConstDefs.putWord (resultId); + return resultId; + } + + + uint32_t SpirvModule::specConst32( + uint32_t typeId, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_typeConstDefs.putIns (spv::OpSpecConstant, 4); + m_typeConstDefs.putWord (typeId); + m_typeConstDefs.putWord (resultId); + m_typeConstDefs.putWord (value); + return resultId; + } + + + void SpirvModule::decorate( + uint32_t object, + spv::Decoration decoration) { + m_annotations.putIns (spv::OpDecorate, 3); + m_annotations.putWord (object); + m_annotations.putWord (decoration); + } + + + void SpirvModule::decorateArrayStride( + uint32_t object, + uint32_t stride) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationArrayStride); + m_annotations.putInt32(stride); + } + + + void SpirvModule::decorateBinding( + uint32_t object, + uint32_t binding) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationBinding); + m_annotations.putInt32(binding); + } + + + void SpirvModule::decorateBlock(uint32_t object) { + m_annotations.putIns (spv::OpDecorate, 3); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationBlock); + } + + + void SpirvModule::decorateBuiltIn( + uint32_t object, + spv::BuiltIn builtIn) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationBuiltIn); + m_annotations.putWord (builtIn); + } + + + void SpirvModule::decorateComponent( + uint32_t object, + uint32_t location) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationComponent); + m_annotations.putInt32(location); + } + + + void SpirvModule::decorateDescriptorSet( + uint32_t object, + uint32_t set) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationDescriptorSet); + m_annotations.putInt32(set); + } + + + void SpirvModule::decorateIndex( + uint32_t object, + uint32_t index) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationIndex); + m_annotations.putInt32(index); + } + + + void SpirvModule::decorateLocation( + uint32_t object, + uint32_t location) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationLocation); + m_annotations.putInt32(location); + } + + + void SpirvModule::decorateSpecId( + uint32_t object, + uint32_t specId) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationSpecId); + m_annotations.putInt32(specId); + } + + + void SpirvModule::decorateXfb( + uint32_t object, + uint32_t streamId, + uint32_t bufferId, + uint32_t offset, + uint32_t stride) { + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationStream); + m_annotations.putInt32(streamId); + + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationXfbBuffer); + m_annotations.putInt32(bufferId); + + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationXfbStride); + m_annotations.putInt32(stride); + + m_annotations.putIns (spv::OpDecorate, 4); + m_annotations.putWord (object); + m_annotations.putWord (spv::DecorationOffset); + m_annotations.putInt32(offset); + } + + + void SpirvModule::memberDecorateBuiltIn( + uint32_t structId, + uint32_t memberId, + spv::BuiltIn builtIn) { + m_annotations.putIns (spv::OpMemberDecorate, 5); + m_annotations.putWord (structId); + m_annotations.putWord (memberId); + m_annotations.putWord (spv::DecorationBuiltIn); + m_annotations.putWord (builtIn); + } + + + void SpirvModule::memberDecorate( + uint32_t structId, + uint32_t memberId, + spv::Decoration decoration) { + m_annotations.putIns (spv::OpMemberDecorate, 4); + m_annotations.putWord (structId); + m_annotations.putWord (memberId); + m_annotations.putWord (decoration); + } + + + void SpirvModule::memberDecorateMatrixStride( + uint32_t structId, + uint32_t memberId, + uint32_t stride) { + m_annotations.putIns (spv::OpMemberDecorate, 5); + m_annotations.putWord (structId); + m_annotations.putWord (memberId); + m_annotations.putWord (spv::DecorationMatrixStride); + m_annotations.putWord (stride); + } + + + void SpirvModule::memberDecorateOffset( + uint32_t structId, + uint32_t memberId, + uint32_t offset) { + m_annotations.putIns (spv::OpMemberDecorate, 5); + m_annotations.putWord (structId); + m_annotations.putWord (memberId); + m_annotations.putWord (spv::DecorationOffset); + m_annotations.putWord (offset); + } + + + uint32_t SpirvModule::defVoidType() { + return this->defType(spv::OpTypeVoid, 0, nullptr); + } + + + uint32_t SpirvModule::defBoolType() { + return this->defType(spv::OpTypeBool, 0, nullptr); + } + + + uint32_t SpirvModule::defIntType( + uint32_t width, + uint32_t isSigned) { + std::array args = {{ width, isSigned }}; + return this->defType(spv::OpTypeInt, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defFloatType( + uint32_t width) { + std::array args = {{ width }}; + return this->defType(spv::OpTypeFloat, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defVectorType( + uint32_t elementType, + uint32_t elementCount) { + std::array args = + {{ elementType, elementCount }}; + + return this->defType(spv::OpTypeVector, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defMatrixType( + uint32_t columnType, + uint32_t columnCount) { + std::array args = + {{ columnType, columnCount }}; + + return this->defType(spv::OpTypeMatrix, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defArrayType( + uint32_t typeId, + uint32_t length) { + std::array args = {{ typeId, length }}; + + return this->defType(spv::OpTypeArray, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defArrayTypeUnique( + uint32_t typeId, + uint32_t length) { + uint32_t resultId = this->allocateId(); + + m_typeConstDefs.putIns (spv::OpTypeArray, 4); + m_typeConstDefs.putWord(resultId); + m_typeConstDefs.putWord(typeId); + m_typeConstDefs.putWord(length); + return resultId; + } + + + uint32_t SpirvModule::defRuntimeArrayType( + uint32_t typeId) { + std::array args = { typeId }; + + return this->defType(spv::OpTypeRuntimeArray, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defRuntimeArrayTypeUnique( + uint32_t typeId) { + uint32_t resultId = this->allocateId(); + + m_typeConstDefs.putIns (spv::OpTypeRuntimeArray, 3); + m_typeConstDefs.putWord(resultId); + m_typeConstDefs.putWord(typeId); + return resultId; + } + + + uint32_t SpirvModule::defFunctionType( + uint32_t returnType, + uint32_t argCount, + const uint32_t* argTypes) { + std::vector args; + args.push_back(returnType); + + for (uint32_t i = 0; i < argCount; i++) + args.push_back(argTypes[i]); + + return this->defType(spv::OpTypeFunction, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defStructType( + uint32_t memberCount, + const uint32_t* memberTypes) { + return this->defType(spv::OpTypeStruct, + memberCount, memberTypes); + } + + + uint32_t SpirvModule::defStructTypeUnique( + uint32_t memberCount, + const uint32_t* memberTypes) { + uint32_t resultId = this->allocateId(); + + m_typeConstDefs.putIns (spv::OpTypeStruct, 2 + memberCount); + m_typeConstDefs.putWord(resultId); + + for (uint32_t i = 0; i < memberCount; i++) + m_typeConstDefs.putWord(memberTypes[i]); + return resultId; + } + + + uint32_t SpirvModule::defPointerType( + uint32_t variableType, + spv::StorageClass storageClass) { + std::array args = {{ + static_cast(storageClass), + variableType, + }}; + + return this->defType(spv::OpTypePointer, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defSamplerType() { + return this->defType(spv::OpTypeSampler, 0, nullptr); + } + + + uint32_t SpirvModule::defImageType( + uint32_t sampledType, + spv::Dim dimensionality, + uint32_t depth, + uint32_t arrayed, + uint32_t multisample, + uint32_t sampled, + spv::ImageFormat format) { + std::array args = {{ + sampledType, + static_cast(dimensionality), + depth, arrayed, + multisample, + sampled, + static_cast(format) + }}; + + return this->defType(spv::OpTypeImage, + args.size(), args.data()); + } + + + uint32_t SpirvModule::defSampledImageType( + uint32_t imageType) { + return this->defType(spv::OpTypeSampledImage, 1, &imageType); + } + + + uint32_t SpirvModule::newVar( + uint32_t pointerType, + spv::StorageClass storageClass) { + uint32_t resultId = this->allocateId(); + + if (isInterfaceVar(storageClass)) + m_interfaceVars.push_back(resultId); + + auto& code = storageClass != spv::StorageClassFunction + ? m_variables : m_code; + + code.putIns (spv::OpVariable, 4); + code.putWord (pointerType); + code.putWord (resultId); + code.putWord (storageClass); + return resultId; + } + + + uint32_t SpirvModule::newVarInit( + uint32_t pointerType, + spv::StorageClass storageClass, + uint32_t initialValue) { + uint32_t resultId = this->allocateId(); + + if (isInterfaceVar(storageClass)) + m_interfaceVars.push_back(resultId); + + auto& code = storageClass != spv::StorageClassFunction + ? m_variables : m_code; + + code.putIns (spv::OpVariable, 5); + code.putWord (pointerType); + code.putWord (resultId); + code.putWord (storageClass); + code.putWord (initialValue); + return resultId; + } + + + void SpirvModule::functionBegin( + uint32_t returnType, + uint32_t functionId, + uint32_t functionType, + spv::FunctionControlMask functionControl) { + m_code.putIns (spv::OpFunction, 5); + m_code.putWord(returnType); + m_code.putWord(functionId); + m_code.putWord(functionControl); + m_code.putWord(functionType); + } + + + uint32_t SpirvModule::functionParameter( + uint32_t parameterType) { + uint32_t parameterId = this->allocateId(); + + m_code.putIns (spv::OpFunctionParameter, 3); + m_code.putWord(parameterType); + m_code.putWord(parameterId); + return parameterId; + } + + + void SpirvModule::functionEnd() { + m_code.putIns (spv::OpFunctionEnd, 1); + } + + + uint32_t SpirvModule::opAccessChain( + uint32_t resultType, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAccessChain, 4 + indexCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(composite); + + for (uint32_t i = 0; i < indexCount; i++) + m_code.putInt32(indexArray[i]); + return resultId; + } + + + uint32_t SpirvModule::opArrayLength( + uint32_t resultType, + uint32_t structure, + uint32_t memberId) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpArrayLength, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(structure); + m_code.putWord(memberId); + return resultId; + } + + + uint32_t SpirvModule::opAny( + uint32_t resultType, + uint32_t vector) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAny, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector); + return resultId; + } + + + uint32_t SpirvModule::opAll( + uint32_t resultType, + uint32_t vector) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAll, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector); + return resultId; + } + + + uint32_t SpirvModule::opAtomicLoad( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicLoad, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + return resultId; + } + + + void SpirvModule::opAtomicStore( + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + m_code.putIns (spv::OpAtomicStore, 5); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + } + + + uint32_t SpirvModule::opAtomicExchange( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicExchange, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicCompareExchange( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t equal, + uint32_t unequal, + uint32_t value, + uint32_t comparator) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicCompareExchange, 9); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(equal); + m_code.putWord(unequal); + m_code.putWord(value); + m_code.putWord(comparator); + return resultId; + } + + + uint32_t SpirvModule::opAtomicIIncrement( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicIIncrement, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + return resultId; + } + + + uint32_t SpirvModule::opAtomicIDecrement( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicIDecrement, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + return resultId; + } + + + uint32_t SpirvModule::opAtomicIAdd( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicIAdd, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicISub( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicISub, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicSMin( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicSMin, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicSMax( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicSMax, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicUMin( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicUMin, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicUMax( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicUMax, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicAnd( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicAnd, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicOr( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicOr, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opAtomicXor( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpAtomicXor, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(pointer); + m_code.putWord(scope); + m_code.putWord(semantics); + m_code.putWord(value); + return resultId; + } + + + uint32_t SpirvModule::opBitcast( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitcast, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opBitCount( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitCount, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opBitReverse( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitReverse, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFindILsb( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FindILsb); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFindUMsb( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FindUMsb); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFindSMsb( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FindSMsb); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opBitFieldInsert( + uint32_t resultType, + uint32_t base, + uint32_t insert, + uint32_t offset, + uint32_t count) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitFieldInsert, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(insert); + m_code.putWord(offset); + m_code.putWord(count); + return resultId; + } + + + uint32_t SpirvModule::opBitFieldSExtract( + uint32_t resultType, + uint32_t base, + uint32_t offset, + uint32_t count) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitFieldSExtract, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(offset); + m_code.putWord(count); + return resultId; + } + + + uint32_t SpirvModule::opBitFieldUExtract( + uint32_t resultType, + uint32_t base, + uint32_t offset, + uint32_t count) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitFieldUExtract, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(offset); + m_code.putWord(count); + return resultId; + } + + + uint32_t SpirvModule::opBitwiseAnd( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitwiseAnd, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opBitwiseOr( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitwiseOr, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opBitwiseXor( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpBitwiseXor, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opNot( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpNot, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opShiftLeftLogical( + uint32_t resultType, + uint32_t base, + uint32_t shift) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpShiftLeftLogical, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(shift); + return resultId; + } + + + uint32_t SpirvModule::opShiftRightArithmetic( + uint32_t resultType, + uint32_t base, + uint32_t shift) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpShiftRightArithmetic, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(shift); + return resultId; + } + + + uint32_t SpirvModule::opShiftRightLogical( + uint32_t resultType, + uint32_t base, + uint32_t shift) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpShiftRightLogical, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(base); + m_code.putWord(shift); + return resultId; + } + + + uint32_t SpirvModule::opConvertFtoS( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpConvertFToS, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opConvertFtoU( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpConvertFToU, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opConvertStoF( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpConvertSToF, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opConvertUtoF( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpConvertUToF, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opCompositeConstruct( + uint32_t resultType, + uint32_t valueCount, + const uint32_t* valueArray) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpCompositeConstruct, 3 + valueCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + + for (uint32_t i = 0; i < valueCount; i++) + m_code.putWord(valueArray[i]); + return resultId; + } + + + uint32_t SpirvModule::opCompositeExtract( + uint32_t resultType, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpCompositeExtract, 4 + indexCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(composite); + + for (uint32_t i = 0; i < indexCount; i++) + m_code.putInt32(indexArray[i]); + return resultId; + } + + + uint32_t SpirvModule::opCompositeInsert( + uint32_t resultType, + uint32_t object, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpCompositeInsert, 5 + indexCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(object); + m_code.putWord(composite); + + for (uint32_t i = 0; i < indexCount; i++) + m_code.putInt32(indexArray[i]); + return resultId; + } + + + uint32_t SpirvModule::opDpdx( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdx, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDpdy( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdy, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDpdxCoarse( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdxCoarse, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDpdyCoarse( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdyCoarse, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDpdxFine( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdxFine, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDpdyFine( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDPdyFine, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opVectorExtractDynamic( + uint32_t resultType, + uint32_t vector, + uint32_t index) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpVectorExtractDynamic, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector); + m_code.putWord(index); + return resultId; + } + + + uint32_t SpirvModule::opVectorShuffle( + uint32_t resultType, + uint32_t vectorLeft, + uint32_t vectorRight, + uint32_t indexCount, + const uint32_t* indexArray) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpVectorShuffle, 5 + indexCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vectorLeft); + m_code.putWord(vectorRight); + + for (uint32_t i = 0; i < indexCount; i++) + m_code.putInt32(indexArray[i]); + return resultId; + } + + + uint32_t SpirvModule::opSNegate( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSNegate, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFNegate( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFNegate, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opSAbs( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450SAbs); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFAbs( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FAbs); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFSign( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FSign); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFMix( + uint32_t resultType, + uint32_t x, + uint32_t y, + uint32_t a) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 8); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FMix); + m_code.putWord(x); + m_code.putWord(y); + m_code.putWord(a); + return resultId; + } + + + uint32_t SpirvModule::opCross( + uint32_t resultType, + uint32_t x, + uint32_t y) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Cross); + m_code.putWord(x); + m_code.putWord(y); + return resultId; + } + + + uint32_t SpirvModule::opIAdd( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpIAdd, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opISub( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpISub, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFAdd( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFAdd, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFSub( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFSub, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opSDiv( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSDiv, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opUDiv( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpUDiv, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opSRem( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSRem, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opUMod( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpUMod, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFDiv( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFDiv, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opIMul( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpIMul, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFMul( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFMul, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opVectorTimesScalar( + uint32_t resultType, + uint32_t vector, + uint32_t scalar) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpVectorTimesScalar, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector); + m_code.putWord(scalar); + return resultId; + } + + + uint32_t SpirvModule::opMatrixTimesMatrix( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpMatrixTimesMatrix, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opMatrixTimesVector( + uint32_t resultType, + uint32_t matrix, + uint32_t vector) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpMatrixTimesVector, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(matrix); + m_code.putWord(vector); + return resultId; + } + + + uint32_t SpirvModule::opVectorTimesMatrix( + uint32_t resultType, + uint32_t vector, + uint32_t matrix) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpVectorTimesMatrix, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector); + m_code.putWord(matrix); + return resultId; + } + + + uint32_t SpirvModule::opTranspose( + uint32_t resultType, + uint32_t matrix) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpTranspose, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(matrix); + return resultId; + } + + + uint32_t SpirvModule::opInverse( + uint32_t resultType, + uint32_t matrix) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450MatrixInverse); + m_code.putWord(matrix); + return resultId; + } + + + uint32_t SpirvModule::opFFma( + uint32_t resultType, + uint32_t a, + uint32_t b, + uint32_t c) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 8); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Fma); + m_code.putWord(a); + m_code.putWord(b); + m_code.putWord(c); + return resultId; + } + + + uint32_t SpirvModule::opFMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opNMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450NMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opNMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450NMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opSMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450SMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opSMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450SMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opUMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450UMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opUMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450UMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opFClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 8); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450FClamp); + m_code.putWord(x); + m_code.putWord(minVal); + m_code.putWord(maxVal); + return resultId; + } + + + uint32_t SpirvModule::opNClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 8); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450NClamp); + m_code.putWord(x); + m_code.putWord(minVal); + m_code.putWord(maxVal); + return resultId; + } + + + uint32_t SpirvModule::opIEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpIEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opINotEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpINotEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSLessThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSLessThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSGreaterThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSGreaterThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opULessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpULessThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opULessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpULessThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opUGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpUGreaterThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opUGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpUGreaterThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdNotEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdNotEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdLessThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdLessThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdGreaterThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opFOrdGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFOrdGreaterThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opLogicalEqual( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLogicalEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opLogicalNotEqual( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLogicalNotEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opLogicalAnd( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLogicalAnd, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opLogicalOr( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLogicalOr, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opLogicalNot( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLogicalNot, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opDot( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpDot, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSin( + uint32_t resultType, + uint32_t vector) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Sin); + m_code.putWord(vector); + return resultId; + } + + + uint32_t SpirvModule::opCos( + uint32_t resultType, + uint32_t vector) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Cos); + m_code.putWord(vector); + return resultId; + } + + + uint32_t SpirvModule::opSqrt( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Sqrt); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opInverseSqrt( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450InverseSqrt); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opNormalize( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Normalize); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opReflect( + uint32_t resultType, + uint32_t incident, + uint32_t normal) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Reflect); + m_code.putWord(incident); + m_code.putWord(normal); + return resultId; + } + + + uint32_t SpirvModule::opLength( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Length); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opExp2( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Exp2); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opExp( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Exp); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opLog2( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Log2); + m_code.putWord(operand); + return resultId; + } + + uint32_t SpirvModule::opPow( + uint32_t resultType, + uint32_t base, + uint32_t exponent) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Pow); + m_code.putWord(base); + m_code.putWord(exponent); + return resultId; + } + + uint32_t SpirvModule::opFract( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Fract); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opCeil( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Ceil); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFloor( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Floor); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opRound( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Round); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opRoundEven( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450RoundEven); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opTrunc( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450Trunc); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFConvert( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFConvert, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opPackHalf2x16( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450PackHalf2x16); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opUnpackHalf2x16( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450UnpackHalf2x16); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opSelect( + uint32_t resultType, + uint32_t condition, + uint32_t operand1, + uint32_t operand2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSelect, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(condition); + m_code.putWord(operand1); + m_code.putWord(operand2); + return resultId; + } + + + uint32_t SpirvModule::opIsNan( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpIsNan, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opIsInf( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpIsInf, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opFunctionCall( + uint32_t resultType, + uint32_t functionId, + uint32_t argCount, + const uint32_t* argIds) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpFunctionCall, 4 + argCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(functionId); + + for (uint32_t i = 0; i < argCount; i++) + m_code.putWord(argIds[i]); + return resultId; + } + + + void SpirvModule::opLabel(uint32_t labelId) { + m_code.putIns (spv::OpLabel, 2); + m_code.putWord(labelId); + + m_blockId = labelId; + } + + + uint32_t SpirvModule::opLoad( + uint32_t typeId, + uint32_t pointerId) { + return opLoad(typeId, pointerId, SpirvMemoryOperands()); + } + + + uint32_t SpirvModule::opLoad( + uint32_t typeId, + uint32_t pointerId, + const SpirvMemoryOperands& operands) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpLoad, 4 + getMemoryOperandWordCount(operands)); + m_code.putWord(typeId); + m_code.putWord(resultId); + m_code.putWord(pointerId); + + putMemoryOperands(operands); + return resultId; + } + + + void SpirvModule::opStore( + uint32_t pointerId, + uint32_t valueId) { + opStore(pointerId, valueId, SpirvMemoryOperands()); + } + + + void SpirvModule::opStore( + uint32_t pointerId, + uint32_t valueId, + const SpirvMemoryOperands& operands) { + m_code.putIns (spv::OpStore, 3 + getMemoryOperandWordCount(operands)); + m_code.putWord(pointerId); + m_code.putWord(valueId); + + putMemoryOperands(operands); + } + + + uint32_t SpirvModule::opInterpolateAtCentroid( + uint32_t resultType, + uint32_t interpolant) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450InterpolateAtCentroid); + m_code.putWord(interpolant); + return resultId; + } + + + uint32_t SpirvModule::opInterpolateAtSample( + uint32_t resultType, + uint32_t interpolant, + uint32_t sample) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450InterpolateAtSample); + m_code.putWord(interpolant); + m_code.putWord(sample); + return resultId; + } + + + uint32_t SpirvModule::opInterpolateAtOffset( + uint32_t resultType, + uint32_t interpolant, + uint32_t offset) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(GLSLstd450InterpolateAtOffset); + m_code.putWord(interpolant); + m_code.putWord(offset); + return resultId; + } + + + uint32_t SpirvModule::opImage( + uint32_t resultType, + uint32_t sampledImage) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpImage, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + return resultId; + } + + + uint32_t SpirvModule::opImageRead( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseRead + : spv::OpImageRead; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + void SpirvModule::opImageWrite( + uint32_t image, + uint32_t coordinates, + uint32_t texel, + const SpirvImageOperands& operands) { + m_code.putIns (spv::OpImageWrite, + 4 + getImageOperandWordCount(operands)); + m_code.putWord(image); + m_code.putWord(coordinates); + m_code.putWord(texel); + + putImageOperands(operands); + } + + + uint32_t SpirvModule::opImageSparseTexelsResident( + uint32_t resultType, + uint32_t residentCode) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageSparseTexelsResident, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(residentCode); + + return resultId; + } + + + uint32_t SpirvModule::opSampledImage( + uint32_t resultType, + uint32_t image, + uint32_t sampler) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSampledImage, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + m_code.putWord(sampler); + return resultId; + } + + + uint32_t SpirvModule::opImageTexelPointer( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + uint32_t sample) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageTexelPointer, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + m_code.putWord(coordinates); + m_code.putWord(sample); + return resultId; + } + + + uint32_t SpirvModule::opImageQuerySizeLod( + uint32_t resultType, + uint32_t image, + uint32_t lod) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageQuerySizeLod, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + m_code.putWord(lod); + return resultId; + } + + + uint32_t SpirvModule::opImageQuerySize( + uint32_t resultType, + uint32_t image) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageQuerySize, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + return resultId; + } + + + uint32_t SpirvModule::opImageQueryLevels( + uint32_t resultType, + uint32_t image) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageQueryLevels, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + return resultId; + } + + + uint32_t SpirvModule::opImageQueryLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageQueryLod, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + return resultId; + } + + + uint32_t SpirvModule::opImageQuerySamples( + uint32_t resultType, + uint32_t image) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpImageQuerySamples, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + return resultId; + } + + + uint32_t SpirvModule::opImageFetch( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseFetch + : spv::OpImageFetch; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(image); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageGather( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t component, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseGather + : spv::OpImageGather; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(component); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageDrefGather( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseDrefGather + : spv::OpImageDrefGather; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(reference); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleImplicitLod + : spv::OpImageSampleImplicitLod; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleExplicitLod + : spv::OpImageSampleExplicitLod; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleProjImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleProjImplicitLod + : spv::OpImageSampleProjImplicitLod; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleProjExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleProjExplicitLod + : spv::OpImageSampleProjExplicitLod; + + m_code.putIns(op, 5 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleDrefImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleDrefImplicitLod + : spv::OpImageSampleDrefImplicitLod; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(reference); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleDrefExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleDrefExplicitLod + : spv::OpImageSampleDrefExplicitLod; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(reference); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleProjDrefImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleProjDrefImplicitLod + : spv::OpImageSampleProjDrefImplicitLod; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(reference); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opImageSampleProjDrefExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands) { + uint32_t resultId = this->allocateId(); + + spv::Op op = operands.sparse + ? spv::OpImageSparseSampleProjDrefExplicitLod + : spv::OpImageSampleProjDrefExplicitLod; + + m_code.putIns(op, 6 + getImageOperandWordCount(operands)); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(sampledImage); + m_code.putWord(coordinates); + m_code.putWord(reference); + + putImageOperands(operands); + return resultId; + } + + + uint32_t SpirvModule::opGroupNonUniformBallot( + uint32_t resultType, + uint32_t execution, + uint32_t predicate) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpGroupNonUniformBallot, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(execution); + m_code.putWord(predicate); + return resultId; + } + + + uint32_t SpirvModule::opGroupNonUniformBallotBitCount( + uint32_t resultType, + uint32_t execution, + uint32_t operation, + uint32_t ballot) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpGroupNonUniformBallotBitCount, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(execution); + m_code.putWord(operation); + m_code.putWord(ballot); + return resultId; + } + + + uint32_t SpirvModule::opGroupNonUniformElect( + uint32_t resultType, + uint32_t execution) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpGroupNonUniformElect, 4); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(execution); + return resultId; + } + + + uint32_t SpirvModule::opGroupNonUniformBroadcastFirst( + uint32_t resultType, + uint32_t execution, + uint32_t value) { + uint32_t resultId = this->allocateId(); + + m_code.putIns(spv::OpGroupNonUniformBroadcastFirst, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(execution); + m_code.putWord(value); + return resultId; + } + + + void SpirvModule::opControlBarrier( + uint32_t execution, + uint32_t memory, + uint32_t semantics) { + m_code.putIns (spv::OpControlBarrier, 4); + m_code.putWord(execution); + m_code.putWord(memory); + m_code.putWord(semantics); + } + + + void SpirvModule::opMemoryBarrier( + uint32_t memory, + uint32_t semantics) { + m_code.putIns (spv::OpMemoryBarrier, 3); + m_code.putWord(memory); + m_code.putWord(semantics); + } + + + void SpirvModule::opLoopMerge( + uint32_t mergeBlock, + uint32_t continueTarget, + uint32_t loopControl) { + m_code.putIns (spv::OpLoopMerge, 4); + m_code.putWord(mergeBlock); + m_code.putWord(continueTarget); + m_code.putWord(loopControl); + } + + + void SpirvModule::opSelectionMerge( + uint32_t mergeBlock, + uint32_t selectionControl) { + m_code.putIns (spv::OpSelectionMerge, 3); + m_code.putWord(mergeBlock); + m_code.putWord(selectionControl); + } + + + void SpirvModule::opBranch( + uint32_t label) { + m_code.putIns (spv::OpBranch, 2); + m_code.putWord(label); + + m_blockId = 0; + } + + + void SpirvModule::opBranchConditional( + uint32_t condition, + uint32_t trueLabel, + uint32_t falseLabel) { + m_code.putIns (spv::OpBranchConditional, 4); + m_code.putWord(condition); + m_code.putWord(trueLabel); + m_code.putWord(falseLabel); + + m_blockId = 0; + } + + + void SpirvModule::opSwitch( + uint32_t selector, + uint32_t jumpDefault, + uint32_t caseCount, + const SpirvSwitchCaseLabel* caseLabels) { + m_code.putIns (spv::OpSwitch, 3 + 2 * caseCount); + m_code.putWord(selector); + m_code.putWord(jumpDefault); + + for (uint32_t i = 0; i < caseCount; i++) { + m_code.putWord(caseLabels[i].literal); + m_code.putWord(caseLabels[i].labelId); + } + + m_blockId = 0; + } + + + uint32_t SpirvModule::opPhi( + uint32_t resultType, + uint32_t sourceCount, + const SpirvPhiLabel* sourceLabels) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpPhi, 3 + 2 * sourceCount); + m_code.putWord(resultType); + m_code.putWord(resultId); + + for (uint32_t i = 0; i < sourceCount; i++) { + m_code.putWord(sourceLabels[i].varId); + m_code.putWord(sourceLabels[i].labelId); + } + + return resultId; + } + + + void SpirvModule::opReturn() { + m_code.putIns (spv::OpReturn, 1); + m_blockId = 0; + } + + + void SpirvModule::opDemoteToHelperInvocation() { + m_code.putIns (spv::OpDemoteToHelperInvocation, 1); + } + + + void SpirvModule::opEmitVertex( + uint32_t streamId) { + if (streamId == 0) { + m_code.putIns (spv::OpEmitVertex, 1); + } else { + m_code.putIns (spv::OpEmitStreamVertex, 2); + m_code.putWord(streamId); + } + } + + + void SpirvModule::opEndPrimitive( + uint32_t streamId) { + if (streamId == 0) { + m_code.putIns (spv::OpEndPrimitive, 1); + } else { + m_code.putIns (spv::OpEndStreamPrimitive, 2); + m_code.putWord(streamId); + } + } + + + void SpirvModule::opBeginInvocationInterlock() { + m_code.putIns(spv::OpBeginInvocationInterlockEXT, 1); + } + + + void SpirvModule::opEndInvocationInterlock() { + m_code.putIns(spv::OpEndInvocationInterlockEXT, 1); + } + + + uint32_t SpirvModule::defType( + spv::Op op, + uint32_t argCount, + const uint32_t* argIds) { + // Since the type info is stored in the code buffer, + // we can use the code buffer to look up type IDs as + // well. Result IDs are always stored as argument 1. + for (auto ins : m_typeConstDefs) { + bool match = ins.opCode() == op + && ins.length() == 2 + argCount; + + for (uint32_t i = 0; i < argCount && match; i++) + match &= ins.arg(2 + i) == argIds[i]; + + if (match) + return ins.arg(1); + } + + // Type not yet declared, create a new one. + uint32_t resultId = this->allocateId(); + m_typeConstDefs.putIns (op, 2 + argCount); + m_typeConstDefs.putWord(resultId); + + for (uint32_t i = 0; i < argCount; i++) + m_typeConstDefs.putWord(argIds[i]); + return resultId; + } + + + uint32_t SpirvModule::defConst( + spv::Op op, + uint32_t typeId, + uint32_t argCount, + const uint32_t* argIds) { + // Avoid declaring constants multiple times + for (auto ins : m_typeConstDefs) { + bool match = ins.opCode() == op + && ins.length() == 3 + argCount + && ins.arg(1) == typeId; + + for (uint32_t i = 0; i < argCount && match; i++) + match &= ins.arg(3 + i) == argIds[i]; + + if (!match) + continue; + + uint32_t id = ins.arg(2); + + if (m_lateConsts.find(id) == m_lateConsts.end()) + return id; + } + + // Constant not yet declared, make a new one + uint32_t resultId = this->allocateId(); + m_typeConstDefs.putIns (op, 3 + argCount); + m_typeConstDefs.putWord(typeId); + m_typeConstDefs.putWord(resultId); + + for (uint32_t i = 0; i < argCount; i++) + m_typeConstDefs.putWord(argIds[i]); + return resultId; + } + + + void SpirvModule::instImportGlsl450() { + m_instExtGlsl450 = this->allocateId(); + const char* name = "GLSL.std.450"; + + m_instExt.putIns (spv::OpExtInstImport, 2 + m_instExt.strLen(name)); + m_instExt.putWord(m_instExtGlsl450); + m_instExt.putStr (name); + } + + + uint32_t SpirvModule::getMemoryOperandWordCount( + const SpirvMemoryOperands& op) const { + const uint32_t result + = ((op.flags & spv::MemoryAccessAlignedMask) ? 1 : 0) + + ((op.flags & spv::MemoryAccessMakePointerAvailableMask) ? 1 : 0) + + ((op.flags & spv::MemoryAccessMakePointerVisibleMask) ? 1 : 0); + + return op.flags ? result + 1 : 0; + } + + + void SpirvModule::putMemoryOperands( + const SpirvMemoryOperands& op) { + if (op.flags) { + m_code.putWord(op.flags); + + if (op.flags & spv::MemoryAccessAlignedMask) + m_code.putWord(op.alignment); + + if (op.flags & spv::MemoryAccessMakePointerAvailableMask) + m_code.putWord(op.makeAvailable); + + if (op.flags & spv::MemoryAccessMakePointerVisibleMask) + m_code.putWord(op.makeVisible); + } + } + + + uint32_t SpirvModule::getImageOperandWordCount(const SpirvImageOperands& op) const { + // Each flag may add one or more operands + const uint32_t result + = ((op.flags & spv::ImageOperandsBiasMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsLodMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsConstOffsetMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsGradMask) ? 2 : 0) + + ((op.flags & spv::ImageOperandsOffsetMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsConstOffsetsMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsSampleMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsMinLodMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsMakeTexelAvailableMask) ? 1 : 0) + + ((op.flags & spv::ImageOperandsMakeTexelVisibleMask) ? 1 : 0); + + // Add a DWORD for the operand mask if it is non-zero + return op.flags ? result + 1 : 0; + } + + + void SpirvModule::putImageOperands(const SpirvImageOperands& op) { + if (op.flags) { + m_code.putWord(op.flags); + + if (op.flags & spv::ImageOperandsBiasMask) + m_code.putWord(op.sLodBias); + + if (op.flags & spv::ImageOperandsLodMask) + m_code.putWord(op.sLod); + + if (op.flags & spv::ImageOperandsConstOffsetMask) + m_code.putWord(op.sConstOffset); + + if (op.flags & spv::ImageOperandsGradMask) { + m_code.putWord(op.sGradX); + m_code.putWord(op.sGradY); + } + + if (op.flags & spv::ImageOperandsOffsetMask) + m_code.putWord(op.gOffset); + + if (op.flags & spv::ImageOperandsConstOffsetsMask) + m_code.putWord(op.gConstOffsets); + + if (op.flags & spv::ImageOperandsSampleMask) + m_code.putWord(op.sSampleId); + + if (op.flags & spv::ImageOperandsMinLodMask) + m_code.putWord(op.sMinLod); + + if (op.flags & spv::ImageOperandsMakeTexelAvailableMask) + m_code.putWord(op.makeAvailable); + + if (op.flags & spv::ImageOperandsMakeTexelVisibleMask) + m_code.putWord(op.makeVisible); + } + } + + + bool SpirvModule::isInterfaceVar( + spv::StorageClass sclass) const { + if (m_version < spvVersion(1, 4)) { + return sclass == spv::StorageClassInput + || sclass == spv::StorageClassOutput; + } else { + // All global variables need to be declared + return sclass != spv::StorageClassFunction; + } + } + +} \ No newline at end of file diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h new file mode 100644 index 0000000..a878e2e --- /dev/null +++ b/src/spirv/spirv_module.h @@ -0,0 +1,1320 @@ +#pragma once + +#include + +#include "spirv_code_buffer.h" + +namespace dxvk { + + struct SpirvPhiLabel { + uint32_t varId = 0; + uint32_t labelId = 0; + }; + + struct SpirvSwitchCaseLabel { + uint32_t literal = 0; + uint32_t labelId = 0; + }; + + struct SpirvMemoryOperands { + uint32_t flags = 0; + uint32_t alignment = 0; + uint32_t makeAvailable = 0; + uint32_t makeVisible = 0; + }; + + struct SpirvImageOperands { + uint32_t flags = 0; + uint32_t sLodBias = 0; + uint32_t sLod = 0; + uint32_t sConstOffset = 0; + uint32_t sGradX = 0; + uint32_t sGradY = 0; + uint32_t gOffset = 0; + uint32_t gConstOffsets = 0; + uint32_t sSampleId = 0; + uint32_t sMinLod = 0; + uint32_t makeAvailable = 0; + uint32_t makeVisible = 0; + bool sparse = false; + }; + + constexpr uint32_t spvVersion(uint32_t major, uint32_t minor) { + return (major << 16) | (minor << 8); + } + + /** + * \brief SPIR-V module + * + * This class generates a code buffer containing a full + * SPIR-V shader module. Ensures that the module layout + * is valid, as defined in the SPIR-V 1.0 specification, + * section 2.4 "Logical Layout of a Module". + */ + class SpirvModule { + + public: + + explicit SpirvModule(uint32_t version); + + ~SpirvModule(); + + SpirvCodeBuffer compile() const; + + size_t getInsertionPtr() { + return m_code.getInsertionPtr(); + } + + void beginInsertion(size_t ptr) { + m_code.beginInsertion(ptr); + } + + void endInsertion() { + m_code.endInsertion(); + } + + uint32_t getBlockId() const { + return m_blockId; + } + + uint32_t allocateId(); + + bool hasCapability( + spv::Capability capability); + + void enableCapability( + spv::Capability capability); + + void enableExtension( + const char* extensionName); + + void addEntryPoint( + uint32_t entryPointId, + spv::ExecutionModel executionModel, + const char* name); + + void setMemoryModel( + spv::AddressingModel addressModel, + spv::MemoryModel memoryModel); + + void setExecutionMode( + uint32_t entryPointId, + spv::ExecutionMode executionMode); + + void setExecutionMode( + uint32_t entryPointId, + spv::ExecutionMode executionMode, + uint32_t argCount, + const uint32_t* args); + + void setInvocations( + uint32_t entryPointId, + uint32_t invocations); + + void setLocalSize( + uint32_t entryPointId, + uint32_t x, + uint32_t y, + uint32_t z); + + void setOutputVertices( + uint32_t entryPointId, + uint32_t vertexCount); + + uint32_t addDebugString( + const char* string); + + void setDebugSource( + spv::SourceLanguage language, + uint32_t version, + uint32_t file, + const char* source); + + void setDebugName( + uint32_t expressionId, + const char* debugName); + + void setDebugMemberName( + uint32_t structId, + uint32_t memberId, + const char* debugName); + + uint32_t constBool( + bool v); + + uint32_t consti32( + int32_t v); + + uint32_t consti64( + int64_t v); + + uint32_t constu32( + uint32_t v); + + uint32_t constu64( + uint64_t v); + + uint32_t constf32( + float v); + + uint32_t constf64( + double v); + + uint32_t constvec4i32( + int32_t x, + int32_t y, + int32_t z, + int32_t w); + + uint32_t constvec4b32( + bool x, + bool y, + bool z, + bool w); + + uint32_t constvec4u32( + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w); + + uint32_t constvec2f32( + float x, + float y); + + uint32_t constvec3f32( + float x, + float y, + float z); + + uint32_t constvec4f32( + float x, + float y, + float z, + float w); + + uint32_t constfReplicant( + float replicant, + uint32_t count); + + uint32_t constbReplicant( + bool replicant, + uint32_t count); + + uint32_t constiReplicant( + int32_t replicant, + uint32_t count); + + uint32_t constuReplicant( + int32_t replicant, + uint32_t count); + + uint32_t constComposite( + uint32_t typeId, + uint32_t constCount, + const uint32_t* constIds); + + uint32_t constUndef( + uint32_t typeId); + + uint32_t lateConst32( + uint32_t typeId); + + void setLateConst( + uint32_t constId, + const uint32_t* argIds); + + uint32_t specConstBool( + bool v); + + uint32_t specConst32( + uint32_t typeId, + uint32_t value); + + void decorate( + uint32_t object, + spv::Decoration decoration); + + void decorateArrayStride( + uint32_t object, + uint32_t stride); + + void decorateBinding( + uint32_t object, + uint32_t binding); + + void decorateBlock( + uint32_t object); + + void decorateBuiltIn( + uint32_t object, + spv::BuiltIn builtIn); + + void decorateComponent( + uint32_t object, + uint32_t location); + + void decorateDescriptorSet( + uint32_t object, + uint32_t set); + + void decorateIndex( + uint32_t object, + uint32_t index); + + void decorateLocation( + uint32_t object, + uint32_t location); + + void decorateSpecId( + uint32_t object, + uint32_t specId); + + void decorateXfb( + uint32_t object, + uint32_t streamId, + uint32_t bufferId, + uint32_t offset, + uint32_t stride); + + void memberDecorateBuiltIn( + uint32_t structId, + uint32_t memberId, + spv::BuiltIn builtIn); + + void memberDecorate( + uint32_t structId, + uint32_t memberId, + spv::Decoration decoration); + + void memberDecorateMatrixStride( + uint32_t structId, + uint32_t memberId, + uint32_t stride); + + void memberDecorateOffset( + uint32_t structId, + uint32_t memberId, + uint32_t offset); + + uint32_t defVoidType(); + + uint32_t defBoolType(); + + uint32_t defIntType( + uint32_t width, + uint32_t isSigned); + + uint32_t defFloatType( + uint32_t width); + + uint32_t defVectorType( + uint32_t elementType, + uint32_t elementCount); + + uint32_t defMatrixType( + uint32_t columnType, + uint32_t columnCount); + + uint32_t defArrayType( + uint32_t typeId, + uint32_t length); + + uint32_t defArrayTypeUnique( + uint32_t typeId, + uint32_t length); + + uint32_t defRuntimeArrayType( + uint32_t typeId); + + uint32_t defRuntimeArrayTypeUnique( + uint32_t typeId); + + uint32_t defFunctionType( + uint32_t returnType, + uint32_t argCount, + const uint32_t* argTypes); + + uint32_t defStructType( + uint32_t memberCount, + const uint32_t* memberTypes); + + uint32_t defStructTypeUnique( + uint32_t memberCount, + const uint32_t* memberTypes); + + uint32_t defPointerType( + uint32_t variableType, + spv::StorageClass storageClass); + + uint32_t defSamplerType(); + + uint32_t defImageType( + uint32_t sampledType, + spv::Dim dimensionality, + uint32_t depth, + uint32_t arrayed, + uint32_t multisample, + uint32_t sampled, + spv::ImageFormat format); + + uint32_t defSampledImageType( + uint32_t imageType); + + uint32_t newVar( + uint32_t pointerType, + spv::StorageClass storageClass); + + uint32_t newVarInit( + uint32_t pointerType, + spv::StorageClass storageClass, + uint32_t initialValue); + + void functionBegin( + uint32_t returnType, + uint32_t functionId, + uint32_t functionType, + spv::FunctionControlMask functionControl); + + uint32_t functionParameter( + uint32_t parameterType); + + void functionEnd(); + + uint32_t opAccessChain( + uint32_t resultType, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray); + + uint32_t opArrayLength( + uint32_t resultType, + uint32_t structure, + uint32_t memberId); + + uint32_t opAny( + uint32_t resultType, + uint32_t vector); + + uint32_t opAll( + uint32_t resultType, + uint32_t vector); + + uint32_t opAtomicLoad( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics); + + void opAtomicStore( + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicExchange( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicCompareExchange( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t equal, + uint32_t unequal, + uint32_t value, + uint32_t comparator); + + uint32_t opAtomicIIncrement( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics); + + uint32_t opAtomicIDecrement( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics); + + uint32_t opAtomicIAdd( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicISub( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicSMin( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicSMax( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicUMin( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicUMax( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicAnd( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicOr( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opAtomicXor( + uint32_t resultType, + uint32_t pointer, + uint32_t scope, + uint32_t semantics, + uint32_t value); + + uint32_t opBitcast( + uint32_t resultType, + uint32_t operand); + + uint32_t opBitCount( + uint32_t resultType, + uint32_t operand); + + uint32_t opBitReverse( + uint32_t resultType, + uint32_t operand); + + uint32_t opFindILsb( + uint32_t resultType, + uint32_t operand); + + uint32_t opFindUMsb( + uint32_t resultType, + uint32_t operand); + + uint32_t opFindSMsb( + uint32_t resultType, + uint32_t operand); + + uint32_t opBitFieldInsert( + uint32_t resultType, + uint32_t base, + uint32_t insert, + uint32_t offset, + uint32_t count); + + uint32_t opBitFieldSExtract( + uint32_t resultType, + uint32_t base, + uint32_t offset, + uint32_t count); + + uint32_t opBitFieldUExtract( + uint32_t resultType, + uint32_t base, + uint32_t offset, + uint32_t count); + + uint32_t opBitwiseAnd( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opBitwiseOr( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opBitwiseXor( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opNot( + uint32_t resultType, + uint32_t operand); + + uint32_t opShiftLeftLogical( + uint32_t resultType, + uint32_t base, + uint32_t shift); + + uint32_t opShiftRightArithmetic( + uint32_t resultType, + uint32_t base, + uint32_t shift); + + uint32_t opShiftRightLogical( + uint32_t resultType, + uint32_t base, + uint32_t shift); + + uint32_t opConvertFtoS( + uint32_t resultType, + uint32_t operand); + + uint32_t opConvertFtoU( + uint32_t resultType, + uint32_t operand); + + uint32_t opConvertStoF( + uint32_t resultType, + uint32_t operand); + + uint32_t opConvertUtoF( + uint32_t resultType, + uint32_t operand); + + uint32_t opCompositeConstruct( + uint32_t resultType, + uint32_t valueCount, + const uint32_t* valueArray); + + uint32_t opCompositeExtract( + uint32_t resultType, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray); + + uint32_t opCompositeInsert( + uint32_t resultType, + uint32_t object, + uint32_t composite, + uint32_t indexCount, + const uint32_t* indexArray); + + uint32_t opDpdx( + uint32_t resultType, + uint32_t operand); + + uint32_t opDpdy( + uint32_t resultType, + uint32_t operand); + + uint32_t opDpdxCoarse( + uint32_t resultType, + uint32_t operand); + + uint32_t opDpdyCoarse( + uint32_t resultType, + uint32_t operand); + + uint32_t opDpdxFine( + uint32_t resultType, + uint32_t operand); + + uint32_t opDpdyFine( + uint32_t resultType, + uint32_t operand); + + uint32_t opVectorExtractDynamic( + uint32_t resultType, + uint32_t vector, + uint32_t index); + + uint32_t opVectorShuffle( + uint32_t resultType, + uint32_t vectorLeft, + uint32_t vectorRight, + uint32_t indexCount, + const uint32_t* indexArray); + + uint32_t opSNegate( + uint32_t resultType, + uint32_t operand); + + uint32_t opFNegate( + uint32_t resultType, + uint32_t operand); + + uint32_t opSAbs( + uint32_t resultType, + uint32_t operand); + + uint32_t opFAbs( + uint32_t resultType, + uint32_t operand); + + uint32_t opFSign( + uint32_t resultType, + uint32_t operand); + + uint32_t opFMix( + uint32_t resultType, + uint32_t x, + uint32_t y, + uint32_t a); + + uint32_t opCross( + uint32_t resultType, + uint32_t x, + uint32_t y); + + uint32_t opIAdd( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opISub( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFAdd( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFSub( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opSDiv( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opUDiv( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opSRem( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opUMod( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFDiv( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opIMul( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFMul( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opVectorTimesScalar( + uint32_t resultType, + uint32_t vector, + uint32_t scalar); + + uint32_t opMatrixTimesMatrix( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opMatrixTimesVector( + uint32_t resultType, + uint32_t matrix, + uint32_t vector); + + uint32_t opVectorTimesMatrix( + uint32_t resultType, + uint32_t vector, + uint32_t matrix); + + uint32_t opTranspose( + uint32_t resultType, + uint32_t matrix); + + uint32_t opInverse( + uint32_t resultType, + uint32_t matrix); + + uint32_t opFFma( + uint32_t resultType, + uint32_t a, + uint32_t b, + uint32_t c); + + uint32_t opFMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opNMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opNMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opSMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opSMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opUMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opUMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal); + + uint32_t opNClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal); + + uint32_t opIEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opINotEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opULessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opULessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opUGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opUGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdNotEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opFOrdGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opLogicalEqual( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opLogicalNotEqual( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opLogicalAnd( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opLogicalOr( + uint32_t resultType, + uint32_t operand1, + uint32_t operand2); + + uint32_t opLogicalNot( + uint32_t resultType, + uint32_t operand); + + uint32_t opDot( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSin( + uint32_t resultType, + uint32_t vector); + + uint32_t opCos( + uint32_t resultType, + uint32_t vector); + + uint32_t opSqrt( + uint32_t resultType, + uint32_t operand); + + uint32_t opInverseSqrt( + uint32_t resultType, + uint32_t operand); + + uint32_t opNormalize( + uint32_t resultType, + uint32_t operand); + + uint32_t opReflect( + uint32_t resultType, + uint32_t incident, + uint32_t normal); + + uint32_t opLength( + uint32_t resultType, + uint32_t operand); + + uint32_t opExp2( + uint32_t resultType, + uint32_t operand); + + uint32_t opExp( + uint32_t resultType, + uint32_t operand); + + uint32_t opLog2( + uint32_t resultType, + uint32_t operand); + + uint32_t opPow( + uint32_t resultType, + uint32_t base, + uint32_t exponent); + + uint32_t opFract( + uint32_t resultType, + uint32_t operand); + + uint32_t opCeil( + uint32_t resultType, + uint32_t operand); + + uint32_t opFloor( + uint32_t resultType, + uint32_t operand); + + uint32_t opRound( + uint32_t resultType, + uint32_t operand); + + uint32_t opRoundEven( + uint32_t resultType, + uint32_t operand); + + uint32_t opTrunc( + uint32_t resultType, + uint32_t operand); + + uint32_t opFConvert( + uint32_t resultType, + uint32_t operand); + + uint32_t opPackHalf2x16( + uint32_t resultType, + uint32_t operand); + + uint32_t opUnpackHalf2x16( + uint32_t resultType, + uint32_t operand); + + uint32_t opSelect( + uint32_t resultType, + uint32_t condition, + uint32_t operand1, + uint32_t operand2); + + uint32_t opIsNan( + uint32_t resultType, + uint32_t operand); + + uint32_t opIsInf( + uint32_t resultType, + uint32_t operand); + + uint32_t opFunctionCall( + uint32_t resultType, + uint32_t functionId, + uint32_t argCount, + const uint32_t* argIds); + + void opLabel( + uint32_t labelId); + + uint32_t opLoad( + uint32_t typeId, + uint32_t pointerId); + + uint32_t opLoad( + uint32_t typeId, + uint32_t pointerId, + const SpirvMemoryOperands& operands); + + void opStore( + uint32_t pointerId, + uint32_t valueId); + + void opStore( + uint32_t pointerId, + uint32_t valueId, + const SpirvMemoryOperands& operands); + + uint32_t opInterpolateAtCentroid( + uint32_t resultType, + uint32_t interpolant); + + uint32_t opInterpolateAtSample( + uint32_t resultType, + uint32_t interpolant, + uint32_t sample); + + uint32_t opInterpolateAtOffset( + uint32_t resultType, + uint32_t interpolant, + uint32_t offset); + + uint32_t opImage( + uint32_t resultType, + uint32_t sampledImage); + + uint32_t opImageRead( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + const SpirvImageOperands& operands); + + void opImageWrite( + uint32_t image, + uint32_t coordinates, + uint32_t texel, + const SpirvImageOperands& operands); + + uint32_t opImageSparseTexelsResident( + uint32_t resultType, + uint32_t residentCode); + + uint32_t opImageTexelPointer( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + uint32_t sample); + + uint32_t opSampledImage( + uint32_t resultType, + uint32_t image, + uint32_t sampler); + + uint32_t opImageQuerySizeLod( + uint32_t resultType, + uint32_t image, + uint32_t lod); + + uint32_t opImageQuerySize( + uint32_t resultType, + uint32_t image); + + uint32_t opImageQueryLevels( + uint32_t resultType, + uint32_t image); + + uint32_t opImageQueryLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates); + + uint32_t opImageQuerySamples( + uint32_t resultType, + uint32_t image); + + uint32_t opImageFetch( + uint32_t resultType, + uint32_t image, + uint32_t coordinates, + const SpirvImageOperands& operands); + + uint32_t opImageGather( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t component, + const SpirvImageOperands& operands); + + uint32_t opImageDrefGather( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + uint32_t opImageSampleImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands); + + uint32_t opImageSampleExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands); + + uint32_t opImageSampleProjImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands); + + uint32_t opImageSampleProjExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + const SpirvImageOperands& operands); + + uint32_t opImageSampleDrefImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + uint32_t opImageSampleDrefExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + uint32_t opImageSampleProjDrefImplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + uint32_t opImageSampleProjDrefExplicitLod( + uint32_t resultType, + uint32_t sampledImage, + uint32_t coordinates, + uint32_t reference, + const SpirvImageOperands& operands); + + uint32_t opGroupNonUniformBallot( + uint32_t resultType, + uint32_t execution, + uint32_t predicate); + + uint32_t opGroupNonUniformBallotBitCount( + uint32_t resultType, + uint32_t execution, + uint32_t operation, + uint32_t ballot); + + uint32_t opGroupNonUniformElect( + uint32_t resultType, + uint32_t execution); + + uint32_t opGroupNonUniformBroadcastFirst( + uint32_t resultType, + uint32_t execution, + uint32_t value); + + void opControlBarrier( + uint32_t execution, + uint32_t memory, + uint32_t semantics); + + void opMemoryBarrier( + uint32_t memory, + uint32_t semantics); + + void opLoopMerge( + uint32_t mergeBlock, + uint32_t continueTarget, + uint32_t loopControl); + + void opSelectionMerge( + uint32_t mergeBlock, + uint32_t selectionControl); + + void opBranch( + uint32_t label); + + void opBranchConditional( + uint32_t condition, + uint32_t trueLabel, + uint32_t falseLabel); + + void opSwitch( + uint32_t selector, + uint32_t jumpDefault, + uint32_t caseCount, + const SpirvSwitchCaseLabel* caseLabels); + + uint32_t opPhi( + uint32_t resultType, + uint32_t sourceCount, + const SpirvPhiLabel* sourceLabels); + + void opReturn(); + + void opDemoteToHelperInvocation(); + + void opEmitVertex( + uint32_t streamId); + + void opEndPrimitive( + uint32_t streamId); + + void opBeginInvocationInterlock(); + + void opEndInvocationInterlock(); + + private: + + uint32_t m_version; + uint32_t m_id = 1; + uint32_t m_instExtGlsl450 = 0; + uint32_t m_blockId = 0; + + SpirvCodeBuffer m_capabilities; + SpirvCodeBuffer m_extensions; + SpirvCodeBuffer m_instExt; + SpirvCodeBuffer m_memoryModel; + SpirvCodeBuffer m_entryPoints; + SpirvCodeBuffer m_execModeInfo; + SpirvCodeBuffer m_debugNames; + SpirvCodeBuffer m_annotations; + SpirvCodeBuffer m_typeConstDefs; + SpirvCodeBuffer m_variables; + SpirvCodeBuffer m_code; + + std::unordered_set m_lateConsts; + + std::vector m_interfaceVars; + + uint32_t defType( + spv::Op op, + uint32_t argCount, + const uint32_t* argIds); + + uint32_t defConst( + spv::Op op, + uint32_t typeId, + uint32_t argCount, + const uint32_t* argIds); + + void instImportGlsl450(); + + uint32_t getMemoryOperandWordCount( + const SpirvMemoryOperands& op) const; + + void putMemoryOperands( + const SpirvMemoryOperands& op); + + uint32_t getImageOperandWordCount( + const SpirvImageOperands& op) const; + + void putImageOperands( + const SpirvImageOperands& op); + + bool isInterfaceVar( + spv::StorageClass sclass) const; + + }; + +} \ No newline at end of file diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt new file mode 100644 index 0000000..daccb2b --- /dev/null +++ b/src/util/CMakeLists.txt @@ -0,0 +1,33 @@ +add_library(dxbc-util STATIC) +target_sources(dxbc-util PRIVATE + com/com_guid.cpp + com/com_guid.h + com/com_include.h + com/com_object.h + com/com_pointer.h + com/com_private_data.cpp + com/com_private_data.h + + log/log_debug.cpp + log/log_debug.h + log/log.cpp + log/log.h + + rc/util_rc_ptr.h + rc/util_rc.h + + thread.cpp + thread.h + util_bit.h + util_enum.h + util_env.cpp + util_env.h + util_error.h + util_flags.h + util_likely.h + util_math.h + util_string.cpp + util_string.h +) +target_include_directories(dxbc-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(dxbc-util PUBLIC windows-headers) diff --git a/src/util/com/com_guid.cpp b/src/util/com/com_guid.cpp new file mode 100644 index 0000000..79b3750 --- /dev/null +++ b/src/util/com/com_guid.cpp @@ -0,0 +1,64 @@ +#include +#include + +#include "com_guid.h" + +#include "../log/log.h" + +#include "../../dxvk/dxvk_hash.h" + +#include "../thread.h" + +namespace dxvk { + + struct GuidPair { + GuidPair() { }; + GuidPair(IID a_, IID b_) + : a(a_), b(b_) { } + + IID a, b; + + size_t hash() const { + return size_t(a.Data1) ^ size_t(b.Data1); + } + + bool eq(const GuidPair& other) const { + return a == other.a && b == other.b; + } + }; + + dxvk::mutex g_loggedQueryInterfaceErrorMutex; + std::unordered_set g_loggedQueryInterfaceErrors; + + bool logQueryInterfaceError(REFIID objectGuid, REFIID requestedGuid) { + if (Logger::logLevel() > LogLevel::Warn) + return false; + + std::lock_guard lock(g_loggedQueryInterfaceErrorMutex); + return g_loggedQueryInterfaceErrors.emplace(objectGuid, requestedGuid).second; + } + +} + +std::ostream& operator << (std::ostream& os, REFIID guid) { + os << std::hex << std::setfill('0') + << std::setw(8) << guid.Data1 << '-'; + + os << std::hex << std::setfill('0') + << std::setw(4) << guid.Data2 << '-'; + + os << std::hex << std::setfill('0') + << std::setw(4) << guid.Data3 << '-'; + + os << std::hex << std::setfill('0') + << std::setw(2) << static_cast(guid.Data4[0]) + << std::setw(2) << static_cast(guid.Data4[1]) + << '-' + << std::setw(2) << static_cast(guid.Data4[2]) + << std::setw(2) << static_cast(guid.Data4[3]) + << std::setw(2) << static_cast(guid.Data4[4]) + << std::setw(2) << static_cast(guid.Data4[5]) + << std::setw(2) << static_cast(guid.Data4[6]) + << std::setw(2) << static_cast(guid.Data4[7]); + return os; +} diff --git a/src/util/com/com_guid.h b/src/util/com/com_guid.h new file mode 100644 index 0000000..f090c23 --- /dev/null +++ b/src/util/com/com_guid.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +#include "com_include.h" + +namespace dxvk { + + /** + * \brief Checks whether an unknown GUID should be logged + * + * \param [in] objectGuid GUID of the object that QueryInterface is called on + * \param [in] requestGuid Requested unsupported GUID + * \returns \c true if the error should be logged + */ + bool logQueryInterfaceError(REFIID objectGuid, REFIID requestedGuid); + +}; + +std::ostream& operator << (std::ostream& os, REFIID guid); diff --git a/src/util/com/com_include.h b/src/util/com/com_include.h new file mode 100644 index 0000000..3c11930 --- /dev/null +++ b/src/util/com/com_include.h @@ -0,0 +1,17 @@ +#pragma once + +// GCC complains about the COM interfaces +// not having virtual destructors +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#endif // __GNUC__ + +#define WIN32_LEAN_AND_MEAN +#include +#include + +// GCC: -std options disable certain keywords +// https://gcc.gnu.org/onlinedocs/gcc/Alternate-Keywords.html +#if defined(__WINE__) && !defined(typeof) +#define typeof __typeof +#endif diff --git a/src/util/com/com_object.h b/src/util/com/com_object.h new file mode 100644 index 0000000..f42fbcc --- /dev/null +++ b/src/util/com/com_object.h @@ -0,0 +1,123 @@ +#pragma once + +#include + +#include "com_include.h" + +#include "../util_likely.h" + +namespace dxvk { + + template + class NoWrapper : public T { + + public: + + virtual ~NoWrapper() { } + + }; + + /** + * \brief Reference-counted COM object + * + * This can serve as a templated base class for most + * COM objects. It implements AddRef and Release from + * \c IUnknown, and provides methods to increment and + * decrement private references which are not visible + * to the application. + * + * Having two reference counters is sadly necessary + * in order to not break games which steal internal + * references if the refefence count of an object is + + greater than they expect. DXVK sometimes requires + * holding on to objects which the application wants + * to delete. + */ + template + class ComObject : public Base { + + public: + + virtual ~ComObject() { } + + ULONG STDMETHODCALLTYPE AddRef() { + uint32_t refCount = m_refCount++; + if (unlikely(!refCount)) + AddRefPrivate(); + return refCount + 1; + } + + ULONG STDMETHODCALLTYPE Release() { + uint32_t refCount = --m_refCount; + if (unlikely(!refCount)) + ReleasePrivate(); + return refCount; + } + + + void AddRefPrivate() { + ++m_refPrivate; + } + + + void ReleasePrivate() { + uint32_t refPrivate = --m_refPrivate; + if (unlikely(!refPrivate)) { + m_refPrivate += 0x80000000; + delete this; + } + } + + ULONG GetPrivateRefCount() const { + return m_refPrivate.load(); + } + + bool HasLiveReferences() const { + return bool(m_refCount.load() | (m_refPrivate.load() & 0x7FFFFFFF)); + } + + protected: + + std::atomic m_refCount = { 0ul }; + std::atomic m_refPrivate = { 0ul }; + + }; + + /** + * \brief Clamped, reference-counted COM object + * + * This version of ComObject ensures that the reference + * count does not wrap around if a release happens at zero. + * eg. [m_refCount = 0] + * Release() + * [m_refCount = 0] + * This is a notable quirk of D3D9's COM implementation + * and is relied upon by some games. + */ + template + class ComObjectClamp : public ComObject { + + public: + + ULONG STDMETHODCALLTYPE Release() { + ULONG refCount = this->m_refCount; + if (likely(refCount != 0ul)) { + this->m_refCount--; + refCount--; + + if (refCount == 0ul) + this->ReleasePrivate(); + } + + return refCount; + } + + }; + + template + inline void InitReturnPtr(T** ptr) { + if (ptr != nullptr) + *ptr = nullptr; + } + +} diff --git a/src/util/com/com_pointer.h b/src/util/com/com_pointer.h new file mode 100644 index 0000000..c81fae2 --- /dev/null +++ b/src/util/com/com_pointer.h @@ -0,0 +1,146 @@ +#pragma once + +#include "com_include.h" + +namespace dxvk { + + /** + * \brief Increment public ref count + * + * If the pointer is not \c nullptr, this + * calls \c AddRef for the given object. + * \returns Pointer to the object + */ + template + T* ref(T* object) { + if (object != nullptr) + object->AddRef(); + return object; + } + + + /** + * \brief Ref count methods for public references + */ + template + struct ComRef_ { + static void incRef(T* ptr) { ptr->AddRef(); } + static void decRef(T* ptr) { ptr->Release(); } + }; + + + /** + * \brief Ref count methods for private references + */ + template + struct ComRef_ { + static void incRef(T* ptr) { ptr->AddRefPrivate(); } + static void decRef(T* ptr) { ptr->ReleasePrivate(); } + }; + + + /** + * \brief COM pointer + * + * Implements automatic reference + * counting for COM objects. + */ + template + class Com { + using ComRef = ComRef_; + public: + + Com() { } + Com(std::nullptr_t) { } + Com(T* object) + : m_ptr(object) { + this->incRef(); + } + + Com(const Com& other) + : m_ptr(other.m_ptr) { + this->incRef(); + } + + Com(Com&& other) + : m_ptr(other.m_ptr) { + other.m_ptr = nullptr; + } + + Com& operator = (T* object) { + this->decRef(); + m_ptr = object; + this->incRef(); + return *this; + } + + Com& operator = (const Com& other) { + other.incRef(); + this->decRef(); + m_ptr = other.m_ptr; + return *this; + } + + Com& operator = (Com&& other) { + this->decRef(); + this->m_ptr = other.m_ptr; + other.m_ptr = nullptr; + return *this; + } + + Com& operator = (std::nullptr_t) { + this->decRef(); + m_ptr = nullptr; + return *this; + } + + ~Com() { + this->decRef(); + } + + T* operator -> () const { + return m_ptr; + } + + T** operator & () { return &m_ptr; } + T* const* operator & () const { return &m_ptr; } + + template + bool operator == (const Com& other) const { return m_ptr == other.m_ptr; } + template + bool operator != (const Com& other) const { return m_ptr != other.m_ptr; } + + bool operator == (const T* other) const { return m_ptr == other; } + bool operator != (const T* other) const { return m_ptr != other; } + + bool operator == (std::nullptr_t) const { return m_ptr == nullptr; } + bool operator != (std::nullptr_t) const { return m_ptr != nullptr; } + + T* ref() const { + return dxvk::ref(m_ptr); + } + + T* ptr() const { + return m_ptr; + } + + Com pubRef() const { return m_ptr; } + Com prvRef() const { return m_ptr; } + + private: + + T* m_ptr = nullptr; + + void incRef() const { + if (m_ptr != nullptr) + ComRef::incRef(m_ptr); + } + + void decRef() const { + if (m_ptr != nullptr) + ComRef::decRef(m_ptr); + } + + }; + +} diff --git a/src/util/com/com_private_data.cpp b/src/util/com/com_private_data.cpp new file mode 100644 index 0000000..d27a410 --- /dev/null +++ b/src/util/com/com_private_data.cpp @@ -0,0 +1,171 @@ +#include +#include +#include + +#include "com_private_data.h" + +namespace dxvk { + + ComPrivateDataEntry::ComPrivateDataEntry() { } + ComPrivateDataEntry::ComPrivateDataEntry( + REFGUID guid, + UINT size, + const void* data) + : m_guid(guid), + m_type(ComPrivateDataType::Data), + m_size(size), + m_data(std::malloc(size)) { + std::memcpy(m_data, data, size); + } + + + ComPrivateDataEntry::ComPrivateDataEntry( + REFGUID guid, + const IUnknown* iface) + : m_guid (guid), + m_type(ComPrivateDataType::Iface), + m_iface (const_cast(iface)) { + if (m_iface) + m_iface->AddRef(); + } + + + ComPrivateDataEntry::~ComPrivateDataEntry() { + this->destroy(); + } + + + ComPrivateDataEntry::ComPrivateDataEntry(ComPrivateDataEntry&& other) + : m_guid (other.m_guid), + m_type (other.m_type), + m_size (other.m_size), + m_data (other.m_data), + m_iface (other.m_iface) { + other.m_guid = __uuidof(IUnknown); + other.m_type = ComPrivateDataType::None; + other.m_size = 0; + other.m_data = nullptr; + other.m_iface = nullptr; + } + + + ComPrivateDataEntry& ComPrivateDataEntry::operator = (ComPrivateDataEntry&& other) { + this->destroy(); + this->m_guid = other.m_guid; + this->m_type = other.m_type; + this->m_size = other.m_size; + this->m_data = other.m_data; + this->m_iface = other.m_iface; + + other.m_guid = __uuidof(IUnknown); + other.m_type = ComPrivateDataType::None; + other.m_size = 0; + other.m_data = nullptr; + other.m_iface = nullptr; + return *this; + } + + + HRESULT ComPrivateDataEntry::get(UINT& size, void* data) const { + UINT minSize = 0; + + if (m_type == ComPrivateDataType::Iface) minSize = sizeof(IUnknown*); + if (m_type == ComPrivateDataType::Data) minSize = m_size; + + if (!data) { + size = minSize; + return S_OK; + } + + HRESULT result = size < minSize + ? DXGI_ERROR_MORE_DATA + : S_OK; + + if (size >= minSize) { + if (m_type == ComPrivateDataType::Iface) { + if (m_iface) + m_iface->AddRef(); + std::memcpy(data, &m_iface, minSize); + } else { + std::memcpy(data, m_data, minSize); + } + } + + size = minSize; + return result; + } + + + void ComPrivateDataEntry::destroy() { + if (m_data) + std::free(m_data); + if (m_iface) + m_iface->Release(); + } + + + HRESULT ComPrivateData::setData( + REFGUID guid, + UINT size, + const void* data) { + if (!data) { + for (auto it = m_entries.begin(); it != m_entries.end(); ++it) { + if (it->hasGuid(guid)) { + m_entries.erase(it); + return S_OK; + } + } + return S_FALSE; + } + this->insertEntry(ComPrivateDataEntry(guid, size, data)); + return S_OK; + } + + + HRESULT ComPrivateData::setInterface( + REFGUID guid, + const IUnknown* iface) { + this->insertEntry(ComPrivateDataEntry(guid, iface)); + return S_OK; + } + + + HRESULT ComPrivateData::getData( + REFGUID guid, + UINT* size, + void* data) { + if (!size) + return E_INVALIDARG; + + auto entry = this->findEntry(guid); + + if (!entry) { + *size = 0; + return DXGI_ERROR_NOT_FOUND; + } + + return entry->get(*size, data); + } + + + ComPrivateDataEntry* ComPrivateData::findEntry(REFGUID guid) { + for (ComPrivateDataEntry& e : m_entries) { + if (e.hasGuid(guid)) + return &e; + } + + return nullptr; + } + + + void ComPrivateData::insertEntry(ComPrivateDataEntry&& entry) { + ComPrivateDataEntry srcEntry = std::move(entry); + ComPrivateDataEntry* dstEntry = this->findEntry(srcEntry.guid()); + + if (dstEntry) + *dstEntry = std::move(srcEntry); + else + m_entries.push_back(std::move(srcEntry)); + } + +} diff --git a/src/util/com/com_private_data.h b/src/util/com/com_private_data.h new file mode 100644 index 0000000..0673f8e --- /dev/null +++ b/src/util/com/com_private_data.h @@ -0,0 +1,115 @@ +#pragma once + +#include + +#include "com_include.h" + +namespace dxvk { + + /** + * \brief COM private data entry type + */ + enum ComPrivateDataType { + None, + Data, + Iface, + }; + + /** + * \brief Data entry for private storage + * Stores a single private storage item. + */ + class ComPrivateDataEntry { + + public: + + ComPrivateDataEntry(); + ComPrivateDataEntry( + REFGUID guid, + UINT size, + const void* data); + ComPrivateDataEntry( + REFGUID guid, + const IUnknown* iface); + ~ComPrivateDataEntry(); + + ComPrivateDataEntry (ComPrivateDataEntry&& other); + ComPrivateDataEntry& operator = (ComPrivateDataEntry&& other); + + /** + * \brief The entry's GUID + * \returns The GUID + */ + REFGUID guid() const { + return m_guid; + } + + /** + * \brief Checks whether the GUID matches another one + * + * GUIDs are used to identify private data entries. + * \param [in] guid The GUID to compare to + * \returns \c true if this entry holds the same GUID + */ + bool hasGuid(REFGUID guid) const { + return m_guid == guid; + } + + /** + * \brief Retrieves stored data + * + * \param [in,out] size Destination buffer size + * \param [in] data Appliaction-provided buffer + * \returns \c S_OK on success, or \c DXGI_ERROR_MORE_DATA + * if the destination buffer is too small + */ + HRESULT get(UINT& size, void* data) const; + + private: + + GUID m_guid = __uuidof(IUnknown); + ComPrivateDataType m_type = ComPrivateDataType::None; + UINT m_size = 0; + void* m_data = nullptr; + IUnknown* m_iface = nullptr; + + void destroy(); + + }; + + + /** + * \brief Private storage for DXGI objects + * + * Provides storage for application-defined + * byte arrays or COM interfaces that can be + * retrieved using GUIDs. + */ + class ComPrivateData { + + public: + + HRESULT setData( + REFGUID guid, + UINT size, + const void* data); + + HRESULT setInterface( + REFGUID guid, + const IUnknown* iface); + + HRESULT getData( + REFGUID guid, + UINT* size, + void* data); + + private: + + std::vector m_entries; + + ComPrivateDataEntry* findEntry(REFGUID guid); + void insertEntry(ComPrivateDataEntry&& entry); + + }; + +} diff --git a/src/util/log/log.cpp b/src/util/log/log.cpp new file mode 100644 index 0000000..da8b620 --- /dev/null +++ b/src/util/log/log.cpp @@ -0,0 +1,132 @@ +#include + +#include "log.h" + +#include "../util_env.h" + +namespace dxvk { + + Logger::Logger(const std::string& fileName) + : m_minLevel(getMinLogLevel()), m_fileName(fileName) { + + } + + + Logger::~Logger() { } + + + void Logger::trace(const std::string& message) { + s_instance.emitMsg(LogLevel::Trace, message); + } + + + void Logger::debug(const std::string& message) { + s_instance.emitMsg(LogLevel::Debug, message); + } + + + void Logger::info(const std::string& message) { + s_instance.emitMsg(LogLevel::Info, message); + } + + + void Logger::warn(const std::string& message) { + s_instance.emitMsg(LogLevel::Warn, message); + } + + + void Logger::err(const std::string& message) { + s_instance.emitMsg(LogLevel::Error, message); + } + + + void Logger::log(LogLevel level, const std::string& message) { + s_instance.emitMsg(level, message); + } + + + void Logger::emitMsg(LogLevel level, const std::string& message) { + if (level >= m_minLevel) { + std::lock_guard lock(m_mutex); + + static std::array s_prefixes + = {{ "trace: ", "debug: ", "info: ", "warn: ", "err: " }}; + + const char* prefix = s_prefixes.at(static_cast(level)); + + if (!std::exchange(m_initialized, true)) { +#ifdef _WIN32 + HMODULE ntdll = GetModuleHandleA("ntdll.dll"); + + if (ntdll) + m_wineLogOutput = reinterpret_cast(GetProcAddress(ntdll, "__wine_dbg_output")); +#endif + auto path = getFileName(m_fileName); + + if (!path.empty()) + m_fileStream = std::ofstream(str::topath(path.c_str()).c_str()); + } + + std::stringstream stream(message); + std::string line; + + while (std::getline(stream, line, '\n')) { + std::stringstream outstream; + outstream << prefix << line << std::endl; + + std::string adjusted = outstream.str(); + + if (!adjusted.empty()) { + if (m_wineLogOutput) + m_wineLogOutput(adjusted.c_str()); + else + std::cerr << adjusted; + } + + if (m_fileStream) + m_fileStream << adjusted; + } + } + } + + + std::string Logger::getFileName(const std::string& base) { + std::string path = env::getEnvVar("DXVK_LOG_PATH"); + + if (path == "none") + return std::string(); + + // Don't create a log file if we're writing to wine's console output + if (path.empty() && m_wineLogOutput) + return std::string(); + + if (!path.empty() && *path.rbegin() != '/') + path += '/'; + + std::string exeName = env::getExeBaseName(); + path += exeName + "_" + base; + return path; + } + + + LogLevel Logger::getMinLogLevel() { + const std::array, 6> logLevels = {{ + { "trace", LogLevel::Trace }, + { "debug", LogLevel::Debug }, + { "info", LogLevel::Info }, + { "warn", LogLevel::Warn }, + { "error", LogLevel::Error }, + { "none", LogLevel::None }, + }}; + + const std::string logLevelStr = env::getEnvVar("DXVK_LOG_LEVEL"); + + for (const auto& pair : logLevels) { + if (logLevelStr == pair.first) + return pair.second; + } + + return LogLevel::Info; + } + +} diff --git a/src/util/log/log.h b/src/util/log/log.h new file mode 100644 index 0000000..3589d70 --- /dev/null +++ b/src/util/log/log.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +#include "../thread.h" + +namespace dxvk { + + enum class LogLevel : uint32_t { + Trace = 0, + Debug = 1, + Info = 2, + Warn = 3, + Error = 4, + None = 5, + }; + + using PFN_wineLogOutput = int (STDMETHODCALLTYPE *)(const char *); + + /** + * \brief Logger + * + * Logger for one DLL. Creates a text file and + * writes all log messages to that file. + */ + class Logger { + + public: + + Logger(const std::string& file_name); + ~Logger(); + + static void trace(const std::string& message); + static void debug(const std::string& message); + static void info (const std::string& message); + static void warn (const std::string& message); + static void err (const std::string& message); + static void log (LogLevel level, const std::string& message); + + static LogLevel logLevel() { + return s_instance.m_minLevel; + } + + private: + + static Logger s_instance; + + const LogLevel m_minLevel; + const std::string m_fileName; + + dxvk::mutex m_mutex; + std::ofstream m_fileStream; + + bool m_initialized = false; + PFN_wineLogOutput m_wineLogOutput = nullptr; + + void emitMsg(LogLevel level, const std::string& message); + + std::string getFileName( + const std::string& base); + + static LogLevel getMinLogLevel(); + + }; + +} diff --git a/src/util/log/log_debug.cpp b/src/util/log/log_debug.cpp new file mode 100644 index 0000000..c67742a --- /dev/null +++ b/src/util/log/log_debug.cpp @@ -0,0 +1,11 @@ +#include "log_debug.h" + +namespace dxvk::debug { + + std::string methodName(const std::string& prettyName) { + size_t end = prettyName.find("("); + size_t begin = prettyName.substr(0, end).rfind(" ") + 1; + return prettyName.substr(begin,end - begin); + } + +} diff --git a/src/util/log/log_debug.h b/src/util/log/log_debug.h new file mode 100644 index 0000000..c508432 --- /dev/null +++ b/src/util/log/log_debug.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include "log.h" + +#ifdef _MSC_VER +#define METHOD_NAME __FUNCSIG__ +#else +#define METHOD_NAME __PRETTY_FUNCTION__ +#endif + +#define TRACE_ENABLED + +#ifdef TRACE_ENABLED +#define TRACE(...) \ + do { dxvk::debug::trace(METHOD_NAME, ##__VA_ARGS__); } while (0) +#else +#define TRACE(...) \ + do { } while (0) +#endif + +namespace dxvk::debug { + + std::string methodName(const std::string& prettyName); + + inline void traceArgs(std::stringstream& stream) { } + + template + void traceArgs(std::stringstream& stream, const Arg1& arg1) { + stream << arg1; + } + + template + void traceArgs(std::stringstream& stream, const Arg1& arg1, const Arg2& arg2, const Args&... args) { + stream << arg1 << ","; + traceArgs(stream, arg2, args...); + } + + template + void trace(const std::string& funcName, const Args&... args) { + std::stringstream stream; + stream << methodName(funcName) << "("; + traceArgs(stream, args...); + stream << ")"; + Logger::trace(stream.str()); + } + +} diff --git a/src/util/rc/util_rc.h b/src/util/rc/util_rc.h new file mode 100644 index 0000000..b92cf00 --- /dev/null +++ b/src/util/rc/util_rc.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include "../util_likely.h" + +namespace dxvk { + + /** + * \brief Reference-counted object + */ + class RcObject { + + public: + + /** + * \brief Increments reference count + * \returns New reference count + */ + force_inline uint32_t incRef() { + return ++m_refCount; + } + + /** + * \brief Decrements reference count + * \returns New reference count + */ + force_inline uint32_t decRef() { + return --m_refCount; + } + + private: + + std::atomic m_refCount = { 0u }; + + }; + +} \ No newline at end of file diff --git a/src/util/rc/util_rc_ptr.h b/src/util/rc/util_rc_ptr.h new file mode 100644 index 0000000..bc4b4bd --- /dev/null +++ b/src/util/rc/util_rc_ptr.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include + +namespace dxvk { + + /** + * \brief Pointer for reference-counted objects + * + * This only requires the given type to implement \c incRef + * and \c decRef methods that adjust the reference count. + * \tparam T Object type + */ + template + class Rc { + template + friend class Rc; + public: + + Rc() { } + Rc(std::nullptr_t) { } + + Rc(T* object) + : m_object(object) { + this->incRef(); + } + + Rc(const Rc& other) + : m_object(other.m_object) { + this->incRef(); + } + + template + Rc(const Rc& other) + : m_object(other.m_object) { + this->incRef(); + } + + Rc(Rc&& other) + : m_object(other.m_object) { + other.m_object = nullptr; + } + + template + Rc(Rc&& other) + : m_object(other.m_object) { + other.m_object = nullptr; + } + + Rc& operator = (std::nullptr_t) { + this->decRef(); + m_object = nullptr; + return *this; + } + + Rc& operator = (const Rc& other) { + other.incRef(); + this->decRef(); + m_object = other.m_object; + return *this; + } + + template + Rc& operator = (const Rc& other) { + other.incRef(); + this->decRef(); + m_object = other.m_object; + return *this; + } + + Rc& operator = (Rc&& other) { + this->decRef(); + this->m_object = other.m_object; + other.m_object = nullptr; + return *this; + } + + template + Rc& operator = (Rc&& other) { + this->decRef(); + this->m_object = other.m_object; + other.m_object = nullptr; + return *this; + } + + ~Rc() { + this->decRef(); + } + + T& operator * () const { return *m_object; } + T* operator -> () const { return m_object; } + T* ptr() const { return m_object; } + + bool operator == (const Rc& other) const { return m_object == other.m_object; } + bool operator != (const Rc& other) const { return m_object != other.m_object; } + + bool operator == (std::nullptr_t) const { return m_object == nullptr; } + bool operator != (std::nullptr_t) const { return m_object != nullptr; } + + private: + + T* m_object = nullptr; + + force_inline void incRef() const { + if (m_object != nullptr) + m_object->incRef(); + } + + force_inline void decRef() const { + if (m_object != nullptr) { + if (m_object->decRef() == 0) + delete m_object; + } + } + + }; + +} + +template +std::ostream& operator << (std::ostream& os, const dxvk::Rc& rc) { + return os << rc.ptr(); +} diff --git a/src/util/thread.cpp b/src/util/thread.cpp new file mode 100644 index 0000000..5c91241 --- /dev/null +++ b/src/util/thread.cpp @@ -0,0 +1,114 @@ +#include + +#include "thread.h" +#include "util_likely.h" + +#ifdef _WIN32 + +namespace dxvk { + + thread::thread(ThreadProc&& proc) + : m_data(new ThreadData(std::move(proc))) { + m_data->handle = ::CreateThread(nullptr, 0x100000, + thread::threadProc, m_data, STACK_SIZE_PARAM_IS_A_RESERVATION, + &m_data->id); + + if (!m_data->handle) { + delete m_data; + throw std::system_error(std::make_error_code(std::errc::resource_unavailable_try_again), "Failed to create thread"); + } + } + + + thread::~thread() { + if (joinable()) + std::terminate(); + } + + + void thread::join() { + if (!joinable()) + throw std::system_error(std::make_error_code(std::errc::invalid_argument), "Thread not joinable"); + + if (get_id() == this_thread::get_id()) + throw std::system_error(std::make_error_code(std::errc::resource_deadlock_would_occur), "Cannot join current thread"); + + if(::WaitForSingleObjectEx(m_data->handle, INFINITE, FALSE) == WAIT_FAILED) + throw std::system_error(std::make_error_code(std::errc::invalid_argument), "Joining thread failed"); + + detach(); + } + + + void thread::set_priority(ThreadPriority priority) { + int32_t value; + switch (priority) { + default: + case ThreadPriority::Normal: value = THREAD_PRIORITY_NORMAL; break; + case ThreadPriority::Lowest: value = THREAD_PRIORITY_LOWEST; break; + } + + if (m_data) + ::SetThreadPriority(m_data->handle, int32_t(value)); + } + + + uint32_t thread::hardware_concurrency() { + SYSTEM_INFO info = { }; + ::GetSystemInfo(&info); + return info.dwNumberOfProcessors; + } + + + DWORD WINAPI thread::threadProc(void* arg) { + auto data = reinterpret_cast(arg); + DWORD exitCode = 0; + + try { + data->proc(); + } catch (...) { + exitCode = 1; + } + + data->decRef(); + return exitCode; + } + +} + + +namespace dxvk::this_thread { + + bool isInModuleDetachment() { + using PFN_RtlDllShutdownInProgress = BOOLEAN (WINAPI *)(); + + static auto RtlDllShutdownInProgress = reinterpret_cast( + ::GetProcAddress(::GetModuleHandleW(L"ntdll.dll"), "RtlDllShutdownInProgress")); + + return RtlDllShutdownInProgress(); + } + +} + +#else + +namespace dxvk::this_thread { + + static std::atomic g_threadCtr = { 0u }; + static thread_local uint32_t g_threadId = 0u; + + // This implementation returns thread ids unique to the current instance. + // ie. if you use this across multiple .so's then you might get conflicting ids. + // + // This isn't an issue for us, as it is only used by the spinlock implementation, + // but may be for you if you use this elsewhere. + uint32_t get_id() { + if (unlikely(!g_threadId)) + g_threadId = ++g_threadCtr; + + return g_threadId; + } + +} + +#endif diff --git a/src/util/thread.h b/src/util/thread.h new file mode 100644 index 0000000..6e25f40 --- /dev/null +++ b/src/util/thread.h @@ -0,0 +1,342 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "util_error.h" + +#include "./com/com_include.h" + +#include "./rc/util_rc.h" +#include "./rc/util_rc_ptr.h" + +namespace dxvk { + + /** + * \brief Thread priority + */ + enum class ThreadPriority : int32_t { + Normal, + Lowest, + }; + +#ifdef _WIN32 + + using ThreadProc = std::function; + + + /** + * \brief Thread object + */ + struct ThreadData { + ThreadData(ThreadProc&& proc_) + : proc(std::move(proc_)) { } + + ~ThreadData() { + if (handle) + CloseHandle(handle); + } + + HANDLE handle = nullptr; + DWORD id = 0; + std::atomic refs = { 2u }; + ThreadProc proc; + + void decRef() { + if (refs.fetch_sub(1, std::memory_order_release) == 1) + delete this; + } + }; + + + /** + * \brief Thread wrapper + * + * Drop-in replacement for std::thread + * using plain win32 threads. + */ + class thread { + + public: + + using id = uint32_t; + using native_handle_type = HANDLE; + + thread() { } + + explicit thread(ThreadProc&& proc); + + ~thread(); + + thread(thread&& other) + : m_data(std::exchange(other.m_data, nullptr)) { } + + thread& operator = (thread&& other) { + if (m_data) + m_data->decRef(); + + m_data = std::exchange(other.m_data, nullptr); + return *this; + } + + void detach() { + m_data->decRef(); + m_data = nullptr; + } + + bool joinable() const { + return m_data != nullptr; + } + + id get_id() const { + return joinable() ? m_data->id : id(); + } + + native_handle_type native_handle() const { + return joinable() ? m_data->handle : native_handle_type(); + } + + void swap(thread& other) { + std::swap(m_data, other.m_data); + } + + void join(); + + void set_priority(ThreadPriority priority); + + static uint32_t hardware_concurrency(); + + private: + + ThreadData* m_data = nullptr; + + static DWORD WINAPI threadProc(void* arg); + + }; + + + namespace this_thread { + inline void yield() { + SwitchToThread(); + } + + inline thread::id get_id() { + return thread::id(GetCurrentThreadId()); + } + + bool isInModuleDetachment(); + } + + + /** + * \brief SRW-based mutex implementation + * + * Drop-in replacement for \c std::mutex that uses Win32 + * SRW locks, which are implemented with \c futex in wine. + */ + class mutex { + + public: + + using native_handle_type = PSRWLOCK; + + mutex() { } + + mutex(const mutex&) = delete; + mutex& operator = (const mutex&) = delete; + + void lock() { + AcquireSRWLockExclusive(&m_lock); + } + + void unlock() { + ReleaseSRWLockExclusive(&m_lock); + } + + bool try_lock() { + return TryAcquireSRWLockExclusive(&m_lock); + } + + native_handle_type native_handle() { + return &m_lock; + } + + private: + + SRWLOCK m_lock = SRWLOCK_INIT; + + }; + + + /** + * \brief Recursive mutex implementation + * + * Drop-in replacement for \c std::recursive_mutex that + * uses Win32 critical sections. + */ + class recursive_mutex { + + public: + + using native_handle_type = PCRITICAL_SECTION; + + recursive_mutex() { + InitializeCriticalSection(&m_lock); + } + + ~recursive_mutex() { + DeleteCriticalSection(&m_lock); + } + + recursive_mutex(const recursive_mutex&) = delete; + recursive_mutex& operator = (const recursive_mutex&) = delete; + + void lock() { + EnterCriticalSection(&m_lock); + } + + void unlock() { + LeaveCriticalSection(&m_lock); + } + + bool try_lock() { + return TryEnterCriticalSection(&m_lock); + } + + native_handle_type native_handle() { + return &m_lock; + } + + private: + + CRITICAL_SECTION m_lock; + + }; + + + /** + * \brief SRW-based condition variable implementation + * + * Drop-in replacement for \c std::condition_variable that + * uses Win32 condition variables on SRW locks. + */ + class condition_variable { + + public: + + using native_handle_type = PCONDITION_VARIABLE; + + condition_variable() { + InitializeConditionVariable(&m_cond); + } + + condition_variable(condition_variable&) = delete; + + condition_variable& operator = (condition_variable&) = delete; + + void notify_one() { + WakeConditionVariable(&m_cond); + } + + void notify_all() { + WakeAllConditionVariable(&m_cond); + } + + void wait(std::unique_lock& lock) { + auto srw = lock.mutex()->native_handle(); + SleepConditionVariableSRW(&m_cond, srw, INFINITE, 0); + } + + template + void wait(std::unique_lock& lock, Predicate pred) { + while (!pred()) + wait(lock); + } + + template + std::cv_status wait_until(std::unique_lock& lock, const std::chrono::time_point& time) { + auto now = Clock::now(); + + return (now < time) + ? wait_for(lock, now - time) + : std::cv_status::timeout; + } + + template + bool wait_until(std::unique_lock& lock, const std::chrono::time_point& time, Predicate pred) { + if (pred()) + return true; + + auto now = Clock::now(); + return now < time && wait_for(lock, now - time, pred); + } + + template + std::cv_status wait_for(std::unique_lock& lock, const std::chrono::duration& timeout) { + auto ms = std::chrono::duration_cast(timeout); + auto srw = lock.mutex()->native_handle(); + + return SleepConditionVariableSRW(&m_cond, srw, ms.count(), 0) + ? std::cv_status::no_timeout + : std::cv_status::timeout; + } + + template + bool wait_for(std::unique_lock& lock, const std::chrono::duration& timeout, Predicate pred) { + bool result = pred(); + + if (!result && wait_for(lock, timeout) == std::cv_status::no_timeout) + result = pred(); + + return result; + } + + native_handle_type native_handle() { + return &m_cond; + } + + private: + + CONDITION_VARIABLE m_cond; + + }; + +#else + class thread : public std::thread { + public: + using std::thread::thread; + + void set_priority(ThreadPriority priority) { + ::sched_param param = {}; + int32_t policy; + switch (priority) { + default: + case ThreadPriority::Normal: policy = SCHED_OTHER; break; + case ThreadPriority::Lowest: policy = SCHED_IDLE; break; + } + ::pthread_setschedparam(this->native_handle(), policy, ¶m); + } + }; + + using mutex = std::mutex; + using recursive_mutex = std::recursive_mutex; + using condition_variable = std::condition_variable; + + namespace this_thread { + inline void yield() { + std::this_thread::yield(); + } + + uint32_t get_id(); + + inline bool isInModuleDetachment() { + return false; + } + } +#endif + +} diff --git a/src/util/util_bit.h b/src/util/util_bit.h new file mode 100644 index 0000000..712be4b --- /dev/null +++ b/src/util/util_bit.h @@ -0,0 +1,519 @@ +#pragma once + +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) + #define DXVK_ARCH_X86 + #if defined(__x86_64__) || defined(_M_X64) + #define DXVK_ARCH_X86_64 + #endif +#elif defined(__aarch64__) || defined(_M_ARM64) + #define DXVK_ARCH_ARM64 +#else +#error "Unknown CPU Architecture" +#endif + +#ifdef DXVK_ARCH_X86 + #ifndef _MSC_VER + #if defined(__WINE__) && defined(__clang__) + #pragma push_macro("_WIN32") + #undef _WIN32 + #endif + #include + #if defined(__WINE__) && defined(__clang__) + #pragma pop_macro("_WIN32") + #endif + #else + #include + #endif +#endif + +#include "util_likely.h" +#include "util_math.h" + +#include +#include +#include +#include +#include + +namespace dxvk::bit { + + template + T cast(const J& src) { + static_assert(sizeof(T) == sizeof(J)); + static_assert(std::is_trivially_copyable::value && std::is_trivial::value); + + T dst; + std::memcpy(&dst, &src, sizeof(T)); + return dst; + } + + template + T extract(T value, uint32_t fst, uint32_t lst) { + return (value >> fst) & ~(~T(0) << (lst - fst + 1)); + } + + inline uint32_t popcntStep(uint32_t n, uint32_t mask, uint32_t shift) { + return (n & mask) + ((n & ~mask) >> shift); + } + + inline uint32_t popcnt(uint32_t n) { + n = popcntStep(n, 0x55555555, 1); + n = popcntStep(n, 0x33333333, 2); + n = popcntStep(n, 0x0F0F0F0F, 4); + n = popcntStep(n, 0x00FF00FF, 8); + n = popcntStep(n, 0x0000FFFF, 16); + return n; + } + + inline uint32_t tzcnt(uint32_t n) { + #if defined(_MSC_VER) && !defined(__clang__) + return _tzcnt_u32(n); + #elif defined(__BMI__) + return __tzcnt_u32(n); + #elif defined(DXVK_ARCH_X86) && (defined(__GNUC__) || defined(__clang__)) + // tzcnt is encoded as rep bsf, so we can use it on all + // processors, but the behaviour of zero inputs differs: + // - bsf: zf = 1, cf = ?, result = ? + // - tzcnt: zf = 0, cf = 1, result = 32 + // We'll have to handle this case manually. + uint32_t res; + uint32_t tmp; + asm ( + "tzcnt %2, %0;" + "mov $32, %1;" + "test %2, %2;" + "cmovz %1, %0;" + : "=&r" (res), "=&r" (tmp) + : "r" (n) + : "cc"); + return res; + #elif defined(__GNUC__) || defined(__clang__) + return n != 0 ? __builtin_ctz(n) : 32; + #else + uint32_t r = 31; + n &= -n; + r -= (n & 0x0000FFFF) ? 16 : 0; + r -= (n & 0x00FF00FF) ? 8 : 0; + r -= (n & 0x0F0F0F0F) ? 4 : 0; + r -= (n & 0x33333333) ? 2 : 0; + r -= (n & 0x55555555) ? 1 : 0; + return n != 0 ? r : 32; + #endif + } + + inline uint32_t tzcnt(uint64_t n) { + #if defined(DXVK_ARCH_X86_64) && defined(_MSC_VER) && !defined(__clang__) + return (uint32_t)_tzcnt_u64(n); + #elif defined(DXVK_ARCH_X86_64) && defined(__BMI__) + return __tzcnt_u64(n); + #elif defined(DXVK_ARCH_X86_64) && (defined(__GNUC__) || defined(__clang__)) + uint64_t res; + uint64_t tmp; + asm ( + "tzcnt %2, %0;" + "mov $64, %1;" + "test %2, %2;" + "cmovz %1, %0;" + : "=&r" (res), "=&r" (tmp) + : "r" (n) + : "cc"); + return res; + #elif defined(__GNUC__) || defined(__clang__) + return n != 0 ? __builtin_ctzll(n) : 64; + #else + uint32_t lo = uint32_t(n); + if (lo) { + return tzcnt(lo); + } else { + uint32_t hi = uint32_t(n >> 32); + return tzcnt(hi) + 32; + } + #endif + } + + inline uint32_t lzcnt(uint32_t n) { + #if (defined(_MSC_VER) && !defined(__clang__)) || defined(__LZCNT__) + return _lzcnt_u32(n); + #elif defined(__GNUC__) || defined(__clang__) + return n != 0 ? __builtin_clz(n) : 32; + #else + uint32_t r = 0; + + if (n == 0) return 32; + + if (n <= 0x0000FFFF) { r += 16; n <<= 16; } + if (n <= 0x00FFFFFF) { r += 8; n <<= 8; } + if (n <= 0x0FFFFFFF) { r += 4; n <<= 4; } + if (n <= 0x3FFFFFFF) { r += 2; n <<= 2; } + if (n <= 0x7FFFFFFF) { r += 1; n <<= 1; } + + return r; + #endif + } + + template + uint32_t pack(T& dst, uint32_t& shift, T src, uint32_t count) { + constexpr uint32_t Bits = 8 * sizeof(T); + if (likely(shift < Bits)) + dst |= src << shift; + shift += count; + return shift > Bits ? shift - Bits : 0; + } + + template + uint32_t unpack(T& dst, T src, uint32_t& shift, uint32_t count) { + constexpr uint32_t Bits = 8 * sizeof(T); + if (likely(shift < Bits)) + dst = (src >> shift) & ((T(1) << count) - 1); + shift += count; + return shift > Bits ? shift - Bits : 0; + } + + /** + * \brief Compares two aligned structs bit by bit + * + * \param [in] a First struct + * \param [in] b Second struct + * \returns \c true if the structs are equal + */ + template + bool bcmpeq(const T* a, const T* b) { + static_assert(alignof(T) >= 16); + #if defined(DXVK_ARCH_X86) && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) + auto ai = reinterpret_cast(a); + auto bi = reinterpret_cast(b); + + size_t i = 0; + + #if defined(__clang__) + #pragma nounroll + #elif defined(__GNUC__) + #pragma GCC unroll 0 + #endif + + for ( ; i < 2 * (sizeof(T) / 32); i += 2) { + __m128i eq0 = _mm_cmpeq_epi8( + _mm_load_si128(ai + i), + _mm_load_si128(bi + i)); + __m128i eq1 = _mm_cmpeq_epi8( + _mm_load_si128(ai + i + 1), + _mm_load_si128(bi + i + 1)); + __m128i eq = _mm_and_si128(eq0, eq1); + + int mask = _mm_movemask_epi8(eq); + if (mask != 0xFFFF) + return false; + } + + for ( ; i < sizeof(T) / 16; i++) { + __m128i eq = _mm_cmpeq_epi8( + _mm_load_si128(ai + i), + _mm_load_si128(bi + i)); + + int mask = _mm_movemask_epi8(eq); + if (mask != 0xFFFF) + return false; + } + + return true; + #else + return !std::memcmp(a, b, sizeof(T)); + #endif + } + + template + class bitset { + static constexpr size_t Dwords = align(Bits, 32) / 32; + public: + + constexpr bitset() + : m_dwords() { + + } + + constexpr bool get(uint32_t idx) const { + uint32_t dword = 0; + uint32_t bit = idx; + + // Compiler doesn't remove this otherwise. + if constexpr (Dwords > 1) { + dword = idx / 32; + bit = idx % 32; + } + + return m_dwords[dword] & (1u << bit); + } + + constexpr void set(uint32_t idx, bool value) { + uint32_t dword = 0; + uint32_t bit = idx; + + // Compiler doesn't remove this otherwise. + if constexpr (Dwords > 1) { + dword = idx / 32; + bit = idx % 32; + } + + if (value) + m_dwords[dword] |= 1u << bit; + else + m_dwords[dword] &= ~(1u << bit); + } + + constexpr bool exchange(uint32_t idx, bool value) { + bool oldValue = get(idx); + set(idx, value); + return oldValue; + } + + constexpr void flip(uint32_t idx) { + uint32_t dword = 0; + uint32_t bit = idx; + + // Compiler doesn't remove this otherwise. + if constexpr (Dwords > 1) { + dword = idx / 32; + bit = idx % 32; + } + + m_dwords[dword] ^= 1u << bit; + } + + constexpr void setAll() { + if constexpr (Bits % 32 == 0) { + for (size_t i = 0; i < Dwords; i++) + m_dwords[i] = std::numeric_limits::max(); + } + else { + for (size_t i = 0; i < Dwords - 1; i++) + m_dwords[i] = std::numeric_limits::max(); + + m_dwords[Dwords - 1] = (1u << (Bits % 32)) - 1; + } + } + + constexpr void clearAll() { + for (size_t i = 0; i < Dwords; i++) + m_dwords[i] = 0; + } + + constexpr bool any() const { + for (size_t i = 0; i < Dwords; i++) { + if (m_dwords[i] != 0) + return true; + } + + return false; + } + + constexpr uint32_t& dword(uint32_t idx) { + return m_dwords[idx]; + } + + constexpr size_t bitCount() { + return Bits; + } + + constexpr size_t dwordCount() { + return Dwords; + } + + constexpr bool operator [] (uint32_t idx) const { + return get(idx); + } + + constexpr void setN(uint32_t bits) { + uint32_t fullDwords = bits / 32; + uint32_t offset = bits % 32; + + for (size_t i = 0; i < fullDwords; i++) + m_dwords[i] = std::numeric_limits::max(); + + if (offset > 0) + m_dwords[fullDwords] = (1u << offset) - 1; + } + + private: + + uint32_t m_dwords[Dwords]; + + }; + + class bitvector { + public: + + bool get(uint32_t idx) const { + uint32_t dword = idx / 32; + uint32_t bit = idx % 32; + + return m_dwords[dword] & (1u << bit); + } + + void ensureSize(uint32_t bitCount) { + uint32_t dword = bitCount / 32; + if (unlikely(dword >= m_dwords.size())) { + m_dwords.resize(dword + 1); + } + m_bitCount = std::max(m_bitCount, bitCount); + } + + void set(uint32_t idx, bool value) { + ensureSize(idx + 1); + + uint32_t dword = 0; + uint32_t bit = idx; + + if (value) + m_dwords[dword] |= 1u << bit; + else + m_dwords[dword] &= ~(1u << bit); + } + + bool exchange(uint32_t idx, bool value) { + ensureSize(idx + 1); + + bool oldValue = get(idx); + set(idx, value); + return oldValue; + } + + void flip(uint32_t idx) { + ensureSize(idx + 1); + + uint32_t dword = idx / 32; + uint32_t bit = idx % 32; + + m_dwords[dword] ^= 1u << bit; + } + + void setAll() { + if (m_bitCount % 32 == 0) { + for (size_t i = 0; i < m_dwords.size(); i++) + m_dwords[i] = std::numeric_limits::max(); + } + else { + for (size_t i = 0; i < m_dwords.size() - 1; i++) + m_dwords[i] = std::numeric_limits::max(); + + m_dwords[m_dwords.size() - 1] = (1u << (m_bitCount % 32)) - 1; + } + } + + void clearAll() { + for (size_t i = 0; i < m_dwords.size(); i++) + m_dwords[i] = 0; + } + + bool any() const { + for (size_t i = 0; i < m_dwords.size(); i++) { + if (m_dwords[i] != 0) + return true; + } + + return false; + } + + uint32_t& dword(uint32_t idx) { + return m_dwords[idx]; + } + + size_t bitCount() const { + return m_bitCount; + } + + size_t dwordCount() const { + return m_dwords.size(); + } + + bool operator [] (uint32_t idx) const { + return get(idx); + } + + void setN(uint32_t bits) { + ensureSize(bits); + + uint32_t fullDwords = bits / 32; + uint32_t offset = bits % 32; + + for (size_t i = 0; i < fullDwords; i++) + m_dwords[i] = std::numeric_limits::max(); + + if (offset > 0) + m_dwords[fullDwords] = (1u << offset) - 1; + } + + private: + + std::vector m_dwords; + uint32_t m_bitCount = 0; + + }; + + class BitMask { + + public: + + class iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = uint32_t; + using difference_type = uint32_t; + using pointer = const uint32_t*; + using reference = uint32_t; + + explicit iterator(uint32_t flags) + : m_mask(flags) { } + + iterator& operator ++ () { + m_mask &= m_mask - 1; + return *this; + } + + iterator operator ++ (int) { + iterator retval = *this; + m_mask &= m_mask - 1; + return retval; + } + + uint32_t operator * () const { +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__BMI__) && defined(DXVK_ARCH_X86) + uint32_t res; + asm ("tzcnt %1,%0" + : "=r" (res) + : "r" (m_mask) + : "cc"); + return res; +#else + return tzcnt(m_mask); +#endif + } + + bool operator == (iterator other) const { return m_mask == other.m_mask; } + bool operator != (iterator other) const { return m_mask != other.m_mask; } + + private: + + uint32_t m_mask; + + }; + + BitMask() + : m_mask(0) { } + + BitMask(uint32_t n) + : m_mask(n) { } + + iterator begin() { + return iterator(m_mask); + } + + iterator end() { + return iterator(0); + } + + private: + + uint32_t m_mask; + + }; +} diff --git a/src/util/util_enum.h b/src/util/util_enum.h new file mode 100644 index 0000000..85b9b21 --- /dev/null +++ b/src/util/util_enum.h @@ -0,0 +1,7 @@ +#pragma once + +#define ENUM_NAME(name) \ + case name: return os << #name + +#define ENUM_DEFAULT(name) \ + default: return os << static_cast(e) diff --git a/src/util/util_env.cpp b/src/util/util_env.cpp new file mode 100644 index 0000000..1b0901f --- /dev/null +++ b/src/util/util_env.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#endif + +#include "util_env.h" + +#include "./com/com_include.h" + +namespace dxvk::env { + + std::string getEnvVar(const char* name) { +#ifdef _WIN32 + std::vector result; + result.resize(MAX_PATH + 1); + + DWORD len = ::GetEnvironmentVariableW(str::tows(name).c_str(), result.data(), MAX_PATH); + result.resize(len); + + return str::fromws(result.data()); +#else + const char* result = std::getenv(name); + return result ? result : ""; +#endif + } + + + size_t matchFileExtension(const std::string& name, const char* ext) { + auto pos = name.find_last_of('.'); + + if (pos == std::string::npos) + return pos; + + bool matches = std::accumulate(name.begin() + pos + 1, name.end(), true, + [&ext] (bool current, char a) { + if (a >= 'A' && a <= 'Z') + a += 'a' - 'A'; + return current && *ext && a == *(ext++); + }); + + return matches ? pos : std::string::npos; + } + + + std::string getExeName() { + std::string fullPath = getExePath(); + auto n = fullPath.find_last_of(env::PlatformDirSlash); + + return (n != std::string::npos) + ? fullPath.substr(n + 1) + : fullPath; + } + + + std::string getExeBaseName() { + auto exeName = getExeName(); +#ifdef _WIN32 + auto extp = matchFileExtension(exeName, "exe"); + + if (extp != std::string::npos) + exeName.erase(extp); +#endif + + return exeName; + } + + + std::string getExePath() { +#if defined(_WIN32) + std::vector exePath; + exePath.resize(MAX_PATH + 1); + + DWORD len = ::GetModuleFileNameW(NULL, exePath.data(), MAX_PATH); + exePath.resize(len); + + return str::fromws(exePath.data()); +#elif defined(__linux__) + std::array exePath = {}; + + size_t count = readlink("/proc/self/exe", exePath.data(), exePath.size()); + + return std::string(exePath.begin(), exePath.begin() + count); +#endif + } + + + void setThreadName(const std::string& name) { +#ifdef _WIN32 + using SetThreadDescriptionProc = HRESULT (WINAPI *) (HANDLE, PCWSTR); + + static auto SetThreadDescription = reinterpret_cast( + ::GetProcAddress(::GetModuleHandleW(L"kernel32.dll"), "SetThreadDescription")); + + if (SetThreadDescription) { + std::array wideName = { }; + + str::transcodeString( + wideName.data(), wideName.size() - 1, + name.data(), name.size()); + + SetThreadDescription(::GetCurrentThread(), wideName.data()); + } +#else + std::array posixName = {}; + dxvk::str::strlcpy(posixName.data(), name.c_str(), 16); + ::pthread_setname_np(pthread_self(), posixName.data()); +#endif + } + + + bool createDirectory(const std::string& path) { +#ifdef _WIN32 + std::array widePath; + + size_t length = str::transcodeString( + widePath.data(), widePath.size() - 1, + path.data(), path.size()); + + widePath[length] = L'\0'; + return !!CreateDirectoryW(widePath.data(), nullptr); +#else + return std::filesystem::create_directories(path); +#endif + } + +} diff --git a/src/util/util_env.h b/src/util/util_env.h new file mode 100644 index 0000000..80efcbc --- /dev/null +++ b/src/util/util_env.h @@ -0,0 +1,79 @@ +#pragma once + +#include "util_string.h" + +namespace dxvk::env { + +#ifdef _WIN32 + constexpr char PlatformDirSlash = '\\'; +#else + constexpr char PlatformDirSlash = '/'; +#endif + + /** + * \brief Checks whether the host platform is 32-bit + */ + constexpr bool is32BitHostPlatform() { + return sizeof(void*) == 4; + } + + /** + * \brief Gets environment variable + * + * If the variable is not defined, this will return + * an empty string. Note that environment variables + * may be defined with an empty value. + * \param [in] name Name of the variable + * \returns Value of the variable + */ + std::string getEnvVar(const char* name); + + /** + * \brief Checks whether a file name has a given extension + * + * \param [in] name File name + * \param [in] ext Extension to match, in lowercase letters + * \returns Position of the extension within the file name, or + * \c std::string::npos if the file has a different extension + */ + size_t matchFileExtension(const std::string& name, const char* ext); + + /** + * \brief Gets the executable name + * + * Returns the base name (not the full path) of the + * program executable, including the file extension. + * This function should be used to identify programs. + * \returns Executable name + */ + std::string getExeName(); + + /** + * \brief Gets the executable name without extension + * + * Same as \ref getExeName but without the file extension. + * \returns Executable name + */ + std::string getExeBaseName(); + + /** + * \brief Gets full path to executable + * \returns Path to executable + */ + std::string getExePath(); + + /** + * \brief Sets name of the calling thread + * \param [in] name Thread name + */ + void setThreadName(const std::string& name); + + /** + * \brief Creates a directory + * + * \param [in] path Path to directory + * \returns \c true on success + */ + bool createDirectory(const std::string& path); + +} diff --git a/src/util/util_error.h b/src/util/util_error.h new file mode 100644 index 0000000..2cfd45f --- /dev/null +++ b/src/util/util_error.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +namespace dxvk { + + /** + * \brief DXVK error + * + * A generic exception class that stores a + * message. Exceptions should be logged. + */ + class DxvkError { + + public: + + DxvkError() { } + DxvkError(std::string&& message) + : m_message(std::move(message)) { } + + const std::string& message() const { + return m_message; + } + + private: + + std::string m_message; + + }; + +} \ No newline at end of file diff --git a/src/util/util_flags.h b/src/util/util_flags.h new file mode 100644 index 0000000..f67b4a2 --- /dev/null +++ b/src/util/util_flags.h @@ -0,0 +1,110 @@ +#pragma once + +#include + +#include "util_bit.h" + +namespace dxvk { + + template + class Flags { + + public: + + using IntType = std::underlying_type_t; + + Flags() { } + + Flags(IntType t) + : m_bits(t) { } + + template + Flags(T f, Tx... fx) { + this->set(f, fx...); + } + + template + void set(Tx... fx) { + m_bits |= bits(fx...); + } + + void set(Flags flags) { + m_bits |= flags.m_bits; + } + + template + void clr(Tx... fx) { + m_bits &= ~bits(fx...); + } + + void clr(Flags flags) { + m_bits &= ~flags.m_bits; + } + + template + bool any(Tx... fx) const { + return (m_bits & bits(fx...)) != 0; + } + + template + bool all(Tx... fx) const { + const IntType mask = bits(fx...); + return (m_bits & mask) == mask; + } + + bool test(T f) const { + return this->any(f); + } + + bool isClear() const { + return m_bits == 0; + } + + void clrAll() { + m_bits = 0; + } + + IntType raw() const { + return m_bits; + } + + Flags operator & (const Flags& other) const { + return Flags(m_bits & other.m_bits); + } + + Flags operator | (const Flags& other) const { + return Flags(m_bits | other.m_bits); + } + + Flags operator ^ (const Flags& other) const { + return Flags(m_bits ^ other.m_bits); + } + + bool operator == (const Flags& other) const { + return m_bits == other.m_bits; + } + + bool operator != (const Flags& other) const { + return m_bits != other.m_bits; + } + + private: + + IntType m_bits = 0; + + static IntType bit(T f) { + return IntType(1) << static_cast(f); + } + + template + static IntType bits(T f, Tx... fx) { + return bit(f) | bits(fx...); + } + + static IntType bits() { + return 0; + } + + }; + +} \ No newline at end of file diff --git a/src/util/util_likely.h b/src/util/util_likely.h new file mode 100644 index 0000000..df71cee --- /dev/null +++ b/src/util/util_likely.h @@ -0,0 +1,11 @@ +#pragma once + +#ifdef __GNUC__ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#define force_inline inline __attribute__((always_inline)) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#define force_inline inline +#endif diff --git a/src/util/util_math.h b/src/util/util_math.h new file mode 100644 index 0000000..5457ef0 --- /dev/null +++ b/src/util/util_math.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +namespace dxvk { + + constexpr size_t CACHE_LINE_SIZE = 64; + + template + constexpr T clamp(T n, T lo, T hi) { + if (n < lo) return lo; + if (n > hi) return hi; + return n; + } + + template + constexpr T align(T what, U to) { + return (what + to - 1) & ~(to - 1); + } + + template + constexpr T alignDown(T what, U to) { + return (what / to) * to; + } + + // Equivalent of std::clamp for use with floating point numbers + // Handles (-){INFINITY,NAN} cases. + // Will return min in cases of NAN, etc. + inline float fclamp(float value, float min, float max) { + return std::fmin( + std::fmax(value, min), max); + } + + template + inline T divCeil(T dividend, T divisor) { + return (dividend + divisor - 1) / divisor; + } + +} \ No newline at end of file diff --git a/src/util/util_string.cpp b/src/util/util_string.cpp new file mode 100644 index 0000000..b555a88 --- /dev/null +++ b/src/util/util_string.cpp @@ -0,0 +1,234 @@ +#include "util_string.h" + +namespace dxvk::str { + + const uint8_t* decodeTypedChar( + const uint8_t* begin, + const uint8_t* end, + uint32_t& ch) { + uint32_t first = begin[0]; + + if (likely(first < 0x80)) { + // Basic ASCII character + ch = uint32_t(first); + return begin + 1; + } else if (unlikely(first < 0xC0)) { + // Character starts with a continuation byte, + // just skip until we find the next valid prefix + while ((begin < end) && (((*begin) & 0xC0) == 0x80)) + begin += 1; + + ch = uint32_t('?'); + return begin; + } else { + // The number of leading 1 bits in the first byte + // determines the length of this character + size_t length = bit::lzcnt((~first) << 24); + + if (unlikely(begin + length > end)) { + ch = uint32_t('?'); + return end; + } + + if (first < 0xE0) { + ch = ((uint32_t(begin[0]) & 0x1F) << 6) + | ((uint32_t(begin[1]) & 0x3F)); + } else if (first < 0xF0) { + ch = ((uint32_t(begin[0]) & 0x0F) << 12) + | ((uint32_t(begin[1]) & 0x3F) << 6) + | ((uint32_t(begin[2]) & 0x3F)); + } else if (first < 0xF8) { + ch = ((uint32_t(begin[0]) & 0x07) << 18) + | ((uint32_t(begin[1]) & 0x3F) << 12) + | ((uint32_t(begin[2]) & 0x3F) << 6) + | ((uint32_t(begin[3]) & 0x3F)); + } else { + // Invalid prefix + ch = uint32_t('?'); + } + + return begin + length; + } + } + + const uint16_t* decodeTypedChar( + const uint16_t* begin, + const uint16_t* end, + uint32_t& ch) { + uint32_t first = begin[0]; + + if (likely(first < 0xD800)) { + ch = first; + return begin + 1; + } else if (first < 0xDC00) { + if (unlikely(begin + 2 > end)) { + ch = uint32_t('?'); + return end; + } + + ch = 0x10000 + + ((uint32_t(begin[0]) & 0x3FF) << 10) + + ((uint32_t(begin[1]) & 0x3FF)); + return begin + 2; + } else if (unlikely(first < 0xE000)) { + // Stray low surrogate + ch = uint32_t('?'); + return begin + 1; + } else { + ch = first; + return begin + 1; + } + } + + + const uint32_t* decodeTypedChar( + const uint32_t* begin, + const uint32_t* end, + uint32_t& ch) { + ch = begin[0]; + return begin + 1; + } + + + size_t encodeTypedChar( + uint8_t* begin, + uint8_t* end, + uint32_t ch) { + if (likely(ch < 0x80)) { + if (begin) { + if (unlikely(begin + 1 > end)) + return 0; + + begin[0] = uint8_t(ch); + } + + return 1; + } else if (ch < 0x800) { + if (begin) { + if (unlikely(begin + 2 > end)) + return 0; + + begin[0] = uint8_t(0xC0 | (ch >> 6)); + begin[1] = uint8_t(0x80 | (ch & 0x3F)); + } + + return 2; + } else if (ch < 0x10000) { + if (begin) { + if (unlikely(begin + 3 > end)) + return 0; + + begin[0] = uint8_t(0xE0 | ((ch >> 12))); + begin[1] = uint8_t(0x80 | ((ch >> 6) & 0x3F)); + begin[2] = uint8_t(0x80 | ((ch >> 0) & 0x3F)); + } + + return 3; + } else if (ch < 0x200000) { + if (begin) { + if (unlikely(begin + 4 > end)) + return 0; + + begin[0] = uint8_t(0xF0 | ((ch >> 18))); + begin[1] = uint8_t(0x80 | ((ch >> 12) & 0x3F)); + begin[2] = uint8_t(0x80 | ((ch >> 6) & 0x3F)); + begin[3] = uint8_t(0x80 | ((ch >> 0) & 0x3F)); + } + + return 4; + } else { + // Invalid code point for UTF-8 + return 0; + } + } + + + size_t encodeTypedChar( + uint16_t* begin, + uint16_t* end, + uint32_t ch) { + if (likely(ch < 0xD800)) { + if (begin) { + if (unlikely(begin + 1 > end)) + return 0; + + begin[0] = ch; + } + + return 1; + } else if (ch < 0xE000) { + // Private use code points, + // we can't encode these + return 0; + } else if (ch < 0x10000) { + if (begin) { + if (unlikely(begin + 1 > end)) + return 0; + + begin[0] = ch; + } + + return 1; + } else if (ch < 0x110000) { + if (begin) { + if (unlikely(begin + 2 > end)) + return 0; + + ch -= 0x10000; + begin[0] = uint16_t(0xD800 + (ch >> 10)); + begin[1] = uint16_t(0xDC00 + (ch & 0x3FF)); + } + + return 2; + } else { + // Invalid code point + return 0; + } + } + + + size_t encodeTypedChar( + uint32_t* begin, + uint32_t* end, + uint32_t ch) { + if (begin) { + if (unlikely(begin + 1 > end)) + return 0; + + begin[0] = ch; + } + + return 1; + } + + + std::string fromws(const WCHAR* ws) { + size_t srcLen = length(ws); + size_t dstLen = transcodeString( + nullptr, 0, ws, srcLen); + + std::string result; + result.resize(dstLen); + + transcodeString(result.data(), + dstLen, ws, srcLen); + + return result; + } + + + std::wstring tows(const char* mbs) { + size_t srcLen = length(mbs); + size_t dstLen = transcodeString( + nullptr, 0, mbs, srcLen); + + std::wstring result; + result.resize(dstLen); + + transcodeString(result.data(), + dstLen, mbs, srcLen); + + return result; + } + +} diff --git a/src/util/util_string.h b/src/util/util_string.h new file mode 100644 index 0000000..87b74af --- /dev/null +++ b/src/util/util_string.h @@ -0,0 +1,242 @@ +#pragma once + +#include +#include +#include +#include + +#include "./com/com_include.h" + +#include "util_bit.h" +#include "util_likely.h" + +namespace dxvk::str { + + template struct UnicodeChar { }; + template<> struct UnicodeChar<1> { using type = uint8_t; }; + template<> struct UnicodeChar<2> { using type = uint16_t; }; + template<> struct UnicodeChar<4> { using type = uint32_t; }; + + template + using UnicodeCharType = typename UnicodeChar::type; + + const uint8_t* decodeTypedChar( + const uint8_t* begin, + const uint8_t* end, + uint32_t& ch); + + const uint16_t* decodeTypedChar( + const uint16_t* begin, + const uint16_t* end, + uint32_t& ch); + + const uint32_t* decodeTypedChar( + const uint32_t* begin, + const uint32_t* end, + uint32_t& ch); + + size_t encodeTypedChar( + uint8_t* begin, + uint8_t* end, + uint32_t ch); + + size_t encodeTypedChar( + uint16_t* begin, + uint16_t* end, + uint32_t ch); + + size_t encodeTypedChar( + uint32_t* begin, + uint32_t* end, + uint32_t ch); + + /** + * \brief Decodes a single character + * + * Note that \c begin and \c end must not be equal. + * \param [in] begin Pointer to current position within the input string + * \param [in] end Pointer to the end of the input string + * \param [out] ch Pointer to the decoded character code + * \returns Pointer to next character in the input string + */ + template + const T* decodeChar( + const T* begin, + const T* end, + uint32_t& ch) { + using CharType = UnicodeCharType; + + const CharType* result = decodeTypedChar( + reinterpret_cast(begin), + reinterpret_cast(end), + ch); + + return reinterpret_cast(result); + } + + /** + * \brief Encodes a character + * + * Note that \c begin and \c end may be both be \c nullptr or equal, in + * which case only the length of the encoded character will be returned. + * \param [in] begin Pointer to current position within the output string + * \param [in] end Pointer to the end of the output string + * \param [in] ch Character to encode + * \returns If begin is \c nullptr , the number of units required to encode + * the character. Otherwise, the number of units written to the output. + * This may return \c 0 for characters that cannot be written or encoded. + */ + template + size_t encodeChar( + T* begin, + T* end, + uint32_t ch) { + using CharType = UnicodeCharType; + + return encodeTypedChar( + reinterpret_cast(begin), + reinterpret_cast(end), + ch); + } + + /** + * \brief Computes length of a null-terminated string + * + * \param [in] begin Start of input string + * \returns Number of characters in input string, + * excluding the terminating null character + */ + template + size_t length(const S* string) { + size_t result = 0; + + while (string[result]) + result += 1; + + return result; + } + + /** + * \brief Converts string from one encoding to another + * + * The output string arguments may be \c nullptr. In that case, the + * total length of the transcoded string will be returned, in units + * of the output character type. The output string will only be + * null-terminated if the input string is also null-terminated. + * \tparam D Output character type + * \tparam S Input character type + * \param [in] dstBegin Start of output string + * \param [in] dstLength Length of output string + * \param [in] srcBegin Start of input string + * \param [in] srcLength Length of input string + * \returns If \c dstBegin is \c nullptr , the total number of output + * characters required to store the output string. Otherwise, the + * total number of characters written to the output string. + */ + template + size_t transcodeString( + D* dstBegin, + size_t dstLength, + const S* srcBegin, + size_t srcLength) { + size_t totalLength = 0; + + auto dstEnd = dstBegin + dstLength; + auto srcEnd = srcBegin + srcLength; + + while (srcBegin < srcEnd) { + uint32_t ch; + + srcBegin = decodeChar(srcBegin, srcEnd, ch); + + if (dstBegin) + totalLength += encodeChar(dstBegin + totalLength, dstEnd, ch); + else + totalLength += encodeChar(nullptr, nullptr, ch); + + if (!ch) + break; + } + + return totalLength; + } + + /** + * \brief Creates string object from wide char array + * + * \param [in] ws Null-terminated wide string + * \returns Regular string object + */ + std::string fromws(const WCHAR* ws); + + /** + * \brief Creates wide string object from char array + * + * \param [in] mbs Null-terminated string + * \returns Wide string object + */ + std::wstring tows(const char* mbs); + +#ifdef _WIN32 + using path_string = std::wstring; + inline path_string topath(const char* mbs) { return tows(mbs); } +#else + using path_string = std::string; + inline path_string topath(const char* mbs) { return std::string(mbs); } +#endif + + inline void format1(std::stringstream&) { } + + template + void format1(std::stringstream& str, const WCHAR *arg, const Tx&... args) { + str << fromws(arg); + format1(str, args...); + } + + template + void format1(std::stringstream& str, const T& arg, const Tx&... args) { + str << arg; + format1(str, args...); + } + + template + std::string format(const Args&... args) { + std::stringstream stream; + format1(stream, args...); + return stream.str(); + } + + inline void strlcpy(char* dst, const char* src, size_t count) { + if (count > 0) { + std::strncpy(dst, src, count - 1); + dst[count - 1] = '\0'; + } + } + + /** + * \brief Split string at one or more delimiters characters + * + * \param [in] string String to split + * \param [in] delims Delimiter characters + * \returns Vector of substring views + */ + inline std::vector split(std::string_view string, std::string_view delims = " ") { + std::vector tokens; + + for (size_t start = 0; start < string.size(); ) { + // Find first delimiter + const auto end = string.find_first_of(delims, start); + + // Add non-empty tokens + if (start != end) + tokens.emplace_back(string.substr(start, end-start)); + + // Break at the end of string + if (end == std::string_view::npos) + break; + + start = end + 1; + } + return tokens; + } +}