1
Fork 0

Port initial commit

This commit is contained in:
n64 2020-06-18 11:58:18 +02:00
parent 05c7d7031c
commit 982af95fc2
76 changed files with 34726 additions and 352 deletions

4
.gitignore vendored
View file

@ -12,7 +12,6 @@
*.pch *.pch
# Libraries # Libraries
*.lib
*.a *.a
*.la *.la
*.lo *.lo
@ -43,6 +42,7 @@
.vscode/* .vscode/*
# General project-specific ignores # General project-specific ignores
__pycache__/*
doxygen/doxygen/* doxygen/doxygen/*
build/* build/*
*.dump *.dump
@ -50,6 +50,8 @@ build/*
*.z64 *.z64
*.map *.map
.assets-local.txt .assets-local.txt
sm64_save_file.bin
sm64config.txt
# Assets. Generally ignored, but ones with "custom" in the name are fine. # Assets. Generally ignored, but ones with "custom" in the name are fine.
/levels/**/*.png /levels/**/*.png

285
CHANGES
View file

@ -1,285 +0,0 @@
Refresh #10.1
1.) Diff update (#1033)
2.) Fix texture dimensions for exclamation boxes (#1034)
3.) Fix armips compilation on Windows by changing order of inclusion files (#1035)
4.) Embed libaudiofile into the repo as a single file (#1036)
5.) Fix some tools issues found while compiling on MSYS2 (#1037)
Refresh #10
1.) GCC 9 noreturn UB fixes (#961)
2.) List supported binutils variants in README.md (#960)
3.) fix fly guy's possible values for target roll
4.) clarify that 1 is an exclusive bound in random_float (#962)
5.) replace unkB8 with prevNumStarsForDialog (#963)
6.) Replace "BulletBillUnkF8" with "BulletBillInitialMoveYaw" (#966)
7.) Renamed more warp behaviors and spawn types (#949)
8.) EU bss cleanups (#965)
9.) Add -fno-zero-initialized-in-bss to GCC CFLAGS (#968)
10.) Use empty .SECONDARY target instead of .PRECIOUS (#964)
11.) Match EU seq_channel_layer_process_script (#969)
12.) Fix version variable overwriting bug for non-matching compilations (#974)
13.) small bhv_collect_star_init labeling (#972)
14.) Align SI DMA command buffers, add more names to osCont code (#973)
15.) Minor for loop fix in playback.c (#976)
16.) Add Docker config (Public #36)
17.) Fix usage output crash for skyconv (#977)
18.) Cleanup tools and their build flags (#978)
19.) Print more detailed file reading error messages in extract_assets (#982)
20.) Remove asmdiff (#983)
21.) Fixed tabledesign makefile bug (#985)
22.) Improve first-diff.py (#987)
23.) Bring in some enhancements such as widescreen enhancements (#981)
24.) Fix background shadow border if widescreen in shade_screen (#988)
25.) Merge common EU sections in osExceptionPreamble (#990)
26.) Introduce the NO_SEGMENTED_MEMORY define (#989)
27.) NO_SEGMENTED_MEMORY comments (#992)
28.) Introduce f3dex2e and a GBI_FLOATS setting (#993)
29.) Audio uses signed 16-bit integers as samples, not unsigned (#995)
30.) Assemble RSP ucode with armips (#970)
31.) Label Mr. Blizzard, Yoshi, Whomp, Wiggler, Wood Piece oFields. (#996)
32.) update crash enhancement (#1003)
33.) Update README.md to use new public Discord server invite
34.) Label variables in act_idle. (#1005)
35.) Fix a typo in camera.h (#1009)
36.) Fixed up profiler.c (#1010)
37.) Add -d to first-diff.py (#1012)
38.) Fix typo in first-diff.py (#1013)
39.) Remove binutils 2.27 dependency check (#1015)
40.) Update armips to v0.11.0 tag and add generator script (#1004)
41.) Extract IPL3 font from ROM (#1011)
42.) Documented audio playback flags (#1021)
43.) Change RSP register notation to dollar-prefixed numbers (#1022)
44.) Only check for QEMU_IRIX in Makefile when compiling with IDO (#1023)
45.) Clean up header files and update function prototypes (#1000)
46.) Named local variables in geo_update_layer_transparency (#1019)
47.) Extract crash screen font textures from EU and SH ROMs (#1025)
48.) Use ACTIVE_FLAG defines where appropriate (#1026)
49.) Changed type of actor collision from s16 to Collision (#1024)
50.) Implement sqrtf in assembly file (#1028)
51.) Rename Struct8032FCE8 as OpenableGrill and identify data members (#1029)
52.) Fix some mistakes in the abi doc (#1031)
53.) The README.md PR (#979)
Refresh #8
1.) Use INT_STATUS_ macros for oInteractStatus (#911)
2.) Update bullet_bill.inc.c (#912)
3.) Make geo.c in levels nonconst to reduce diffs with gcc enhancement patch in future (#913)
4.) Improve process_level_music_dynamics (#915)
5.) add AVOID_UB define for fixing libultra aliasing. (#916)
6.) const hack to TARGET_N64 and tweak ld script (#918)
7.) Replace raw level/course numbers with defines (#919)
8.) Label all graph node flags. (#920)
9.) readme.md capitalization
10.) The Big Function PR (Part 5) (#910)
11.) Label hardware registers (#922)
12.) Match EU synthesis_process_note (#923)
13.) Some quick value changes (#909)
14.) Added sequence player defines (#926)
15.) Shindou diffs 2 [merge game.c and display.c, required for SH] (#927)
16.) Match synthesis_process_notes and use -sopt for synthesis.c (#928)
17.) Fix mips64-binutils for Fedora 31 and similar Linux distros (#931)
18.) Update first-diff.py to work with Shindou (#933)
19.) Labelled behavior_script.c (#929)
20.) shindou diffs up to level_update (#932)
21.) Update diff.py and diff_settings.py to work with SH (#936)
22.) Add some Shindou resources to extract_assets.py (#939)
23.) Label and document gd_math.c (#930)
24.) Label all of Mario's actions. (#941)
25.) Label all particles. (#940)
26.) Renamed death related warpBhvs (#942)
27.) Small Shindou differences (#945)
28.) match player_performed_grab_escape_action in all versions (#943)
29.) add main alignment (aglab2, #947)
30.) Match a bunch of fake EU matchings (#944)
31.) Shindou diffs round 4 (#937)
32.) Easy PAL fake matchings (#946)
33.) Merge object_helpers and object_helpers2 headers (#948)
34.) match cap_check_quicksand (#950)
35.) Shindou crash screen diffs (#938)
36.) Thread6 (#951)
37.) Correct IPL3 function and label addresses (#952)
38.) Add COMPILER flag for Makefile and add official mips gcc support. (#953)
39.) add align to sequence_00 (fixes gcc build) (#957)
40.) Fixed itty bitty typo (#959)
Refresh #7
1.) update README.md (#861)
2.) [eu] fix decompilation bugs in synthesis_process_note (#862)
3.) Label and document Snufit.inc.c (#863)
4.) Label and document Snufit.inc.c (#863)
5.) Match append_bubble_vertex_buffer, append_snowflake_vertex_buffer (#860)
6.) Compile audio/effects.c with -sopt (#870)
7.) Match handle_dialog_text_and_pages (#872)
8.) Remove improperly used LEVEL_SA define. (#875)
9.) Remove unused EU ASM files (#873)
10.) Shindou build flag support (#876)
11.) The Big Function PR (Part 1) (#864)
12.) "Re-document" file_select and star_select for EU (#877)
13.) Document paintings.c (#868)
14.) Minor interaction.c labelling/documentation. (#882)
15.) Match synthesis_execute, prepare_reverb_ring_buffer, func_eu_802e00d8 (#886)
16.) The Big Function PR (Part 2) (#874)
17.) Additional painting documentation (#883)
18.) Some EU audio labelling (#885)
19.) Bring synthesis_do_one_audio_update a bit closer to matching (#887)
20.) Misleading comment, this actually contols Bowser's init action (#889)
21.) Match synthesis_do_one_audio_update (#890)
22.) Remove unneeded if(1) in synthesis_do_one_audio_update (#891)
23.) Match synthesis_resample_and_mix_reverb (#893)
24.) Set LANG=C (#896)
25.) Match note_apply_headset_pan_effects (#897)
26.) Match play_sequence (#898)
27.) Match sequence_channel_enable (#899)
28.) Match sequence_player_process_sequence (#900)
29.) Match (eu) alloc_bank_or_seq (#901)
30.) Match alloc_bank_or_seq (#903)
31.) Update README.md (again)
32.) Documented behaviour/fish.inc.c & blue_fish.inc.c (#894)
33.) Labelled 4 behavior files, made object_helper functions consistent (#881)
34.) Add a make target for .copt (#902)
35.) The Big Function PR (Part 3) (#888)
36.) The Big Function PR (Part 4) (#906)
37.) Rename src/audio/memory.c -> heap.c (#907)
38.) Fix BBH texture (#908)
Refresh #6
1.) Make tools work on MinGW (#804)
2.) Document mario_misc (#628)
3.) add patch_libmalloc.py script to work around compiler crashes (#811)
4.) Label the coffins file. (#829)
5.) Convert enhancements into patches (#827)
6.) Document Tweester.inc.c (#840)
7.) EU OK and cleanup EU (#782)
Refresh #5
1.) mem_error_screen.inc.c updated for C (#775)
2.) updated patch_libultra_math to work directly on libultra.a (#781)
3.) Enhancement: Debug boxes (#783)
4.) Label effects MemoryPool (#784)
5.) Use full-width characters for JP text and rename mislabeled stuff (#772)
6.) Small asset extraction fixes (#785)
7.) Fix incremental asset extraction after the last PR (#790)
8.) Labelled behavior script commands, tidied up behavior scripts and ren…
9.) Document camera.c (#741)
10.) Fix a HMC texture asset (#795)
11.) Fix UB in angle conversion for paintings. Needed for pc-port. (#796)
12.) Fix PSS texture assets (#801)
13.) Fix some camera typos (#802)
Refresh #4
1.) Label screen_transition.c (#737)
2.) Revamp macros (#738)
3.) Simplify the cake makefile logic (#739)
4.) Move zbuffer to C (#742)
5.) Audio blob endian/64-bit fixes (#740)
6.) Ub fixes (#745)
7.) process_level_music_dynamics: slightly better stack and regalloc (#746)
8.) move level order specific definitions out to levels/level_defines.h (#743)
9.) Renamed some mislabelled sound effects (#748)
10.) Very minor white space fixes in Mario.c (#752)
11.) Renamed GRAVITY to OBJ_PHYSICS (#755)
12.) Use defined macros instead of literal values for interactions in behavior_data.c (#754)
13.) iQue Player console detection + full support (#756)
14.) Fix Goddard Mario head for little endian etc. (#757)
15.) Small bits of audio doc (#749)
16.) Update diff script (#750)
17.) Nicer format for course/dialog text, enum for dialog IDs (#753)
18.) first-diff: store most recent mtime during check (#759)
19.) Use Lights1 struct for light values (#760)
20.) Detect mips64-elf in diff.py (#761)
21.) Name display lists in gd renderer.c (#764)
22.) Document a variety of PU type crashes. (#765)
23.) Label object_behaviors. (#716)
24.) Update swoop.inc.c (#767)
25.) Label tilting_pyramid.inc.c (#768)
26.) Label red_coin.inc.c (#770)
27.) Use more level defines (#758)
28.) Named Mario actions 6, 7, and 8 and noted causes of hands-free holding glitch (#769)
Refresh #3
1.) Small bits of sound sample labelling
2.) Append 'b' to fopen calls in n64graphics to satisfy Windows
3.) Document synthesis.c and abi.h
4.) Remove gbi_old.h and use gbi.h instead for F3D_OLD
5.) Fix and change most values used by f3d commands
6.) Convert actors to C
7.) Make extract_assets.py fail on bad baserom
8.) Fix some (pragma-ignored) warnings
9.) Convert trig tables and data/ buffers into C
10.) Document intro behaviors and some of camera.c
11.) Match dma_sample_data
12.) Convert levels, bin, text, goddard_tex_dl to C
13.) Remove .bss size hardcoding
14.) Add mips64 fallback and alt dump option to diff.sh
15.) Convert behavior_data to C
16.) Get rid of the enum in behavior_data.
17.) Change more values used by f3d commands
18.) Document game/skybox.c
19.) Actors levels fixup
20.) Convert Mario animations to C (kind of)
22.) generate skybox data from 248x248 images
23.) 64-bit fixes
24.) Move demo data to C
25.) Change uintptr_t to GeoLayout for actors
26.) Move libc includes to include/libc/
Refresh #2a (hotfix)
Changes from Refresh #2:
1.) Fixed text Makefile dependencies"
Refresh #2
Changes from Refresh #1:
1.) Label src/game/behaviors/boo.inc.c
2.) Verify half of audio_defines.h
3.) Label ingame_menu.c and some dialog stuff
4.) Improve handle_dialog_text_and_pages EU non-matching
5.) Fix texture format for water splash/sparkle
6.) Merge ucode.bin and ucode_unk.bin
7.) 64-bit fixes
8.) [AUDIO] Match func_80316928
9.) Documented bug in Tuxie's mother's code
10.) [AUDIO] Slightly closer matching for process_level_music_dynamics
11.) Name channel scripts in sequence 0
12.) Nicer error message when sha1sum doesn't match
13.) Refactor object fields
14.) Document file_select.c
15.) Document star_select.c
16.) Add uncompressed string support to textconv
17.) Document main_menu geo and leveldata
18.) Document hud.c and rename char textures
19.) Correct gSPTextureRectangle values
20.) Linker script wildcard
- Fixes "undefined reference to x in discarded section y" linker errors.
- Also means that you no longer have to add linker script entries for source files.
21.) Reference qemu-irix package and add project structure
- Installing qemu-irix is as simple as installing the provided Debian package on the GitHub "Releases" tab
(https://github.com/n64decomp/qemu-irix).
- Old qemu-irix installs are unaffected by this change, and it is still possible to install qemu-irix with the older method.
Refresh #1
Changes from initial release:
1.) More README cleanups and documentation
2.) Fix .gitignore in tools/
3.) More README cleanups and corrections
4.) oUnk190 -> oInteractionSubtype
5.) Normalize line endings
6.) Replace bank_sets.s by sequences.json + build magic
7.) Fix tabledesign in .gitignore
8.) Cleaned and documented arrays in audio/
9.) Change minimum Python 3 requirement in README.md to 3.6
10.) Remove whitespace in ingame_menu.c, save_file.c, and star_select.c
11.) Documentation comment style fixed
12.) sm64.map -> sm64.version.map (ex: sm64.us.map)
13.) Decompile EU versions of HUD and obj behaviors
14.) Label ukiki.inc.c and ukiki_cage.inc.c
15.) Add missing OS header includes
16.) Fix alignment issue in model_ids.h
17.) A bunch of behavior changes:
a.) Made styles consistent with function names (ex: BehBehaviorLoop -> beh_behavior_loop)
b.) Split obj_behaviors_2.c into multiple .inc.c files in behaviors/
c.) bhvHauntedRoomCheck -> bhvCoffinManager
18.) Update README.md notes on WSL and add links
19.) Added tidy.sh and also bracing around single-line ifs and loops

223
Makefile
View file

@ -18,10 +18,65 @@ COMPARE ?= 1
# If NON_MATCHING is 1, define the NON_MATCHING and AVOID_UB macros when building (recommended) # If NON_MATCHING is 1, define the NON_MATCHING and AVOID_UB macros when building (recommended)
NON_MATCHING ?= 0 NON_MATCHING ?= 0
# Build for the N64 (turn this off for ports) # Build for the N64 (turn this off for ports)
TARGET_N64 ?= 1 TARGET_N64 ?= 0
# Build for Emscripten/WebGL
TARGET_WEB ?= 0
# Compiler to use (ido or gcc) # Compiler to use (ido or gcc)
COMPILER ?= ido COMPILER ?= ido
# Automatic settings only for ports
ifeq ($(TARGET_N64),0)
NON_MATCHING := 1
GRUCODE := f3dex2e
TARGET_WINDOWS := 0
ifeq ($(TARGET_WEB),0)
ifeq ($(OS),Windows_NT)
TARGET_WINDOWS := 1
else
# TODO: Detect Mac OS X, BSD, etc. For now, assume Linux
TARGET_LINUX := 1
endif
endif
ifeq ($(TARGET_WINDOWS),1)
# On Windows, default to DirectX 11
ifneq ($(ENABLE_OPENGL),1)
ifneq ($(ENABLE_DX12),1)
ENABLE_DX11 ?= 1
endif
endif
else
# On others, default to OpenGL
ENABLE_OPENGL ?= 1
endif
# Sanity checks
ifeq ($(ENABLE_DX11),1)
ifneq ($(TARGET_WINDOWS),1)
$(error The DirectX 11 backend is only supported on Windows)
endif
ifeq ($(ENABLE_OPENGL),1)
$(error Cannot specify multiple graphics backends)
endif
ifeq ($(ENABLE_DX12),1)
$(error Cannot specify multiple graphics backends)
endif
endif
ifeq ($(ENABLE_DX12),1)
ifneq ($(TARGET_WINDOWS),1)
$(error The DirectX 12 backend is only supported on Windows)
endif
ifeq ($(ENABLE_OPENGL),1)
$(error Cannot specify multiple graphics backends)
endif
ifeq ($(ENABLE_DX11),1)
$(error Cannot specify multiple graphics backends)
endif
endif
endif
ifeq ($(COMPILER),gcc) ifeq ($(COMPILER),gcc)
NON_MATCHING := 1 NON_MATCHING := 1
endif endif
@ -71,6 +126,11 @@ ifeq ($(GRUCODE), f3dex2) # Fast3DEX2
TARGET := $(TARGET).f3dex2 TARGET := $(TARGET).f3dex2
COMPARE := 0 COMPARE := 0
else else
ifeq ($(GRUCODE), f3dex2e) # Fast3DEX2 Extended (for PC)
GRUCODE_DEF := F3DEX_GBI_2E
TARGET := $(TARGET).f3dex2e
COMPARE := 0
else
ifeq ($(GRUCODE),f3d_new) # Fast3D 2.0H (Shindou) ifeq ($(GRUCODE),f3d_new) # Fast3D 2.0H (Shindou)
GRUCODE_DEF := F3D_NEW GRUCODE_DEF := F3D_NEW
TARGET := $(TARGET).f3d_new TARGET := $(TARGET).f3d_new
@ -86,14 +146,11 @@ endif
endif endif
endif endif
endif endif
endif
GRUCODE_CFLAGS := -D$(GRUCODE_DEF) GRUCODE_CFLAGS := -D$(GRUCODE_DEF)
GRUCODE_ASFLAGS := $(GRUCODE_ASFLAGS) --defsym $(GRUCODE_DEF)=1 GRUCODE_ASFLAGS := $(GRUCODE_ASFLAGS) --defsym $(GRUCODE_DEF)=1
ifeq ($(TARGET_N64),0)
NON_MATCHING := 1
endif
ifeq ($(NON_MATCHING),1) ifeq ($(NON_MATCHING),1)
MATCH_CFLAGS := -DNON_MATCHING -DAVOID_UB MATCH_CFLAGS := -DNON_MATCHING -DAVOID_UB
MATCH_ASFLAGS := --defsym AVOID_UB=1 MATCH_ASFLAGS := --defsym AVOID_UB=1
@ -131,9 +188,26 @@ endif
# BUILD_DIR is location where all build artifacts are placed # BUILD_DIR is location where all build artifacts are placed
BUILD_DIR_BASE := build BUILD_DIR_BASE := build
ifeq ($(TARGET_N64),1)
BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION) BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)
else
ifeq ($(TARGET_WEB),1)
BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)_web
else
BUILD_DIR := $(BUILD_DIR_BASE)/$(VERSION)_pc
endif
endif
LIBULTRA := $(BUILD_DIR)/libultra.a LIBULTRA := $(BUILD_DIR)/libultra.a
ifeq ($(TARGET_WEB),1)
EXE := $(BUILD_DIR)/$(TARGET).html
else
ifeq ($(TARGET_WINDOWS),1)
EXE := $(BUILD_DIR)/$(TARGET).exe
else
EXE := $(BUILD_DIR)/$(TARGET)
endif
endif
ROM := $(BUILD_DIR)/$(TARGET).z64 ROM := $(BUILD_DIR)/$(TARGET).z64
ELF := $(BUILD_DIR)/$(TARGET).elf ELF := $(BUILD_DIR)/$(TARGET).elf
LD_SCRIPT := sm64.ld LD_SCRIPT := sm64.ld
@ -144,8 +218,14 @@ ACTOR_DIR := actors
LEVEL_DIRS := $(patsubst levels/%,%,$(dir $(wildcard levels/*/header.h))) LEVEL_DIRS := $(patsubst levels/%,%,$(dir $(wildcard levels/*/header.h)))
# Directories containing source files # Directories containing source files
SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin data assets SRC_DIRS := src src/engine src/game src/audio src/menu src/buffers actors levels bin bin/$(VERSION) data assets
ASM_DIRS := asm lib ASM_DIRS := lib
ifeq ($(TARGET_N64),1)
ASM_DIRS := asm $(ASM_DIRS)
else
SRC_DIRS := $(SRC_DIRS) src/pc src/pc/gfx src/pc/audio src/pc/controller
ASM_DIRS :=
endif
BIN_DIRS := bin bin/$(VERSION) BIN_DIRS := bin bin/$(VERSION)
ULTRA_SRC_DIRS := lib/src lib/src/math ULTRA_SRC_DIRS := lib/src lib/src/math
@ -161,6 +241,8 @@ ifeq ($(COMPILER),gcc)
MIPSISET := -mips3 MIPSISET := -mips3
endif endif
ifeq ($(TARGET_N64),1)
ifeq ($(VERSION),eu) ifeq ($(VERSION),eu)
OPT_FLAGS := -O2 OPT_FLAGS := -O2
else else
@ -176,17 +258,49 @@ ifeq ($(COMPILER),gcc)
OPT_FLAGS := -O2 OPT_FLAGS := -O2
endif endif
else
ifeq ($(TARGET_WEB),1)
OPT_FLAGS := -O2 -g4 --source-map-base http://localhost:8080/
else
OPT_FLAGS := -O2
endif
endif
# File dependencies and variables for specific files # File dependencies and variables for specific files
include Makefile.split include Makefile.split
# Source code files # Source code files
LEVEL_C_FILES := $(wildcard levels/*/leveldata.c) $(wildcard levels/*/script.c) $(wildcard levels/*/geo.c) LEVEL_C_FILES := $(wildcard levels/*/leveldata.c) $(wildcard levels/*/script.c) $(wildcard levels/*/geo.c)
C_FILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.c)) $(LEVEL_C_FILES) C_FILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.c)) $(LEVEL_C_FILES)
CXX_FILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.cpp))
S_FILES := $(foreach dir,$(ASM_DIRS),$(wildcard $(dir)/*.s)) S_FILES := $(foreach dir,$(ASM_DIRS),$(wildcard $(dir)/*.s))
ULTRA_C_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.c)) ULTRA_C_FILES := $(foreach dir,$(ULTRA_SRC_DIRS),$(wildcard $(dir)/*.c))
GODDARD_C_FILES := $(foreach dir,$(GODDARD_SRC_DIRS),$(wildcard $(dir)/*.c)) GODDARD_C_FILES := $(foreach dir,$(GODDARD_SRC_DIRS),$(wildcard $(dir)/*.c))
ifeq ($(TARGET_N64),1)
ULTRA_S_FILES := $(foreach dir,$(ULTRA_ASM_DIRS),$(wildcard $(dir)/*.s)) ULTRA_S_FILES := $(foreach dir,$(ULTRA_ASM_DIRS),$(wildcard $(dir)/*.s))
GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c endif
GENERATED_C_FILES := $(BUILD_DIR)/assets/mario_anim_data.c $(BUILD_DIR)/assets/demo_data.c \
$(addprefix $(BUILD_DIR)/bin/,$(addsuffix _skybox.c,$(notdir $(basename $(wildcard textures/skyboxes/*.png)))))
ifeq ($(TARGET_WINDOWS),0)
CXX_FILES :=
endif
ifneq ($(TARGET_N64),1)
ULTRA_C_FILES := \
alBnkfNew.c \
guLookAtRef.c \
guMtxF2L.c \
guNormalize.c \
guOrthoF.c \
guPerspectiveF.c \
guRotateF.c \
guScaleF.c \
guTranslateF.c
C_FILES := $(filter-out src/game/main.c,$(C_FILES))
ULTRA_C_FILES := $(addprefix lib/src/,$(ULTRA_C_FILES))
endif
ifeq ($(VERSION),sh) ifeq ($(VERSION),sh)
SOUND_BANK_FILES := $(wildcard sound/sound_banks/*.json) SOUND_BANK_FILES := $(wildcard sound/sound_banks/*.json)
@ -211,6 +325,7 @@ SOUND_OBJ_FILES := $(SOUND_BIN_DIR)/sound_data.o
# Object files # Object files
O_FILES := $(foreach file,$(C_FILES),$(BUILD_DIR)/$(file:.c=.o)) \ O_FILES := $(foreach file,$(C_FILES),$(BUILD_DIR)/$(file:.c=.o)) \
$(foreach file,$(CXX_FILES),$(BUILD_DIR)/$(file:.cpp=.o)) \
$(foreach file,$(S_FILES),$(BUILD_DIR)/$(file:.s=.o)) \ $(foreach file,$(S_FILES),$(BUILD_DIR)/$(file:.s=.o)) \
$(foreach file,$(GENERATED_C_FILES),$(file:.c=.o)) $(foreach file,$(GENERATED_C_FILES),$(file:.c=.o))
@ -233,6 +348,10 @@ endif
SEG_FILES := $(SEGMENT_ELF_FILES) $(ACTOR_ELF_FILES) $(LEVEL_ELF_FILES) SEG_FILES := $(SEGMENT_ELF_FILES) $(ACTOR_ELF_FILES) $(LEVEL_ELF_FILES)
##################### Compiler Options ####################### ##################### Compiler Options #######################
INCLUDE_CFLAGS := -I include -I $(BUILD_DIR) -I $(BUILD_DIR)/include -I src -I .
ENDIAN_BITWIDTH := $(BUILD_DIR)/endian-and-bitwidth
ifeq ($(TARGET_N64),1)
IRIX_ROOT := tools/ido5.3_compiler IRIX_ROOT := tools/ido5.3_compiler
ifeq ($(shell type mips-linux-gnu-ld >/dev/null 2>/dev/null; echo $$?), 0) ifeq ($(shell type mips-linux-gnu-ld >/dev/null 2>/dev/null; echo $$?), 0)
@ -302,6 +421,78 @@ endif
# Prevent a crash with -sopt # Prevent a crash with -sopt
export LANG := C export LANG := C
else # TARGET_N64
AS := as
ifneq ($(TARGET_WEB),1)
CC := gcc
CXX := g++
else
CC := emcc
endif
ifeq ($(TARGET_WINDOWS),1)
LD := $(CXX)
else
LD := $(CC)
endif
CPP := cpp -P
OBJDUMP := objdump
OBJCOPY := objcopy
PYTHON := python3
# Platform-specific compiler and linker flags
ifeq ($(TARGET_WINDOWS),1)
PLATFORM_CFLAGS := -DTARGET_WINDOWS
PLATFORM_LDFLAGS := -lm -lxinput9_1_0 -lole32 -no-pie -mwindows
endif
ifeq ($(TARGET_LINUX),1)
PLATFORM_CFLAGS := -DTARGET_LINUX `pkg-config --cflags libusb-1.0`
PLATFORM_LDFLAGS := -lm -lpthread `pkg-config --libs libusb-1.0` -lasound -lpulse -no-pie
endif
ifeq ($(TARGET_WEB),1)
PLATFORM_CFLAGS := -DTARGET_WEB
PLATFORM_LDFLAGS := -lm -no-pie -s TOTAL_MEMORY=20MB -g4 --source-map-base http://localhost:8080/ -s "EXTRA_EXPORTED_RUNTIME_METHODS=['callMain']"
endif
PLATFORM_CFLAGS += -DNO_SEGMENTED_MEMORY
# Compiler and linker flags for graphics backend
ifeq ($(ENABLE_OPENGL),1)
GFX_CFLAGS := -DENABLE_OPENGL
GFX_LDFLAGS :=
ifeq ($(TARGET_WINDOWS),1)
GFX_CFLAGS += $(shell sdl2-config --cflags) -DGLEW_STATIC
GFX_LDFLAGS += $(shell sdl2-config --libs) -lglew32 -lopengl32 -lwinmm -limm32 -lversion -loleaut32 -lsetupapi
endif
ifeq ($(TARGET_LINUX),1)
GFX_CFLAGS += $(shell sdl2-config --cflags)
GFX_LDFLAGS += -lGL $(shell sdl2-config --libs) -lX11 -lXrandr
endif
ifeq ($(TARGET_WEB),1)
GFX_CFLAGS += -s USE_SDL=2
GFX_LDFLAGS += -lGL -lSDL2
endif
endif
ifeq ($(ENABLE_DX11),1)
GFX_CFLAGS := -DENABLE_DX11
PLATFORM_LDFLAGS += -lgdi32 -static
endif
ifeq ($(ENABLE_DX12),1)
GFX_CFLAGS := -DENABLE_DX12
PLATFORM_LDFLAGS += -lgdi32 -static
endif
GFX_CFLAGS += -DWIDESCREEN
CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security -D_LANGUAGE_C $(VERSION_CFLAGS) $(MATCH_CFLAGS) $(PLATFORM_CFLAGS) $(GFX_CFLAGS) $(GRUCODE_CFLAGS)
CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) -D_LANGUAGE_C $(VERSION_CFLAGS) $(MATCH_CFLAGS) $(PLATFORM_CFLAGS) $(GFX_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv -march=native
ASFLAGS := -I include -I $(BUILD_DIR) $(VERSION_ASFLAGS)
LDFLAGS := $(PLATFORM_LDFLAGS) $(GFX_LDFLAGS)
endif
####################### Other Tools ######################### ####################### Other Tools #########################
# N64 tools # N64 tools
@ -334,10 +525,14 @@ endif
######################## Targets ############################# ######################## Targets #############################
ifeq ($(TARGET_N64),1)
all: $(ROM) all: $(ROM)
ifeq ($(COMPARE),1) ifeq ($(COMPARE),1)
@$(SHA1SUM) -c $(TARGET).sha1 || (echo 'The build succeeded, but did not match the official ROM. This is expected if you are making changes to the game.\nTo silence this message, use "make COMPARE=0"'. && false) @$(SHA1SUM) -c $(TARGET).sha1 || (echo 'The build succeeded, but did not match the official ROM. This is expected if you are making changes to the game.\nTo silence this message, use "make COMPARE=0"'. && false)
endif endif
else
all: $(EXE)
endif
clean: clean:
$(RM) -r $(BUILD_DIR_BASE) $(RM) -r $(BUILD_DIR_BASE)
@ -437,6 +632,7 @@ $(BUILD_DIR)/%.ci4: %.ci4.png
# compressed segment generation # compressed segment generation
ifeq ($(TARGET_N64),1)
# TODO: ideally this would be `-Trodata-segment=0x07000000` but that doesn't set the address # TODO: ideally this would be `-Trodata-segment=0x07000000` but that doesn't set the address
$(BUILD_DIR)/bin/%.elf: $(BUILD_DIR)/bin/%.o $(BUILD_DIR)/bin/%.elf: $(BUILD_DIR)/bin/%.o
@ -466,6 +662,7 @@ $(BUILD_DIR)/%.mio0.o: $(BUILD_DIR)/%.mio0.s
$(BUILD_DIR)/%.mio0.s: $(BUILD_DIR)/%.mio0 $(BUILD_DIR)/%.mio0.s: $(BUILD_DIR)/%.mio0
printf ".section .data\n\n.incbin \"$<\"\n" > $@ printf ".section .data\n\n.incbin \"$<\"\n" > $@
endif
$(BUILD_DIR)/%.table: %.aiff $(BUILD_DIR)/%.table: %.aiff
$(AIFF_EXTRACT_CODEBOOK) $< >$@ $(AIFF_EXTRACT_CODEBOOK) $< >$@
@ -583,6 +780,10 @@ $(GLOBAL_ASM_DEP).$(NON_MATCHING):
@rm -f $(GLOBAL_ASM_DEP).* @rm -f $(GLOBAL_ASM_DEP).*
touch $@ touch $@
$(BUILD_DIR)/%.o: %.cpp
@$(CXX) -fsyntax-only $(CFLAGS) -MMD -MP -MT $@ -MF $(BUILD_DIR)/$*.d $<
$(CXX) -c $(CFLAGS) -o $@ $<
$(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/%.o: %.c
@$(CC_CHECK) $(CC_CHECK_CFLAGS) -MMD -MP -MT $@ -MF $(BUILD_DIR)/$*.d $< @$(CC_CHECK) $(CC_CHECK_CFLAGS) -MMD -MP -MT $@ -MF $(BUILD_DIR)/$*.d $<
$(CC) -c $(CFLAGS) -o $@ $< $(CC) -c $(CFLAGS) -o $@ $<
@ -595,6 +796,7 @@ $(BUILD_DIR)/%.o: $(BUILD_DIR)/%.c
$(BUILD_DIR)/%.o: %.s $(BUILD_DIR)/%.o: %.s
$(AS) $(ASFLAGS) -MD $(BUILD_DIR)/$*.d -o $@ $< $(AS) $(ASFLAGS) -MD $(BUILD_DIR)/$*.d -o $@ $<
ifeq ($(TARGET_N64),1)
$(BUILD_DIR)/$(LD_SCRIPT): $(LD_SCRIPT) $(BUILD_DIR)/$(LD_SCRIPT): $(LD_SCRIPT)
$(CPP) $(VERSION_CFLAGS) -MMD -MP -MT $@ -MF $@.d -I include/ -I . -DBUILD_DIR=$(BUILD_DIR) -o $@ $< $(CPP) $(VERSION_CFLAGS) -MMD -MP -MT $@ -MF $@.d -I include/ -I . -DBUILD_DIR=$(BUILD_DIR) -o $@ $<
@ -615,6 +817,11 @@ $(ROM): $(ELF)
$(BUILD_DIR)/$(TARGET).objdump: $(ELF) $(BUILD_DIR)/$(TARGET).objdump: $(ELF)
$(OBJDUMP) -D $< > $@ $(OBJDUMP) -D $< > $@
else
$(EXE): $(O_FILES) $(MIO0_FILES:.mio0=.o) $(SOUND_OBJ_FILES) $(ULTRA_O_FILES) $(GODDARD_O_FILES)
$(LD) -L $(BUILD_DIR) -o $@ $(O_FILES) $(SOUND_OBJ_FILES) $(ULTRA_O_FILES) $(GODDARD_O_FILES) $(LDFLAGS)
endif
.PHONY: all clean distclean default diff test load libultra .PHONY: all clean distclean default diff test load libultra

View file

@ -1,28 +1,49 @@
# Super Mario 64 # Super Mario 64 Port
- This repo contains a full decompilation of Super Mario 64 (J), (U), and (E) with minor exceptions in the audio subsystem. - This repo contains a full decompilation of Super Mario 64 (J), (U), and (E) with minor exceptions in the audio subsystem.
- Naming and documentation of the source code and data structures are in progress. - Naming and documentation of the source code and data structures are in progress.
- Efforts to decompile the Shindou ROM steadily advance toward a matching build. - Efforts to decompile the Shindou ROM steadily advance toward a matching build.
- Beyond Nintendo 64, it can also target Linux and Windows natively.
It builds the following ROMs: This repo does not include all assets necessary for compiling the game.
* sm64.jp.z64 `sha1: 8a20a5c83d6ceb0f0506cfc9fa20d8f438cafe51`
* sm64.us.z64 `sha1: 9bef1128717f958171a4afac3ed78ee2bb4e86ce`
* sm64.eu.z64 `sha1: 4ac5721683d0e0b6bbb561b58a71740845dceea9`
This repo does not include all assets necessary for compiling the ROMs.
A prior copy of the game is required to extract the assets. A prior copy of the game is required to extract the assets.
## Quick Start (for Ubuntu) ## Building native executables
1. Install prerequisites: `sudo apt install -y build-essential git binutils-mips-linux-gnu python3 libaudiofile-dev` ### Linux
2. Clone the repo from within Linux: `git clone https://github.com/n64decomp/sm64.git`
1. Install prerequisites (Ubuntu): `sudo apt install -y git build-essential pkg-config libusb-1.0-0-dev libsdl2-dev`.
2. Clone the repo: `git clone https://github.com/sm64-port/sm64-port.git`.
3. Place a Super Mario 64 ROM called `baserom.<VERSION>.z64` into the project folder for asset extraction, where `VERSION` can be `us`, `jp`, or `eu`. 3. Place a Super Mario 64 ROM called `baserom.<VERSION>.z64` into the project folder for asset extraction, where `VERSION` can be `us`, `jp`, or `eu`.
4. Run `make` to build. Qualify the version through `make VERSION=<VERSION>`. Add `-j4` to improve build speed (hardware dependent). 4. Run `make` to build. Qualify the version through `make VERSION=<VERSION>`. Add `-j4` to improve build speed (hardware dependent based on the amount of CPU cores available).
5. The executable binary will be located at `build/<VERSION>_pc/sm64.<VERSION>.f3dex2e`.
### Windows
1. Install and update MSYS2, following the directions listed on https://www.msys2.org/.
2. Launch MSYS2 MinGW and install required packages depending on your machine (do **NOT** launch "MSYS2 MSYS"):
a. 64-bit: Launch "MSYS2 MinGW 64-bit" and install: `pacman -S git make python3 mingw-w64-x86_64-gcc`
b. 32-bit (will also work on 64-bit machines): Launch "MSYS2 MinGW 32-bit" and install: `pacman -S git make python3 mingw-w64-i686-gcc`
* Do **NOT** install `gcc`.
3. Clone the repo: `git clone https://github.com/sm64-port/sm64-port.git` and enter it `cd sm64-port`.
4. Place a *Super Mario 64* ROM called `baserom.<VERSION>.z64` into the project folder for asset extraction, where `VERSION` can be `us`, `jp`, or `eu`.
5. Run `make` to build. Qualify the version through `make VERSION=<VERSION>`. Add `-j4` to improve build speed (hardware dependent based on the amount of CPU cores available).
6. The executable binary will be located at `build/<VERSION>_pc/sm64.<VERSION>.f3dex2e.exe`.
### Debugging
The code can be debugged using `gdb`. On Linux install the `gdb` package and execute `gdb <executable>`. On MSYS2 install by executing `pacman -S winpty gdb` and execute `winpty gdb <executable>`. The `winpty` program makes sure the keyboard works correctly in the terminal. In the Makefile, make sure you compile the sources using `-g` rather than `-O2` to include debugging symbols. See any online tutorial for how to use gdb.
## Quick Start ROM building (for Ubuntu)
1. Install prerequisites: `sudo apt install -y build-essential git binutils-mips-linux-gnu python3`.
2. Clone the repo from within Linux: `git clone https://github.com/n64decomp/sm64.git`.
3. Place a Super Mario 64 ROM called `baserom.<VERSION>.z64` into the project folder for asset extraction, where `VERSION` can be `us`, `jp`, or `eu`.
4. Run `make` to build. Qualify the version through `make TARGET_N64=1 VERSION=<VERSION>`. Add `-j4` to improve build speed (hardware dependent based on the amount of CPU cores available).
Ensure the repo path length does not exceed 255 characters. Long path names result in build errors. Ensure the repo path length does not exceed 255 characters. Long path names result in build errors.
## Installation ## Installation for ROM building
### Windows ### Windows
@ -43,17 +64,16 @@ There are 3 steps to set up a working build.
#### Step 1: Install dependencies #### Step 1: Install dependencies
The build system has the following package requirements: The build system has the following package requirements:
* binutils-mips * ``binutils-mips``
* python3 >= 3.6 * ``python3 >= 3.6``
* libaudiofile * ``qemu-irix`` (When building without GCC)
* qemu-irix
Dependency installation instructions for common Linux distros are provided below: Dependency installation instructions for common Linux distros are provided below:
##### Debian / Ubuntu ##### Debian / Ubuntu
To install build dependencies: To install build dependencies:
``` ```
sudo apt install -y build-essential git binutils-mips-linux-gnu python3 libaudiofile-dev sudo apt install -y build-essential git binutils-mips-linux-gnu python3
``` ```
Download latest package from [qemu-irix Releases.](https://github.com/n64decomp/qemu-irix/releases) Download latest package from [qemu-irix Releases.](https://github.com/n64decomp/qemu-irix/releases)
@ -66,7 +86,7 @@ sudo dpkg -i qemu-irix-2.11.0-2169-g32ab296eef_amd64.deb
##### Arch Linux ##### Arch Linux
To install build dependencies: To install build dependencies:
``` ```
sudo pacman -S base-devel python audiofile sudo pacman -S base-devel python
``` ```
Install the following AUR packages: Install the following AUR packages:
* [mips64-elf-binutils](https://aur.archlinux.org/packages/mips64-elf-binutils) (AUR) * [mips64-elf-binutils](https://aur.archlinux.org/packages/mips64-elf-binutils) (AUR)
@ -109,6 +129,12 @@ The full list of configurable variables are listed below, with the default being
* ``NON_MATCHING``: Use functionally equivalent C implementations for non-matchings. Also will avoid instances of undefined behavior. * ``NON_MATCHING``: Use functionally equivalent C implementations for non-matchings. Also will avoid instances of undefined behavior.
* ``CROSS``: Cross-compiler tool prefix (Example: ``mips64-elf-``). * ``CROSS``: Cross-compiler tool prefix (Example: ``mips64-elf-``).
* ``QEMU_IRIX``: Path to a ``qemu-irix`` binary. * ``QEMU_IRIX``: Path to a ``qemu-irix`` binary.
* ``TARGET_N64``: ``0`` If set to one, will build an N64 ROM. An unmodified repository will produce one of the following ROMs depending on what ``VERSION`` is set to:
* sm64.jp.z64 `sha1: 8a20a5c83d6ceb0f0506cfc9fa20d8f438cafe51`
* sm64.us.z64 `sha1: 9bef1128717f958171a4afac3ed78ee2bb4e86ce`
* sm64.eu.z64 `sha1: 4ac5721683d0e0b6bbb561b58a71740845dceea9`
### macOS ### macOS
@ -163,7 +189,8 @@ sm64
│ ├── engine: script processing engines and utils │ ├── engine: script processing engines and utils
│ ├── game: behaviors and rest of game source │ ├── game: behaviors and rest of game source
│ ├── goddard: Mario intro screen │ ├── goddard: Mario intro screen
│ └── menu: title screen and file, act, and debug level selection menus │ ├── menu: title screen and file, act, and debug level selection menus
│ └── pc: port code, audio and video renderer
├── text: dialog, level names, act names ├── text: dialog, level names, act names
├── textures: skybox and generic texture data ├── textures: skybox and generic texture data
└── tools: build tools └── tools: build tools
@ -176,4 +203,4 @@ discuss what you would like to change.
Run `clang-format` on your code to ensure it meets the project's coding standards. Run `clang-format` on your code to ensure it meets the project's coding standards.
Official Discord: https://discord.gg/DuYH3Fh Official Discord: https://discord.gg/7bcNTPK

View file

@ -14,8 +14,25 @@ typedef signed short int s16;
typedef unsigned short int u16; typedef unsigned short int u16;
typedef signed int s32; typedef signed int s32;
typedef unsigned int u32; typedef unsigned int u32;
typedef float f32;
typedef double f64;
#ifdef TARGET_N64
typedef signed long long int s64; typedef signed long long int s64;
typedef unsigned long long int u64; typedef unsigned long long int u64;
typedef u32 size_t;
typedef s32 ssize_t;
typedef u32 uintptr_t;
typedef s32 intptr_t;
typedef s32 ptrdiff_t;
#else
#include <stddef.h>
#include <stdint.h>
typedef ptrdiff_t ssize_t;
typedef int64_t s64;
typedef uint64_t u64;
#endif
typedef volatile u8 vu8; typedef volatile u8 vu8;
typedef volatile u16 vu16; typedef volatile u16 vu16;
@ -26,19 +43,4 @@ typedef volatile s16 vs16;
typedef volatile s32 vs32; typedef volatile s32 vs32;
typedef volatile s64 vs64; typedef volatile s64 vs64;
typedef float f32;
typedef double f64;
#ifdef TARGET_N64
typedef u32 size_t;
typedef s32 ssize_t;
typedef u32 uintptr_t;
typedef s32 intptr_t;
typedef s32 ptrdiff_t;
#else
#include <stddef.h>
#include <stdint.h>
typedef ptrdiff_t ssize_t;
#endif
#endif #endif

View file

@ -24,7 +24,15 @@ Note that RECT commands must be enhanced to support negative coordinates with th
*/ */
#ifdef WIDESCREEN #ifdef WIDESCREEN
#error "widescreen not implemented"
#include <math.h>
#include "pc/gfx/gfx_pc.h"
#define GFX_DIMENSIONS_FROM_LEFT_EDGE(v) (SCREEN_WIDTH / 2 - SCREEN_HEIGHT / 2 * gfx_current_dimensions.aspect_ratio + (v))
#define GFX_DIMENSIONS_FROM_RIGHT_EDGE(v) (SCREEN_WIDTH / 2 + SCREEN_HEIGHT / 2 * gfx_current_dimensions.aspect_ratio - (v))
#define GFX_DIMENSIONS_RECT_FROM_LEFT_EDGE(v) ((int)floorf(GFX_DIMENSIONS_FROM_LEFT_EDGE(v)))
#define GFX_DIMENSIONS_RECT_FROM_RIGHT_EDGE(v) ((int)ceilf(GFX_DIMENSIONS_FROM_RIGHT_EDGE(v)))
#define GFX_DIMENSIONS_ASPECT_RATIO (gfx_current_dimensions.aspect_ratio)
#else #else
#define GFX_DIMENSIONS_FROM_LEFT_EDGE(v) (v) #define GFX_DIMENSIONS_FROM_LEFT_EDGE(v) (v)

View file

@ -762,6 +762,7 @@ void func_eu_802e9bec(s32 player, s32 channel, s32 arg2) {
#else #else
#ifdef TARGET_N64
struct SPTask *create_next_audio_frame_task(void) { struct SPTask *create_next_audio_frame_task(void) {
u32 samplesRemainingInAI; u32 samplesRemainingInAI;
s32 writtenCmds; s32 writtenCmds;
@ -864,6 +865,22 @@ struct SPTask *create_next_audio_frame_task(void) {
decrease_sample_dma_ttls(); decrease_sample_dma_ttls();
return gAudioTask; return gAudioTask;
} }
#else
struct SPTask *create_next_audio_frame_task(void) {
return NULL;
}
void create_next_audio_buffer(s16 *samples, u32 num_samples) {
gAudioFrameCount++;
if (sGameLoopTicked != 0) {
update_game_sound();
sGameLoopTicked = 0;
}
s32 writtenCmds;
synthesis_execute(gAudioCmdBuffers[0], &writtenCmds, samples, num_samples);
gAudioRandom = ((gAudioRandom + gAudioFrameCount) * gAudioFrameCount);
decrease_sample_dma_ttls();
}
#endif
#endif #endif
void play_sound(s32 soundBits, f32 *pos) { void play_sound(s32 soundBits, f32 *pos) {

View file

@ -648,12 +648,14 @@ s32 audio_shut_down_and_reset_step(void) {
/** /**
* Waits until a specified number of audio frames have been created * Waits until a specified number of audio frames have been created
*/ */
void wait_for_audio_frames(s32 frames) { void wait_for_audio_frames(UNUSED s32 frames) {
gAudioFrameCount = 0; gAudioFrameCount = 0;
#ifdef TARGET_N64
// Sound thread will update gAudioFrameCount // Sound thread will update gAudioFrameCount
while (gAudioFrameCount < frames) { while (gAudioFrameCount < frames) {
// spin // spin
} }
#endif
} }
#endif #endif

View file

@ -876,13 +876,13 @@ void audio_init() {
UNUSED s8 pad[32]; UNUSED s8 pad[32];
u8 buf[0x10]; u8 buf[0x10];
#endif #endif
s32 i, j, k; s32 i, j, UNUSED k;
UNUSED s32 lim1; // lim1 unused in EU UNUSED s32 lim1; // lim1 unused in EU
#ifdef VERSION_EU #ifdef VERSION_EU
u8 buf[0x10]; u8 buf[0x10];
s32 UNUSED lim2, lim3; s32 UNUSED lim2, lim3;
#else #else
s32 lim2, lim3; s32 lim2, UNUSED lim3;
#endif #endif
u32 size; u32 size;
UNUSED u64 *ptr64; UNUSED u64 *ptr64;

View file

@ -34,6 +34,7 @@ void decrease_sample_dma_ttls(void);
s32 audio_shut_down_and_reset_step(void); s32 audio_shut_down_and_reset_step(void);
void func_802ad7ec(u32); void func_802ad7ec(u32);
#ifdef TARGET_N64
struct SPTask *create_next_audio_frame_task(void) { struct SPTask *create_next_audio_frame_task(void) {
u32 samplesRemainingInAI; u32 samplesRemainingInAI;
s32 writtenCmds; s32 writtenCmds;
@ -130,6 +131,32 @@ struct SPTask *create_next_audio_frame_task(void) {
task->yield_data_size = 0; task->yield_data_size = 0;
return gAudioTask; return gAudioTask;
} }
#else
struct SPTask *create_next_audio_frame_task(void) {
return NULL;
}
void create_next_audio_buffer(s16 *samples, u32 num_samples) {
s32 writtenCmds;
OSMesg msg;
gAudioFrameCount++;
decrease_sample_dma_ttls();
if (osRecvMesg(OSMesgQueues[2], &msg, 0) != -1) {
gAudioResetPresetIdToLoad = (u8) (s32) msg;
gAudioResetStatus = 5;
}
if (gAudioResetStatus != 0) {
audio_reset_session();
gAudioResetStatus = 0;
}
if (osRecvMesg(OSMesgQueues[1], &msg, OS_MESG_NOBLOCK) != -1) {
func_802ad7ec((u32) msg);
}
synthesis_execute(gAudioCmdBuffers[0], &writtenCmds, samples, num_samples);
gAudioRandom = ((gAudioRandom + gAudioFrameCount) * gAudioFrameCount);
gAudioRandom = gAudioRandom + writtenCmds / 8;
}
#endif
void eu_process_audio_cmd(struct EuAudioCmd *cmd) { void eu_process_audio_cmd(struct EuAudioCmd *cmd) {
s32 i; s32 i;

View file

@ -7,6 +7,9 @@
#include "seqplayer.h" #include "seqplayer.h"
#include "external.h" #include "external.h"
#ifndef TARGET_N64
#include "../pc/mixer.h"
#endif
#define DMEM_ADDR_TEMP 0x0 #define DMEM_ADDR_TEMP 0x0
#define DMEM_ADDR_UNCOMPRESSED_NOTE 0x180 #define DMEM_ADDR_UNCOMPRESSED_NOTE 0x180

View file

@ -23,7 +23,7 @@ ALIGNED8 u8 gGfxSPTaskYieldBuffer[OS_YIELD_DATA_SIZE];
// 0x200 bytes // 0x200 bytes
ALIGNED8 struct SaveBuffer gSaveBuffer; ALIGNED8 struct SaveBuffer gSaveBuffer;
// 0x190a0 bytes // 0x190a0 bytes
struct GfxPool gGfxPools[2]; struct GfxPool gGfxPools[GFX_NUM_POOLS];
// Yield buffer for audio, 0x400 bytes. Stubbed out post-JP since the audio // Yield buffer for audio, 0x400 bytes. Stubbed out post-JP since the audio
@ -37,8 +37,3 @@ ALIGNED8 u8 gAudioSPTaskYieldBuffer[OS_YIELD_AUDIO_SIZE];
#if !defined(F3DEX_GBI_SHARED) && !defined(VERSION_EU) #if !defined(F3DEX_GBI_SHARED) && !defined(VERSION_EU)
ALIGNED8 u8 gUnusedThread2Stack[0x1400]; ALIGNED8 u8 gUnusedThread2Stack[0x1400];
#endif #endif

View file

@ -28,6 +28,11 @@ extern struct SaveBuffer gSaveBuffer;
extern u8 gGfxSPTaskStack[]; extern u8 gGfxSPTaskStack[];
extern struct GfxPool gGfxPools[2]; #ifdef TARGET_N64
#define GFX_NUM_POOLS 2
#else
#define GFX_NUM_POOLS 1
#endif
extern struct GfxPool gGfxPools[GFX_NUM_POOLS];
#endif // BUFFERS_H #endif // BUFFERS_H

View file

@ -221,11 +221,13 @@ void create_task_structure(void) {
gGfxSPTask->msgqueue = &D_80339CB8; gGfxSPTask->msgqueue = &D_80339CB8;
gGfxSPTask->msg = (OSMesg) 2; gGfxSPTask->msg = (OSMesg) 2;
gGfxSPTask->task.t.type = M_GFXTASK; gGfxSPTask->task.t.type = M_GFXTASK;
#if TARGET_N64
gGfxSPTask->task.t.ucode_boot = rspF3DBootStart; gGfxSPTask->task.t.ucode_boot = rspF3DBootStart;
gGfxSPTask->task.t.ucode_boot_size = ((u8 *) rspF3DBootEnd - (u8 *) rspF3DBootStart); gGfxSPTask->task.t.ucode_boot_size = ((u8 *) rspF3DBootEnd - (u8 *) rspF3DBootStart);
gGfxSPTask->task.t.flags = 0; gGfxSPTask->task.t.flags = 0;
gGfxSPTask->task.t.ucode = rspF3DStart; gGfxSPTask->task.t.ucode = rspF3DStart;
gGfxSPTask->task.t.ucode_data = rspF3DDataStart; gGfxSPTask->task.t.ucode_data = rspF3DDataStart;
#endif
gGfxSPTask->task.t.ucode_size = SP_UCODE_SIZE; // (this size is ignored) gGfxSPTask->task.t.ucode_size = SP_UCODE_SIZE; // (this size is ignored)
gGfxSPTask->task.t.ucode_data_size = SP_UCODE_DATA_SIZE; gGfxSPTask->task.t.ucode_data_size = SP_UCODE_DATA_SIZE;
gGfxSPTask->task.t.dram_stack = (u64 *) gGfxSPTaskStack; gGfxSPTask->task.t.dram_stack = (u64 *) gGfxSPTaskStack;
@ -305,7 +307,7 @@ void rendering_init(void) {
} }
void config_gfx_pool(void) { void config_gfx_pool(void) {
gGfxPool = &gGfxPools[gGlobalTimer % 2]; gGfxPool = &gGfxPools[gGlobalTimer % GFX_NUM_POOLS];
set_segment_base_addr(1, gGfxPool->buffer); set_segment_base_addr(1, gGfxPool->buffer);
gGfxSPTask = &gGfxPool->spTask; gGfxSPTask = &gGfxPool->spTask;
gDisplayListHead = gGfxPool->buffer; gDisplayListHead = gGfxPool->buffer;
@ -579,10 +581,16 @@ void setup_game_memory(void) {
load_segment_decompress(2, _segment2_mio0SegmentRomStart, _segment2_mio0SegmentRomEnd); load_segment_decompress(2, _segment2_mio0SegmentRomStart, _segment2_mio0SegmentRomEnd);
} }
#ifndef TARGET_N64
static struct LevelCommand *levelCommandAddr;
#endif
// main game loop thread. runs forever as long as the game // main game loop thread. runs forever as long as the game
// continues. // continues.
void thread5_game_loop(UNUSED void *arg) { void thread5_game_loop(UNUSED void *arg) {
struct LevelCommand *addr; #ifdef TARGET_N64
struct LevelCommand *levelCommandAddr;
#endif
setup_game_memory(); setup_game_memory();
#ifdef VERSION_SH #ifdef VERSION_SH
@ -596,18 +604,30 @@ void thread5_game_loop(UNUSED void *arg) {
set_vblank_handler(2, &gGameVblankHandler, &gGameVblankQueue, (OSMesg) 1); set_vblank_handler(2, &gGameVblankHandler, &gGameVblankQueue, (OSMesg) 1);
// point addr to the entry point into the level script data. // point levelCommandAddr to the entry point into the level script data.
addr = segmented_to_virtual(level_script_entry); levelCommandAddr = segmented_to_virtual(level_script_entry);
play_music(SEQ_PLAYER_SFX, SEQUENCE_ARGS(0, SEQ_SOUND_PLAYER), 0); play_music(SEQ_PLAYER_SFX, SEQUENCE_ARGS(0, SEQ_SOUND_PLAYER), 0);
set_sound_mode(save_file_get_sound_mode()); set_sound_mode(save_file_get_sound_mode());
#ifdef TARGET_N64
rendering_init(); rendering_init();
while (1) { while (1) {
#else
gGlobalTimer++;
}
void game_loop_one_iteration(void) {
#endif
// if the reset timer is active, run the process to reset the game. // if the reset timer is active, run the process to reset the game.
if (gResetTimer) { if (gResetTimer) {
draw_reset_bars(); draw_reset_bars();
#ifdef TARGET_N64
continue; continue;
#else
return;
#endif
} }
profiler_log_thread5_time(THREAD5_START); profiler_log_thread5_time(THREAD5_START);
@ -623,7 +643,7 @@ void thread5_game_loop(UNUSED void *arg) {
audio_game_loop_tick(); audio_game_loop_tick();
config_gfx_pool(); config_gfx_pool();
read_controller_inputs(); read_controller_inputs();
addr = level_script_execute(addr); levelCommandAddr = level_script_execute(levelCommandAddr);
display_and_vsync(); display_and_vsync();
// when debug info is enabled, print the "BUF %d" information. // when debug info is enabled, print the "BUF %d" information.
@ -632,5 +652,7 @@ void thread5_game_loop(UNUSED void *arg) {
// amount of free space remaining. // amount of free space remaining.
print_text_fmt_int(180, 20, "BUF %d", gGfxPoolEnd - (u8 *) gDisplayListHead); print_text_fmt_int(180, 20, "BUF %d", gGfxPoolEnd - (u8 *) gDisplayListHead);
} }
#ifdef TARGET_N64
} }
#endif
} }

View file

@ -1,4 +1,7 @@
#include <PR/ultratypes.h> #include <PR/ultratypes.h>
#ifndef TARGET_N64
#include <string.h>
#endif
#include "sm64.h" #include "sm64.h"
@ -98,7 +101,7 @@ void *segmented_to_virtual(const void *addr) {
return (void *) addr; return (void *) addr;
} }
void *virtual_to_segmented(u32 segment, const void *addr) { void *virtual_to_segmented(UNUSED u32 segment, const void *addr) {
return (void *) addr; return (void *) addr;
} }
@ -242,8 +245,8 @@ u32 main_pool_pop_state(void) {
* function blocks until completion. * function blocks until completion.
*/ */
static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) { static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
#ifdef TARGET_N64
u32 size = ALIGN16(srcEnd - srcStart); u32 size = ALIGN16(srcEnd - srcStart);
osInvalDCache(dest, size); osInvalDCache(dest, size);
while (size != 0) { while (size != 0) {
u32 copySize = (size >= 0x1000) ? 0x1000 : size; u32 copySize = (size >= 0x1000) ? 0x1000 : size;
@ -256,6 +259,9 @@ static void dma_read(u8 *dest, u8 *srcStart, u8 *srcEnd) {
srcStart += copySize; srcStart += copySize;
size -= copySize; size -= copySize;
} }
#else
memcpy(dest, srcStart, srcEnd - srcStart);
#endif
} }
/** /**

View file

@ -2322,7 +2322,9 @@ void start_view_dl(struct ObjView *view) {
uly = lry - 1.0f; uly = lry - 1.0f;
} }
#ifdef TARGET_N64
gDPSetScissor(next_gfx(), G_SC_NON_INTERLACE, ulx, uly, lrx, lry); gDPSetScissor(next_gfx(), G_SC_NON_INTERLACE, ulx, uly, lrx, lry);
#endif
gSPClearGeometryMode(next_gfx(), 0xFFFFFFFF); gSPClearGeometryMode(next_gfx(), 0xFFFFFFFF);
gSPSetGeometryMode(next_gfx(), G_LIGHTING | G_CULL_BACK | G_SHADING_SMOOTH | G_SHADE); gSPSetGeometryMode(next_gfx(), G_LIGHTING | G_CULL_BACK | G_SHADING_SMOOTH | G_SHADE);
if (view->flags & VIEW_ALLOC_ZBUF) { if (view->flags & VIEW_ALLOC_ZBUF) {

168
src/pc/audio/audio_alsa.c Normal file
View file

@ -0,0 +1,168 @@
#if defined(__linux__) && !defined(TARGET_WEB)
/*
Simple sound playback using ALSA API and libasound.
Dependencies: libasound, alsa
Build-Dependencies: liasound-dev
Compile: gcc -lasound -o play sound_playback.c
Usage: ./play <sample_rate> <channels> < <file>
Examples:
./play 44100 2 5 < /dev/urandom
./play 22050 1 8 < /path/to/file.wav
Copyright (C) 2009 Alessandro Ghedini <al3xbio@gmail.com>
--------------------------------------------------------------
"THE BEER-WARE LICENSE" (Revision 42):
Alessandro Ghedini wrote this file. As long as you retain this
notice you can do whatever you want with this stuff. If we
meet some day, and you think this stuff is worth it, you can
buy me a beer in return.
--------------------------------------------------------------
*/
#include <fcntl.h>
#include <unistd.h>
#include <poll.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/tcp.h>
#include <time.h>
#include <alsa/asoundlib.h>
#include <stdio.h>
#include "audio_api.h"
#define PCM_DEVICE "default"
static snd_pcm_t *pcm_handle;
static unsigned long int alsa_buffer_size;
static unsigned long get_time(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (unsigned long)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
}
static bool audio_alsa_init(void) {
int pcm;
unsigned int tmp;
unsigned int rate, channels;
snd_pcm_hw_params_t *params;
snd_pcm_uframes_t frames;
rate = 32000;
channels = 2;
/* Open the PCM device in playback mode */
if ((pcm = snd_pcm_open(&pcm_handle, PCM_DEVICE,
SND_PCM_STREAM_PLAYBACK, 0)) < 0) {
printf("ERROR: Can't open \"%s\" PCM device. %s\n",
PCM_DEVICE, snd_strerror(pcm));
return false;
}
/* Allocate parameters object and fill it with default values*/
snd_pcm_hw_params_alloca(&params);
snd_pcm_hw_params_any(pcm_handle, params);
/* Set parameters */
if ((pcm = snd_pcm_hw_params_set_access(pcm_handle, params,
SND_PCM_ACCESS_RW_INTERLEAVED)) < 0)
printf("ERROR: Can't set interleaved mode. %s\n", snd_strerror(pcm));
if ((pcm = snd_pcm_hw_params_set_format(pcm_handle, params,
SND_PCM_FORMAT_S16_LE)) < 0)
printf("ERROR: Can't set format. %s\n", snd_strerror(pcm));
if ((pcm = snd_pcm_hw_params_set_channels(pcm_handle, params, channels)) < 0)
printf("ERROR: Can't set channels number. %s\n", snd_strerror(pcm));
if ((pcm = snd_pcm_hw_params_set_rate_near(pcm_handle, params, &rate, 0)) < 0)
printf("ERROR: Can't set rate. %s\n", snd_strerror(pcm));
alsa_buffer_size = 1600 + 528 + 544; // five audio buffers from the game
if ((pcm = snd_pcm_hw_params_set_buffer_size_near(pcm_handle, params, &alsa_buffer_size)) < 0)
printf("ERROR: Can't set buffer size. %s\n", snd_strerror(pcm));
/* Write parameters */
if ((pcm = snd_pcm_hw_params(pcm_handle, params)) < 0)
printf("ERROR: Can't set harware parameters. %s\n", snd_strerror(pcm));
/* Resume information */
printf("PCM name: '%s'\n", snd_pcm_name(pcm_handle));
printf("PCM state: %s\n", snd_pcm_state_name(snd_pcm_state(pcm_handle)));
snd_pcm_hw_params_get_channels(params, &tmp);
printf("channels: %i ", tmp);
if (tmp == 1)
printf("(mono)\n");
else if (tmp == 2)
printf("(stereo)\n");
snd_pcm_hw_params_get_rate(params, &tmp, 0);
printf("rate: %d bps\n", tmp);
snd_pcm_hw_params_get_buffer_size(params, &alsa_buffer_size);
printf("buffer size: %lu\n", alsa_buffer_size);
/* Allocate buffer to hold single period */
snd_pcm_hw_params_get_period_size(params, &frames, 0);
printf("frames: %lu\n", frames);
snd_pcm_hw_params_get_period_time(params, &tmp, NULL);
printf("time: %d\n", tmp);
return true;
}
static int audio_alsa_buffered(void) {
if (!pcm_handle) {
return 0;
}
snd_pcm_sframes_t ret = snd_pcm_avail(pcm_handle);
if (ret < 0) {
return 0;
}
ret = alsa_buffer_size - ret;
return ret < 0 ? 0 : ret;
}
static int audio_alsa_get_desired_buffered(void) {
return 1100;
}
static void audio_alsa_play(const uint8_t* buff, size_t len) {
if (!pcm_handle) {
audio_alsa_init();
}
//unsigned long t1 = get_time();
int frames = len / 4;
int pcm;
if ((pcm = snd_pcm_writei(pcm_handle, buff, frames)) == -EPIPE) {
printf("XRUN.\n");
snd_pcm_prepare(pcm_handle);
// Add some silence to avoid another XRUN
char buf[1100 * 4 + len];
memset(buf, 0, 1100 * 4);
memcpy(buf + 1100 * 4, buff, len);
if ((pcm = snd_pcm_writei(pcm_handle, buf, 1100 + frames)) < 0) {
printf("Failed again %d\n", pcm);
}
} else if (pcm < 0) {
printf("ERROR. Can't write to PCM device. %s\n", snd_strerror(pcm));
return;
}
//fprintf(stderr, "%u ", get_time() - t1);
}
struct AudioAPI audio_alsa = {
audio_alsa_init,
audio_alsa_buffered,
audio_alsa_get_desired_buffered,
audio_alsa_play
};
#endif

11
src/pc/audio/audio_alsa.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef AUDIO_ALSA_H
#define AUDIO_ALSA_H
#ifdef __linux__
extern struct AudioAPI audio_alsa;
#define HAVE_ALSA 1
#else
#define HAVE_ALSA 0
#endif
#endif

15
src/pc/audio/audio_api.h Normal file
View file

@ -0,0 +1,15 @@
#ifndef AUDIO_API_H
#define AUDIO_API_H
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
struct AudioAPI {
bool (*init)(void);
int (*buffered)(void);
int (*get_desired_buffered)(void);
void (*play)(const uint8_t *buf, size_t len);
};
#endif

24
src/pc/audio/audio_null.c Normal file
View file

@ -0,0 +1,24 @@
#include "macros.h"
#include "audio_api.h"
static bool audio_null_init(void) {
return true;
}
static int audio_null_buffered(void) {
return 0;
}
static int audio_null_get_desired_buffered(void) {
return 0;
}
static void audio_null_play(UNUSED const uint8_t *buf, UNUSED size_t len) {
}
struct AudioAPI audio_null = {
audio_null_init,
audio_null_buffered,
audio_null_get_desired_buffered,
audio_null_play
};

View file

@ -0,0 +1,8 @@
#ifndef AUDIO_NULL_H
#define AUDIO_NULL_H
#include "audio_api.h"
extern struct AudioAPI audio_null;
#endif

204
src/pc/audio/audio_pulse.c Normal file
View file

@ -0,0 +1,204 @@
#ifdef __linux__
#include <stdio.h>
#include <stdbool.h>
#include <pulse/pulseaudio.h>
#include "macros.h"
#include "audio_api.h"
static struct {
pa_mainloop *mainloop;
pa_context *context;
pa_stream *stream;
pa_buffer_attr attr;
bool write_complete;
} pas;
static void pas_context_state_cb(pa_context *c, void *userdata) {
switch (pa_context_get_state(c)) {
case PA_CONTEXT_READY:
case PA_CONTEXT_TERMINATED:
case PA_CONTEXT_FAILED:
*((bool *)userdata) = true;
break;
default:
break;
}
}
static void pas_stream_state_cb(pa_stream *s, void *userdata) {
switch (pa_stream_get_state(s)) {
case PA_STREAM_READY:
case PA_STREAM_FAILED:
case PA_STREAM_TERMINATED:
*((bool *)userdata) = true;
break;
default:
break;
}
}
static void pas_stream_write_cb(UNUSED pa_stream *s, UNUSED size_t length, UNUSED void *userdata) {
//size_t ws = pa_stream_writable_size(pas.stream);
//printf("write cb: %d %d\n", (int)length, (int)ws);
}
static bool audio_pulse_init(void) {
// Create mainloop
pas.mainloop = pa_mainloop_new();
if (pas.mainloop == NULL) {
return false;
}
// Create context and connect
pas.context = pa_context_new(pa_mainloop_get_api(pas.mainloop), "Super Mario 64");
if (pas.context == NULL) {
goto fail;
}
bool done = false;
pa_context_set_state_callback(pas.context, pas_context_state_cb, &done);
if (pa_context_connect(pas.context, NULL, 0, NULL) < 0) {
goto fail;
}
while (!done) {
pa_mainloop_iterate(pas.mainloop, true, NULL);
}
pa_context_set_state_callback(pas.context, NULL, NULL);
if (pa_context_get_state(pas.context) != PA_CONTEXT_READY) {
goto fail;
}
// Create stream
pa_sample_spec ss;
ss.format = PA_SAMPLE_S16LE;
ss.rate = 32000;
ss.channels = 2;
pa_buffer_attr attr;
attr.maxlength = (1600 + 544 + 528 + 1600) * 4;
attr.tlength = (528*2 + 544) * 4;
attr.prebuf = 1500 * 4;
attr.minreq = 161 * 4;
attr.fragsize = (uint32_t)-1;
pas.stream = pa_stream_new(pas.context, "mario", &ss, NULL);
if (pas.stream == NULL) {
goto fail;
}
done = false;
pa_stream_set_state_callback(pas.stream, pas_stream_state_cb, &done);
pa_stream_set_write_callback(pas.stream, pas_stream_write_cb, NULL);
if (pa_stream_connect_playback(pas.stream, NULL, &attr, PA_STREAM_ADJUST_LATENCY, NULL, NULL) < 0) {
goto fail;
}
while (!done) {
pa_mainloop_iterate(pas.mainloop, true, NULL);
}
pa_stream_set_state_callback(pas.stream, NULL, NULL);
if (pa_stream_get_state(pas.stream) != PA_STREAM_READY) {
goto fail;
}
const pa_buffer_attr *applied_attr = pa_stream_get_buffer_attr(pas.stream);
printf("maxlength: %u\ntlength: %u\nprebuf: %u\nminreq: %u\nfragsize: %u\n",
applied_attr->maxlength, applied_attr->tlength, applied_attr->prebuf, applied_attr->minreq, applied_attr->fragsize);
pas.attr = *applied_attr;
return true;
fail:
if (pas.stream != NULL) {
pa_stream_unref(pas.stream);
pas.stream = NULL;
}
if (pas.context != NULL) {
pa_context_disconnect(pas.context);
pa_context_unref(pas.context);
pas.context = NULL;
}
if (pas.mainloop != NULL) {
pa_mainloop_free(pas.mainloop);
pas.mainloop = NULL;
}
return false;
}
static void pas_update_complete(UNUSED pa_stream *stream, UNUSED int success, void *userdata) {
*(bool *)userdata = true;
}
static void pas_update(void) {
bool done = false;
pa_stream_update_timing_info(pas.stream, pas_update_complete, &done);
while (!done) {
pa_mainloop_iterate(pas.mainloop, true, NULL);
}
}
static void pas_write_complete(UNUSED void *p) {
pas.write_complete = true;
}
static int audio_pulse_buffered(void) {
if (pas.stream == NULL) {
return 0;
}
pas_update();
const pa_timing_info *info = pa_stream_get_timing_info(pas.stream);
if (info == NULL) {
printf("pa_stream_get_timing_info failed, state is %d\n", pa_stream_get_state(pas.stream));
}
/*int diff = info->write_index - info->read_index + (int)(info->sink_usec * 0.128);
pa_usec_t usec;
pa_stream_get_time(pas.stream, &usec);
int32_t t = info->timestamp.tv_sec * 1000000 + info->timestamp.tv_usec - (usec - info->transport_usec);
static int t0;
if (t0 == 0) t0 = t;*/
/*int r = pa_mainloop_iterate(pas.mainloop, false, NULL);
size_t ws = pa_stream_writable_size(pas.stream);
printf("Writable: %d (%d) %d %d %d %d %llu %d\n", (int)ws, r, (int)(info->write_index - info->read_index), diff, (int)info->sink_usec, (int)info->transport_usec, (unsigned long long)usec, t - t0);*/
//return (pas.attr.tlength - ws) / 4;
return (info->write_index - info->read_index) / 4;
}
static int audio_pulse_get_desired_buffered(void) {
return 1100;
}
static void audio_pulse_play(const uint8_t *buf, size_t len) {
if (pas.stream == NULL) {
if (!audio_pulse_init()) {
return;
}
}
//size_t ws = pa_stream_writable_size(pas.stream);
size_t ws = pas.attr.maxlength - audio_pulse_buffered() * 4;
if (ws < len) {
//printf("Warning: can't write everything: %d vs %d\n", (int)len, (int)ws);
len = ws;
}
if (pa_stream_write(pas.stream, buf, len, pas_write_complete, 0LL, PA_SEEK_RELATIVE) < 0) {
printf("pa_stream_write failed\n");
return;
}
while (!pas.write_complete) {
pa_mainloop_iterate(pas.mainloop, true, NULL);
}
pas.write_complete = false;
}
struct AudioAPI audio_pulse = {
audio_pulse_init,
audio_pulse_buffered,
audio_pulse_get_desired_buffered,
audio_pulse_play
};
#endif

View file

@ -0,0 +1,11 @@
#ifndef AUDIO_PULSE_H
#define AUDIO_PULSE_H
#ifdef __linux__
extern struct AudioAPI audio_pulse;
#define HAVE_PULSE_AUDIO 1
#else
#define HAVE_PULSE_AUDIO 0
#endif
#endif

56
src/pc/audio/audio_sdl.c Normal file
View file

@ -0,0 +1,56 @@
#if !defined(_WIN32) && !defined(_WIN64)
#ifdef __MINGW32__
#include "SDL.h"
#else
#include "SDL2/SDL.h"
#endif
#include "audio_api.h"
static SDL_AudioDeviceID dev;
static bool audio_sdl_init(void) {
if (SDL_Init(SDL_INIT_AUDIO) != 0) {
fprintf(stderr, "SDL init error: %s\n", SDL_GetError());
return false;
}
SDL_AudioSpec want, have;
SDL_zero(want);
want.freq = 32000;
want.format = AUDIO_S16;
want.channels = 2;
want.samples = 512;
want.callback = NULL;
dev = SDL_OpenAudioDevice(NULL, 0, &want, &have, 0);
if (dev == 0) {
fprintf(stderr, "SDL_OpenAudio error: %s\n", SDL_GetError());
return false;
}
SDL_PauseAudioDevice(dev, 0);
return true;
}
static int audio_sdl_buffered(void) {
return SDL_GetQueuedAudioSize(dev) / 4;
}
static int audio_sdl_get_desired_buffered(void) {
return 1100;
}
static void audio_sdl_play(const uint8_t *buf, size_t len) {
if (audio_sdl_buffered() < 6000) {
// Don't fill the audio buffer too much in case this happens
SDL_QueueAudio(dev, buf, len);
}
}
struct AudioAPI audio_sdl = {
audio_sdl_init,
audio_sdl_buffered,
audio_sdl_get_desired_buffered,
audio_sdl_play
};
#endif

6
src/pc/audio/audio_sdl.h Normal file
View file

@ -0,0 +1,6 @@
#ifndef AUDIO_SDL_H
#define AUDIO_SDL_H
extern struct AudioAPI audio_sdl;
#endif

View file

@ -0,0 +1,210 @@
#if defined(_WIN32) || defined(_WIN64)
#include <stdint.h>
#include <string.h>
#include <windows.h>
#include <wrl/client.h>
#include "Objbase.h"
#include "Mmdeviceapi.h"
#include "audioclient.h"
#include "audio_api.h"
// These constants are currently missing from the MinGW headers.
#ifndef AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM
# define AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM 0x80000000
#endif
#ifndef AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY
# define AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY 0x08000000
#endif
using namespace Microsoft::WRL;
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);
static ComPtr<IMMDeviceEnumerator> immdev_enumerator;
static struct WasapiState {
ComPtr<IMMDevice> device;
ComPtr<IAudioClient> client;
ComPtr<IAudioRenderClient> rclient;
UINT32 buffer_frame_count;
bool initialized;
bool started;
} wasapi;
static class NotificationClient : public IMMNotificationClient {
LONG refcount;
public:
NotificationClient() : refcount(1) {
}
virtual HRESULT STDMETHODCALLTYPE OnDeviceStateChanged(LPCWSTR pwstrDeviceId, DWORD dwNewState) {
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE OnDeviceAdded(LPCWSTR pwstrDeviceId) {
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE OnDeviceRemoved(LPCWSTR pwstrDeviceId) {
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE OnDefaultDeviceChanged(EDataFlow flow, ERole role, LPCWSTR pwstrDefaultDeviceId) {
if (flow == eRender && role == eConsole) {
// This callback runs on a separate thread,
// but it's not important how fast this write takes effect.
wasapi.initialized = false;
}
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE OnPropertyValueChanged(LPCWSTR pwstrDeviceId, const PROPERTYKEY key) {
return S_OK;
}
virtual ULONG STDMETHODCALLTYPE AddRef() {
return InterlockedIncrement(&refcount);
}
virtual ULONG STDMETHODCALLTYPE Release() {
ULONG rc = InterlockedDecrement(&refcount);
if (rc == 0) {
delete this;
}
return rc;
}
virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, VOID **ppvInterface) {
if (riid == __uuidof(IUnknown)) {
AddRef();
*ppvInterface = (IUnknown *)this;
} else if (riid == __uuidof(IMMNotificationClient)) {
AddRef();
*ppvInterface = (IMMNotificationClient *)this;
} else {
*ppvInterface = nullptr;
return E_NOINTERFACE;
}
return S_OK;
}
} notification_client;
static void ThrowIfFailed(HRESULT res) {
if (FAILED(res)) {
throw res;
}
}
bool audio_wasapi_init(void) {
try {
ThrowIfFailed(CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, IID_PPV_ARGS(&immdev_enumerator)));
} catch (HRESULT res) {
return false;
}
ThrowIfFailed(immdev_enumerator->RegisterEndpointNotificationCallback(new NotificationClient()));
return true;
}
static bool audio_wasapi_setup_stream(void) {
wasapi = WasapiState();
try {
ThrowIfFailed(immdev_enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &wasapi.device));
ThrowIfFailed(wasapi.device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, IID_PPV_ARGS_Helper(&wasapi.client)));
WAVEFORMATEX desired;
desired.wFormatTag = WAVE_FORMAT_PCM;
desired.nChannels = 2;
desired.nSamplesPerSec = 32000;
desired.nAvgBytesPerSec = 32000 * 2 * 2;
desired.nBlockAlign = 4;
desired.wBitsPerSample = 16;
desired.cbSize = 0;
ThrowIfFailed(wasapi.client->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, 2000000, 0, &desired, nullptr));
ThrowIfFailed(wasapi.client->GetBufferSize(&wasapi.buffer_frame_count));
ThrowIfFailed(wasapi.client->GetService(IID_PPV_ARGS(&wasapi.rclient)));
wasapi.started = false;
wasapi.initialized = true;
} catch (HRESULT res) {
wasapi = WasapiState();
return false;
}
return true;
}
static int audio_wasapi_buffered(void) {
if (!wasapi.initialized) {
if (!audio_wasapi_setup_stream()) {
return 0;
}
}
try {
UINT32 padding;
ThrowIfFailed(wasapi.client->GetCurrentPadding(&padding));
return padding;
} catch (HRESULT res) {
wasapi = WasapiState();
return 0;
}
}
static int audio_wasapi_get_desired_buffered(void) {
return 1100;
}
//#include <stdio.h>
static void audio_wasapi_play(const uint8_t *buf, size_t len) {
if (!wasapi.initialized) {
if (!audio_wasapi_setup_stream()) {
return;
}
}
try {
UINT32 frames = len / 4;
UINT32 padding;
ThrowIfFailed(wasapi.client->GetCurrentPadding(&padding));
//printf("%u %u\n", frames, padding);
UINT32 available = wasapi.buffer_frame_count - padding;
if (available < frames) {
frames = available;
}
if (available == 0) {
return;
}
BYTE *data;
ThrowIfFailed(wasapi.rclient->GetBuffer(frames, &data));
memcpy(data, buf, frames * 4);
ThrowIfFailed(wasapi.rclient->ReleaseBuffer(frames, 0));
if (!wasapi.started && padding + frames > 1500) {
wasapi.started = true;
ThrowIfFailed(wasapi.client->Start());
}
} catch (HRESULT res) {
wasapi = WasapiState();
}
}
struct AudioAPI audio_wasapi = {
audio_wasapi_init,
audio_wasapi_buffered,
audio_wasapi_get_desired_buffered,
audio_wasapi_play
};
#endif

View file

@ -0,0 +1,12 @@
#ifndef AUDIO_WASAPI_H
#define AUDIO_WASAPI_H
#if defined(_WIN32) || defined(_WIN64)
#include "audio_api.h"
extern struct AudioAPI audio_wasapi;
#define HAVE_WASAPI 1
#else
#define HAVE_WASAPI 0
#endif
#endif

236
src/pc/configfile.c Normal file
View file

@ -0,0 +1,236 @@
// configfile.c - handles loading and saving the configuration options
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include "configfile.h"
#define ARRAY_LEN(arr) (sizeof(arr) / sizeof(arr[0]))
enum ConfigOptionType {
CONFIG_TYPE_BOOL,
CONFIG_TYPE_UINT,
CONFIG_TYPE_FLOAT,
};
struct ConfigOption {
const char *name;
enum ConfigOptionType type;
union {
bool *boolValue;
unsigned int *uintValue;
float *floatValue;
};
};
/*
*Config options and default values
*/
bool configFullscreen = false;
// Keyboard mappings (scancode values)
unsigned int configKeyA = 0x26;
unsigned int configKeyB = 0x33;
unsigned int configKeyStart = 0x39;
unsigned int configKeyR = 0x36;
unsigned int configKeyZ = 0x25;
unsigned int configKeyCUp = 0x148;
unsigned int configKeyCDown = 0x150;
unsigned int configKeyCLeft = 0x14B;
unsigned int configKeyCRight = 0x14D;
unsigned int configKeyStickUp = 0x11;
unsigned int configKeyStickDown = 0x1F;
unsigned int configKeyStickLeft = 0x1E;
unsigned int configKeyStickRight = 0x20;
static const struct ConfigOption options[] = {
{.name = "fullscreen", .type = CONFIG_TYPE_BOOL, .boolValue = &configFullscreen},
{.name = "key_a", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyA},
{.name = "key_b", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyB},
{.name = "key_start", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyStart},
{.name = "key_r", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyR},
{.name = "key_z", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyZ},
{.name = "key_cup", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyCUp},
{.name = "key_cdown", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyCDown},
{.name = "key_cleft", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyCLeft},
{.name = "key_cright", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyCRight},
{.name = "key_stickup", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyStickUp},
{.name = "key_stickdown", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyStickDown},
{.name = "key_stickleft", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyStickLeft},
{.name = "key_stickright", .type = CONFIG_TYPE_UINT, .uintValue = &configKeyStickRight},
};
// Reads an entire line from a file (excluding the newline character) and returns an allocated string
// Returns NULL if no lines could be read from the file
static char *read_file_line(FILE *file) {
char *buffer;
size_t bufferSize = 8;
size_t offset = 0; // offset in buffer to write
buffer = malloc(bufferSize);
while (1) {
// Read a line from the file
if (fgets(buffer + offset, bufferSize - offset, file) == NULL) {
free(buffer);
return NULL; // Nothing could be read.
}
offset = strlen(buffer);
assert(offset > 0);
// If a newline was found, remove the trailing newline and exit
if (buffer[offset - 1] == '\n') {
buffer[offset - 1] = '\0';
break;
}
if (feof(file)) // EOF was reached
break;
// If no newline or EOF was reached, then the whole line wasn't read.
bufferSize *= 2; // Increase buffer size
buffer = realloc(buffer, bufferSize);
assert(buffer != NULL);
}
return buffer;
}
// Returns the position of the first non-whitespace character
static char *skip_whitespace(char *str) {
while (isspace(*str))
str++;
return str;
}
// NULL-terminates the current whitespace-delimited word, and returns a pointer to the next word
static char *word_split(char *str) {
// Precondition: str must not point to whitespace
assert(!isspace(*str));
// Find either the next whitespace char or end of string
while (!isspace(*str) && *str != '\0')
str++;
if (*str == '\0') // End of string
return str;
// Terminate current word
*(str++) = '\0';
// Skip whitespace to next word
return skip_whitespace(str);
}
// Splits a string into words, and stores the words into the 'tokens' array
// 'maxTokens' is the length of the 'tokens' array
// Returns the number of tokens parsed
static unsigned int tokenize_string(char *str, int maxTokens, char **tokens) {
int count = 0;
str = skip_whitespace(str);
while (str[0] != '\0' && count < maxTokens) {
tokens[count] = str;
str = word_split(str);
count++;
}
return count;
}
// Loads the config file specified by 'filename'
void configfile_load(const char *filename) {
FILE *file;
char *line;
printf("Loading configuration from '%s'\n", filename);
file = fopen(filename, "r");
if (file == NULL) {
// Create a new config file and save defaults
printf("Config file '%s' not found. Creating it.\n", filename);
configfile_save(filename);
return;
}
// Go through each line in the file
while ((line = read_file_line(file)) != NULL) {
char *p = line;
char *tokens[2];
int numTokens;
while (isspace(*p))
p++;
numTokens = tokenize_string(p, 2, tokens);
if (numTokens != 0) {
if (numTokens == 2) {
const struct ConfigOption *option = NULL;
for (unsigned int i = 0; i < ARRAY_LEN(options); i++) {
if (strcmp(tokens[0], options[i].name) == 0) {
option = &options[i];
break;
}
}
if (option == NULL)
printf("unknown option '%s'\n", tokens[0]);
else {
switch (option->type) {
case CONFIG_TYPE_BOOL:
if (strcmp(tokens[1], "true") == 0)
*option->boolValue = true;
else if (strcmp(tokens[1], "false") == 0)
*option->boolValue = false;
break;
case CONFIG_TYPE_UINT:
sscanf(tokens[1], "%u", option->uintValue);
break;
case CONFIG_TYPE_FLOAT:
sscanf(tokens[1], "%f", option->floatValue);
break;
default:
assert(0); // bad type
}
printf("option: '%s', value: '%s'\n", tokens[0], tokens[1]);
}
} else
puts("error: expected value");
}
free(line);
}
fclose(file);
}
// Writes the config file to 'filename'
void configfile_save(const char *filename) {
FILE *file;
printf("Saving configuration to '%s'\n", filename);
file = fopen(filename, "w");
if (file == NULL) {
// error
return;
}
for (unsigned int i = 0; i < ARRAY_LEN(options); i++) {
const struct ConfigOption *option = &options[i];
switch (option->type) {
case CONFIG_TYPE_BOOL:
fprintf(file, "%s %s\n", option->name, *option->boolValue ? "true" : "false");
break;
case CONFIG_TYPE_UINT:
fprintf(file, "%s %u\n", option->name, *option->uintValue);
break;
case CONFIG_TYPE_FLOAT:
fprintf(file, "%s %f\n", option->name, *option->floatValue);
break;
default:
assert(0); // unknown type
}
}
fclose(file);
}

22
src/pc/configfile.h Normal file
View file

@ -0,0 +1,22 @@
#ifndef CONFIGFILE_H
#define CONFIGFILE_H
extern bool configFullscreen;
extern unsigned int configKeyA;
extern unsigned int configKeyB;
extern unsigned int configKeyStart;
extern unsigned int configKeyR;
extern unsigned int configKeyZ;
extern unsigned int configKeyCUp;
extern unsigned int configKeyCDown;
extern unsigned int configKeyCLeft;
extern unsigned int configKeyCRight;
extern unsigned int configKeyStickUp;
extern unsigned int configKeyStickDown;
extern unsigned int configKeyStickLeft;
extern unsigned int configKeyStickRight;
void configfile_load(const char *filename);
void configfile_save(const char *filename);
#endif

View file

@ -0,0 +1,11 @@
#ifndef CONTROLLER_API
#define CONTROLLER_API
#include <ultra64.h>
struct ControllerAPI {
void (*init)(void);
void (*read)(OSContPad *pad);
};
#endif

View file

@ -0,0 +1,151 @@
#ifdef TARGET_WEB
#include <string.h>
#include <emscripten/html5.h>
#include "macros.h"
#include "controller_keyboard.h"
static const struct {
const char *code;
int scancode;
} keymap_browser[] = {
{"Escape", 0x01},
{"Digit1", 0x02 },
{"Digit2", 0x03 },
{"Digit3", 0x04 },
{"Digit4", 0x05 },
{"Digit5", 0x06 },
{"Digit6", 0x07 },
{"Digit7", 0x08 },
{"Digit8", 0x09 },
{"Digit9", 0x0a },
{"Digit0", 0x0b },
{"Minus", 0x0c },
{"Equal", 0x0d },
{"Backspace", 0x0e },
{"Tab", 0x0f },
{"KeyQ", 0x10 },
{"KeyW", 0x11 },
{"KeyE", 0x12 },
{"KeyR", 0x13 },
{"KeyT", 0x14 },
{"KeyY", 0x15 },
{"KeyU", 0x16 },
{"KeyI", 0x17 },
{"KeyO", 0x18 },
{"KeyP", 0x19 },
{"BracketLeft", 0x1a },
{"BracketRight", 0x1b },
{"Enter", 0x1c },
{"ControlLeft", 0x1d },
{"KeyA", 0x1e },
{"KeyS", 0x1f },
{"KeyD", 0x20 },
{"KeyF", 0x21 },
{"KeyG", 0x22 },
{"KeyH", 0x23 },
{"KeyJ", 0x24 },
{"KeyK", 0x25 },
{"KeyL", 0x26 },
{"Semicolon", 0x27 },
{"Quote", 0x28 },
{"Backquote", 0x29 },
{"ShiftLeft", 0x2a },
{"Backslash", 0x2b },
{"KeyZ", 0x2c },
{"KeyX", 0x2d },
{"KeyC", 0x2e },
{"KeyV", 0x2f },
{"KeyB", 0x30 },
{"KeyN", 0x31 },
{"KeyM", 0x32 },
{"Comma", 0x33 },
{"Period", 0x34 },
{"Slash", 0x35 },
{"ShiftRight", 0x36 },
{"NumpadMultiply", 0x37 },
{"AltLeft", 0x38 },
{"Space", 0x39 },
{"CapsLock", 0x3a },
{"F1", 0x3b },
{"F2", 0x3c },
{"F3", 0x3d },
{"F4", 0x3e },
{"F5", 0x3f },
{"F6", 0x40 },
{"F7", 0x41 },
{"F8", 0x42 },
{"F9", 0x43 },
{"F10", 0x44 },
{"NumLock", 0x45 },
{"ScrollLock", 0x46 },
{"Numpad7", 0x47 },
{"Numpad8", 0x48 },
{"Numpad9", 0x49 },
{"NumpadSubtract", 0x4a },
{"Numpad4", 0x4b },
{"Numpad5", 0x4c },
{"Numpad6", 0x4d },
{"NumpadAdd", 0x4e },
{"Numpad1", 0x4f },
{"Numpad2", 0x50 },
{"Numpad3", 0x51 },
{"Numpad0", 0x52 },
{"NumpadDecimal", 0x53 },
{"PrintScreen", 0x54 },
// 0x55
{"IntlBackslash", 0x56 },
{"F11", 0x57 },
{"F12", 0x58 },
{"IntlRo", 0x59 },
//{"Katakana", 0 },
//{"Hiragana", 0 },
{"NumpadEnter", 0x11c },
{"ControlRight", 0x11d },
{"NumpadDivide", 0x135 },
{"AltRight", 0x138 },
{"Home", 0x147 },
{"ArrowUp", 0x148 },
{"PageUp", 0x149 },
{"ArrowLeft", 0x14b },
{"ArrowRight", 0x14d },
{"End", 0x14f },
{"ArrowDown", 0x150 },
{"PageDown", 0x151 },
{"Insert", 0x152 },
{"Delete", 0x153 },
{"Pause", 0x21d },
{"MetaLeft", 0x15b },
{"MetaRight", 0x15c },
{"ContextMenu", 0x15d },
};
static EM_BOOL controller_emscripten_keyboard_handler(int event_type, const EmscriptenKeyboardEvent *key_event, UNUSED void *user_data) {
for (size_t i = 0; i < sizeof(keymap_browser) / sizeof(keymap_browser[0]); i++) {
if (strcmp(key_event->code, keymap_browser[i].code) == 0) {
if (event_type == EMSCRIPTEN_EVENT_KEYDOWN) {
return keyboard_on_key_down(keymap_browser[i].scancode);
} else if (event_type == EMSCRIPTEN_EVENT_KEYUP) {
return keyboard_on_key_up(keymap_browser[i].scancode);
}
break;
}
}
return EM_FALSE;
}
static EM_BOOL controller_emscripten_keyboard_blur_handler(UNUSED int event_type, UNUSED const EmscriptenFocusEvent *focus_event, UNUSED void *user_data) {
keyboard_on_all_keys_up();
return EM_TRUE;
}
void controller_emscripten_keyboard_init(void) {
// Should be #window according to docs, but that crashes
const char *target = EMSCRIPTEN_EVENT_TARGET_WINDOW;
emscripten_set_keydown_callback(target, NULL, EM_FALSE, controller_emscripten_keyboard_handler);
emscripten_set_keyup_callback(target, NULL, EM_FALSE, controller_emscripten_keyboard_handler);
emscripten_set_blur_callback(target, NULL, EM_FALSE, controller_emscripten_keyboard_blur_handler);
}
#endif

View file

@ -0,0 +1,8 @@
#ifndef CONTROLLER_KEYBOARD_EMSCRIPTEN_H
#define CONTROLLER_KEYBOARD_EMSCRIPTEN_H
#ifdef TARGET_WEB
void controller_emscripten_keyboard_init(void);
#endif
#endif

View file

@ -0,0 +1,53 @@
#include "macros.h"
#include "lib/src/libultra_internal.h"
#include "lib/src/osContInternal.h"
#include "controller_recorded_tas.h"
#include "controller_keyboard.h"
#if defined(_WIN32) || defined(_WIN64)
#include "controller_xinput.h"
#else
#include "controller_sdl.h"
#endif
#ifdef __linux__
#include "controller_wup.h"
#endif
static struct ControllerAPI *controller_implementations[] = {
&controller_recorded_tas,
#if defined(_WIN32) || defined(_WIN64)
&controller_xinput,
#else
&controller_sdl,
#endif
#ifdef __linux__
&controller_wup,
#endif
&controller_keyboard,
};
s32 osContInit(UNUSED OSMesgQueue *mq, u8 *controllerBits, UNUSED OSContStatus *status) {
for (size_t i = 0; i < sizeof(controller_implementations) / sizeof(struct ControllerAPI *); i++) {
controller_implementations[i]->init();
}
*controllerBits = 1;
return 0;
}
s32 osContStartReadData(UNUSED OSMesgQueue *mesg) {
return 0;
}
void osContGetReadData(OSContPad *pad) {
pad->button = 0;
pad->stick_x = 0;
pad->stick_y = 0;
pad->errnum = 0;
for (size_t i = 0; i < sizeof(controller_implementations) / sizeof(struct ControllerAPI *); i++) {
controller_implementations[i]->read(pad);
}
}

View file

@ -0,0 +1,88 @@
#include <stdbool.h>
#include <ultra64.h>
#include "controller_api.h"
#ifdef TARGET_WEB
#include "controller_emscripten_keyboard.h"
#endif
#include "../configfile.h"
static int keyboard_buttons_down;
static int keyboard_mapping[13][2];
static int keyboard_map_scancode(int scancode) {
int ret = 0;
for (size_t i = 0; i < sizeof(keyboard_mapping) / sizeof(keyboard_mapping[0]); i++) {
if (keyboard_mapping[i][0] == scancode) {
ret |= keyboard_mapping[i][1];
}
}
return ret;
}
bool keyboard_on_key_down(int scancode) {
int mapped = keyboard_map_scancode(scancode);
keyboard_buttons_down |= mapped;
return mapped != 0;
}
bool keyboard_on_key_up(int scancode) {
int mapped = keyboard_map_scancode(scancode);
keyboard_buttons_down &= ~mapped;
return mapped != 0;
}
void keyboard_on_all_keys_up(void) {
keyboard_buttons_down = 0;
}
static void set_keyboard_mapping(int index, int mask, int scancode) {
keyboard_mapping[index][0] = scancode;
keyboard_mapping[index][1] = mask;
}
static void keyboard_init(void) {
int i = 0;
set_keyboard_mapping(i++, 0x80000, configKeyStickUp);
set_keyboard_mapping(i++, 0x10000, configKeyStickLeft);
set_keyboard_mapping(i++, 0x40000, configKeyStickDown);
set_keyboard_mapping(i++, 0x20000, configKeyStickRight);
set_keyboard_mapping(i++, A_BUTTON, configKeyA);
set_keyboard_mapping(i++, B_BUTTON, configKeyB);
set_keyboard_mapping(i++, Z_TRIG, configKeyZ);
set_keyboard_mapping(i++, U_CBUTTONS, configKeyCUp);
set_keyboard_mapping(i++, L_CBUTTONS, configKeyCLeft);
set_keyboard_mapping(i++, D_CBUTTONS, configKeyCDown);
set_keyboard_mapping(i++, R_CBUTTONS, configKeyCRight);
set_keyboard_mapping(i++, R_TRIG, configKeyR);
set_keyboard_mapping(i++, START_BUTTON, configKeyStart);
#ifdef TARGET_WEB
controller_emscripten_keyboard_init();
#endif
}
static void keyboard_read(OSContPad *pad) {
pad->button |= keyboard_buttons_down;
if ((keyboard_buttons_down & 0x30000) == 0x10000) {
pad->stick_x = -128;
}
if ((keyboard_buttons_down & 0x30000) == 0x20000) {
pad->stick_x = 127;
}
if ((keyboard_buttons_down & 0xc0000) == 0x40000) {
pad->stick_y = -128;
}
if ((keyboard_buttons_down & 0xc0000) == 0x80000) {
pad->stick_y = 127;
}
}
struct ControllerAPI controller_keyboard = {
keyboard_init,
keyboard_read
};

View file

@ -0,0 +1,19 @@
#ifndef CONTROLLER_KEYBOARD_H
#define CONTROLLER_KEYBOARD_H
#include <stdbool.h>
#include "controller_api.h"
#ifdef __cplusplus
extern "C" {
#endif
bool keyboard_on_key_down(int scancode);
bool keyboard_on_key_up(int scancode);
void keyboard_on_all_keys_up(void);
#ifdef __cplusplus
}
#endif
extern struct ControllerAPI controller_keyboard;
#endif

View file

@ -0,0 +1,29 @@
#include <stdio.h>
#include <ultra64.h>
#include "controller_api.h"
static FILE *fp;
static void tas_init(void) {
fp = fopen("cont.m64", "rb");
if (fp != NULL) {
uint8_t buf[0x400];
fread(buf, 1, sizeof(buf), fp);
}
}
static void tas_read(OSContPad *pad) {
if (fp != NULL) {
uint8_t bytes[4] = {0};
fread(bytes, 1, 4, fp);
pad->button = (bytes[0] << 8) | bytes[1];
pad->stick_x = bytes[2];
pad->stick_y = bytes[3];
}
}
struct ControllerAPI controller_recorded_tas = {
tas_init,
tas_read
};

View file

@ -0,0 +1,8 @@
#ifndef CONTROLLER_RECORDED_TAS_H
#define CONTROLLER_RECORDED_TAS_H
#include "controller_api.h"
extern struct ControllerAPI controller_recorded_tas;
#endif

View file

@ -0,0 +1,102 @@
#if !defined(_WIN32) && !defined(_WIN64)
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <SDL2/SDL.h>
#include <ultra64.h>
#include "controller_api.h"
#define DEADZONE 4960
static bool init_ok;
static SDL_GameController *sdl_cntrl;
static void controller_sdl_init(void) {
if (SDL_Init(SDL_INIT_GAMECONTROLLER) != 0) {
fprintf(stderr, "SDL init error: %s\n", SDL_GetError());
return;
}
init_ok = true;
}
static void controller_sdl_read(OSContPad *pad) {
if (!init_ok) {
return;
}
SDL_GameControllerUpdate();
if (sdl_cntrl != NULL && !SDL_GameControllerGetAttached(sdl_cntrl)) {
SDL_GameControllerClose(sdl_cntrl);
sdl_cntrl = NULL;
}
if (sdl_cntrl == NULL) {
for (int i = 0; i < SDL_NumJoysticks(); i++) {
if (SDL_IsGameController(i)) {
sdl_cntrl = SDL_GameControllerOpen(i);
if (sdl_cntrl != NULL) {
break;
}
}
}
if (sdl_cntrl == NULL) {
return;
}
}
if (SDL_GameControllerGetButton(sdl_cntrl, SDL_CONTROLLER_BUTTON_START)) pad->button |= START_BUTTON;
if (SDL_GameControllerGetButton(sdl_cntrl, SDL_CONTROLLER_BUTTON_LEFTSHOULDER)) pad->button |= Z_TRIG;
if (SDL_GameControllerGetButton(sdl_cntrl, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER)) pad->button |= R_TRIG;
if (SDL_GameControllerGetButton(sdl_cntrl, SDL_CONTROLLER_BUTTON_A)) pad->button |= A_BUTTON;
if (SDL_GameControllerGetButton(sdl_cntrl, SDL_CONTROLLER_BUTTON_X)) pad->button |= B_BUTTON;
int16_t leftx = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_LEFTX);
int16_t lefty = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_LEFTY);
int16_t rightx = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_RIGHTX);
int16_t righty = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_RIGHTY);
int16_t ltrig = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_TRIGGERLEFT);
int16_t rtrig = SDL_GameControllerGetAxis(sdl_cntrl, SDL_CONTROLLER_AXIS_TRIGGERRIGHT);
#ifdef TARGET_WEB
// Firefox has a bug: https://bugzilla.mozilla.org/show_bug.cgi?id=1606562
// It sets down y to 32768.0f / 32767.0f, which is greater than the allowed 1.0f,
// which SDL then converts to a int16_t by multiplying by 32767.0f, which overflows into -32768.
// Maximum up will hence never become -32768 with the current version of SDL2,
// so this workaround should be safe in compliant browsers.
if (lefty == -32768) {
lefty = 32767;
}
if (righty == -32768) {
righty = 32767;
}
#endif
if (rightx < -0x4000) pad->button |= L_CBUTTONS;
if (rightx > 0x4000) pad->button |= R_CBUTTONS;
if (righty < -0x4000) pad->button |= U_CBUTTONS;
if (righty > 0x4000) pad->button |= D_CBUTTONS;
if (ltrig > 30 * 256) pad->button |= Z_TRIG;
if (rtrig > 30 * 256) pad->button |= R_TRIG;
uint32_t magnitude_sq = (uint32_t)(leftx * leftx) + (uint32_t)(lefty * lefty);
if (magnitude_sq > (uint32_t)(DEADZONE * DEADZONE)) {
pad->stick_x = leftx / 0x100;
int stick_y = -lefty / 0x100;
pad->stick_y = stick_y == 128 ? 127 : stick_y;
}
}
struct ControllerAPI controller_sdl = {
controller_sdl_init,
controller_sdl_read
};
#endif

View file

@ -0,0 +1,8 @@
#ifndef CONTROLLER_SDL_H
#define CONTROLLER_SDL_H
#include "controller_api.h"
extern struct ControllerAPI controller_sdl;
#endif

View file

@ -0,0 +1,51 @@
#ifdef __linux__
#include <stdbool.h>
#include <pthread.h>
#include <ultra64.h>
#include "controller_api.h"
void *wup_start(void *a);
bool wup_get_controller_input(uint16_t *buttons, uint8_t axis[6]);
static int8_t saturate(int v) {
v = v * 3 / 2;
return v < -128 ? -128 : v > 127 ? 127 : v;
}
static void controller_wup_init(void) {
pthread_t pid;
pthread_create(&pid, NULL, wup_start, NULL);
}
static void controller_wup_read(OSContPad *pad) {
uint16_t buttons;
uint8_t axis[6];
if (wup_get_controller_input(&buttons, axis)) {
if (buttons & 0x0001) pad->button |= START_BUTTON;
if (buttons & 0x0008) pad->button |= Z_TRIG;
if (buttons & 0x0004) pad->button |= R_TRIG;
if (buttons & 0x0100) pad->button |= A_BUTTON;
if (buttons & 0x0200) pad->button |= B_BUTTON;
if (buttons & 0x1000) pad->button |= L_TRIG;
if (axis[2] < 0x40) pad->button |= L_CBUTTONS;
if (axis[2] > 0xC0) pad->button |= R_CBUTTONS;
if (axis[3] < 0x40) pad->button |= D_CBUTTONS;
if (axis[3] > 0xC0) pad->button |= U_CBUTTONS;
int8_t stick_x = saturate(axis[0] - 128 - 0);
int8_t stick_y = saturate(axis[1] - 128 - 0);
if (stick_x != 0 || stick_y != 0) {
pad->stick_x = stick_x;
pad->stick_y = stick_y;
}
}
}
struct ControllerAPI controller_wup = {
controller_wup_init,
controller_wup_read
};
#endif

View file

@ -0,0 +1,12 @@
#ifndef CONTROLLER_WUP_H
#define CONTROLLER_WUP_H
#ifdef __linux__
#include "controller_api.h"
extern struct ControllerAPI controller_wup;
#endif
#endif

View file

@ -0,0 +1,49 @@
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <xinput.h>
#include <ultra64.h>
#include "controller_api.h"
#define DEADZONE 4960
static void xinput_init(void) {
}
static void xinput_read(OSContPad *pad) {
for (int i = 0; i < XUSER_MAX_COUNT; i++) {
XINPUT_STATE state;
memset(&state, 0, sizeof(XINPUT_STATE));
if (XInputGetState(i, &state) == ERROR_SUCCESS) {
XINPUT_GAMEPAD *gp = &state.Gamepad;
if (gp->wButtons & XINPUT_GAMEPAD_START) pad->button |= START_BUTTON;
if (gp->wButtons & XINPUT_GAMEPAD_LEFT_SHOULDER) pad->button |= Z_TRIG;
if (gp->bLeftTrigger > XINPUT_GAMEPAD_TRIGGER_THRESHOLD) pad->button |= Z_TRIG;
if (gp->wButtons & XINPUT_GAMEPAD_RIGHT_SHOULDER) pad->button |= R_TRIG;
if (gp->bRightTrigger > XINPUT_GAMEPAD_TRIGGER_THRESHOLD) pad->button |= R_TRIG;
if (gp->wButtons & XINPUT_GAMEPAD_A) pad->button |= A_BUTTON;
if (gp->wButtons & XINPUT_GAMEPAD_X) pad->button |= B_BUTTON;
if (gp->wButtons & XINPUT_GAMEPAD_DPAD_LEFT) pad->button |= L_TRIG;
if (gp->sThumbRX < -0x4000) pad->button |= L_CBUTTONS;
if (gp->sThumbRX > 0x4000) pad->button |= R_CBUTTONS;
if (gp->sThumbRY < -0x4000) pad->button |= D_CBUTTONS;
if (gp->sThumbRY > 0x4000) pad->button |= U_CBUTTONS;
uint32_t magnitude_sq = (uint32_t)(gp->sThumbLX * gp->sThumbLX) + (uint32_t)(gp->sThumbLY * gp->sThumbLY);
if (magnitude_sq > (uint32_t)(DEADZONE * DEADZONE)) {
pad->stick_x = gp->sThumbLX / 0x100;
pad->stick_y = gp->sThumbLY / 0x100;
}
break;
}
}
}
struct ControllerAPI controller_xinput = {
xinput_init,
xinput_read
};
#endif

View file

@ -0,0 +1,12 @@
#ifndef CONTROLLER_XINPUT_H
#define CONTROLLER_XINPUT_H
#if defined(_WIN32) || defined(_WIN64)
#include "controller_api.h"
extern struct ControllerAPI controller_xinput;
#endif
#endif

362
src/pc/controller/wup.c Normal file
View file

@ -0,0 +1,362 @@
#if !defined(__MINGW32__) && !defined(TARGET_WEB)
// See LICENSE for license
#define _XOPEN_SOURCE 600
#include <time.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <linux/input.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <errno.h>
//#include <libudev.h>
#include <libusb.h>
#include <pthread.h>
#include "macros.h"
#if (!defined(LIBUSBX_API_VERSION) || LIBUSBX_API_VERSION < 0x01000102) && (!defined(LIBUSB_API_VERSION) || LIBUSB_API_VERSION < 0x01000102)
#error libusb(x) 1.0.16 or higher is required
#endif
#define EP_IN 0x81
#define EP_OUT 0x02
#define STATE_NORMAL 0x10
#define STATE_WAVEBIRD 0x20
const int BUTTON_OFFSET_VALUES[16] = {
BTN_START,
BTN_TR2,
BTN_TR,
BTN_TL,
-1,
-1,
-1,
-1,
BTN_SOUTH,
BTN_WEST,
BTN_EAST,
BTN_NORTH,
BTN_DPAD_LEFT,
BTN_DPAD_RIGHT,
BTN_DPAD_DOWN,
BTN_DPAD_UP,
};
const int AXIS_OFFSET_VALUES[6] = {
ABS_X,
ABS_Y,
ABS_RX,
ABS_RY,
ABS_Z,
ABS_RZ
};
struct ports
{
bool connected;
bool extra_power;
unsigned char type;
uint16_t buttons;
uint8_t axis[6];
};
struct adapter
{
volatile bool quitting;
struct libusb_device *device;
struct libusb_device_handle *handle;
pthread_t thread;
unsigned char rumble[5];
struct ports controllers[4];
struct adapter *next;
};
static bool raw_mode;
static volatile int quitting;
static struct adapter adapters;
static const char *uinput_path;
bool wup_get_controller_input(uint16_t *buttons, uint8_t axis[6]) {
struct adapter *adapter = adapters.next;
if (adapter != NULL) {
*buttons = adapter->controllers[0].buttons;
memcpy(axis, adapter->controllers[0].axis, 6);
return true;
} else {
return false;
}
}
static unsigned char connected_type(unsigned char status)
{
unsigned char type = status & (STATE_NORMAL | STATE_WAVEBIRD);
switch (type)
{
case STATE_NORMAL:
case STATE_WAVEBIRD:
return type;
default:
return 0;
}
}
static void handle_payload(int i, struct ports *port, unsigned char *payload)
{
unsigned char status = payload[0];
unsigned char type = connected_type(status);
if (type != 0 && !port->connected)
{
//uinput_create(i, port, type);
port->type = type;
port->connected = true;
}
else if (type == 0 && port->connected)
{
//uinput_destroy(i, port);
port->connected = false;
}
if (!port->connected)
return;
port->extra_power = ((status & 0x04) != 0);
if (type != port->type)
{
fprintf(stderr, "controller on port %d changed controller type???", i+1);
port->type = type;
}
uint16_t btns = (uint16_t) payload[1] << 8 | (uint16_t) payload[2];
port->buttons = btns;
//printf("Btns: %04x\n", btns);
//printf("Axis:");
for (int j = 0; j < 6; j++)
{
unsigned char value = payload[j+3];
port->axis[j] = value;
//printf(" %02x", value);
}
//puts("");
}
static int64_t to_ms(struct timespec* t) {
return t->tv_sec * 1000 + t->tv_nsec / 1000000;
}
static void *adapter_thread(void *data)
{
struct adapter *a = (struct adapter *)data;
int bytes_transferred;
unsigned char payload[1] = { 0x13 };
int transfer_ret = libusb_interrupt_transfer(a->handle, EP_OUT, payload, sizeof(payload), &bytes_transferred, 0);
if (transfer_ret != 0) {
fprintf(stderr, "libusb_interrupt_transfer: %s\n", libusb_error_name(transfer_ret));
return NULL;
}
if (bytes_transferred != sizeof(payload)) {
fprintf(stderr, "libusb_interrupt_transfer %d/%lu bytes transferred.\n", bytes_transferred, sizeof(payload));
return NULL;
}
while (!a->quitting)
{
//struct timespec time_before = { 0 }, time_after = { 0 };
unsigned char payload[37];
int size = 0;
//clock_gettime(CLOCK_MONOTONIC, &time_before);
int transfer_ret = libusb_interrupt_transfer(a->handle, EP_IN, payload, sizeof(payload), &size, 0);
//clock_gettime(CLOCK_MONOTONIC, &time_after);
//printf("Time taken: %d\n", (int)(to_ms(&time_after) - to_ms(&time_before)));
if (transfer_ret != 0) {
fprintf(stderr, "libusb_interrupt_transfer error %d\n", transfer_ret);
a->quitting = true;
break;
}
if (size != 37 || payload[0] != 0x21)
continue;
unsigned char *controller = &payload[1];
unsigned char rumble[5] = { 0x11, 0, 0, 0, 0 };
//struct timespec current_time = { 0 };
//clock_gettime(CLOCK_REALTIME, &current_time);
//printf("Time: %d %d\n", (int)current_time.tv_sec, (int)current_time.tv_nsec);
for (int i = 0; i < 4; i++, controller += 9)
{
handle_payload(i, &a->controllers[i], controller);
rumble[i+1] = 0;
/*if (a->controllers[i].extra_power && a->controllers[i].type == STATE_NORMAL)
{
for (int j = 0; j < MAX_FF_EVENTS; j++)
{
struct ff_event *e = &a->controllers[i].ff_events[j];
if (e->in_use)
{
if (ts_lessthan(&e->start_time, &current_time) && ts_greaterthan(&e->end_time, &current_time))
rumble[i+1] = 1;
else
update_ff_start_stop(e, &current_time);
}
}
}*/
}
if (memcmp(rumble, a->rumble, sizeof(rumble)) != 0)
{
memcpy(a->rumble, rumble, sizeof(rumble));
transfer_ret = libusb_interrupt_transfer(a->handle, EP_OUT, a->rumble, sizeof(a->rumble), &size, 0);
if (transfer_ret != 0) {
fprintf(stderr, "libusb_interrupt_transfer error %d\n", transfer_ret);
a->quitting = true;
break;
}
}
}
for (int i = 0; i < 4; i++)
{
/*if (a->controllers[i].connected)
uinput_destroy(i, &a->controllers[i]);*/
}
return NULL;
}
static void add_adapter(struct libusb_device *dev)
{
struct adapter *a = calloc(1, sizeof(struct adapter));
if (a == NULL)
{
fprintf(stderr, "FATAL: calloc() failed");
exit(-1);
}
a->device = dev;
if (libusb_open(a->device, &a->handle) != 0)
{
fprintf(stderr, "Error opening device 0x%p\n", a->device);
return;
}
if (libusb_kernel_driver_active(a->handle, 0) == 1) {
fprintf(stderr, "Detaching kernel driver\n");
if (libusb_detach_kernel_driver(a->handle, 0)) {
fprintf(stderr, "Error detaching handle 0x%p from kernel\n", a->handle);
return;
}
}
struct adapter *old_head = adapters.next;
adapters.next = a;
a->next = old_head;
pthread_create(&a->thread, NULL, adapter_thread, a);
fprintf(stderr, "adapter 0x%p connected\n", a->device);
}
static void remove_adapter(struct libusb_device *dev)
{
struct adapter *a = &adapters;
while (a->next != NULL)
{
if (a->next->device == dev)
{
a->next->quitting = true;
pthread_join(a->next->thread, NULL);
fprintf(stderr, "adapter 0x%p disconnected\n", a->next->device);
libusb_close(a->next->handle);
struct adapter *new_next = a->next->next;
free(a->next);
a->next = new_next;
return;
}
a = a->next;
}
}
static int LIBUSB_CALL hotplug_callback(struct libusb_context *ctx, struct libusb_device *dev, libusb_hotplug_event event, void *user_data)
{
(void)ctx;
(void)user_data;
if (event == LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED)
{
add_adapter(dev);
}
else if (event == LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT)
{
remove_adapter(dev);
}
return 0;
}
void *wup_start(UNUSED void *a)
{
libusb_init(NULL);
struct libusb_device **devices;
int count = libusb_get_device_list(NULL, &devices);
for (int i = 0; i < count; i++)
{
struct libusb_device_descriptor desc;
libusb_get_device_descriptor(devices[i], &desc);
if (desc.idVendor == 0x057e && desc.idProduct == 0x0337)
add_adapter(devices[i]);
}
if (count > 0)
libusb_free_device_list(devices, 1);
libusb_hotplug_callback_handle callback;
int hotplug_capability = libusb_has_capability(LIBUSB_CAP_HAS_HOTPLUG);
if (hotplug_capability) {
int hotplug_ret = libusb_hotplug_register_callback(NULL,
LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED | LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT,
LIBUSB_HOTPLUG_NO_FLAGS, 0x057e, 0x0337,
LIBUSB_HOTPLUG_MATCH_ANY, hotplug_callback, NULL, &callback);
if (hotplug_ret != LIBUSB_SUCCESS) {
fprintf(stderr, "cannot register hotplug callback, hotplugging not enabled\n");
hotplug_capability = 0;
}
}
// pump events until shutdown & all helper threads finish cleaning up
while (!quitting)
libusb_handle_events_completed(NULL, (int *)&quitting);
while (adapters.next)
remove_adapter(adapters.next->device);
if (hotplug_capability)
libusb_hotplug_deregister_callback(NULL, callback);
libusb_exit(NULL);
return (void *)0;
}
#endif

19
src/pc/gfx/LICENSE.txt Normal file
View file

@ -0,0 +1,19 @@
Copyright (c) 2020 Emill, MaikelChan
Redistribution and use in source forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form are not allowed.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

33
src/pc/gfx/README.md Normal file
View file

@ -0,0 +1,33 @@
# Nintendo 64 Fast3D renderer
Implementation of a Fast3D renderer for games built originally for the Nintendo 64 platform.
For rendering OpenGL, Direct3D 11 and Direct3D 12 are supported.
Supported windowing systems are GLX (used on Linux), DXGI (used on Windows) and SDL (generic).
# Usage
See `gfx_pc.h`. You will also need a copy of `PR/gbi.h`, found in libultra.
First call `gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi, const char *game_name, bool start_in_fullscreen)` and supply the desired backends at program start.
Some callbacks can be set on `wapi`. See `gfx_window_manager_api.h` for more info.
Each game main loop iteration should look like this:
```C
gfx_start_frame(); // Handles input events such as keyboard and window events
// perform game logic here
gfx_run(cmds); // submit display list and render a frame
// do more expensive work here, such as play audio
gfx_end_frame(); // this just waits until the frame is shown on the screen (vsync), to provide correct game timing
```
When you are ready to start the main loop, call `wapi->main_loop(one_iteration_func)`.
For the best experience, please change the Vtx and Mtx structures to use floats instead of fixed point arithmetic (`GBI_FLOATS`).
# License
See LICENSE.txt. Redistributions are allowed only in source form, not in binary form.

17258
src/pc/gfx/dxsdk/d3d12.h Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

1009
src/pc/gfx/dxsdk/d3dcommon.h Normal file

File diff suppressed because it is too large Load diff

3440
src/pc/gfx/dxsdk/d3dx12.h Normal file

File diff suppressed because it is too large Load diff

41
src/pc/gfx/gfx_cc.c Normal file
View file

@ -0,0 +1,41 @@
#include "gfx_cc.h"
void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features) {
for (int i = 0; i < 4; i++) {
cc_features->c[0][i] = (shader_id >> (i * 3)) & 7;
cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7;
}
cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0;
cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0;
cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0;
cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0;
cc_features->used_textures[0] = false;
cc_features->used_textures[1] = false;
cc_features->num_inputs = 0;
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 4; j++) {
if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) {
if (cc_features->c[i][j] > cc_features->num_inputs) {
cc_features->num_inputs = cc_features->c[i][j];
}
}
if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) {
cc_features->used_textures[0] = true;
}
if (cc_features->c[i][j] == SHADER_TEXEL1) {
cc_features->used_textures[1] = true;
}
}
}
cc_features->do_single[0] = cc_features->c[0][2] == 0;
cc_features->do_single[1] = cc_features->c[1][2] == 0;
cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0;
cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0;
cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3];
cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3];
cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff);
}

58
src/pc/gfx/gfx_cc.h Normal file
View file

@ -0,0 +1,58 @@
#ifndef GFX_CC_H
#define GFX_CC_H
#include <stdint.h>
#include <stdbool.h>
enum {
CC_0,
CC_TEXEL0,
CC_TEXEL1,
CC_PRIM,
CC_SHADE,
CC_ENV,
CC_TEXEL0A,
CC_LOD
};
enum {
SHADER_0,
SHADER_INPUT_1,
SHADER_INPUT_2,
SHADER_INPUT_3,
SHADER_INPUT_4,
SHADER_TEXEL0,
SHADER_TEXEL0A,
SHADER_TEXEL1
};
#define SHADER_OPT_ALPHA (1 << 24)
#define SHADER_OPT_FOG (1 << 25)
#define SHADER_OPT_TEXTURE_EDGE (1 << 26)
#define SHADER_OPT_NOISE (1 << 27)
struct CCFeatures {
uint8_t c[2][4];
bool opt_alpha;
bool opt_fog;
bool opt_texture_edge;
bool opt_noise;
bool used_textures[2];
int num_inputs;
bool do_single[2];
bool do_multiply[2];
bool do_mix[2];
bool color_alpha_same;
};
#ifdef __cplusplus
extern "C" {
#endif
void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,728 @@
#ifdef ENABLE_DX11
#include <cstdio>
#include <vector>
#include <cmath>
#include <windows.h>
#include <versionhelpers.h>
#include <wrl/client.h>
#include <dxgi1_3.h>
#include <d3d11.h>
#include <d3dcompiler.h>
#ifndef _LANGUAGE_C
#define _LANGUAGE_C
#endif
#include <PR/gbi.h>
#include "gfx_cc.h"
#include "gfx_window_manager_api.h"
#include "gfx_rendering_api.h"
#include "gfx_direct3d_common.h"
#define DECLARE_GFX_DXGI_FUNCTIONS
#include "gfx_dxgi.h"
#include "gfx_screen_config.h"
#define THREE_POINT_FILTERING 0
#define DEBUG_D3D 0
using namespace Microsoft::WRL; // For ComPtr
namespace {
struct PerFrameCB {
uint32_t noise_frame;
float noise_scale_x;
float noise_scale_y;
uint32_t padding;
};
struct PerDrawCB {
struct Texture {
uint32_t width;
uint32_t height;
uint32_t linear_filtering;
uint32_t padding;
} textures[2];
};
struct TextureData {
ComPtr<ID3D11ShaderResourceView> resource_view;
ComPtr<ID3D11SamplerState> sampler_state;
uint32_t width;
uint32_t height;
bool linear_filtering;
};
struct ShaderProgramD3D11 {
ComPtr<ID3D11VertexShader> vertex_shader;
ComPtr<ID3D11PixelShader> pixel_shader;
ComPtr<ID3D11InputLayout> input_layout;
ComPtr<ID3D11BlendState> blend_state;
uint32_t shader_id;
uint8_t num_inputs;
uint8_t num_floats;
bool used_textures[2];
};
static struct {
HMODULE d3d11_module;
PFN_D3D11_CREATE_DEVICE D3D11CreateDevice;
HMODULE d3dcompiler_module;
pD3DCompile D3DCompile;
D3D_FEATURE_LEVEL feature_level;
ComPtr<ID3D11Device> device;
ComPtr<IDXGISwapChain1> swap_chain;
ComPtr<ID3D11DeviceContext> context;
ComPtr<ID3D11RenderTargetView> backbuffer_view;
ComPtr<ID3D11DepthStencilView> depth_stencil_view;
ComPtr<ID3D11RasterizerState> rasterizer_state;
ComPtr<ID3D11DepthStencilState> depth_stencil_state;
ComPtr<ID3D11Buffer> vertex_buffer;
ComPtr<ID3D11Buffer> per_frame_cb;
ComPtr<ID3D11Buffer> per_draw_cb;
#if DEBUG_D3D
ComPtr<ID3D11Debug> debug;
#endif
DXGI_SAMPLE_DESC sample_description;
PerFrameCB per_frame_cb_data;
PerDrawCB per_draw_cb_data;
struct ShaderProgramD3D11 shader_program_pool[64];
uint8_t shader_program_pool_size;
std::vector<struct TextureData> textures;
int current_tile;
uint32_t current_texture_ids[2];
// Current state
struct ShaderProgramD3D11 *shader_program;
uint32_t current_width, current_height;
int8_t depth_test;
int8_t depth_mask;
int8_t zmode_decal;
// Previous states (to prevent setting states needlessly)
struct ShaderProgramD3D11 *last_shader_program = nullptr;
uint32_t last_vertex_buffer_stride = 0;
ComPtr<ID3D11BlendState> last_blend_state = nullptr;
ComPtr<ID3D11ShaderResourceView> last_resource_views[2] = { nullptr, nullptr };
ComPtr<ID3D11SamplerState> last_sampler_states[2] = { nullptr, nullptr };
int8_t last_depth_test = -1;
int8_t last_depth_mask = -1;
int8_t last_zmode_decal = -1;
D3D_PRIMITIVE_TOPOLOGY last_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
} d3d;
static LARGE_INTEGER last_time, accumulated_time, frequency;
static void create_render_target_views(bool is_resize) {
DXGI_SWAP_CHAIN_DESC1 desc1;
if (is_resize) {
// Release previous stuff (if any)
d3d.backbuffer_view.Reset();
d3d.depth_stencil_view.Reset();
// Resize swap chain buffers
ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1));
ThrowIfFailed(d3d.swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, desc1.Flags),
gfx_dxgi_get_h_wnd(), "Failed to resize IDXGISwapChain buffers.");
}
// Get new size
ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1));
// Create back buffer
ComPtr<ID3D11Texture2D> backbuffer_texture;
ThrowIfFailed(d3d.swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID *) backbuffer_texture.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to get backbuffer from IDXGISwapChain.");
ThrowIfFailed(d3d.device->CreateRenderTargetView(backbuffer_texture.Get(), nullptr, d3d.backbuffer_view.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to create render target view.");
// Create depth buffer
D3D11_TEXTURE2D_DESC depth_stencil_texture_desc;
ZeroMemory(&depth_stencil_texture_desc, sizeof(D3D11_TEXTURE2D_DESC));
depth_stencil_texture_desc.Width = desc1.Width;
depth_stencil_texture_desc.Height = desc1.Height;
depth_stencil_texture_desc.MipLevels = 1;
depth_stencil_texture_desc.ArraySize = 1;
depth_stencil_texture_desc.Format = d3d.feature_level >= D3D_FEATURE_LEVEL_10_0 ?
DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_D24_UNORM_S8_UINT;
depth_stencil_texture_desc.SampleDesc = d3d.sample_description;
depth_stencil_texture_desc.Usage = D3D11_USAGE_DEFAULT;
depth_stencil_texture_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
depth_stencil_texture_desc.CPUAccessFlags = 0;
depth_stencil_texture_desc.MiscFlags = 0;
ComPtr<ID3D11Texture2D> depth_stencil_texture;
ThrowIfFailed(d3d.device->CreateTexture2D(&depth_stencil_texture_desc, nullptr, depth_stencil_texture.GetAddressOf()));
ThrowIfFailed(d3d.device->CreateDepthStencilView(depth_stencil_texture.Get(), nullptr, d3d.depth_stencil_view.GetAddressOf()));
// Save resolution
d3d.current_width = desc1.Width;
d3d.current_height = desc1.Height;
}
static void gfx_d3d11_init(void) {
// Load d3d11.dll
d3d.d3d11_module = LoadLibraryW(L"d3d11.dll");
if (d3d.d3d11_module == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "d3d11.dll not found");
}
d3d.D3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d.d3d11_module, "D3D11CreateDevice");
// Load D3DCompiler_47.dll
d3d.d3dcompiler_module = LoadLibraryW(L"D3DCompiler_47.dll");
if (d3d.d3dcompiler_module == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "D3DCompiler_47.dll not found");
}
d3d.D3DCompile = (pD3DCompile)GetProcAddress(d3d.d3dcompiler_module, "D3DCompile");
// Create D3D11 device
gfx_dxgi_create_factory_and_device(DEBUG_D3D, 11, [](IDXGIAdapter1 *adapter, bool test_only) {
#if DEBUG_D3D
UINT device_creation_flags = D3D11_CREATE_DEVICE_DEBUG;
#else
UINT device_creation_flags = 0;
#endif
D3D_FEATURE_LEVEL FeatureLevels[] = {
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_1
};
HRESULT res = d3d.D3D11CreateDevice(
adapter,
D3D_DRIVER_TYPE_UNKNOWN, // since we use a specific adapter
nullptr,
device_creation_flags,
FeatureLevels,
ARRAYSIZE(FeatureLevels),
D3D11_SDK_VERSION,
test_only ? nullptr : d3d.device.GetAddressOf(),
&d3d.feature_level,
test_only ? nullptr : d3d.context.GetAddressOf());
if (test_only) {
return SUCCEEDED(res);
} else {
ThrowIfFailed(res, gfx_dxgi_get_h_wnd(), "Failed to create D3D11 device.");
return true;
}
});
// Sample description to be used in back buffer and depth buffer
d3d.sample_description.Count = 1;
d3d.sample_description.Quality = 0;
// Create the swap chain
d3d.swap_chain = gfx_dxgi_create_swap_chain(d3d.device.Get());
// Create D3D Debug device if in debug mode
#if DEBUG_D3D
ThrowIfFailed(d3d.device->QueryInterface(__uuidof(ID3D11Debug), (void **) d3d.debug.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to get ID3D11Debug device.");
#endif
// Create views
create_render_target_views(false);
// Create main vertex buffer
D3D11_BUFFER_DESC vertex_buffer_desc;
ZeroMemory(&vertex_buffer_desc, sizeof(D3D11_BUFFER_DESC));
vertex_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
vertex_buffer_desc.ByteWidth = 256 * 26 * 3 * sizeof(float); // Same as buf_vbo size in gfx_pc
vertex_buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertex_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
vertex_buffer_desc.MiscFlags = 0;
ThrowIfFailed(d3d.device->CreateBuffer(&vertex_buffer_desc, nullptr, d3d.vertex_buffer.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to create vertex buffer.");
// Create per-frame constant buffer
D3D11_BUFFER_DESC constant_buffer_desc;
ZeroMemory(&constant_buffer_desc, sizeof(D3D11_BUFFER_DESC));
constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
constant_buffer_desc.ByteWidth = sizeof(PerFrameCB);
constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
constant_buffer_desc.MiscFlags = 0;
ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, nullptr, d3d.per_frame_cb.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to create per-frame constant buffer.");
d3d.context->PSSetConstantBuffers(0, 1, d3d.per_frame_cb.GetAddressOf());
// Create per-draw constant buffer
constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
constant_buffer_desc.ByteWidth = sizeof(PerDrawCB);
constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
constant_buffer_desc.MiscFlags = 0;
ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, nullptr, d3d.per_draw_cb.GetAddressOf()),
gfx_dxgi_get_h_wnd(), "Failed to create per-draw constant buffer.");
d3d.context->PSSetConstantBuffers(1, 1, d3d.per_draw_cb.GetAddressOf());
}
static bool gfx_d3d11_z_is_from_0_to_1(void) {
return true;
}
static void gfx_d3d11_unload_shader(struct ShaderProgram *old_prg) {
}
static void gfx_d3d11_load_shader(struct ShaderProgram *new_prg) {
d3d.shader_program = (struct ShaderProgramD3D11 *)new_prg;
}
static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shader_id) {
CCFeatures cc_features;
gfx_cc_get_features(shader_id, &cc_features);
char buf[4096];
size_t len, num_floats;
gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, false, THREE_POINT_FILTERING);
ComPtr<ID3DBlob> vs, ps;
ComPtr<ID3DBlob> error_blob;
#if DEBUG_D3D
UINT compile_flags = D3DCOMPILE_DEBUG;
#else
UINT compile_flags = D3DCOMPILE_OPTIMIZATION_LEVEL2;
#endif
HRESULT hr = d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_4_0_level_9_1", compile_flags, 0, vs.GetAddressOf(), error_blob.GetAddressOf());
if (FAILED(hr)) {
MessageBox(gfx_dxgi_get_h_wnd(), (char *) error_blob->GetBufferPointer(), "Error", MB_OK | MB_ICONERROR);
throw hr;
}
hr = d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_4_0_level_9_1", compile_flags, 0, ps.GetAddressOf(), error_blob.GetAddressOf());
if (FAILED(hr)) {
MessageBox(gfx_dxgi_get_h_wnd(), (char *) error_blob->GetBufferPointer(), "Error", MB_OK | MB_ICONERROR);
throw hr;
}
struct ShaderProgramD3D11 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++];
ThrowIfFailed(d3d.device->CreateVertexShader(vs->GetBufferPointer(), vs->GetBufferSize(), nullptr, prg->vertex_shader.GetAddressOf()));
ThrowIfFailed(d3d.device->CreatePixelShader(ps->GetBufferPointer(), ps->GetBufferSize(), nullptr, prg->pixel_shader.GetAddressOf()));
// Input Layout
D3D11_INPUT_ELEMENT_DESC ied[7];
uint8_t ied_index = 0;
ied[ied_index++] = { "POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
ied[ied_index++] = { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
if (cc_features.opt_fog) {
ied[ied_index++] = { "FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
for (unsigned int i = 0; i < cc_features.num_inputs; i++) {
DXGI_FORMAT format = cc_features.opt_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
ied[ied_index++] = { "INPUT", i, format, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
ThrowIfFailed(d3d.device->CreateInputLayout(ied, ied_index, vs->GetBufferPointer(), vs->GetBufferSize(), prg->input_layout.GetAddressOf()));
// Blend state
D3D11_BLEND_DESC blend_desc;
ZeroMemory(&blend_desc, sizeof(D3D11_BLEND_DESC));
if (cc_features.opt_alpha) {
blend_desc.RenderTarget[0].BlendEnable = true;
blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
blend_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
} else {
blend_desc.RenderTarget[0].BlendEnable = false;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
}
ThrowIfFailed(d3d.device->CreateBlendState(&blend_desc, prg->blend_state.GetAddressOf()));
// Save some values
prg->shader_id = shader_id;
prg->num_inputs = cc_features.num_inputs;
prg->num_floats = num_floats;
prg->used_textures[0] = cc_features.used_textures[0];
prg->used_textures[1] = cc_features.used_textures[1];
return (struct ShaderProgram *)(d3d.shader_program = prg);
}
static struct ShaderProgram *gfx_d3d11_lookup_shader(uint32_t shader_id) {
for (size_t i = 0; i < d3d.shader_program_pool_size; i++) {
if (d3d.shader_program_pool[i].shader_id == shader_id) {
return (struct ShaderProgram *)&d3d.shader_program_pool[i];
}
}
return nullptr;
}
static void gfx_d3d11_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) {
struct ShaderProgramD3D11 *p = (struct ShaderProgramD3D11 *)prg;
*num_inputs = p->num_inputs;
used_textures[0] = p->used_textures[0];
used_textures[1] = p->used_textures[1];
}
static uint32_t gfx_d3d11_new_texture(void) {
d3d.textures.resize(d3d.textures.size() + 1);
return (uint32_t)(d3d.textures.size() - 1);
}
static void gfx_d3d11_select_texture(int tile, uint32_t texture_id) {
d3d.current_tile = tile;
d3d.current_texture_ids[tile] = texture_id;
}
static D3D11_TEXTURE_ADDRESS_MODE gfx_cm_to_d3d11(uint32_t val) {
if (val & G_TX_CLAMP) {
return D3D11_TEXTURE_ADDRESS_CLAMP;
}
return (val & G_TX_MIRROR) ? D3D11_TEXTURE_ADDRESS_MIRROR : D3D11_TEXTURE_ADDRESS_WRAP;
}
static void gfx_d3d11_upload_texture(const uint8_t *rgba32_buf, int width, int height) {
// Create texture
D3D11_TEXTURE2D_DESC texture_desc;
ZeroMemory(&texture_desc, sizeof(D3D11_TEXTURE2D_DESC));
texture_desc.Width = width;
texture_desc.Height = height;
texture_desc.Usage = D3D11_USAGE_IMMUTABLE;
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
texture_desc.CPUAccessFlags = 0;
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS ?
texture_desc.ArraySize = 1;
texture_desc.MipLevels = 1;
texture_desc.SampleDesc.Count = 1;
texture_desc.SampleDesc.Quality = 0;
D3D11_SUBRESOURCE_DATA resource_data;
resource_data.pSysMem = rgba32_buf;
resource_data.SysMemPitch = width * 4;
resource_data.SysMemSlicePitch = resource_data.SysMemPitch * height;
ComPtr<ID3D11Texture2D> texture;
ThrowIfFailed(d3d.device->CreateTexture2D(&texture_desc, &resource_data, texture.GetAddressOf()));
// Create shader resource view from texture
D3D11_SHADER_RESOURCE_VIEW_DESC resource_view_desc;
ZeroMemory(&resource_view_desc, sizeof(D3D11_SHADER_RESOURCE_VIEW_DESC));
resource_view_desc.Format = texture_desc.Format;
resource_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
resource_view_desc.Texture2D.MostDetailedMip = 0;
resource_view_desc.Texture2D.MipLevels = -1;
TextureData *texture_data = &d3d.textures[d3d.current_texture_ids[d3d.current_tile]];
texture_data->width = width;
texture_data->height = height;
if (texture_data->resource_view.Get() != nullptr) {
// Free the previous texture in this slot
texture_data->resource_view.Reset();
}
ThrowIfFailed(d3d.device->CreateShaderResourceView(texture.Get(), &resource_view_desc, texture_data->resource_view.GetAddressOf()));
}
static void gfx_d3d11_set_sampler_parameters(int tile, bool linear_filter, uint32_t cms, uint32_t cmt) {
D3D11_SAMPLER_DESC sampler_desc;
ZeroMemory(&sampler_desc, sizeof(D3D11_SAMPLER_DESC));
#if THREE_POINT_FILTERING
sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
#else
sampler_desc.Filter = linear_filter ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT;
#endif
sampler_desc.AddressU = gfx_cm_to_d3d11(cms);
sampler_desc.AddressV = gfx_cm_to_d3d11(cmt);
sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D11_FLOAT32_MAX;
TextureData *texture_data = &d3d.textures[d3d.current_texture_ids[tile]];
texture_data->linear_filtering = linear_filter;
// This function is called twice per texture, the first one only to set default values.
// Maybe that could be skipped? Anyway, make sure to release the first default sampler
// state before setting the actual one.
texture_data->sampler_state.Reset();
ThrowIfFailed(d3d.device->CreateSamplerState(&sampler_desc, texture_data->sampler_state.GetAddressOf()));
}
static void gfx_d3d11_set_depth_test(bool depth_test) {
d3d.depth_test = depth_test;
}
static void gfx_d3d11_set_depth_mask(bool depth_mask) {
d3d.depth_mask = depth_mask;
}
static void gfx_d3d11_set_zmode_decal(bool zmode_decal) {
d3d.zmode_decal = zmode_decal;
}
static void gfx_d3d11_set_viewport(int x, int y, int width, int height) {
D3D11_VIEWPORT viewport;
viewport.TopLeftX = x;
viewport.TopLeftY = d3d.current_height - y - height;
viewport.Width = width;
viewport.Height = height;
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
d3d.context->RSSetViewports(1, &viewport);
}
static void gfx_d3d11_set_scissor(int x, int y, int width, int height) {
D3D11_RECT rect;
rect.left = x;
rect.top = d3d.current_height - y - height;
rect.right = x + width;
rect.bottom = d3d.current_height - y;
d3d.context->RSSetScissorRects(1, &rect);
}
static void gfx_d3d11_set_use_alpha(bool use_alpha) {
// Already part of the pipeline state from shader info
}
static void gfx_d3d11_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) {
if (d3d.last_depth_test != d3d.depth_test || d3d.last_depth_mask != d3d.depth_mask) {
d3d.last_depth_test = d3d.depth_test;
d3d.last_depth_mask = d3d.depth_mask;
d3d.depth_stencil_state.Reset();
D3D11_DEPTH_STENCIL_DESC depth_stencil_desc;
ZeroMemory(&depth_stencil_desc, sizeof(D3D11_DEPTH_STENCIL_DESC));
depth_stencil_desc.DepthEnable = d3d.depth_test;
depth_stencil_desc.DepthWriteMask = d3d.depth_mask ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
depth_stencil_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL;
depth_stencil_desc.StencilEnable = false;
ThrowIfFailed(d3d.device->CreateDepthStencilState(&depth_stencil_desc, d3d.depth_stencil_state.GetAddressOf()));
d3d.context->OMSetDepthStencilState(d3d.depth_stencil_state.Get(), 0);
}
if (d3d.last_zmode_decal != d3d.zmode_decal) {
d3d.last_zmode_decal = d3d.zmode_decal;
d3d.rasterizer_state.Reset();
D3D11_RASTERIZER_DESC rasterizer_desc;
ZeroMemory(&rasterizer_desc, sizeof(D3D11_RASTERIZER_DESC));
rasterizer_desc.FillMode = D3D11_FILL_SOLID;
rasterizer_desc.CullMode = D3D11_CULL_NONE;
rasterizer_desc.FrontCounterClockwise = true;
rasterizer_desc.DepthBias = 0;
rasterizer_desc.SlopeScaledDepthBias = d3d.zmode_decal ? -2.0f : 0.0f;
rasterizer_desc.DepthBiasClamp = 0.0f;
rasterizer_desc.DepthClipEnable = true;
rasterizer_desc.ScissorEnable = true;
rasterizer_desc.MultisampleEnable = false;
rasterizer_desc.AntialiasedLineEnable = false;
ThrowIfFailed(d3d.device->CreateRasterizerState(&rasterizer_desc, d3d.rasterizer_state.GetAddressOf()));
d3d.context->RSSetState(d3d.rasterizer_state.Get());
}
bool textures_changed = false;
for (int i = 0; i < 2; i++) {
if (d3d.shader_program->used_textures[i]) {
if (d3d.last_resource_views[i].Get() != d3d.textures[d3d.current_texture_ids[i]].resource_view.Get()) {
d3d.last_resource_views[i] = d3d.textures[d3d.current_texture_ids[i]].resource_view.Get();
d3d.context->PSSetShaderResources(i, 1, d3d.textures[d3d.current_texture_ids[i]].resource_view.GetAddressOf());
#if THREE_POINT_FILTERING
d3d.per_draw_cb_data.textures[i].width = d3d.textures[d3d.current_texture_ids[i]].width;
d3d.per_draw_cb_data.textures[i].height = d3d.textures[d3d.current_texture_ids[i]].height;
d3d.per_draw_cb_data.textures[i].linear_filtering = d3d.textures[d3d.current_texture_ids[i]].linear_filtering;
textures_changed = true;
#endif
if (d3d.last_sampler_states[i].Get() != d3d.textures[d3d.current_texture_ids[i]].sampler_state.Get()) {
d3d.last_sampler_states[i] = d3d.textures[d3d.current_texture_ids[i]].sampler_state.Get();
d3d.context->PSSetSamplers(i, 1, d3d.textures[d3d.current_texture_ids[i]].sampler_state.GetAddressOf());
}
}
}
}
// Set per-draw constant buffer
if (textures_changed) {
D3D11_MAPPED_SUBRESOURCE ms;
ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE));
d3d.context->Map(d3d.per_draw_cb.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms);
memcpy(ms.pData, &d3d.per_draw_cb_data, sizeof(PerDrawCB));
d3d.context->Unmap(d3d.per_draw_cb.Get(), 0);
}
// Set vertex buffer data
D3D11_MAPPED_SUBRESOURCE ms;
ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE));
d3d.context->Map(d3d.vertex_buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms);
memcpy(ms.pData, buf_vbo, buf_vbo_len * sizeof(float));
d3d.context->Unmap(d3d.vertex_buffer.Get(), 0);
uint32_t stride = d3d.shader_program->num_floats * sizeof(float);
uint32_t offset = 0;
if (d3d.last_vertex_buffer_stride != stride) {
d3d.last_vertex_buffer_stride = stride;
d3d.context->IASetVertexBuffers(0, 1, d3d.vertex_buffer.GetAddressOf(), &stride, &offset);
}
if (d3d.last_shader_program != d3d.shader_program) {
d3d.last_shader_program = d3d.shader_program;
d3d.context->IASetInputLayout(d3d.shader_program->input_layout.Get());
d3d.context->VSSetShader(d3d.shader_program->vertex_shader.Get(), 0, 0);
d3d.context->PSSetShader(d3d.shader_program->pixel_shader.Get(), 0, 0);
if (d3d.last_blend_state.Get() != d3d.shader_program->blend_state.Get()) {
d3d.last_blend_state = d3d.shader_program->blend_state.Get();
d3d.context->OMSetBlendState(d3d.shader_program->blend_state.Get(), 0, 0xFFFFFFFF);
}
}
if (d3d.last_primitive_topology != D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) {
d3d.last_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
d3d.context->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
}
d3d.context->Draw(buf_vbo_num_tris * 3, 0);
}
static void gfx_d3d11_on_resize(void) {
create_render_target_views(true);
}
static void gfx_d3d11_start_frame(void) {
// Set render targets
d3d.context->OMSetRenderTargets(1, d3d.backbuffer_view.GetAddressOf(), d3d.depth_stencil_view.Get());
// Clear render targets
const float clearColor[] = { 0.0f, 0.0f, 0.0f, 1.0f };
d3d.context->ClearRenderTargetView(d3d.backbuffer_view.Get(), clearColor);
d3d.context->ClearDepthStencilView(d3d.depth_stencil_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0);
// Set per-frame constant buffer
d3d.per_frame_cb_data.noise_frame++;
if (d3d.per_frame_cb_data.noise_frame > 150) {
// No high values, as noise starts to look ugly
d3d.per_frame_cb_data.noise_frame = 0;
}
float aspect_ratio = (float) d3d.current_width / (float) d3d.current_height;
d3d.per_frame_cb_data.noise_scale_x = 120 * aspect_ratio; // 120 = N64 height resolution (240) / 2
d3d.per_frame_cb_data.noise_scale_y = 120;
D3D11_MAPPED_SUBRESOURCE ms;
ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE));
d3d.context->Map(d3d.per_frame_cb.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms);
memcpy(ms.pData, &d3d.per_frame_cb_data, sizeof(PerFrameCB));
d3d.context->Unmap(d3d.per_frame_cb.Get(), 0);
}
static void gfx_d3d11_end_frame(void) {
}
static void gfx_d3d11_finish_render(void) {
}
} // namespace
struct GfxRenderingAPI gfx_direct3d11_api = {
gfx_d3d11_z_is_from_0_to_1,
gfx_d3d11_unload_shader,
gfx_d3d11_load_shader,
gfx_d3d11_create_and_load_new_shader,
gfx_d3d11_lookup_shader,
gfx_d3d11_shader_get_info,
gfx_d3d11_new_texture,
gfx_d3d11_select_texture,
gfx_d3d11_upload_texture,
gfx_d3d11_set_sampler_parameters,
gfx_d3d11_set_depth_test,
gfx_d3d11_set_depth_mask,
gfx_d3d11_set_zmode_decal,
gfx_d3d11_set_viewport,
gfx_d3d11_set_scissor,
gfx_d3d11_set_use_alpha,
gfx_d3d11_draw_triangles,
gfx_d3d11_init,
gfx_d3d11_on_resize,
gfx_d3d11_start_frame,
gfx_d3d11_end_frame,
gfx_d3d11_finish_render
};
#endif

View file

@ -0,0 +1,12 @@
#ifdef ENABLE_DX11
#ifndef GFX_DIRECT3D11_H
#define GFX_DIRECT3D11_H
#include "gfx_rendering_api.h"
extern struct GfxRenderingAPI gfx_direct3d11_api;
#endif
#endif

View file

@ -0,0 +1,966 @@
#ifdef ENABLE_DX12
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <map>
#include <set>
#include <vector>
#include <windows.h>
#include <wrl/client.h>
// This is needed when compiling with MinGW, used in d3d12.h
#define __in_ecount_opt(size)
#include <dxgi.h>
#include <dxgi1_4.h>
#include "dxsdk/d3d12.h"
#include <d3dcompiler.h>
#include "gfx_direct3d12_guids.h"
#include "dxsdk/d3dx12.h"
#ifndef _LANGUAGE_C
#define _LANGUAGE_C
#endif
#include <PR/gbi.h>
#define DECLARE_GFX_DXGI_FUNCTIONS
#include "gfx_dxgi.h"
#include "gfx_cc.h"
#include "gfx_window_manager_api.h"
#include "gfx_rendering_api.h"
#include "gfx_direct3d_common.h"
#include "gfx_screen_config.h"
#define DEBUG_D3D 0
using namespace Microsoft::WRL; // For ComPtr
namespace {
struct ShaderProgramD3D12 {
uint32_t shader_id;
uint8_t num_inputs;
bool used_textures[2];
uint8_t num_floats;
uint8_t num_attribs;
ComPtr<ID3DBlob> vertex_shader;
ComPtr<ID3DBlob> pixel_shader;
ComPtr<ID3D12RootSignature> root_signature;
};
struct PipelineDesc {
uint32_t shader_id;
bool depth_test;
bool depth_mask;
bool zmode_decal;
bool _padding;
bool operator==(const PipelineDesc& o) const {
return memcmp(this, &o, sizeof(*this)) == 0;
}
bool operator<(const PipelineDesc& o) const {
return memcmp(this, &o, sizeof(*this)) < 0;
}
};
struct TextureHeap {
ComPtr<ID3D12Heap> heap;
std::vector<uint8_t> free_list;
};
struct TextureData {
ComPtr<ID3D12Resource> resource;
struct TextureHeap *heap;
uint8_t heap_offset;
uint64_t last_frame_counter;
uint32_t descriptor_index;
int sampler_parameters;
};
struct NoiseCB {
uint32_t noise_frame;
float noise_scale_x;
float noise_scale_y;
uint32_t padding;
};
static struct {
HMODULE d3d12_module;
PFN_D3D12_CREATE_DEVICE D3D12CreateDevice;
PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface;
HMODULE d3dcompiler_module;
pD3DCompile D3DCompile;
struct ShaderProgramD3D12 shader_program_pool[64];
uint8_t shader_program_pool_size;
uint32_t current_width, current_height;
ComPtr<ID3D12Device> device;
ComPtr<ID3D12CommandQueue> command_queue;
ComPtr<ID3D12CommandQueue> copy_command_queue;
ComPtr<IDXGISwapChain3> swap_chain;
ComPtr<ID3D12DescriptorHeap> rtv_heap;
UINT rtv_descriptor_size;
ComPtr<ID3D12Resource> render_targets[2];
ComPtr<ID3D12CommandAllocator> command_allocator;
ComPtr<ID3D12CommandAllocator> copy_command_allocator;
ComPtr<ID3D12GraphicsCommandList> command_list;
ComPtr<ID3D12GraphicsCommandList> copy_command_list;
ComPtr<ID3D12DescriptorHeap> dsv_heap;
ComPtr<ID3D12Resource> depth_stencil_buffer;
ComPtr<ID3D12DescriptorHeap> srv_heap;
UINT srv_descriptor_size;
ComPtr<ID3D12DescriptorHeap> sampler_heap;
UINT sampler_descriptor_size;
std::map<std::pair<uint32_t, uint32_t>, std::list<struct TextureHeap>> texture_heaps;
std::map<size_t, std::vector<ComPtr<ID3D12Resource>>> upload_heaps;
std::vector<std::pair<size_t, ComPtr<ID3D12Resource>>> upload_heaps_in_flight;
ComPtr<ID3D12Fence> copy_fence;
uint64_t copy_fence_value;
std::vector<struct TextureData> textures;
int current_tile;
uint32_t current_texture_ids[2];
uint32_t srv_pos;
int frame_index;
ComPtr<ID3D12Fence> fence;
HANDLE fence_event;
uint64_t frame_counter;
ComPtr<ID3D12Resource> noise_cb;
void *mapped_noise_cb_address;
struct NoiseCB noise_cb_data;
ComPtr<ID3D12Resource> vertex_buffer;
void *mapped_vbuf_address;
int vbuf_pos;
std::vector<ComPtr<ID3D12Resource>> resources_to_clean_at_end_of_frame;
std::vector<std::pair<struct TextureHeap *, uint8_t>> texture_heap_allocations_to_reclaim_at_end_of_frame;
std::map<PipelineDesc, ComPtr<ID3D12PipelineState>> pipeline_states;
bool must_reload_pipeline;
// Current state:
ID3D12PipelineState *pipeline_state;
struct ShaderProgramD3D12 *shader_program;
bool depth_test;
bool depth_mask;
bool zmode_decal;
CD3DX12_VIEWPORT viewport;
CD3DX12_RECT scissor;
} d3d;
static int texture_uploads = 0;
static int max_texture_uploads;
static D3D12_CPU_DESCRIPTOR_HANDLE get_cpu_descriptor_handle(ComPtr<ID3D12DescriptorHeap>& heap) {
#ifdef __MINGW32__
// We would like to do this:
// D3D12_CPU_DESCRIPTOR_HANDLE handle = heap->GetCPUDescriptorHandleForHeapStart();
// but MinGW64 doesn't follow the calling conventions of VC++ for some reason.
// Per MS documentation "User-defined types can be returned by value from global functions and static member functions"...
// "Otherwise, the caller assumes the responsibility of allocating memory and passing a pointer for the return value as the first argument".
// The method here is a non-static member function, and hence we need to pass the address to the return value as a parameter.
// MinGW32 has the same issue.
auto fn = heap->GetCPUDescriptorHandleForHeapStart;
void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*fun)(D3D12_CPU_DESCRIPTOR_HANDLE *out) = (void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*)(D3D12_CPU_DESCRIPTOR_HANDLE *out))fn;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
(heap.Get()->*fun)(&handle);
return handle;
#else
return heap->GetCPUDescriptorHandleForHeapStart();
#endif
}
static D3D12_GPU_DESCRIPTOR_HANDLE get_gpu_descriptor_handle(ComPtr<ID3D12DescriptorHeap>& heap) {
#ifdef __MINGW32__
// See get_cpu_descriptor_handle
auto fn = heap->GetGPUDescriptorHandleForHeapStart;
void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*fun)(D3D12_GPU_DESCRIPTOR_HANDLE *out) = (void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*)(D3D12_GPU_DESCRIPTOR_HANDLE *out))fn;
D3D12_GPU_DESCRIPTOR_HANDLE handle;
(heap.Get()->*fun)(&handle);
return handle;
#else
return heap->GetGPUDescriptorHandleForHeapStart();
#endif
}
static D3D12_RESOURCE_ALLOCATION_INFO get_resource_allocation_info(const D3D12_RESOURCE_DESC *resource_desc) {
#ifdef __MINGW32__
// See get_cpu_descriptor_handle
auto fn = d3d.device->GetResourceAllocationInfo;
void (STDMETHODCALLTYPE ID3D12Device::*fun)(D3D12_RESOURCE_ALLOCATION_INFO *out, UINT visibleMask, UINT numResourceDescs, const D3D12_RESOURCE_DESC *pResourceDescs) =
(void (STDMETHODCALLTYPE ID3D12Device::*)(D3D12_RESOURCE_ALLOCATION_INFO *out, UINT visibleMask, UINT numResourceDescs, const D3D12_RESOURCE_DESC *pResourceDescs))fn;
D3D12_RESOURCE_ALLOCATION_INFO out;
(d3d.device.Get()->*fun)(&out, 0, 1, resource_desc);
return out;
#else
return d3d.device->GetResourceAllocationInfo(0, 1, resource_desc);
#endif
}
static bool gfx_direct3d12_z_is_from_0_to_1(void) {
return true;
}
static void gfx_direct3d12_unload_shader(struct ShaderProgram *old_prg) {
}
static void gfx_direct3d12_load_shader(struct ShaderProgram *new_prg) {
d3d.shader_program = (struct ShaderProgramD3D12 *)new_prg;
d3d.must_reload_pipeline = true;
}
static struct ShaderProgram *gfx_direct3d12_create_and_load_new_shader(uint32_t shader_id) {
/*static FILE *fp;
if (!fp) {
fp = fopen("shaders.txt", "w");
}
fprintf(fp, "0x%08x\n", shader_id);
fflush(fp);*/
struct ShaderProgramD3D12 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++];
CCFeatures cc_features;
gfx_cc_get_features(shader_id, &cc_features);
char buf[2048];
size_t len, num_floats;
gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, true, false);
//fwrite(buf, 1, len, stdout);
ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->vertex_shader, nullptr));
ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->pixel_shader, nullptr));
ThrowIfFailed(d3d.device->CreateRootSignature(0, prg->pixel_shader->GetBufferPointer(), prg->pixel_shader->GetBufferSize(), IID_PPV_ARGS(&prg->root_signature)));
prg->shader_id = shader_id;
prg->num_inputs = cc_features.num_inputs;
prg->used_textures[0] = cc_features.used_textures[0];
prg->used_textures[1] = cc_features.used_textures[1];
prg->num_floats = num_floats;
//prg->num_attribs = cnt;
d3d.must_reload_pipeline = true;
return (struct ShaderProgram *)(d3d.shader_program = prg);
}
static struct ShaderProgram *gfx_direct3d12_lookup_shader(uint32_t shader_id) {
for (size_t i = 0; i < d3d.shader_program_pool_size; i++) {
if (d3d.shader_program_pool[i].shader_id == shader_id) {
return (struct ShaderProgram *)&d3d.shader_program_pool[i];
}
}
return nullptr;
}
static void gfx_direct3d12_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) {
struct ShaderProgramD3D12 *p = (struct ShaderProgramD3D12 *)prg;
*num_inputs = p->num_inputs;
used_textures[0] = p->used_textures[0];
used_textures[1] = p->used_textures[1];
}
static uint32_t gfx_direct3d12_new_texture(void) {
d3d.textures.resize(d3d.textures.size() + 1);
return (uint32_t)(d3d.textures.size() - 1);
}
static void gfx_direct3d12_select_texture(int tile, uint32_t texture_id) {
d3d.current_tile = tile;
d3d.current_texture_ids[tile] = texture_id;
}
static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, int height) {
texture_uploads++;
ComPtr<ID3D12Resource> texture_resource;
// Describe and create a Texture2D.
D3D12_RESOURCE_DESC texture_desc = {};
texture_desc.MipLevels = 1;
texture_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
texture_desc.Width = width;
texture_desc.Height = height;
texture_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
texture_desc.DepthOrArraySize = 1;
texture_desc.SampleDesc.Count = 1;
texture_desc.SampleDesc.Quality = 0;
texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
texture_desc.Alignment = ((width + 31) / 32) * ((height + 31) / 32) > 16 ? 0 : D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT;
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = get_resource_allocation_info(&texture_desc);
std::list<struct TextureHeap>& heaps = d3d.texture_heaps[std::pair<uint32_t, uint32_t>(alloc_info.SizeInBytes, alloc_info.Alignment)];
struct TextureHeap *found_heap = nullptr;
for (struct TextureHeap& heap : heaps) {
if (!heap.free_list.empty()) {
found_heap = &heap;
}
}
if (found_heap == nullptr) {
heaps.resize(heaps.size() + 1);
found_heap = &heaps.back();
// In case of HD textures, make sure too much memory isn't wasted
int textures_per_heap = 524288 / alloc_info.SizeInBytes;
if (textures_per_heap < 1) {
textures_per_heap = 1;
} else if (textures_per_heap > 64) {
textures_per_heap = 64;
}
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = alloc_info.SizeInBytes * textures_per_heap;
if (alloc_info.Alignment == D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT) {
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
} else {
heap_desc.Alignment = alloc_info.Alignment;
}
heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
ThrowIfFailed(d3d.device->CreateHeap(&heap_desc, IID_PPV_ARGS(&found_heap->heap)));
for (int i = 0; i < textures_per_heap; i++) {
found_heap->free_list.push_back(i);
}
}
uint8_t heap_offset = found_heap->free_list.back();
found_heap->free_list.pop_back();
ThrowIfFailed(d3d.device->CreatePlacedResource(found_heap->heap.Get(), heap_offset * alloc_info.SizeInBytes, &texture_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&texture_resource)));
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
UINT num_rows;
UINT64 row_size_in_bytes;
UINT64 upload_buffer_size;
d3d.device->GetCopyableFootprints(&texture_desc, 0, 1, 0, &layout, &num_rows, &row_size_in_bytes, &upload_buffer_size);
std::vector<ComPtr<ID3D12Resource>>& upload_heaps = d3d.upload_heaps[upload_buffer_size];
ComPtr<ID3D12Resource> upload_heap;
if (upload_heaps.empty()) {
CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(upload_buffer_size);
ThrowIfFailed(d3d.device->CreateCommittedResource(
&hp,
D3D12_HEAP_FLAG_NONE,
&rdb,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&upload_heap)));
} else {
upload_heap = upload_heaps.back();
upload_heaps.pop_back();
}
{
D3D12_SUBRESOURCE_DATA texture_data = {};
texture_data.pData = rgba32_buf;
texture_data.RowPitch = width * 4; // RGBA
texture_data.SlicePitch = texture_data.RowPitch * height;
void *data;
upload_heap->Map(0, nullptr, &data);
D3D12_MEMCPY_DEST dest_data = { (uint8_t *)data + layout.Offset, layout.Footprint.RowPitch, SIZE_T(layout.Footprint.RowPitch) * SIZE_T(num_rows) };
MemcpySubresource(&dest_data, &texture_data, static_cast<SIZE_T>(row_size_in_bytes), num_rows, layout.Footprint.Depth);
upload_heap->Unmap(0, nullptr);
CD3DX12_TEXTURE_COPY_LOCATION dst(texture_resource.Get(), 0);
CD3DX12_TEXTURE_COPY_LOCATION src(upload_heap.Get(), layout);
d3d.copy_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr);
}
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(texture_resource.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
d3d.command_list->ResourceBarrier(1, &barrier);
d3d.upload_heaps_in_flight.push_back(std::make_pair((size_t)upload_buffer_size, std::move(upload_heap)));
struct TextureData& td = d3d.textures[d3d.current_texture_ids[d3d.current_tile]];
if (td.resource.Get() != nullptr) {
d3d.resources_to_clean_at_end_of_frame.push_back(std::move(td.resource));
d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.push_back(std::make_pair(td.heap, td.heap_offset));
td.last_frame_counter = 0;
}
td.resource = std::move(texture_resource);
td.heap = found_heap;
td.heap_offset = heap_offset;
}
static int gfx_cm_to_index(uint32_t val) {
if (val & G_TX_CLAMP) {
return 2;
}
return (val & G_TX_MIRROR) ? 1 : 0;
}
static void gfx_direct3d12_set_sampler_parameters(int tile, bool linear_filter, uint32_t cms, uint32_t cmt) {
d3d.textures[d3d.current_texture_ids[tile]].sampler_parameters = linear_filter * 9 + gfx_cm_to_index(cms) * 3 + gfx_cm_to_index(cmt);
}
static void gfx_direct3d12_set_depth_test(bool depth_test) {
d3d.depth_test = depth_test;
d3d.must_reload_pipeline = true;
}
static void gfx_direct3d12_set_depth_mask(bool z_upd) {
d3d.depth_mask = z_upd;
d3d.must_reload_pipeline = true;
}
static void gfx_direct3d12_set_zmode_decal(bool zmode_decal) {
d3d.zmode_decal = zmode_decal;
d3d.must_reload_pipeline = true;
}
static void gfx_direct3d12_set_viewport(int x, int y, int width, int height) {
d3d.viewport = CD3DX12_VIEWPORT(x, d3d.current_height - y - height, width, height);
}
static void gfx_direct3d12_set_scissor(int x, int y, int width, int height) {
d3d.scissor = CD3DX12_RECT(x, d3d.current_height - y - height, x + width, d3d.current_height - y);
}
static void gfx_direct3d12_set_use_alpha(bool use_alpha) {
// Already part of the pipeline state from shader info
}
static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) {
struct ShaderProgramD3D12 *prg = d3d.shader_program;
if (d3d.must_reload_pipeline) {
ComPtr<ID3D12PipelineState>& pipeline_state = d3d.pipeline_states[PipelineDesc{
prg->shader_id,
d3d.depth_test,
d3d.depth_mask,
d3d.zmode_decal,
0
}];
if (pipeline_state.Get() == nullptr) {
D3D12_INPUT_ELEMENT_DESC ied[7] = {
{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
};
uint32_t ied_pos = 1;
if (prg->used_textures[0] || prg->used_textures[1]) {
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
if (prg->shader_id & SHADER_OPT_FOG) {
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
for (int i = 0; i < prg->num_inputs; i++) {
DXGI_FORMAT format = (prg->shader_id & SHADER_OPT_ALPHA) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"INPUT", (UINT)i, format, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
desc.InputLayout = { ied, ied_pos };
desc.pRootSignature = prg->root_signature.Get();
desc.VS = CD3DX12_SHADER_BYTECODE(prg->vertex_shader.Get());
desc.PS = CD3DX12_SHADER_BYTECODE(prg->pixel_shader.Get());
desc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
if (d3d.zmode_decal) {
desc.RasterizerState.SlopeScaledDepthBias = -2.0f;
}
desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
if (prg->shader_id & SHADER_OPT_ALPHA) {
D3D12_BLEND_DESC bd = {};
bd.AlphaToCoverageEnable = FALSE;
bd.IndependentBlendEnable = FALSE;
static const D3D12_RENDER_TARGET_BLEND_DESC default_rtbd = {
TRUE, FALSE,
D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
D3D12_LOGIC_OP_NOOP,
D3D12_COLOR_WRITE_ENABLE_ALL
};
for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) {
bd.RenderTarget[i] = default_rtbd;
}
desc.BlendState = bd;
} else {
desc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
}
desc.DepthStencilState.DepthEnable = d3d.depth_test;
desc.DepthStencilState.DepthWriteMask = d3d.depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
desc.DSVFormat = d3d.depth_test ? DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_UNKNOWN;
desc.SampleMask = UINT_MAX;
desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
desc.NumRenderTargets = 1;
desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.SampleDesc.Count = 1;
ThrowIfFailed(d3d.device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pipeline_state)));
}
d3d.pipeline_state = pipeline_state.Get();
d3d.must_reload_pipeline = false;
}
d3d.command_list->SetGraphicsRootSignature(prg->root_signature.Get());
d3d.command_list->SetPipelineState(d3d.pipeline_state);
ID3D12DescriptorHeap *heaps[] = { d3d.srv_heap.Get(), d3d.sampler_heap.Get() };
d3d.command_list->SetDescriptorHeaps(2, heaps);
int root_param_index = 0;
if ((prg->shader_id & (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) == (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) {
d3d.command_list->SetGraphicsRootConstantBufferView(root_param_index++, d3d.noise_cb->GetGPUVirtualAddress());
}
for (int i = 0; i < 2; i++) {
if (prg->used_textures[i]) {
struct TextureData& td = d3d.textures[d3d.current_texture_ids[i]];
if (td.last_frame_counter != d3d.frame_counter) {
td.descriptor_index = d3d.srv_pos;
td.last_frame_counter = d3d.frame_counter;
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
CD3DX12_CPU_DESCRIPTOR_HANDLE srv_handle(get_cpu_descriptor_handle(d3d.srv_heap), d3d.srv_pos++, d3d.srv_descriptor_size);
d3d.device->CreateShaderResourceView(td.resource.Get(), &srv_desc, srv_handle);
}
CD3DX12_GPU_DESCRIPTOR_HANDLE srv_gpu_handle(get_gpu_descriptor_handle(d3d.srv_heap), td.descriptor_index, d3d.srv_descriptor_size);
d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, srv_gpu_handle);
CD3DX12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle(get_gpu_descriptor_handle(d3d.sampler_heap), td.sampler_parameters, d3d.sampler_descriptor_size);
d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, sampler_gpu_handle);
}
}
CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size);
D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap);
d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle);
d3d.command_list->RSSetViewports(1, &d3d.viewport);
d3d.command_list->RSSetScissorRects(1, &d3d.scissor);
int current_pos = d3d.vbuf_pos;
memcpy((uint8_t *)d3d.mapped_vbuf_address + current_pos, buf_vbo, buf_vbo_len * sizeof(float));
d3d.vbuf_pos += buf_vbo_len * sizeof(float);
static int maxpos;
if (d3d.vbuf_pos > maxpos) {
maxpos = d3d.vbuf_pos;
//printf("NEW MAXPOS: %d\n", maxpos);
}
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view;
vertex_buffer_view.BufferLocation = d3d.vertex_buffer->GetGPUVirtualAddress() + current_pos;
vertex_buffer_view.StrideInBytes = buf_vbo_len / (3 * buf_vbo_num_tris) * sizeof(float);
vertex_buffer_view.SizeInBytes = buf_vbo_len * sizeof(float);
d3d.command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
d3d.command_list->IASetVertexBuffers(0, 1, &vertex_buffer_view);
d3d.command_list->DrawInstanced(3 * buf_vbo_num_tris, 1, 0, 0);
}
static void gfx_direct3d12_start_frame(void) {
++d3d.frame_counter;
d3d.srv_pos = 0;
texture_uploads = 0;
ThrowIfFailed(d3d.command_allocator->Reset());
ThrowIfFailed(d3d.command_list->Reset(d3d.command_allocator.Get(), nullptr));
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(
d3d.render_targets[d3d.frame_index].Get(),
D3D12_RESOURCE_STATE_PRESENT,
D3D12_RESOURCE_STATE_RENDER_TARGET);
d3d.command_list->ResourceBarrier(1, &barrier);
CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size);
D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap);
d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle);
static unsigned char c;
const float clear_color[] = { 0.0f, 0.0f, 0.0f, 1.0f };
d3d.command_list->ClearRenderTargetView(rtv_handle, clear_color, 0, nullptr);
d3d.command_list->ClearDepthStencilView(dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr);
d3d.noise_cb_data.noise_frame++;
if (d3d.noise_cb_data.noise_frame > 150) {
// No high values, as noise starts to look ugly
d3d.noise_cb_data.noise_frame = 0;
}
float aspect_ratio = (float) d3d.current_width / (float) d3d.current_height;
d3d.noise_cb_data.noise_scale_x = 120 * aspect_ratio; // 120 = N64 height resolution (240) / 2
d3d.noise_cb_data.noise_scale_y = 120;
memcpy(d3d.mapped_noise_cb_address, &d3d.noise_cb_data, sizeof(struct NoiseCB));
d3d.vbuf_pos = 0;
}
static void create_render_target_views(void) {
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = get_cpu_descriptor_handle(d3d.rtv_heap);
for (UINT i = 0; i < 2; i++) {
ThrowIfFailed(d3d.swap_chain->GetBuffer(i, IID_ID3D12Resource, (void **)&d3d.render_targets[i]));
d3d.device->CreateRenderTargetView(d3d.render_targets[i].Get(), nullptr, rtv_handle);
rtv_handle.ptr += d3d.rtv_descriptor_size;
}
}
static void create_depth_buffer(void) {
DXGI_SWAP_CHAIN_DESC1 desc1;
ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1));
UINT width = desc1.Width;
UINT height = desc1.Height;
d3d.current_width = width;
d3d.current_height = height;
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
dsv_desc.Format = DXGI_FORMAT_D32_FLOAT;
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
D3D12_CLEAR_VALUE depth_optimized_cv = {};
depth_optimized_cv.Format = DXGI_FORMAT_D32_FLOAT;
depth_optimized_cv.DepthStencil.Depth = 1.0f;
D3D12_HEAP_PROPERTIES hp = {};
hp.Type = D3D12_HEAP_TYPE_DEFAULT;
hp.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
hp.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
hp.CreationNodeMask = 1;
hp.VisibleNodeMask = 1;
D3D12_RESOURCE_DESC rd = {};
rd.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
rd.Alignment = 0;
rd.Width = width;
rd.Height = height;
rd.DepthOrArraySize = 1;
rd.MipLevels = 0;
rd.Format = DXGI_FORMAT_D32_FLOAT;
rd.SampleDesc.Count = 1;
rd.SampleDesc.Quality = 0;
rd.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
rd.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
ThrowIfFailed(d3d.device->CreateCommittedResource(&hp, D3D12_HEAP_FLAG_NONE, &rd, D3D12_RESOURCE_STATE_DEPTH_WRITE, &depth_optimized_cv, IID_PPV_ARGS(&d3d.depth_stencil_buffer)));
d3d.device->CreateDepthStencilView(d3d.depth_stencil_buffer.Get(), &dsv_desc, get_cpu_descriptor_handle(d3d.dsv_heap));
}
static void gfx_direct3d12_on_resize(void) {
if (d3d.render_targets[0].Get() != nullptr) {
d3d.render_targets[0].Reset();
d3d.render_targets[1].Reset();
ThrowIfFailed(d3d.swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT));
d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex();
create_render_target_views();
create_depth_buffer();
}
}
static void gfx_direct3d12_init(void ) {
// Load d3d12.dll
d3d.d3d12_module = LoadLibraryW(L"d3d12.dll");
if (d3d.d3d12_module == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "d3d12.dll not found");
}
d3d.D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(d3d.d3d12_module, "D3D12CreateDevice");
#if DEBUG_D3D
d3d.D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(d3d.d3d12_module, "D3D12GetDebugInterface");
#endif
// Load D3DCompiler_47.dll
d3d.d3dcompiler_module = LoadLibraryW(L"D3DCompiler_47.dll");
if (d3d.d3dcompiler_module == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "D3DCompiler_47.dll not found");
}
d3d.D3DCompile = (pD3DCompile)GetProcAddress(d3d.d3dcompiler_module, "D3DCompile");
// Create device
{
UINT debug_flags = 0;
#if DEBUG_D3D
ComPtr<ID3D12Debug> debug_controller;
if (SUCCEEDED(d3d.D3D12GetDebugInterface(IID_PPV_ARGS(&debug_controller)))) {
debug_controller->EnableDebugLayer();
debug_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
gfx_dxgi_create_factory_and_device(DEBUG_D3D, 12, [](IDXGIAdapter1 *adapter, bool test_only) {
HRESULT res = d3d.D3D12CreateDevice(
adapter,
D3D_FEATURE_LEVEL_11_0,
IID_ID3D12Device,
test_only ? nullptr : IID_PPV_ARGS_Helper(&d3d.device));
if (test_only) {
return SUCCEEDED(res);
} else {
ThrowIfFailed(res, gfx_dxgi_get_h_wnd(), "Failed to create D3D12 device.");
return true;
}
});
}
// Create command queues
{
D3D12_COMMAND_QUEUE_DESC queue_desc = {};
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&d3d.command_queue)));
}
{
D3D12_COMMAND_QUEUE_DESC queue_desc = {};
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&d3d.copy_command_queue)));
}
// Create swap chain
{
ComPtr<IDXGISwapChain1> swap_chain1 = gfx_dxgi_create_swap_chain(d3d.command_queue.Get());
ThrowIfFailed(swap_chain1->QueryInterface(__uuidof(IDXGISwapChain3), &d3d.swap_chain));
d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex();
}
// Create render target views
{
D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc = {};
rtv_heap_desc.NumDescriptors = 2;
rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&rtv_heap_desc, IID_PPV_ARGS(&d3d.rtv_heap)));
d3d.rtv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
create_render_target_views();
}
// Create Z-buffer
{
D3D12_DESCRIPTOR_HEAP_DESC dsv_heap_desc = {};
dsv_heap_desc.NumDescriptors = 1;
dsv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
dsv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&dsv_heap_desc, IID_PPV_ARGS(&d3d.dsv_heap)));
create_depth_buffer();
}
// Create SRV heap for texture descriptors
{
D3D12_DESCRIPTOR_HEAP_DESC srv_heap_desc = {};
srv_heap_desc.NumDescriptors = 1024; // Max unique textures per frame
srv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
srv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&srv_heap_desc, IID_PPV_ARGS(&d3d.srv_heap)));
d3d.srv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
// Create sampler heap and descriptors
{
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = {};
sampler_heap_desc.NumDescriptors = 18;
sampler_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
sampler_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&d3d.sampler_heap)));
d3d.sampler_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
static const D3D12_TEXTURE_ADDRESS_MODE address_modes[] = {
D3D12_TEXTURE_ADDRESS_MODE_WRAP,
D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
D3D12_TEXTURE_ADDRESS_MODE_CLAMP
};
D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle = get_cpu_descriptor_handle(d3d.sampler_heap);
int pos = 0;
for (int linear_filter = 0; linear_filter < 2; linear_filter++) {
for (int cms = 0; cms < 3; cms++) {
for (int cmt = 0; cmt < 3; cmt++) {
D3D12_SAMPLER_DESC sampler_desc = {};
sampler_desc.Filter = linear_filter ? D3D12_FILTER_MIN_MAG_MIP_LINEAR : D3D12_FILTER_MIN_MAG_MIP_POINT;
sampler_desc.AddressU = address_modes[cms];
sampler_desc.AddressV = address_modes[cmt];
sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D12_FLOAT32_MAX;
sampler_desc.MipLODBias = 0.0f;
sampler_desc.MaxAnisotropy = 1;
sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
d3d.device->CreateSampler(&sampler_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(sampler_handle, pos++, d3d.sampler_descriptor_size));
}
}
}
}
// Create constant buffer view for noise
{
/*D3D12_DESCRIPTOR_HEAP_DESC cbv_heap_desc = {};
cbv_heap_desc.NumDescriptors = 1;
cbv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
srv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap*/
CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(256);
ThrowIfFailed(d3d.device->CreateCommittedResource(
&hp,
D3D12_HEAP_FLAG_NONE,
&rdb,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&d3d.noise_cb)));
CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU
ThrowIfFailed(d3d.noise_cb->Map(0, &read_range, &d3d.mapped_noise_cb_address));
}
ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&d3d.command_allocator)));
ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&d3d.copy_command_allocator)));
ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, d3d.command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.command_list)));
ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, d3d.copy_command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.copy_command_list)));
ThrowIfFailed(d3d.command_list->Close());
ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.fence)));
d3d.fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (d3d.fence_event == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.copy_fence)));
{
// Create a buffer of 1 MB in size. With a 120 star speed run 192 kB seems to be max usage.
CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(256 * 1024 * sizeof(float));
ThrowIfFailed(d3d.device->CreateCommittedResource(
&hp,
D3D12_HEAP_FLAG_NONE,
&rdb,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&d3d.vertex_buffer)));
CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU
ThrowIfFailed(d3d.vertex_buffer->Map(0, &read_range, &d3d.mapped_vbuf_address));
}
}
static void gfx_direct3d12_end_frame(void) {
if (max_texture_uploads < texture_uploads && texture_uploads != 38 && texture_uploads != 34 && texture_uploads != 29) {
max_texture_uploads = texture_uploads;
}
//printf("Texture uploads: %d %d\n", max_texture_uploads, texture_uploads);
texture_uploads = 0;
ThrowIfFailed(d3d.copy_command_list->Close());
{
ID3D12CommandList *lists[] = { d3d.copy_command_list.Get() };
d3d.copy_command_queue->ExecuteCommandLists(1, lists);
d3d.copy_command_queue->Signal(d3d.copy_fence.Get(), ++d3d.copy_fence_value);
}
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(
d3d.render_targets[d3d.frame_index].Get(),
D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_PRESENT);
d3d.command_list->ResourceBarrier(1, &barrier);
d3d.command_queue->Wait(d3d.copy_fence.Get(), d3d.copy_fence_value);
ThrowIfFailed(d3d.command_list->Close());
{
ID3D12CommandList *lists[] = { d3d.command_list.Get() };
d3d.command_queue->ExecuteCommandLists(1, lists);
}
{
LARGE_INTEGER t0;
QueryPerformanceCounter(&t0);
//printf("Present: %llu %u\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), d3d.length_in_vsync_frames);
}
}
static void gfx_direct3d12_finish_render(void) {
LARGE_INTEGER t0, t1, t2;
QueryPerformanceCounter(&t0);
static UINT64 fence_value;
ThrowIfFailed(d3d.command_queue->Signal(d3d.fence.Get(), ++fence_value));
if (d3d.fence->GetCompletedValue() < fence_value) {
ThrowIfFailed(d3d.fence->SetEventOnCompletion(fence_value, d3d.fence_event));
WaitForSingleObject(d3d.fence_event, INFINITE);
}
QueryPerformanceCounter(&t1);
d3d.resources_to_clean_at_end_of_frame.clear();
for (std::pair<size_t, ComPtr<ID3D12Resource>>& heap : d3d.upload_heaps_in_flight) {
d3d.upload_heaps[heap.first].push_back(std::move(heap.second));
}
d3d.upload_heaps_in_flight.clear();
for (std::pair<struct TextureHeap *, uint8_t>& item : d3d.texture_heap_allocations_to_reclaim_at_end_of_frame) {
item.first->free_list.push_back(item.second);
}
d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.clear();
QueryPerformanceCounter(&t2);
d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex();
ThrowIfFailed(d3d.copy_command_allocator->Reset());
ThrowIfFailed(d3d.copy_command_list->Reset(d3d.copy_command_allocator.Get(), nullptr));
//printf("done %llu gpu:%d wait:%d freed:%llu frame:%u %u monitor:%u t:%llu\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), (int)(t1.QuadPart - t0.QuadPart), (int)(t2.QuadPart - t0.QuadPart), (unsigned long long)(t2.QuadPart - d3d.qpc_init), d3d.pending_frame_stats.rbegin()->first, stats.PresentCount, stats.SyncRefreshCount, (unsigned long long)(stats.SyncQPCTime.QuadPart - d3d.qpc_init));
}
} // namespace
struct GfxRenderingAPI gfx_direct3d12_api = {
gfx_direct3d12_z_is_from_0_to_1,
gfx_direct3d12_unload_shader,
gfx_direct3d12_load_shader,
gfx_direct3d12_create_and_load_new_shader,
gfx_direct3d12_lookup_shader,
gfx_direct3d12_shader_get_info,
gfx_direct3d12_new_texture,
gfx_direct3d12_select_texture,
gfx_direct3d12_upload_texture,
gfx_direct3d12_set_sampler_parameters,
gfx_direct3d12_set_depth_test,
gfx_direct3d12_set_depth_mask,
gfx_direct3d12_set_zmode_decal,
gfx_direct3d12_set_viewport,
gfx_direct3d12_set_scissor,
gfx_direct3d12_set_use_alpha,
gfx_direct3d12_draw_triangles,
gfx_direct3d12_init,
gfx_direct3d12_on_resize,
gfx_direct3d12_start_frame,
gfx_direct3d12_end_frame,
gfx_direct3d12_finish_render
};
#endif

View file

@ -0,0 +1,12 @@
#ifdef ENABLE_DX12
#ifndef GFX_DIRECT3D12_H
#define GFX_DIRECT3D12_H
#include "gfx_rendering_api.h"
extern struct GfxRenderingAPI gfx_direct3d12_api;
#endif
#endif

View file

@ -0,0 +1,62 @@
#ifndef GFX_DIRECT3D12_GUIDS_H
#define GFX_DIRECT3D12_GUIDS_H
#ifdef __MINGW32__
// This file is only needed due to missing MinGW-specific headers for d3d12.h.
// It will define IID_* symbols having the "selectany" attribute (assuming
// d3d12.h was earlier included), as well as make __uuidof(...) work.
#define DEF_GUID(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \
__CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \
const GUID IID_##type = __uuidof(type)
DEF_GUID(ID3D12Object,0xc4fec28f,0x7966,0x4e95,0x9f,0x94,0xf4,0x31,0xcb,0x56,0xc3,0xb8);
DEF_GUID(ID3D12DeviceChild,0x905db94b,0xa00c,0x4140,0x9d,0xf5,0x2b,0x64,0xca,0x9e,0xa3,0x57);
DEF_GUID(ID3D12RootSignature,0xc54a6b66,0x72df,0x4ee8,0x8b,0xe5,0xa9,0x46,0xa1,0x42,0x92,0x14);
DEF_GUID(ID3D12RootSignatureDeserializer,0x34AB647B,0x3CC8,0x46AC,0x84,0x1B,0xC0,0x96,0x56,0x45,0xC0,0x46);
DEF_GUID(ID3D12VersionedRootSignatureDeserializer,0x7F91CE67,0x090C,0x4BB7,0xB7,0x8E,0xED,0x8F,0xF2,0xE3,0x1D,0xA0);
DEF_GUID(ID3D12Pageable,0x63ee58fb,0x1268,0x4835,0x86,0xda,0xf0,0x08,0xce,0x62,0xf0,0xd6);
DEF_GUID(ID3D12Heap,0x6b3b2502,0x6e51,0x45b3,0x90,0xee,0x98,0x84,0x26,0x5e,0x8d,0xf3);
DEF_GUID(ID3D12Resource,0x696442be,0xa72e,0x4059,0xbc,0x79,0x5b,0x5c,0x98,0x04,0x0f,0xad);
DEF_GUID(ID3D12CommandAllocator,0x6102dee4,0xaf59,0x4b09,0xb9,0x99,0xb4,0x4d,0x73,0xf0,0x9b,0x24);
DEF_GUID(ID3D12Fence,0x0a753dcf,0xc4d8,0x4b91,0xad,0xf6,0xbe,0x5a,0x60,0xd9,0x5a,0x76);
DEF_GUID(ID3D12Fence1,0x433685fe,0xe22b,0x4ca0,0xa8,0xdb,0xb5,0xb4,0xf4,0xdd,0x0e,0x4a);
DEF_GUID(ID3D12PipelineState,0x765a30f3,0xf624,0x4c6f,0xa8,0x28,0xac,0xe9,0x48,0x62,0x24,0x45);
DEF_GUID(ID3D12DescriptorHeap,0x8efb471d,0x616c,0x4f49,0x90,0xf7,0x12,0x7b,0xb7,0x63,0xfa,0x51);
DEF_GUID(ID3D12QueryHeap,0x0d9658ae,0xed45,0x469e,0xa6,0x1d,0x97,0x0e,0xc5,0x83,0xca,0xb4);
DEF_GUID(ID3D12CommandSignature,0xc36a797c,0xec80,0x4f0a,0x89,0x85,0xa7,0xb2,0x47,0x50,0x82,0xd1);
DEF_GUID(ID3D12CommandList,0x7116d91c,0xe7e4,0x47ce,0xb8,0xc6,0xec,0x81,0x68,0xf4,0x37,0xe5);
DEF_GUID(ID3D12GraphicsCommandList,0x5b160d0f,0xac1b,0x4185,0x8b,0xa8,0xb3,0xae,0x42,0xa5,0xa4,0x55);
DEF_GUID(ID3D12GraphicsCommandList1,0x553103fb,0x1fe7,0x4557,0xbb,0x38,0x94,0x6d,0x7d,0x0e,0x7c,0xa7);
DEF_GUID(ID3D12GraphicsCommandList2,0x38C3E585,0xFF17,0x412C,0x91,0x50,0x4F,0xC6,0xF9,0xD7,0x2A,0x28);
DEF_GUID(ID3D12CommandQueue,0x0ec870a6,0x5d7e,0x4c22,0x8c,0xfc,0x5b,0xaa,0xe0,0x76,0x16,0xed);
DEF_GUID(ID3D12Device,0x189819f1,0x1db6,0x4b57,0xbe,0x54,0x18,0x21,0x33,0x9b,0x85,0xf7);
DEF_GUID(ID3D12PipelineLibrary,0xc64226a8,0x9201,0x46af,0xb4,0xcc,0x53,0xfb,0x9f,0xf7,0x41,0x4f);
DEF_GUID(ID3D12PipelineLibrary1,0x80eabf42,0x2568,0x4e5e,0xbd,0x82,0xc3,0x7f,0x86,0x96,0x1d,0xc3);
DEF_GUID(ID3D12Device1,0x77acce80,0x638e,0x4e65,0x88,0x95,0xc1,0xf2,0x33,0x86,0x86,0x3e);
DEF_GUID(ID3D12Device2,0x30baa41e,0xb15b,0x475c,0xa0,0xbb,0x1a,0xf5,0xc5,0xb6,0x43,0x28);
DEF_GUID(ID3D12Device3,0x81dadc15,0x2bad,0x4392,0x93,0xc5,0x10,0x13,0x45,0xc4,0xaa,0x98);
DEF_GUID(ID3D12ProtectedSession,0xA1533D18,0x0AC1,0x4084,0x85,0xB9,0x89,0xA9,0x61,0x16,0x80,0x6B);
DEF_GUID(ID3D12ProtectedResourceSession,0x6CD696F4,0xF289,0x40CC,0x80,0x91,0x5A,0x6C,0x0A,0x09,0x9C,0x3D);
DEF_GUID(ID3D12Device4,0xe865df17,0xa9ee,0x46f9,0xa4,0x63,0x30,0x98,0x31,0x5a,0xa2,0xe5);
DEF_GUID(ID3D12LifetimeOwner,0xe667af9f,0xcd56,0x4f46,0x83,0xce,0x03,0x2e,0x59,0x5d,0x70,0xa8);
DEF_GUID(ID3D12SwapChainAssistant,0xf1df64b6,0x57fd,0x49cd,0x88,0x07,0xc0,0xeb,0x88,0xb4,0x5c,0x8f);
DEF_GUID(ID3D12LifetimeTracker,0x3fd03d36,0x4eb1,0x424a,0xa5,0x82,0x49,0x4e,0xcb,0x8b,0xa8,0x13);
DEF_GUID(ID3D12StateObject,0x47016943,0xfca8,0x4594,0x93,0xea,0xaf,0x25,0x8b,0x55,0x34,0x6d);
DEF_GUID(ID3D12StateObjectProperties,0xde5fa827,0x9bf9,0x4f26,0x89,0xff,0xd7,0xf5,0x6f,0xde,0x38,0x60);
DEF_GUID(ID3D12Device5,0x8b4f173b,0x2fea,0x4b80,0x8f,0x58,0x43,0x07,0x19,0x1a,0xb9,0x5d);
DEF_GUID(ID3D12DeviceRemovedExtendedDataSettings,0x82BC481C,0x6B9B,0x4030,0xAE,0xDB,0x7E,0xE3,0xD1,0xDF,0x1E,0x63);
DEF_GUID(ID3D12DeviceRemovedExtendedData,0x98931D33,0x5AE8,0x4791,0xAA,0x3C,0x1A,0x73,0xA2,0x93,0x4E,0x71);
DEF_GUID(ID3D12Device6,0xc70b221b,0x40e4,0x4a17,0x89,0xaf,0x02,0x5a,0x07,0x27,0xa6,0xdc);
DEF_GUID(ID3D12Resource1,0x9D5E227A,0x4430,0x4161,0x88,0xB3,0x3E,0xCA,0x6B,0xB1,0x6E,0x19);
DEF_GUID(ID3D12Heap1,0x572F7389,0x2168,0x49E3,0x96,0x93,0xD6,0xDF,0x58,0x71,0xBF,0x6D);
DEF_GUID(ID3D12GraphicsCommandList3,0x6FDA83A7,0xB84C,0x4E38,0x9A,0xC8,0xC7,0xBD,0x22,0x01,0x6B,0x3D);
DEF_GUID(ID3D12MetaCommand,0xDBB84C27,0x36CE,0x4FC9,0xB8,0x01,0xF0,0x48,0xC4,0x6A,0xC5,0x70);
DEF_GUID(ID3D12GraphicsCommandList4,0x8754318e,0xd3a9,0x4541,0x98,0xcf,0x64,0x5b,0x50,0xdc,0x48,0x74);
DEF_GUID(ID3D12Tools,0x7071e1f0,0xe84b,0x4b33,0x97,0x4f,0x12,0xfa,0x49,0xde,0x65,0xc5);
DEF_GUID(ID3D12GraphicsCommandList5,0x55050859,0x4024,0x474c,0x87,0xf5,0x64,0x72,0xea,0xee,0x44,0xea);
#endif
#endif

View file

@ -0,0 +1,313 @@
#if defined(ENABLE_DX11) || defined(ENABLE_DX12)
#include <cstdio>
#include "gfx_direct3d_common.h"
#include "gfx_cc.h"
void get_cc_features(uint32_t shader_id, CCFeatures *cc_features) {
for (int i = 0; i < 4; i++) {
cc_features->c[0][i] = (shader_id >> (i * 3)) & 7;
cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7;
}
cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0;
cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0;
cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0;
cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0;
cc_features->used_textures[0] = false;
cc_features->used_textures[1] = false;
cc_features->num_inputs = 0;
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 4; j++) {
if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) {
if (cc_features->c[i][j] > cc_features->num_inputs) {
cc_features->num_inputs = cc_features->c[i][j];
}
}
if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) {
cc_features->used_textures[0] = true;
}
if (cc_features->c[i][j] == SHADER_TEXEL1) {
cc_features->used_textures[1] = true;
}
}
}
cc_features->do_single[0] = cc_features->c[0][2] == 0;
cc_features->do_single[1] = cc_features->c[1][2] == 0;
cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0;
cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0;
cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3];
cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3];
cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff);
}
static void append_str(char *buf, size_t *len, const char *str) {
while (*str != '\0') buf[(*len)++] = *str++;
}
static void append_line(char *buf, size_t *len, const char *str) {
while (*str != '\0') buf[(*len)++] = *str++;
buf[(*len)++] = '\r';
buf[(*len)++] = '\n';
}
static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_alpha, bool inputs_have_alpha, bool hint_single_element) {
if (!only_alpha) {
switch (item) {
default:
case SHADER_0:
return with_alpha ? "float4(0.0, 0.0, 0.0, 0.0)" : "float3(0.0, 0.0, 0.0)";
case SHADER_INPUT_1:
return with_alpha || !inputs_have_alpha ? "input.input1" : "input.input1.rgb";
case SHADER_INPUT_2:
return with_alpha || !inputs_have_alpha ? "input.input2" : "input.input2.rgb";
case SHADER_INPUT_3:
return with_alpha || !inputs_have_alpha ? "input.input3" : "input.input3.rgb";
case SHADER_INPUT_4:
return with_alpha || !inputs_have_alpha ? "input.input4" : "input.input4.rgb";
case SHADER_TEXEL0:
return with_alpha ? "texVal0" : "texVal0.rgb";
case SHADER_TEXEL0A:
return hint_single_element ? "texVal0.a" : (with_alpha ? "float4(texVal0.a, texVal0.a, texVal0.a, texVal0.a)" : "float3(texVal0.a, texVal0.a, texVal0.a)");
case SHADER_TEXEL1:
return with_alpha ? "texVal1" : "texVal1.rgb";
}
} else {
switch (item) {
default:
case SHADER_0:
return "0.0";
case SHADER_INPUT_1:
return "input.input1.a";
case SHADER_INPUT_2:
return "input.input2.a";
case SHADER_INPUT_3:
return "input.input3.a";
case SHADER_INPUT_4:
return "input.input4.a";
case SHADER_TEXEL0:
return "texVal0.a";
case SHADER_TEXEL0A:
return "texVal0.a";
case SHADER_TEXEL1:
return "texVal1.a";
}
}
}
static void append_formula(char *buf, size_t *len, const uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
if (do_single) {
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
} else if (do_multiply) {
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
} else if (do_mix) {
append_str(buf, len, "lerp(");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, ")");
} else {
append_str(buf, len, "(");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " - ");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ") * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, " + ");
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
}
}
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering) {
len = 0;
num_floats = 4;
// Pixel shader input struct
if (include_root_signature) {
append_str(buf, &len, "#define RS \"RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | DENY_VERTEX_SHADER_ROOT_ACCESS)");
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_str(buf, &len, ",CBV(b0, visibility = SHADER_VISIBILITY_PIXEL)");
}
if (cc_features.used_textures[0]) {
append_str(buf, &len, ",DescriptorTable(SRV(t0), visibility = SHADER_VISIBILITY_PIXEL)");
append_str(buf, &len, ",DescriptorTable(Sampler(s0), visibility = SHADER_VISIBILITY_PIXEL)");
}
if (cc_features.used_textures[1]) {
append_str(buf, &len, ",DescriptorTable(SRV(t1), visibility = SHADER_VISIBILITY_PIXEL)");
append_str(buf, &len, ",DescriptorTable(Sampler(s1), visibility = SHADER_VISIBILITY_PIXEL)");
}
append_line(buf, &len, "\"");
}
append_line(buf, &len, "struct PSInput {");
append_line(buf, &len, " float4 position : SV_POSITION;");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_line(buf, &len, " float2 uv : TEXCOORD;");
num_floats += 2;
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(buf, &len, " float4 screenPos : TEXCOORD1;");
}
if (cc_features.opt_fog) {
append_line(buf, &len, " float4 fog : FOG;");
num_floats += 4;
}
for (int i = 0; i < cc_features.num_inputs; i++) {
len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc_features.opt_alpha ? 4 : 3, i + 1, i);
num_floats += cc_features.opt_alpha ? 4 : 3;
}
append_line(buf, &len, "};");
// Textures and samplers
if (cc_features.used_textures[0]) {
append_line(buf, &len, "Texture2D g_texture0 : register(t0);");
append_line(buf, &len, "SamplerState g_sampler0 : register(s0);");
}
if (cc_features.used_textures[1]) {
append_line(buf, &len, "Texture2D g_texture1 : register(t1);");
append_line(buf, &len, "SamplerState g_sampler1 : register(s1);");
}
// Constant buffer and random function
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(buf, &len, "cbuffer PerFrameCB : register(b0) {");
append_line(buf, &len, " uint noise_frame;");
append_line(buf, &len, " float2 noise_scale;");
append_line(buf, &len, "}");
append_line(buf, &len, "float random(in float3 value) {");
append_line(buf, &len, " float random = dot(value, float3(12.9898, 78.233, 37.719));");
append_line(buf, &len, " return frac(sin(random) * 143758.5453);");
append_line(buf, &len, "}");
}
// 3 point texture filtering
// Original author: ArthurCarvalho
// Based on GLSL implementation by twinaphex, mupen64plus-libretro project.
if (three_point_filtering && (cc_features.used_textures[0] || cc_features.used_textures[1])) {
append_line(buf, &len, "cbuffer PerDrawCB : register(b1) {");
append_line(buf, &len, " struct {");
append_line(buf, &len, " uint width;");
append_line(buf, &len, " uint height;");
append_line(buf, &len, " bool linear_filtering;");
append_line(buf, &len, " } textures[2];");
append_line(buf, &len, "}");
append_line(buf, &len, "#define TEX_OFFSET(tex, tSampler, texCoord, off, texSize) tex.Sample(tSampler, texCoord - off / texSize)");
append_line(buf, &len, "float4 tex2D3PointFilter(in Texture2D tex, in SamplerState tSampler, in float2 texCoord, in float2 texSize) {");
append_line(buf, &len, " float2 offset = frac(texCoord * texSize - float2(0.5, 0.5));");
append_line(buf, &len, " offset -= step(1.0, offset.x + offset.y);");
append_line(buf, &len, " float4 c0 = TEX_OFFSET(tex, tSampler, texCoord, offset, texSize);");
append_line(buf, &len, " float4 c1 = TEX_OFFSET(tex, tSampler, texCoord, float2(offset.x - sign(offset.x), offset.y), texSize);");
append_line(buf, &len, " float4 c2 = TEX_OFFSET(tex, tSampler, texCoord, float2(offset.x, offset.y - sign(offset.y)), texSize);");
append_line(buf, &len, " return c0 + abs(offset.x)*(c1-c0) + abs(offset.y)*(c2-c0);");
append_line(buf, &len, "}");
}
// Vertex shader
append_str(buf, &len, "PSInput VSMain(float4 position : POSITION");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_str(buf, &len, ", float2 uv : TEXCOORD");
}
if (cc_features.opt_fog) {
append_str(buf, &len, ", float4 fog : FOG");
}
for (int i = 0; i < cc_features.num_inputs; i++) {
len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc_features.opt_alpha ? 4 : 3, i + 1, i);
}
append_line(buf, &len, ") {");
append_line(buf, &len, " PSInput result;");
append_line(buf, &len, " result.position = position;");
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(buf, &len, " result.screenPos = position;");
}
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_line(buf, &len, " result.uv = uv;");
}
if (cc_features.opt_fog) {
append_line(buf, &len, " result.fog = fog;");
}
for (int i = 0; i < cc_features.num_inputs; i++) {
len += sprintf(buf + len, " result.input%d = input%d;\r\n", i + 1, i + 1);
}
append_line(buf, &len, " return result;");
append_line(buf, &len, "}");
// Pixel shader
if (include_root_signature) {
append_line(buf, &len, "[RootSignature(RS)]");
}
append_line(buf, &len, "float4 PSMain(PSInput input) : SV_TARGET {");
if (cc_features.used_textures[0]) {
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal0;");
append_line(buf, &len, " if (textures[0].linear_filtering)");
append_line(buf, &len, " texVal0 = tex2D3PointFilter(g_texture0, g_sampler0, input.uv, float2(textures[0].width, textures[0].height));");
append_line(buf, &len, " else");
append_line(buf, &len, " texVal0 = g_texture0.Sample(g_sampler0, input.uv);");
} else {
append_line(buf, &len, " float4 texVal0 = g_texture0.Sample(g_sampler0, input.uv);");
}
}
if (cc_features.used_textures[1]) {
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal1;");
append_line(buf, &len, " if (textures[1].linear_filtering)");
append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));");
append_line(buf, &len, " else");
append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
} else {
append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
}
}
append_str(buf, &len, cc_features.opt_alpha ? " float4 texel = " : " float3 texel = ");
if (!cc_features.color_alpha_same && cc_features.opt_alpha) {
append_str(buf, &len, "float4(");
append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true);
append_str(buf, &len, ", ");
append_formula(buf, &len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true);
append_str(buf, &len, ")");
} else {
append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha);
}
append_line(buf, &len, ";");
if (cc_features.opt_texture_edge && cc_features.opt_alpha) {
append_line(buf, &len, " if (texel.a > 0.3) texel.a = 1.0; else discard;");
}
// TODO discard if alpha is 0?
if (cc_features.opt_fog) {
if (cc_features.opt_alpha) {
append_line(buf, &len, " texel = float4(lerp(texel.rgb, input.fog.rgb, input.fog.a), texel.a);");
} else {
append_line(buf, &len, " texel = lerp(texel, input.fog.rgb, input.fog.a);");
}
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(buf, &len, " float2 coords = (input.screenPos.xy / input.screenPos.w) * noise_scale;");
append_line(buf, &len, " texel.a *= round(random(float3(floor(coords), noise_frame)));");
}
if (cc_features.opt_alpha) {
append_line(buf, &len, " return texel;");
} else {
append_line(buf, &len, " return float4(texel, 1.0);");
}
append_line(buf, &len, "}");
}
#endif

View file

@ -0,0 +1,14 @@
#if defined(ENABLE_DX11) || defined(ENABLE_DX12)
#ifndef GFX_DIRECT3D_COMMON_H
#define GFX_DIRECT3D_COMMON_H
#include <stdint.h>
#include "gfx_cc.h"
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering);
#endif
#endif

614
src/pc/gfx/gfx_dxgi.cpp Normal file
View file

@ -0,0 +1,614 @@
#if defined(ENABLE_DX11) || defined(ENABLE_DX12)
#include <stdint.h>
#include <math.h>
#include <map>
#include <set>
#include <string>
#include <windows.h>
#include <wrl/client.h>
#include <dxgi1_3.h>
#include <versionhelpers.h>
#include <shellscalingapi.h>
#ifndef _LANGUAGE_C
#define _LANGUAGE_C
#endif
#include <PR/gbi.h>
#include "gfx_window_manager_api.h"
#include "gfx_rendering_api.h"
#include "gfx_direct3d_common.h"
#include "gfx_screen_config.h"
#include "gfx_pc.h"
#define DECLARE_GFX_DXGI_FUNCTIONS
#include "gfx_dxgi.h"
#define WINCLASS_NAME L"N64GAME"
#define GFX_API_NAME "DirectX"
#ifdef VERSION_EU
#define FRAME_INTERVAL_US_NUMERATOR 40000
#define FRAME_INTERVAL_US_DENOMINATOR 1
#else
#define FRAME_INTERVAL_US_NUMERATOR 100000
#define FRAME_INTERVAL_US_DENOMINATOR 3
#endif
using namespace Microsoft::WRL; // For ComPtr
static struct {
HWND h_wnd;
bool showing_error;
uint32_t current_width, current_height;
std::string game_name;
HMODULE dxgi_module;
HRESULT (__stdcall *CreateDXGIFactory1)(REFIID riid, void **factory);
HRESULT (__stdcall *CreateDXGIFactory2)(UINT flags, REFIID iid, void **factory);
bool process_dpi_awareness_done;
RECT last_window_rect;
bool is_full_screen, last_maximized_state;
ComPtr<IDXGIFactory2> factory;
ComPtr<IDXGISwapChain1> swap_chain;
HANDLE waitable_object;
uint64_t qpc_init, qpc_freq;
uint64_t frame_timestamp; // in units of 1/FRAME_INTERVAL_US_DENOMINATOR microseconds
std::map<UINT, DXGI_FRAME_STATISTICS> frame_stats;
std::set<std::pair<UINT, UINT>> pending_frame_stats;
bool dropped_frame;
bool sync_interval_means_frames_to_wait;
UINT length_in_vsync_frames;
void (*on_fullscreen_changed)(bool is_now_fullscreen);
void (*run_one_game_iter)(void);
bool (*on_key_down)(int scancode);
bool (*on_key_up)(int scancode);
void (*on_all_keys_up)(void);
} dxgi;
static void load_dxgi_library(void) {
dxgi.dxgi_module = LoadLibraryW(L"dxgi.dll");
*(FARPROC *)&dxgi.CreateDXGIFactory1 = GetProcAddress(dxgi.dxgi_module, "CreateDXGIFactory1");
*(FARPROC *)&dxgi.CreateDXGIFactory2 = GetProcAddress(dxgi.dxgi_module, "CreateDXGIFactory2");
}
template <typename Fun>
static void run_as_dpi_aware(Fun f) {
// Make sure Windows 8.1 or newer doesn't upscale/downscale the rendered images.
// This is an issue on Windows 8.1 and newer where moving around the window
// between different monitors having different scaling settings will
// by default result in the DirectX image will also be scaled accordingly.
// The resulting scale factor is the curent monitor's scale factor divided by
// the initial monitor's scale factor. Setting per-monitor aware disables scaling.
// On Windows 10 1607 and later, that is solved by setting the awarenenss per window,
// which is done by using SetThreadDpiAwarenessContext before and after creating
// any window. When the message handler runs, the corresponding context also applies.
// From windef.h, missing in MinGW.
DECLARE_HANDLE(DPI_AWARENESS_CONTEXT);
#define DPI_AWARENESS_CONTEXT_UNAWARE ((DPI_AWARENESS_CONTEXT)-1)
#define DPI_AWARENESS_CONTEXT_SYSTEM_AWARE ((DPI_AWARENESS_CONTEXT)-2)
#define DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE ((DPI_AWARENESS_CONTEXT)-3)
#define DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2 ((DPI_AWARENESS_CONTEXT)-4)
#define DPI_AWARENESS_CONTEXT_UNAWARE_GDISCALED ((DPI_AWARENESS_CONTEXT)-5)
DPI_AWARENESS_CONTEXT (WINAPI *SetThreadDpiAwarenessContext)(DPI_AWARENESS_CONTEXT dpiContext);
*(FARPROC *)&SetThreadDpiAwarenessContext = GetProcAddress(GetModuleHandleW(L"user32.dll"), "SetThreadDpiAwarenessContext");
DPI_AWARENESS_CONTEXT old_awareness_context;
if (SetThreadDpiAwarenessContext != nullptr) {
old_awareness_context = SetThreadDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2);
} else {
// Solution for Windows 8.1 and newer, but before Windows 10 1607.
// SetProcessDpiAwareness must be called before any drawing related API is called.
if (!dxgi.process_dpi_awareness_done) {
HMODULE shcore_module = LoadLibraryW(L"SHCore.dll");
if (shcore_module != nullptr) {
HRESULT (WINAPI *SetProcessDpiAwareness)(PROCESS_DPI_AWARENESS value);
*(FARPROC *)&SetProcessDpiAwareness = GetProcAddress(shcore_module, "SetProcessDpiAwareness");
if (SetProcessDpiAwareness != nullptr) {
SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE);
// Ignore result, will fail if already called or manifest already specifies dpi awareness.
}
FreeLibrary(shcore_module);
}
dxgi.process_dpi_awareness_done = true;
}
}
f();
// Restore the old context
if (SetThreadDpiAwarenessContext != nullptr && old_awareness_context != nullptr) {
SetThreadDpiAwarenessContext(old_awareness_context);
}
}
static void toggle_borderless_window_full_screen(bool enable, bool call_callback) {
// Windows 7 + flip mode + waitable object can't go to exclusive fullscreen,
// so do borderless instead. If DWM is enabled, this means we get one monitor
// sync interval of latency extra. On Win 10 however (maybe Win 8 too), due to
// "fullscreen optimizations" the latency is eliminated.
if (enable == dxgi.is_full_screen) {
return;
}
if (!enable) {
RECT r = dxgi.last_window_rect;
// Set in window mode with the last saved position and size
SetWindowLongPtr(dxgi.h_wnd, GWL_STYLE, WS_VISIBLE | WS_OVERLAPPEDWINDOW);
if (dxgi.last_maximized_state) {
SetWindowPos(dxgi.h_wnd, NULL, 0, 0, 0, 0, SWP_FRAMECHANGED | SWP_NOMOVE | SWP_NOSIZE);
ShowWindow(dxgi.h_wnd, SW_MAXIMIZE);
} else {
SetWindowPos(dxgi.h_wnd, NULL, r.left, r.top, r.right - r.left, r.bottom - r.top, SWP_FRAMECHANGED);
ShowWindow(dxgi.h_wnd, SW_RESTORE);
}
ShowCursor(TRUE);
dxgi.is_full_screen = false;
} else {
// Save if window is maximized or not
WINDOWPLACEMENT window_placement;
window_placement.length = sizeof(WINDOWPLACEMENT);
GetWindowPlacement(dxgi.h_wnd, &window_placement);
dxgi.last_maximized_state = window_placement.showCmd == SW_SHOWMAXIMIZED;
// Save window position and size if the window is not maximized
GetWindowRect(dxgi.h_wnd, &dxgi.last_window_rect);
// Get in which monitor the window is
HMONITOR h_monitor = MonitorFromWindow(dxgi.h_wnd, MONITOR_DEFAULTTONEAREST);
// Get info from that monitor
MONITORINFOEX monitor_info;
monitor_info.cbSize = sizeof(MONITORINFOEX);
GetMonitorInfo(h_monitor, &monitor_info);
RECT r = monitor_info.rcMonitor;
// Set borderless full screen to that monitor
SetWindowLongPtr(dxgi.h_wnd, GWL_STYLE, WS_VISIBLE | WS_POPUP);
SetWindowPos(dxgi.h_wnd, HWND_TOP, r.left, r.top, r.right - r.left, r.bottom - r.top, SWP_FRAMECHANGED);
ShowCursor(FALSE);
dxgi.is_full_screen = true;
}
if (dxgi.on_fullscreen_changed != nullptr && call_callback) {
dxgi.on_fullscreen_changed(enable);
}
}
static void gfx_dxgi_on_resize(void) {
if (dxgi.swap_chain.Get() != nullptr) {
gfx_get_current_rendering_api()->on_resize();
DXGI_SWAP_CHAIN_DESC1 desc1;
ThrowIfFailed(dxgi.swap_chain->GetDesc1(&desc1));
dxgi.current_width = desc1.Width;
dxgi.current_height = desc1.Height;
}
}
static void onkeydown(WPARAM w_param, LPARAM l_param) {
int key = ((l_param >> 16) & 0x1ff);
if (dxgi.on_key_down != nullptr) {
dxgi.on_key_down(key);
}
}
static void onkeyup(WPARAM w_param, LPARAM l_param) {
int key = ((l_param >> 16) & 0x1ff);
if (dxgi.on_key_up != nullptr) {
dxgi.on_key_up(key);
}
}
static LRESULT CALLBACK gfx_dxgi_wnd_proc(HWND h_wnd, UINT message, WPARAM w_param, LPARAM l_param) {
switch (message) {
case WM_SIZE:
gfx_dxgi_on_resize();
break;
case WM_DESTROY:
exit(0);
case WM_PAINT:
if (dxgi.showing_error) {
return DefWindowProcW(h_wnd, message, w_param, l_param);
} else {
if (dxgi.run_one_game_iter != nullptr) {
dxgi.run_one_game_iter();
}
}
break;
case WM_ACTIVATEAPP:
if (dxgi.on_all_keys_up != nullptr) {
dxgi.on_all_keys_up();
}
break;
case WM_KEYDOWN:
onkeydown(w_param, l_param);
break;
case WM_KEYUP:
onkeyup(w_param, l_param);
break;
case WM_SYSKEYDOWN:
if ((w_param == VK_RETURN) && ((l_param & 1 << 30) == 0)) {
toggle_borderless_window_full_screen(!dxgi.is_full_screen, true);
break;
} else {
return DefWindowProcW(h_wnd, message, w_param, l_param);
}
default:
return DefWindowProcW(h_wnd, message, w_param, l_param);
}
return 0;
}
static void gfx_dxgi_init(const char *game_name, bool start_in_fullscreen) {
LARGE_INTEGER qpc_init, qpc_freq;
QueryPerformanceCounter(&qpc_init);
QueryPerformanceFrequency(&qpc_freq);
dxgi.qpc_init = qpc_init.QuadPart;
dxgi.qpc_freq = qpc_freq.QuadPart;
// Prepare window title
char title[512];
wchar_t w_title[512];
int len = sprintf(title, "%s (%s)", game_name, GFX_API_NAME);
mbstowcs(w_title, title, len + 1);
dxgi.game_name = game_name;
// Create window
WNDCLASSEXW wcex;
wcex.cbSize = sizeof(WNDCLASSEX);
wcex.style = CS_HREDRAW | CS_VREDRAW;
wcex.lpfnWndProc = gfx_dxgi_wnd_proc;
wcex.cbClsExtra = 0;
wcex.cbWndExtra = 0;
wcex.hInstance = nullptr;
wcex.hIcon = nullptr;
wcex.hCursor = LoadCursor(nullptr, IDC_ARROW);
wcex.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH);
wcex.lpszMenuName = nullptr;
wcex.lpszClassName = WINCLASS_NAME;
wcex.hIconSm = nullptr;
ATOM winclass = RegisterClassExW(&wcex);
run_as_dpi_aware([&] () {
// We need to be dpi aware when calculating the size
RECT wr = {0, 0, DESIRED_SCREEN_WIDTH, DESIRED_SCREEN_HEIGHT};
AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE);
dxgi.h_wnd = CreateWindowW(WINCLASS_NAME, w_title, WS_OVERLAPPEDWINDOW,
CW_USEDEFAULT, 0, wr.right - wr.left, wr.bottom - wr.top, nullptr, nullptr, nullptr, nullptr);
});
load_dxgi_library();
ShowWindow(dxgi.h_wnd, SW_SHOW);
UpdateWindow(dxgi.h_wnd);
if (start_in_fullscreen) {
toggle_borderless_window_full_screen(true, false);
}
}
static void gfx_dxgi_set_fullscreen_changed_callback(void (*on_fullscreen_changed)(bool is_now_fullscreen)) {
dxgi.on_fullscreen_changed = on_fullscreen_changed;
}
static void gfx_dxgi_set_fullscreen(bool enable) {
toggle_borderless_window_full_screen(enable, true);
}
static void gfx_dxgi_set_keyboard_callbacks(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void)) {
dxgi.on_key_down = on_key_down;
dxgi.on_key_up = on_key_up;
dxgi.on_all_keys_up = on_all_keys_up;
}
static void gfx_dxgi_main_loop(void (*run_one_game_iter)(void)) {
dxgi.run_one_game_iter = run_one_game_iter;
MSG msg;
while (GetMessage(&msg, nullptr, 0, 0)) {
TranslateMessage(&msg);
DispatchMessage(&msg);
}
}
static void gfx_dxgi_get_dimensions(uint32_t *width, uint32_t *height) {
*width = dxgi.current_width;
*height = dxgi.current_height;
}
static void gfx_dxgi_handle_events(void) {
/*MSG msg;
while (PeekMessageW(&msg, nullptr, 0, 0, PM_REMOVE)) {
TranslateMessage(&msg);
DispatchMessage(&msg);
}*/
}
static uint64_t qpc_to_us(uint64_t qpc) {
return qpc / dxgi.qpc_freq * 1000000 + qpc % dxgi.qpc_freq * 1000000 / dxgi.qpc_freq;
}
static bool gfx_dxgi_start_frame(void) {
DXGI_FRAME_STATISTICS stats;
if (dxgi.swap_chain->GetFrameStatistics(&stats) == S_OK && (stats.SyncRefreshCount != 0 || stats.SyncQPCTime.QuadPart != 0ULL)) {
{
LARGE_INTEGER t0;
QueryPerformanceCounter(&t0);
//printf("Get frame stats: %llu\n", (unsigned long long)(t0.QuadPart - dxgi.qpc_init));
}
//printf("stats: %u %u %u %u %u %.6f\n", dxgi.pending_frame_stats.rbegin()->first, dxgi.pending_frame_stats.rbegin()->second, stats.PresentCount, stats.PresentRefreshCount, stats.SyncRefreshCount, (double)(stats.SyncQPCTime.QuadPart - dxgi.qpc_init) / dxgi.qpc_freq);
if (dxgi.frame_stats.empty() || dxgi.frame_stats.rbegin()->second.PresentCount != stats.PresentCount) {
dxgi.frame_stats.insert(std::make_pair(stats.PresentCount, stats));
}
if (dxgi.frame_stats.size() > 3) {
dxgi.frame_stats.erase(dxgi.frame_stats.begin());
}
}
if (!dxgi.frame_stats.empty()) {
while (!dxgi.pending_frame_stats.empty() && dxgi.pending_frame_stats.begin()->first < dxgi.frame_stats.rbegin()->first) {
dxgi.pending_frame_stats.erase(dxgi.pending_frame_stats.begin());
}
}
while (dxgi.pending_frame_stats.size() > 15) {
// Just make sure the list doesn't grow too large if GetFrameStatistics fails.
dxgi.pending_frame_stats.erase(dxgi.pending_frame_stats.begin());
}
dxgi.frame_timestamp += FRAME_INTERVAL_US_NUMERATOR;
if (dxgi.frame_stats.size() >= 2) {
DXGI_FRAME_STATISTICS *first = &dxgi.frame_stats.begin()->second;
DXGI_FRAME_STATISTICS *last = &dxgi.frame_stats.rbegin()->second;
uint64_t sync_qpc_diff = last->SyncQPCTime.QuadPart - first->SyncQPCTime.QuadPart;
UINT sync_vsync_diff = last->SyncRefreshCount - first->SyncRefreshCount;
UINT present_vsync_diff = last->PresentRefreshCount - first->PresentRefreshCount;
UINT present_diff = last->PresentCount - first->PresentCount;
if (sync_vsync_diff == 0) {
sync_vsync_diff = 1;
}
double estimated_vsync_interval = (double)sync_qpc_diff / (double)sync_vsync_diff;
uint64_t estimated_vsync_interval_us = qpc_to_us(estimated_vsync_interval);
//printf("Estimated vsync_interval: %d\n", (int)estimated_vsync_interval_us);
if (estimated_vsync_interval_us < 2 || estimated_vsync_interval_us > 1000000) {
// Unreasonable, maybe a monitor change
estimated_vsync_interval_us = 16666;
estimated_vsync_interval = estimated_vsync_interval_us * dxgi.qpc_freq / 1000000;
}
UINT queued_vsyncs = 0;
bool is_first = true;
for (const std::pair<UINT, UINT>& p : dxgi.pending_frame_stats) {
if (is_first && dxgi.sync_interval_means_frames_to_wait) {
is_first = false;
continue;
}
queued_vsyncs += p.second;
}
uint64_t last_frame_present_end_qpc = (last->SyncQPCTime.QuadPart - dxgi.qpc_init) + estimated_vsync_interval * queued_vsyncs;
uint64_t last_end_us = qpc_to_us(last_frame_present_end_qpc);
double vsyncs_to_wait = (double)(int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) / estimated_vsync_interval_us;
//printf("ts: %llu, last_end_us: %llu, Init v: %f\n", dxgi.frame_timestamp / 3, last_end_us, vsyncs_to_wait);
if (vsyncs_to_wait <= 0) {
// Too late
if ((int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) < -66666) {
// The application must have been paused or similar
vsyncs_to_wait = round(((double)FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) / estimated_vsync_interval_us);
if (vsyncs_to_wait < 1) {
vsyncs_to_wait = 1;
}
dxgi.frame_timestamp = FRAME_INTERVAL_US_DENOMINATOR * (last_end_us + vsyncs_to_wait * estimated_vsync_interval_us);
} else {
// Drop frame
//printf("Dropping frame\n");
dxgi.dropped_frame = true;
return false;
}
}
if (floor(vsyncs_to_wait) != vsyncs_to_wait) {
uint64_t left = last_end_us + floor(vsyncs_to_wait) * estimated_vsync_interval_us;
uint64_t right = last_end_us + ceil(vsyncs_to_wait) * estimated_vsync_interval_us;
uint64_t adjusted_desired_time = dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR + (last_end_us + (FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) > dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR ? 2000 : -2000);
int64_t diff_left = adjusted_desired_time - left;
int64_t diff_right = right - adjusted_desired_time;
if (diff_left < 0) {
diff_left = -diff_left;
}
if (diff_right < 0) {
diff_right = -diff_right;
}
if (diff_left < diff_right) {
vsyncs_to_wait = floor(vsyncs_to_wait);
} else {
vsyncs_to_wait = ceil(vsyncs_to_wait);
}
if (vsyncs_to_wait == 0) {
//printf("vsyncs_to_wait became 0 so dropping frame\n");
dxgi.dropped_frame = true;
return false;
}
}
//printf("v: %d\n", (int)vsyncs_to_wait);
if (vsyncs_to_wait > 4) {
// Invalid, so change to 4
vsyncs_to_wait = 4;
}
dxgi.length_in_vsync_frames = vsyncs_to_wait;
} else {
dxgi.length_in_vsync_frames = 2;
}
return true;
}
static void gfx_dxgi_swap_buffers_begin(void) {
//dxgi.length_in_vsync_frames = 1;
ThrowIfFailed(dxgi.swap_chain->Present(dxgi.length_in_vsync_frames, 0));
UINT this_present_id;
if (dxgi.swap_chain->GetLastPresentCount(&this_present_id) == S_OK) {
dxgi.pending_frame_stats.insert(std::make_pair(this_present_id, dxgi.length_in_vsync_frames));
}
dxgi.dropped_frame = false;
}
static void gfx_dxgi_swap_buffers_end(void) {
LARGE_INTEGER t0, t1, t2;
QueryPerformanceCounter(&t0);
QueryPerformanceCounter(&t1);
if (!dxgi.dropped_frame) {
if (dxgi.waitable_object != nullptr) {
WaitForSingleObject(dxgi.waitable_object, INFINITE);
}
// else TODO: maybe sleep until some estimated time the frame will be shown to reduce lag
}
DXGI_FRAME_STATISTICS stats;
dxgi.swap_chain->GetFrameStatistics(&stats);
QueryPerformanceCounter(&t2);
dxgi.sync_interval_means_frames_to_wait = dxgi.pending_frame_stats.rbegin()->first == stats.PresentCount;
//printf("done %llu gpu:%d wait:%d freed:%llu frame:%u %u monitor:%u t:%llu\n", (unsigned long long)(t0.QuadPart - dxgi.qpc_init), (int)(t1.QuadPart - t0.QuadPart), (int)(t2.QuadPart - t0.QuadPart), (unsigned long long)(t2.QuadPart - dxgi.qpc_init), dxgi.pending_frame_stats.rbegin()->first, stats.PresentCount, stats.SyncRefreshCount, (unsigned long long)(stats.SyncQPCTime.QuadPart - dxgi.qpc_init));
}
static double gfx_dxgi_get_time(void) {
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return (double)(t.QuadPart - dxgi.qpc_init) / dxgi.qpc_freq;
}
void gfx_dxgi_create_factory_and_device(bool debug, int d3d_version, bool (*create_device_fn)(IDXGIAdapter1 *adapter, bool test_only)) {
if (dxgi.CreateDXGIFactory2 != nullptr) {
ThrowIfFailed(dxgi.CreateDXGIFactory2(debug ? DXGI_CREATE_FACTORY_DEBUG : 0, __uuidof(IDXGIFactory2), &dxgi.factory));
} else {
ThrowIfFailed(dxgi.CreateDXGIFactory1(__uuidof(IDXGIFactory2), &dxgi.factory));
}
ComPtr<IDXGIAdapter1> adapter;
for (UINT i = 0; dxgi.factory->EnumAdapters1(i, &adapter) != DXGI_ERROR_NOT_FOUND; i++) {
DXGI_ADAPTER_DESC1 desc;
adapter->GetDesc1(&desc);
if (desc.Flags & 2/*DXGI_ADAPTER_FLAG_SOFTWARE*/) { // declaration missing in mingw headers
continue;
}
if (create_device_fn(adapter.Get(), true)) {
break;
}
}
create_device_fn(adapter.Get(), false);
char title[512];
wchar_t w_title[512];
int len = sprintf(title, "%s (Direct3D %d)", dxgi.game_name.c_str(), d3d_version);
mbstowcs(w_title, title, len + 1);
SetWindowTextW(dxgi.h_wnd, w_title);
}
ComPtr<IDXGISwapChain1> gfx_dxgi_create_swap_chain(IUnknown *device) {
bool win8 = IsWindows8OrGreater(); // DXGI_SCALING_NONE is only supported on Win8 and beyond
bool dxgi_13 = dxgi.CreateDXGIFactory2 != nullptr; // DXGI 1.3 introduced waitable object
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
swap_chain_desc.BufferCount = 2;
swap_chain_desc.Width = 0;
swap_chain_desc.Height = 0;
swap_chain_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swap_chain_desc.Scaling = win8 ? DXGI_SCALING_NONE : DXGI_SCALING_STRETCH;
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; // Apparently this was backported to Win 7 Platform Update
swap_chain_desc.Flags = dxgi_13 ? DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT : 0;
swap_chain_desc.SampleDesc.Count = 1;
run_as_dpi_aware([&] () {
// When setting size for the buffers, the values that DXGI puts into the desc (that can later be retrieved by GetDesc1)
// have been divided by the current scaling factor. By making this call dpi aware, no division will be performed.
// The same goes for IDXGISwapChain::ResizeBuffers(), however that function is currently only called from the message handler.
ThrowIfFailed(dxgi.factory->CreateSwapChainForHwnd(device, dxgi.h_wnd, &swap_chain_desc, nullptr, nullptr, &dxgi.swap_chain));
});
ThrowIfFailed(dxgi.factory->MakeWindowAssociation(dxgi.h_wnd, DXGI_MWA_NO_ALT_ENTER));
ComPtr<IDXGISwapChain2> swap_chain2;
if (dxgi.swap_chain->QueryInterface(__uuidof(IDXGISwapChain2), &swap_chain2) == S_OK) {
ThrowIfFailed(swap_chain2->SetMaximumFrameLatency(1));
dxgi.waitable_object = swap_chain2->GetFrameLatencyWaitableObject();
WaitForSingleObject(dxgi.waitable_object, INFINITE);
} else {
ComPtr<IDXGIDevice1> device1;
ThrowIfFailed(device->QueryInterface(IID_PPV_ARGS(&device1)));
ThrowIfFailed(device1->SetMaximumFrameLatency(1));
}
ThrowIfFailed(dxgi.swap_chain->GetDesc1(&swap_chain_desc));
dxgi.current_width = swap_chain_desc.Width;
dxgi.current_height = swap_chain_desc.Height;
return dxgi.swap_chain;
}
HWND gfx_dxgi_get_h_wnd(void) {
return dxgi.h_wnd;
}
void ThrowIfFailed(HRESULT res) {
if (FAILED(res)) {
fprintf(stderr, "Error: 0x%08X\n", res);
throw res;
}
}
void ThrowIfFailed(HRESULT res, HWND h_wnd, const char *message) {
if (FAILED(res)) {
char full_message[256];
sprintf(full_message, "%s\n\nHRESULT: 0x%08X", message, res);
dxgi.showing_error = true;
MessageBox(h_wnd, full_message, "Error", MB_OK | MB_ICONERROR);
throw res;
}
}
struct GfxWindowManagerAPI gfx_dxgi_api = {
gfx_dxgi_init,
gfx_dxgi_set_keyboard_callbacks,
gfx_dxgi_set_fullscreen_changed_callback,
gfx_dxgi_set_fullscreen,
gfx_dxgi_main_loop,
gfx_dxgi_get_dimensions,
gfx_dxgi_handle_events,
gfx_dxgi_start_frame,
gfx_dxgi_swap_buffers_begin,
gfx_dxgi_swap_buffers_end,
gfx_dxgi_get_time,
};
#endif

16
src/pc/gfx/gfx_dxgi.h Normal file
View file

@ -0,0 +1,16 @@
#ifndef GFX_DXGI_H
#define GFX_DXGI_H
#include "gfx_rendering_api.h"
#ifdef DECLARE_GFX_DXGI_FUNCTIONS
void gfx_dxgi_create_factory_and_device(bool debug, int d3d_version, bool (*create_device_fn)(IDXGIAdapter1 *adapter, bool test_only));
Microsoft::WRL::ComPtr<IDXGISwapChain1> gfx_dxgi_create_swap_chain(IUnknown *device);
HWND gfx_dxgi_get_h_wnd(void);
void ThrowIfFailed(HRESULT res);
void ThrowIfFailed(HRESULT res, HWND h_wnd, const char *message);
#endif
extern struct GfxWindowManagerAPI gfx_dxgi_api;
#endif

611
src/pc/gfx/gfx_glx.c Normal file
View file

@ -0,0 +1,611 @@
#ifdef __linux__
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <X11/X.h>
#include <X11/Xlib.h>
#include <GL/glx.h>
#include <X11/extensions/Xrandr.h>
#include <X11/XKBlib.h>
#include <X11/Xatom.h>
#include "gfx_window_manager_api.h"
#include "gfx_screen_config.h"
#define GFX_API_NAME "GLX - OpenGL"
#ifdef VERSION_EU
#define FRAME_INTERVAL_US_NUMERATOR 40000
#define FRAME_INTERVAL_US_DENOMINATOR 1
#else
#define FRAME_INTERVAL_US_NUMERATOR 100000
#define FRAME_INTERVAL_US_DENOMINATOR 3
#endif
const struct {
const char *name;
int scancode;
} keymap_name_to_scancode[] = {
{"ESC", 0x01},
{"AE01", 0x02 },
{"AE02", 0x03 },
{"AE03", 0x04 },
{"AE04", 0x05 },
{"AE05", 0x06 },
{"AE06", 0x07 },
{"AE07", 0x08 },
{"AE08", 0x09 },
{"AE09", 0x0a },
{"AE10", 0x0b },
{"AE11", 0x0c },
{"AE12", 0x0d },
{"BKSP", 0x0e },
{"TAB", 0x0f },
{"AD01", 0x10 },
{"AD02", 0x11 },
{"AD03", 0x12 },
{"AD04", 0x13 },
{"AD05", 0x14 },
{"AD06", 0x15 },
{"AD07", 0x16 },
{"AD08", 0x17 },
{"AD09", 0x18 },
{"AD10", 0x19 },
{"AD11", 0x1a },
{"AD12", 0x1b },
{"RTRN", 0x1c },
{"LCTL", 0x1d },
{"AC01", 0x1e },
{"AC02", 0x1f },
{"AC03", 0x20 },
{"AC04", 0x21 },
{"AC05", 0x22 },
{"AC06", 0x23 },
{"AC07", 0x24 },
{"AC08", 0x25 },
{"AC09", 0x26 },
{"AC10", 0x27 },
{"AC11", 0x28 },
{"TLDE", 0x29 },
{"LFSH", 0x2a },
{"BKSL", 0x2b },
{"AB01", 0x2c },
{"AB02", 0x2d },
{"AB03", 0x2e },
{"AB04", 0x2f },
{"AB05", 0x30 },
{"AB06", 0x31 },
{"AB07", 0x32 },
{"AB08", 0x33 },
{"AB09", 0x34 },
{"AB10", 0x35 },
{"RTSH", 0x36 },
{"KPMU", 0x37 },
{"LALT", 0x38 },
{"SPCE", 0x39 },
{"CAPS", 0x3a },
{"FK01", 0x3b },
{"FK02", 0x3c },
{"FK03", 0x3d },
{"FK04", 0x3e },
{"FK05", 0x3f },
{"FK06", 0x40 },
{"FK07", 0x41 },
{"FK08", 0x42 },
{"FK09", 0x43 },
{"FK10", 0x44 },
{"NMLK", 0x45 },
{"SCLK", 0x46 },
{"KP7", 0x47 },
{"KP8", 0x48 },
{"KP9", 0x49 },
{"KPSU", 0x4a },
{"KP4", 0x4b },
{"KP5", 0x4c },
{"KP6", 0x4d },
{"KPAD", 0x4e },
{"KP1", 0x4f },
{"KP2", 0x50 },
{"KP3", 0x51 },
{"KP0", 0x52 },
{"KPDL", 0x53 },
{"LVL3", 0x54 }, // correct?
{"", 0x55 }, // not mapped?
{"LSGT", 0x56 },
{"FK11", 0x57 },
{"FK12", 0x58 },
{"AB11", 0x59 },
{"KATA", 0 },
{"HIRA", 0 },
{"HENK", 0 },
{"HKTG", 0 },
{"MUHE", 0 },
{"JPCM", 0 },
{"KPEN", 0x11c },
{"RCTL", 0x11d },
{"KPDV", 0x135 },
{"PRSC", 0x54 }, // ?
{"RALT", 0x138 },
{"LNFD", 0 },
{"HOME", 0x147 },
{"UP", 0x148 },
{"PGUP", 0x149 },
{"LEFT", 0x14b },
{"RGHT", 0x14d },
{"END", 0x14f },
{"DOWN", 0x150 },
{"PGDN", 0x151 },
{"INS", 0x152 },
{"DELE", 0x153 },
{"PAUS", 0x21d },
{"LWIN", 0x15b },
{"RWIN", 0x15c },
{"COMP", 0x15d },
};
static struct {
Display *dpy;
Window root;
Window win;
Atom atom_wm_state;
Atom atom_wm_state_fullscreen;
bool is_fullscreen;
void (*on_fullscreen_changed)(bool is_now_fullscreen);
int keymap[256];
bool (*on_key_down)(int scancode);
bool (*on_key_up)(int scancode);
void (*on_all_keys_up)(void);
PFNGLXGETSYNCVALUESOMLPROC glXGetSyncValuesOML;
PFNGLXSWAPBUFFERSMSCOMLPROC glXSwapBuffersMscOML;
PFNGLXWAITFORSBCOMLPROC glXWaitForSbcOML;
PFNGLXSWAPINTERVALEXTPROC glXSwapIntervalEXT;
PFNGLXSWAPINTERVALSGIPROC glXSwapIntervalSGI;
PFNGLXGETVIDEOSYNCSGIPROC glXGetVideoSyncSGI;
PFNGLXWAITVIDEOSYNCSGIPROC glXWaitVideoSyncSGI;
bool has_oml_sync_control;
uint64_t ust0;
int64_t last_msc;
uint64_t wanted_ust; // multiplied by FRAME_INTERVAL_US_DENOMINATOR
uint64_t vsync_interval;
uint64_t last_ust;
int64_t target_msc;
bool dropped_frame;
bool has_sgi_video_sync;
uint64_t last_sync_counter;
int64_t this_msc;
int64_t this_ust;
} glx;
static int64_t get_time(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
}
static int64_t adjust_sync_counter(uint32_t counter) {
uint32_t hi = glx.last_sync_counter >> 32;
uint32_t lo = (uint32_t)glx.last_sync_counter;
if (lo >= 0x80000000U && counter < 0x80000000U) {
// Wrapped
++hi;
}
glx.last_sync_counter = ((uint64_t)hi << 32) | counter;
return glx.last_sync_counter;
}
static int64_t glXWaitVideoSyncSGI_wrapper(void) {
unsigned int counter = 0;
glx.glXWaitVideoSyncSGI(1, 0, &counter);
return adjust_sync_counter(counter);
}
static int64_t glXGetVideoSyncSGI_wrapper(void) {
unsigned int counter = 0;
glx.glXGetVideoSyncSGI(&counter);
return adjust_sync_counter(counter);
}
static void init_keymap(void) {
XkbDescPtr desc = XkbGetMap(glx.dpy, 0, XkbUseCoreKbd);
XkbGetNames(glx.dpy, XkbKeyNamesMask, desc);
for (int i = desc->min_key_code; i <= desc->max_key_code && i < 256; i++) {
char name[XkbKeyNameLength + 1];
memcpy(name, desc->names->keys[i].name, XkbKeyNameLength);
name[XkbKeyNameLength] = '\0';
for (size_t j = 0; j < sizeof(keymap_name_to_scancode) / sizeof(keymap_name_to_scancode[0]); j++) {
if (strcmp(keymap_name_to_scancode[j].name, name) == 0) {
glx.keymap[i] = keymap_name_to_scancode[j].scancode;
break;
}
}
}
XkbFreeNames(desc, XkbKeyNamesMask, True);
XkbFreeKeyboard(desc, 0, True);
}
static void gfx_glx_hide_mouse(bool hide) {
// Removes distracting mouse cursor during fullscreen play
if (hide) {
Cursor hideCursor;
Pixmap bitmapNoData;
XColor black;
static char noData[] = { 0,0,0,0,0,0,0,0 };
black.red = black.green = black.blue = 0;
bitmapNoData = XCreateBitmapFromData(glx.dpy, glx.win, noData, 8, 8);
hideCursor = XCreatePixmapCursor(glx.dpy, bitmapNoData, bitmapNoData,
&black, &black, 0, 0);
XDefineCursor(glx.dpy, glx.win, hideCursor);
XSync(glx.dpy, False);
XFreeCursor(glx.dpy, hideCursor);
XFreePixmap(glx.dpy, bitmapNoData);
} else {
XUndefineCursor(glx.dpy, glx.win);
XSync(glx.dpy, False);
}
}
static void gfx_glx_set_fullscreen_state(bool on, bool call_callback) {
if (glx.is_fullscreen == on) {
return;
}
glx.is_fullscreen = on;
XEvent xev;
xev.xany.type = ClientMessage;
xev.xclient.message_type = glx.atom_wm_state;
xev.xclient.format = 32;
xev.xclient.window = glx.win;
xev.xclient.data.l[0] = on;
xev.xclient.data.l[1] = glx.atom_wm_state_fullscreen;
xev.xclient.data.l[2] = 0;
xev.xclient.data.l[3] = 0;
XSendEvent(glx.dpy, glx.root, 0, SubstructureNotifyMask | SubstructureRedirectMask, &xev);
gfx_glx_hide_mouse(on);
if (glx.on_fullscreen_changed != NULL && call_callback) {
glx.on_fullscreen_changed(on);
}
}
static bool gfx_glx_check_extension(const char *extensions, const char *extension) {
size_t len = strlen(extension);
const char *pos = extensions;
while ((pos = strstr(pos, extension)) != NULL) {
if ((pos[len] == ' ' || pos[len] == '\0') && (pos == extensions || pos[-1] == ' ')) {
return true;
}
if (pos[len] == '\0') {
break;
}
pos += len + 1;
}
return false;
}
static void gfx_glx_init(const char *game_name, bool start_in_fullscreen) {
// On NVIDIA proprietary driver, make the driver queue up to two frames on glXSwapBuffers,
// which means that glXSwapBuffers should be non-blocking,
// if we are sure to wait at least one vsync interval between calls.
setenv("__GL_MaxFramesAllowed", "2", true);
glx.dpy = XOpenDisplay(NULL);
if (glx.dpy == NULL) {
fprintf(stderr, "Cannot connect to X server\n");
exit(1);
}
int screen = DefaultScreen(glx.dpy);
glx.root = RootWindow(glx.dpy, screen);
GLint att[] = { GLX_RGBA, GLX_DEPTH_SIZE, 24, GLX_DOUBLEBUFFER, None };
XVisualInfo *vi = glXChooseVisual(glx.dpy, 0, att);
if (vi == NULL) {
fprintf(stderr, "No appropriate GLX visual found\n");
exit(1);
}
Colormap cmap = XCreateColormap(glx.dpy, glx.root, vi->visual, AllocNone);
XSetWindowAttributes swa;
swa.colormap = cmap;
swa.event_mask = ExposureMask | KeyPressMask | KeyReleaseMask | FocusChangeMask;
glx.win = XCreateWindow(glx.dpy, glx.root, 0, 0, DESIRED_SCREEN_WIDTH, DESIRED_SCREEN_HEIGHT, 0, vi->depth, InputOutput, vi->visual, CWColormap | CWEventMask, &swa);
glx.atom_wm_state = XInternAtom(glx.dpy, "_NET_WM_STATE", False);
glx.atom_wm_state_fullscreen = XInternAtom(glx.dpy, "_NET_WM_STATE_FULLSCREEN", False);
XMapWindow(glx.dpy, glx.win);
if (start_in_fullscreen) {
gfx_glx_set_fullscreen_state(true, false);
}
char title[512];
int len = sprintf(title, "%s (%s)", game_name, GFX_API_NAME);
XStoreName(glx.dpy, glx.win, title);
GLXContext glc = glXCreateContext(glx.dpy, vi, NULL, GL_TRUE);
glXMakeCurrent(glx.dpy, glx.win, glc);
init_keymap();
const char *extensions = glXQueryExtensionsString(glx.dpy, screen);
if (gfx_glx_check_extension(extensions, "GLX_OML_sync_control")) {
glx.glXGetSyncValuesOML = (PFNGLXGETSYNCVALUESOMLPROC)glXGetProcAddressARB((const GLubyte *)"glXGetSyncValuesOML");
glx.glXSwapBuffersMscOML = (PFNGLXSWAPBUFFERSMSCOMLPROC)glXGetProcAddressARB((const GLubyte *)"glXSwapBuffersMscOML");
glx.glXWaitForSbcOML = (PFNGLXWAITFORSBCOMLPROC)glXGetProcAddressARB((const GLubyte *)"glXWaitForSbcOML");
}
if (gfx_glx_check_extension(extensions, "GLX_EXT_swap_control")) {
glx.glXSwapIntervalEXT = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddressARB((const GLubyte *)"glXSwapIntervalEXT");
}
if (gfx_glx_check_extension(extensions, "GLX_SGI_swap_control")) {
glx.glXSwapIntervalSGI = (PFNGLXSWAPINTERVALSGIPROC)glXGetProcAddressARB((const GLubyte *)"glXSwapIntervalSGI");
}
if (gfx_glx_check_extension(extensions, "GLX_SGI_video_sync")) {
glx.glXGetVideoSyncSGI = (PFNGLXGETVIDEOSYNCSGIPROC)glXGetProcAddressARB((const GLubyte *)"glXGetVideoSyncSGI");
glx.glXWaitVideoSyncSGI = (PFNGLXWAITVIDEOSYNCSGIPROC)glXGetProcAddressARB((const GLubyte *)"glXWaitVideoSyncSGI");
}
int64_t ust, msc, sbc;
if (glx.glXGetSyncValuesOML != NULL && glx.glXGetSyncValuesOML(glx.dpy, glx.win, &ust, &msc, &sbc)) {
glx.has_oml_sync_control = true;
glx.ust0 = (uint64_t)ust;
} else {
glx.ust0 = get_time();
if (glx.glXSwapIntervalEXT != NULL) {
glx.glXSwapIntervalEXT(glx.dpy, glx.win, 1);
} else if (glx.glXSwapIntervalSGI != NULL) {
glx.glXSwapIntervalSGI(1);
}
if (glx.glXGetVideoSyncSGI != NULL) {
// Try if it really works
unsigned int count;
if (glx.glXGetVideoSyncSGI(&count) == 0) {
glx.last_sync_counter = count;
glx.has_sgi_video_sync = true;
}
}
}
glx.vsync_interval = 16666;
}
static void gfx_glx_set_fullscreen_changed_callback(void (*on_fullscreen_changed)(bool is_now_fullscreen)) {
glx.on_fullscreen_changed = on_fullscreen_changed;
}
static void gfx_glx_set_fullscreen(bool enable) {
gfx_glx_set_fullscreen_state(enable, true);
}
static void gfx_glx_set_keyboard_callbacks(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void)) {
glx.on_key_down = on_key_down;
glx.on_key_up = on_key_up;
glx.on_all_keys_up = on_all_keys_up;
}
static void gfx_glx_main_loop(void (*run_one_game_iter)(void)) {
while (1) {
run_one_game_iter();
}
}
static void gfx_glx_get_dimensions(uint32_t *width, uint32_t *height) {
XWindowAttributes attributes;
XGetWindowAttributes(glx.dpy, glx.win, &attributes);
*width = attributes.width;
*height = attributes.height;
}
static void gfx_glx_handle_events(void) {
while (XPending(glx.dpy)) {
XEvent xev;
XNextEvent(glx.dpy, &xev);
if (xev.type == FocusOut) {
if (glx.on_all_keys_up != NULL) {
glx.on_all_keys_up();
}
}
if (xev.type == KeyPress || xev.type == KeyRelease) {
if (xev.xkey.keycode < 256) {
int scancode = glx.keymap[xev.xkey.keycode];
if (scancode != 0) {
if (xev.type == KeyPress) {
if (scancode == 0x44) { // F10
gfx_glx_set_fullscreen_state(!glx.is_fullscreen, true);
}
if (glx.on_key_down != NULL) {
glx.on_key_down(scancode);
}
} else {
if (glx.on_key_up != NULL) {
glx.on_key_up(scancode);
}
}
}
}
}
}
}
static bool gfx_glx_start_frame(void) {
return true;
}
static void gfx_glx_swap_buffers_begin(void) {
glx.wanted_ust += FRAME_INTERVAL_US_NUMERATOR; // advance 1/30 seconds on JP/US or 1/25 seconds on EU
if (!glx.has_oml_sync_control && !glx.has_sgi_video_sync) {
glFlush();
uint64_t target = glx.wanted_ust / FRAME_INTERVAL_US_DENOMINATOR;
uint64_t now;
while (target > (now = (uint64_t)get_time() - glx.ust0)) {
struct timespec ts = {(target - now) / 1000000, ((target - now) % 1000000) * 1000};
if (nanosleep(&ts, NULL) == 0) {
break;
}
}
if (target + 2 * FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR < now) {
if (target + 32 * FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR >= now) {
printf("Dropping frame\n");
glx.dropped_frame = true;
return;
} else {
// Reset timer since we are way out of sync
glx.wanted_ust = now * FRAME_INTERVAL_US_DENOMINATOR;
}
}
glXSwapBuffers(glx.dpy, glx.win);
glx.dropped_frame = false;
return;
}
double vsyncs_to_wait = (int64_t)(glx.wanted_ust / FRAME_INTERVAL_US_DENOMINATOR - glx.last_ust) / (double)glx.vsync_interval;
if (vsyncs_to_wait <= 0) {
printf("Dropping frame\n");
// Drop frame
glx.dropped_frame = true;
return;
}
if (floor(vsyncs_to_wait) != vsyncs_to_wait) {
uint64_t left_ust = glx.last_ust + floor(vsyncs_to_wait) * glx.vsync_interval;
uint64_t right_ust = glx.last_ust + ceil(vsyncs_to_wait) * glx.vsync_interval;
uint64_t adjusted_wanted_ust = glx.wanted_ust / FRAME_INTERVAL_US_DENOMINATOR + (glx.last_ust + FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR > glx.wanted_ust / FRAME_INTERVAL_US_DENOMINATOR ? 2000 : -2000);
int64_t diff_left = adjusted_wanted_ust - left_ust;
int64_t diff_right = right_ust - adjusted_wanted_ust;
if (diff_left < 0) {
diff_left = -diff_left;
}
if (diff_right < 0) {
diff_right = -diff_right;
}
if (diff_left < diff_right) {
vsyncs_to_wait = floor(vsyncs_to_wait);
} else {
vsyncs_to_wait = ceil(vsyncs_to_wait);
}
if (vsyncs_to_wait <= -4) {
printf("vsyncs_to_wait became -4 or less so dropping frame\n");
glx.dropped_frame = true;
return;
} else if (vsyncs_to_wait < 1) {
vsyncs_to_wait = 1;
}
}
glx.dropped_frame = false;
//printf("Vsyncs to wait: %d, diff: %d\n", (int)vsyncs_to_wait, (int)(glx.last_ust + (int64_t)vsyncs_to_wait * glx.vsync_interval - glx.wanted_ust / 3));
if (vsyncs_to_wait > 30) {
// Unreasonable, so change to 2
vsyncs_to_wait = 2;
}
glx.target_msc = glx.last_msc + vsyncs_to_wait;
if (glx.has_oml_sync_control) {
glx.glXSwapBuffersMscOML(glx.dpy, glx.win, glx.target_msc, 0, 0);
} else if (glx.has_sgi_video_sync) {
glFlush(); // Try to submit pending work. Don't use glFinish since that busy loops on NVIDIA proprietary driver.
//uint64_t counter0;
uint64_t counter1, counter2;
//uint64_t before_wait = get_time();
counter1 = glXGetVideoSyncSGI_wrapper();
//counter0 = counter1;
//int waits = 0;
while (counter1 < (uint64_t)glx.target_msc - 1) {
counter1 = glXWaitVideoSyncSGI_wrapper();
//++waits;
}
//uint64_t before = get_time();
glXSwapBuffers(glx.dpy, glx.win);
counter2 = glXGetVideoSyncSGI_wrapper();
while (counter2 < (uint64_t)glx.target_msc) {
counter2 = glXWaitVideoSyncSGI_wrapper();
}
uint64_t after = get_time();
//printf("%.3f %.3f %.3f\t%.3f\t%u %d %.2f %u %d\n", before_wait * 0.000060, before * 0.000060, after * 0.000060, (after - before) * 0.000060, counter0, counter2 - counter0, vsyncs_to_wait, (unsigned int)glx.target_msc, waits);
glx.this_msc = counter2;
glx.this_ust = after;
}
}
static void gfx_glx_swap_buffers_end(void) {
if (glx.dropped_frame || (!glx.has_oml_sync_control && !glx.has_sgi_video_sync)) {
return;
}
int64_t ust, msc, sbc;
if (glx.has_oml_sync_control) {
if (!glx.glXWaitForSbcOML(glx.dpy, glx.win, 0, &ust, &msc, &sbc)) {
// X connection broke or something?
glx.last_ust += (glx.target_msc - glx.last_msc) * glx.vsync_interval;
glx.last_msc = glx.target_msc;
return;
}
} else {
ust = glx.this_ust;
msc = glx.this_msc;
}
uint64_t this_ust = ust - glx.ust0;
uint64_t vsyncs_passed = msc - glx.last_msc;
bool bad_vsync_interval = false;
if (glx.last_ust != 0 && vsyncs_passed != 0) {
uint64_t new_vsync_interval = (this_ust - glx.last_ust) / vsyncs_passed;
if (new_vsync_interval <= 500000) {
// Should be less than 0.5 seconds to be trusted
glx.vsync_interval = new_vsync_interval;
} else {
bad_vsync_interval = true;
}
//printf("glx.vsync_interval: %d\n", (int)glx.vsync_interval);
}
glx.last_ust = this_ust;
glx.last_msc = msc;
if (msc != glx.target_msc) {
printf("Frame too late by %d vsyncs\n", (int)(msc - glx.target_msc));
}
if (msc - glx.target_msc >= 8 || bad_vsync_interval) {
// Frame arrived way too late, so reset timer from here
printf("Reseting timer\n");
glx.wanted_ust = this_ust * FRAME_INTERVAL_US_DENOMINATOR;
}
}
static double gfx_glx_get_time(void) {
return 0.0;
}
struct GfxWindowManagerAPI gfx_glx = {
gfx_glx_init,
gfx_glx_set_keyboard_callbacks,
gfx_glx_set_fullscreen_changed_callback,
gfx_glx_set_fullscreen,
gfx_glx_main_loop,
gfx_glx_get_dimensions,
gfx_glx_handle_events,
gfx_glx_start_frame,
gfx_glx_swap_buffers_begin,
gfx_glx_swap_buffers_end,
gfx_glx_get_time
};
#endif

8
src/pc/gfx/gfx_glx.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef GFX_GLX_H
#define GFX_GLX_H
#include "gfx_window_manager_api.h"
struct GfxWindowManagerAPI gfx_glx;
#endif

531
src/pc/gfx/gfx_opengl.c Normal file
View file

@ -0,0 +1,531 @@
#ifdef ENABLE_OPENGL
#include <stdint.h>
#include <stdbool.h>
#ifndef _LANGUAGE_C
#define _LANGUAGE_C
#endif
#include <PR/gbi.h>
#ifdef __MINGW32__
#define FOR_WINDOWS 1
#else
#define FOR_WINDOWS 0
#endif
#if FOR_WINDOWS
#include <GL/glew.h>
#include "SDL.h"
#define GL_GLEXT_PROTOTYPES 1
#include "SDL_opengl.h"
#else
#include <SDL2/SDL.h>
#define GL_GLEXT_PROTOTYPES 1
#include <SDL2/SDL_opengles2.h>
#endif
#include "gfx_cc.h"
#include "gfx_rendering_api.h"
struct ShaderProgram {
uint32_t shader_id;
GLuint opengl_program_id;
uint8_t num_inputs;
bool used_textures[2];
uint8_t num_floats;
GLint attrib_locations[7];
uint8_t attrib_sizes[7];
uint8_t num_attribs;
bool used_noise;
GLint frame_count_location;
GLint window_height_location;
};
static struct ShaderProgram shader_program_pool[64];
static uint8_t shader_program_pool_size;
static GLuint opengl_vbo;
static uint32_t frame_count;
static uint32_t current_height;
static bool gfx_opengl_z_is_from_0_to_1(void) {
return false;
}
static void gfx_opengl_vertex_array_set_attribs(struct ShaderProgram *prg) {
size_t num_floats = prg->num_floats;
size_t pos = 0;
for (int i = 0; i < prg->num_attribs; i++) {
glEnableVertexAttribArray(prg->attrib_locations[i]);
glVertexAttribPointer(prg->attrib_locations[i], prg->attrib_sizes[i], GL_FLOAT, GL_FALSE, num_floats * sizeof(float), (void *) (pos * sizeof(float)));
pos += prg->attrib_sizes[i];
}
}
static void gfx_opengl_set_uniforms(struct ShaderProgram *prg) {
if (prg->used_noise) {
glUniform1i(prg->frame_count_location, frame_count);
glUniform1i(prg->window_height_location, current_height);
}
}
static void gfx_opengl_unload_shader(struct ShaderProgram *old_prg) {
if (old_prg != NULL) {
for (int i = 0; i < old_prg->num_attribs; i++) {
glDisableVertexAttribArray(old_prg->attrib_locations[i]);
}
}
}
static void gfx_opengl_load_shader(struct ShaderProgram *new_prg) {
glUseProgram(new_prg->opengl_program_id);
gfx_opengl_vertex_array_set_attribs(new_prg);
gfx_opengl_set_uniforms(new_prg);
}
static void append_str(char *buf, size_t *len, const char *str) {
while (*str != '\0') buf[(*len)++] = *str++;
}
static void append_line(char *buf, size_t *len, const char *str) {
while (*str != '\0') buf[(*len)++] = *str++;
buf[(*len)++] = '\n';
}
static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_alpha, bool inputs_have_alpha, bool hint_single_element) {
if (!only_alpha) {
switch (item) {
case SHADER_0:
return with_alpha ? "vec4(0.0, 0.0, 0.0, 0.0)" : "vec3(0.0, 0.0, 0.0)";
case SHADER_INPUT_1:
return with_alpha || !inputs_have_alpha ? "vInput1" : "vInput1.rgb";
case SHADER_INPUT_2:
return with_alpha || !inputs_have_alpha ? "vInput2" : "vInput2.rgb";
case SHADER_INPUT_3:
return with_alpha || !inputs_have_alpha ? "vInput3" : "vInput3.rgb";
case SHADER_INPUT_4:
return with_alpha || !inputs_have_alpha ? "vInput4" : "vInput4.rgb";
case SHADER_TEXEL0:
return with_alpha ? "texVal0" : "texVal0.rgb";
case SHADER_TEXEL0A:
return hint_single_element ? "texVal0.a" :
(with_alpha ? "vec4(texVal0.a, texVal0.a, texVal0.a, texVal0.a)" : "vec3(texVal0.a, texVal0.a, texVal0.a)");
case SHADER_TEXEL1:
return with_alpha ? "texVal1" : "texVal1.rgb";
}
} else {
switch (item) {
case SHADER_0:
return "0.0";
case SHADER_INPUT_1:
return "vInput1.a";
case SHADER_INPUT_2:
return "vInput2.a";
case SHADER_INPUT_3:
return "vInput3.a";
case SHADER_INPUT_4:
return "vInput4.a";
case SHADER_TEXEL0:
return "texVal0.a";
case SHADER_TEXEL0A:
return "texVal0.a";
case SHADER_TEXEL1:
return "texVal1.a";
}
}
}
static void append_formula(char *buf, size_t *len, uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
if (do_single) {
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
} else if (do_multiply) {
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
} else if (do_mix) {
append_str(buf, len, "mix(");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, ")");
} else {
append_str(buf, len, "(");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " - ");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ") * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, " + ");
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
}
}
static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shader_id) {
struct CCFeatures cc_features;
gfx_cc_get_features(shader_id, &cc_features);
char vs_buf[1024];
char fs_buf[1024];
size_t vs_len = 0;
size_t fs_len = 0;
size_t num_floats = 4;
// Vertex shader
append_line(vs_buf, &vs_len, "#version 110");
append_line(vs_buf, &vs_len, "attribute vec4 aVtxPos;");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_line(vs_buf, &vs_len, "attribute vec2 aTexCoord;");
append_line(vs_buf, &vs_len, "varying vec2 vTexCoord;");
num_floats += 2;
}
if (cc_features.opt_fog) {
append_line(vs_buf, &vs_len, "attribute vec4 aFog;");
append_line(vs_buf, &vs_len, "varying vec4 vFog;");
num_floats += 4;
}
for (int i = 0; i < cc_features.num_inputs; i++) {
vs_len += sprintf(vs_buf + vs_len, "attribute vec%d aInput%d;\n", cc_features.opt_alpha ? 4 : 3, i + 1);
vs_len += sprintf(vs_buf + vs_len, "varying vec%d vInput%d;\n", cc_features.opt_alpha ? 4 : 3, i + 1);
num_floats += cc_features.opt_alpha ? 4 : 3;
}
append_line(vs_buf, &vs_len, "void main() {");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_line(vs_buf, &vs_len, "vTexCoord = aTexCoord;");
}
if (cc_features.opt_fog) {
append_line(vs_buf, &vs_len, "vFog = aFog;");
}
for (int i = 0; i < cc_features.num_inputs; i++) {
vs_len += sprintf(vs_buf + vs_len, "vInput%d = aInput%d;\n", i + 1, i + 1);
}
append_line(vs_buf, &vs_len, "gl_Position = aVtxPos;");
append_line(vs_buf, &vs_len, "}");
// Fragment shader
append_line(fs_buf, &fs_len, "#version 110");
//append_line(fs_buf, &fs_len, "precision mediump float;");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
append_line(fs_buf, &fs_len, "varying vec2 vTexCoord;");
}
if (cc_features.opt_fog) {
append_line(fs_buf, &fs_len, "varying vec4 vFog;");
}
for (int i = 0; i < cc_features.num_inputs; i++) {
fs_len += sprintf(fs_buf + fs_len, "varying vec%d vInput%d;\n", cc_features.opt_alpha ? 4 : 3, i + 1);
}
if (cc_features.used_textures[0]) {
append_line(fs_buf, &fs_len, "uniform sampler2D uTex0;");
}
if (cc_features.used_textures[1]) {
append_line(fs_buf, &fs_len, "uniform sampler2D uTex1;");
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(fs_buf, &fs_len, "uniform int frame_count;");
append_line(fs_buf, &fs_len, "uniform int window_height;");
append_line(fs_buf, &fs_len, "float random(in vec3 value) {");
append_line(fs_buf, &fs_len, " float random = dot(sin(value), vec3(12.9898, 78.233, 37.719));");
append_line(fs_buf, &fs_len, " return fract(sin(random) * 143758.5453);");
append_line(fs_buf, &fs_len, "}");
}
append_line(fs_buf, &fs_len, "void main() {");
if (cc_features.used_textures[0]) {
append_line(fs_buf, &fs_len, "vec4 texVal0 = texture2D(uTex0, vTexCoord);");
}
if (cc_features.used_textures[1]) {
append_line(fs_buf, &fs_len, "vec4 texVal1 = texture2D(uTex1, vTexCoord);");
}
append_str(fs_buf, &fs_len, cc_features.opt_alpha ? "vec4 texel = " : "vec3 texel = ");
if (!cc_features.color_alpha_same && cc_features.opt_alpha) {
append_str(fs_buf, &fs_len, "vec4(");
append_formula(fs_buf, &fs_len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true);
append_str(fs_buf, &fs_len, ", ");
append_formula(fs_buf, &fs_len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true);
append_str(fs_buf, &fs_len, ")");
} else {
append_formula(fs_buf, &fs_len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha);
}
append_line(fs_buf, &fs_len, ";");
if (cc_features.opt_texture_edge && cc_features.opt_alpha) {
append_line(fs_buf, &fs_len, "if (texel.a > 0.3) texel.a = 1.0; else discard;");
}
// TODO discard if alpha is 0?
if (cc_features.opt_fog) {
if (cc_features.opt_alpha) {
append_line(fs_buf, &fs_len, "texel = vec4(mix(texel.rgb, vFog.rgb, vFog.a), texel.a);");
} else {
append_line(fs_buf, &fs_len, "texel = mix(texel, vFog.rgb, vFog.a);");
}
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
append_line(fs_buf, &fs_len, "texel.a *= floor(random(vec3(floor(gl_FragCoord.xy * (240.0 / float(window_height))), float(frame_count))) + 0.5);");
}
if (cc_features.opt_alpha) {
append_line(fs_buf, &fs_len, "gl_FragColor = texel;");
} else {
append_line(fs_buf, &fs_len, "gl_FragColor = vec4(texel, 1.0);");
}
append_line(fs_buf, &fs_len, "}");
vs_buf[vs_len] = '\0';
fs_buf[fs_len] = '\0';
/*puts("Vertex shader:");
puts(vs_buf);
puts("Fragment shader:");
puts(fs_buf);
puts("End");*/
const GLchar *sources[2] = { vs_buf, fs_buf };
const GLint lengths[2] = { vs_len, fs_len };
GLint success;
GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &sources[0], &lengths[0]);
glCompileShader(vertex_shader);
glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &success);
if (!success) {
GLint max_length = 0;
glGetShaderiv(vertex_shader, GL_INFO_LOG_LENGTH, &max_length);
char error_log[1024];
fprintf(stderr, "Vertex shader compilation failed\n");
glGetShaderInfoLog(vertex_shader, max_length, &max_length, &error_log[0]);
fprintf(stderr, "%s\n", &error_log[0]);
abort();
}
GLuint fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &sources[1], &lengths[1]);
glCompileShader(fragment_shader);
glGetShaderiv(fragment_shader, GL_COMPILE_STATUS, &success);
if (!success) {
GLint max_length = 0;
glGetShaderiv(fragment_shader, GL_INFO_LOG_LENGTH, &max_length);
char error_log[1024];
fprintf(stderr, "Fragment shader compilation failed\n");
glGetShaderInfoLog(fragment_shader, max_length, &max_length, &error_log[0]);
fprintf(stderr, "%s\n", &error_log[0]);
abort();
}
GLuint shader_program = glCreateProgram();
glAttachShader(shader_program, vertex_shader);
glAttachShader(shader_program, fragment_shader);
glLinkProgram(shader_program);
size_t cnt = 0;
struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_size++];
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aVtxPos");
prg->attrib_sizes[cnt] = 4;
++cnt;
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aTexCoord");
prg->attrib_sizes[cnt] = 2;
++cnt;
}
if (cc_features.opt_fog) {
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aFog");
prg->attrib_sizes[cnt] = 4;
++cnt;
}
for (int i = 0; i < cc_features.num_inputs; i++) {
char name[16];
sprintf(name, "aInput%d", i + 1);
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, name);
prg->attrib_sizes[cnt] = cc_features.opt_alpha ? 4 : 3;
++cnt;
}
prg->shader_id = shader_id;
prg->opengl_program_id = shader_program;
prg->num_inputs = cc_features.num_inputs;
prg->used_textures[0] = cc_features.used_textures[0];
prg->used_textures[1] = cc_features.used_textures[1];
prg->num_floats = num_floats;
prg->num_attribs = cnt;
gfx_opengl_load_shader(prg);
if (cc_features.used_textures[0]) {
GLint sampler_location = glGetUniformLocation(shader_program, "uTex0");
glUniform1i(sampler_location, 0);
}
if (cc_features.used_textures[1]) {
GLint sampler_location = glGetUniformLocation(shader_program, "uTex1");
glUniform1i(sampler_location, 1);
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
prg->frame_count_location = glGetUniformLocation(shader_program, "frame_count");
prg->window_height_location = glGetUniformLocation(shader_program, "window_height");
prg->used_noise = true;
} else {
prg->used_noise = false;
}
return prg;
}
static struct ShaderProgram *gfx_opengl_lookup_shader(uint32_t shader_id) {
for (size_t i = 0; i < shader_program_pool_size; i++) {
if (shader_program_pool[i].shader_id == shader_id) {
return &shader_program_pool[i];
}
}
return NULL;
}
static void gfx_opengl_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) {
*num_inputs = prg->num_inputs;
used_textures[0] = prg->used_textures[0];
used_textures[1] = prg->used_textures[1];
}
static GLuint gfx_opengl_new_texture(void) {
GLuint ret;
glGenTextures(1, &ret);
return ret;
}
static void gfx_opengl_select_texture(int tile, GLuint texture_id) {
glActiveTexture(GL_TEXTURE0 + tile);
glBindTexture(GL_TEXTURE_2D, texture_id);
}
static void gfx_opengl_upload_texture(const uint8_t *rgba32_buf, int width, int height) {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba32_buf);
}
static uint32_t gfx_cm_to_opengl(uint32_t val) {
if (val & G_TX_CLAMP) {
return GL_CLAMP_TO_EDGE;
}
return (val & G_TX_MIRROR) ? GL_MIRRORED_REPEAT : GL_REPEAT;
}
static void gfx_opengl_set_sampler_parameters(int tile, bool linear_filter, uint32_t cms, uint32_t cmt) {
glActiveTexture(GL_TEXTURE0 + tile);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gfx_cm_to_opengl(cms));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gfx_cm_to_opengl(cmt));
}
static void gfx_opengl_set_depth_test(bool depth_test) {
if (depth_test) {
glEnable(GL_DEPTH_TEST);
} else {
glDisable(GL_DEPTH_TEST);
}
}
static void gfx_opengl_set_depth_mask(bool z_upd) {
glDepthMask(z_upd ? GL_TRUE : GL_FALSE);
}
static void gfx_opengl_set_zmode_decal(bool zmode_decal) {
if (zmode_decal) {
glPolygonOffset(-2, -2);
glEnable(GL_POLYGON_OFFSET_FILL);
} else {
glPolygonOffset(0, 0);
glDisable(GL_POLYGON_OFFSET_FILL);
}
}
static void gfx_opengl_set_viewport(int x, int y, int width, int height) {
glViewport(x, y, width, height);
current_height = height;
}
static void gfx_opengl_set_scissor(int x, int y, int width, int height) {
glScissor(x, y, width, height);
}
static void gfx_opengl_set_use_alpha(bool use_alpha) {
if (use_alpha) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
}
static void gfx_opengl_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) {
//printf("flushing %d tris\n", buf_vbo_num_tris);
glBufferData(GL_ARRAY_BUFFER, sizeof(float) * buf_vbo_len, buf_vbo, GL_STREAM_DRAW);
glDrawArrays(GL_TRIANGLES, 0, 3 * buf_vbo_num_tris);
}
static void gfx_opengl_init(void) {
#if FOR_WINDOWS
glewInit();
#endif
glGenBuffers(1, &opengl_vbo);
glBindBuffer(GL_ARRAY_BUFFER, opengl_vbo);
glDepthFunc(GL_LEQUAL);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
}
static void gfx_opengl_on_resize(void) {
}
static void gfx_opengl_start_frame(void) {
frame_count++;
glDisable(GL_SCISSOR_TEST);
glDepthMask(GL_TRUE); // Must be set to clear Z-buffer
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glEnable(GL_SCISSOR_TEST);
}
static void gfx_opengl_end_frame(void) {
}
static void gfx_opengl_finish_render(void) {
}
struct GfxRenderingAPI gfx_opengl_api = {
gfx_opengl_z_is_from_0_to_1,
gfx_opengl_unload_shader,
gfx_opengl_load_shader,
gfx_opengl_create_and_load_new_shader,
gfx_opengl_lookup_shader,
gfx_opengl_shader_get_info,
gfx_opengl_new_texture,
gfx_opengl_select_texture,
gfx_opengl_upload_texture,
gfx_opengl_set_sampler_parameters,
gfx_opengl_set_depth_test,
gfx_opengl_set_depth_mask,
gfx_opengl_set_zmode_decal,
gfx_opengl_set_viewport,
gfx_opengl_set_scissor,
gfx_opengl_set_use_alpha,
gfx_opengl_draw_triangles,
gfx_opengl_init,
gfx_opengl_on_resize,
gfx_opengl_start_frame,
gfx_opengl_end_frame,
gfx_opengl_finish_render
};
#endif

8
src/pc/gfx/gfx_opengl.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef GFX_OPENGL_H
#define GFX_OPENGL_H
#include "gfx_rendering_api.h"
extern struct GfxRenderingAPI gfx_opengl_api;
#endif

1655
src/pc/gfx/gfx_pc.c Normal file

File diff suppressed because it is too large Load diff

30
src/pc/gfx/gfx_pc.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef GFX_PC_H
#define GFX_PC_H
#include <stdbool.h>
struct GfxRenderingAPI;
struct GfxWindowManagerAPI;
struct GfxDimensions {
uint32_t width, height;
float aspect_ratio;
};
extern struct GfxDimensions gfx_current_dimensions;
#ifdef __cplusplus
extern "C" {
#endif
void gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi, const char *game_name, bool start_in_fullscreen);
struct GfxRenderingAPI *gfx_get_current_rendering_api(void);
void gfx_start_frame(void);
void gfx_run(Gfx *commands);
void gfx_end_frame(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,35 @@
#ifndef GFX_RENDERING_API_H
#define GFX_RENDERING_API_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
struct ShaderProgram;
struct GfxRenderingAPI {
bool (*z_is_from_0_to_1)(void);
void (*unload_shader)(struct ShaderProgram *old_prg);
void (*load_shader)(struct ShaderProgram *new_prg);
struct ShaderProgram *(*create_and_load_new_shader)(uint32_t shader_id);
struct ShaderProgram *(*lookup_shader)(uint32_t shader_id);
void (*shader_get_info)(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]);
uint32_t (*new_texture)(void);
void (*select_texture)(int tile, uint32_t texture_id);
void (*upload_texture)(const uint8_t *rgba32_buf, int width, int height);
void (*set_sampler_parameters)(int sampler, bool linear_filter, uint32_t cms, uint32_t cmt);
void (*set_depth_test)(bool depth_test);
void (*set_depth_mask)(bool z_upd);
void (*set_zmode_decal)(bool zmode_decal);
void (*set_viewport)(int x, int y, int width, int height);
void (*set_scissor)(int x, int y, int width, int height);
void (*set_use_alpha)(bool use_alpha);
void (*draw_triangles)(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris);
void (*init)(void);
void (*on_resize)(void);
void (*start_frame)(void);
void (*end_frame)(void);
void (*finish_render)(void);
};
#endif

View file

@ -0,0 +1,7 @@
#ifndef GFX_SCREEN_CONFIG_H
#define GFX_SCREEN_CONFIG_H
#define DESIRED_SCREEN_WIDTH 640
#define DESIRED_SCREEN_HEIGHT 480
#endif

8
src/pc/gfx/gfx_sdl.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef GFX_SDL_H
#define GFX_SDL_H
#include "gfx_window_manager_api.h"
extern struct GfxWindowManagerAPI gfx_sdl;
#endif

313
src/pc/gfx/gfx_sdl2.c Normal file
View file

@ -0,0 +1,313 @@
#if !defined(__linux__) && defined(ENABLE_OPENGL)
#ifdef __MINGW32__
#define FOR_WINDOWS 1
#else
#define FOR_WINDOWS 0
#endif
#if FOR_WINDOWS
#include <GL/glew.h>
#include "SDL.h"
#define GL_GLEXT_PROTOTYPES 1
#include "SDL_opengl.h"
#else
#include <SDL2/SDL.h>
#define GL_GLEXT_PROTOTYPES 1
#include <SDL2/SDL_opengles2.h>
#endif
#include "gfx_window_manager_api.h"
#include "gfx_screen_config.h"
#define GFX_API_NAME "SDL2 - OpenGL"
static SDL_Window *wnd;
static int inverted_scancode_table[512];
static int vsync_enabled = 0;
static unsigned int window_width = DESIRED_SCREEN_WIDTH;
static unsigned int window_height = DESIRED_SCREEN_HEIGHT;
static bool fullscreen_state;
static void (*on_fullscreen_changed_callback)(bool is_now_fullscreen);
static bool (*on_key_down_callback)(int scancode);
static bool (*on_key_up_callback)(int scancode);
static void (*on_all_keys_up_callback)(void);
const SDL_Scancode windows_scancode_table[] =
{
/* 0 1 2 3 4 5 6 7 */
/* 8 9 A B C D E F */
SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_ESCAPE, SDL_SCANCODE_1, SDL_SCANCODE_2, SDL_SCANCODE_3, SDL_SCANCODE_4, SDL_SCANCODE_5, SDL_SCANCODE_6, /* 0 */
SDL_SCANCODE_7, SDL_SCANCODE_8, SDL_SCANCODE_9, SDL_SCANCODE_0, SDL_SCANCODE_MINUS, SDL_SCANCODE_EQUALS, SDL_SCANCODE_BACKSPACE, SDL_SCANCODE_TAB, /* 0 */
SDL_SCANCODE_Q, SDL_SCANCODE_W, SDL_SCANCODE_E, SDL_SCANCODE_R, SDL_SCANCODE_T, SDL_SCANCODE_Y, SDL_SCANCODE_U, SDL_SCANCODE_I, /* 1 */
SDL_SCANCODE_O, SDL_SCANCODE_P, SDL_SCANCODE_LEFTBRACKET, SDL_SCANCODE_RIGHTBRACKET, SDL_SCANCODE_RETURN, SDL_SCANCODE_LCTRL, SDL_SCANCODE_A, SDL_SCANCODE_S, /* 1 */
SDL_SCANCODE_D, SDL_SCANCODE_F, SDL_SCANCODE_G, SDL_SCANCODE_H, SDL_SCANCODE_J, SDL_SCANCODE_K, SDL_SCANCODE_L, SDL_SCANCODE_SEMICOLON, /* 2 */
SDL_SCANCODE_APOSTROPHE, SDL_SCANCODE_GRAVE, SDL_SCANCODE_LSHIFT, SDL_SCANCODE_BACKSLASH, SDL_SCANCODE_Z, SDL_SCANCODE_X, SDL_SCANCODE_C, SDL_SCANCODE_V, /* 2 */
SDL_SCANCODE_B, SDL_SCANCODE_N, SDL_SCANCODE_M, SDL_SCANCODE_COMMA, SDL_SCANCODE_PERIOD, SDL_SCANCODE_SLASH, SDL_SCANCODE_RSHIFT, SDL_SCANCODE_PRINTSCREEN,/* 3 */
SDL_SCANCODE_LALT, SDL_SCANCODE_SPACE, SDL_SCANCODE_CAPSLOCK, SDL_SCANCODE_F1, SDL_SCANCODE_F2, SDL_SCANCODE_F3, SDL_SCANCODE_F4, SDL_SCANCODE_F5, /* 3 */
SDL_SCANCODE_F6, SDL_SCANCODE_F7, SDL_SCANCODE_F8, SDL_SCANCODE_F9, SDL_SCANCODE_F10, SDL_SCANCODE_NUMLOCKCLEAR, SDL_SCANCODE_SCROLLLOCK, SDL_SCANCODE_HOME, /* 4 */
SDL_SCANCODE_UP, SDL_SCANCODE_PAGEUP, SDL_SCANCODE_KP_MINUS, SDL_SCANCODE_LEFT, SDL_SCANCODE_KP_5, SDL_SCANCODE_RIGHT, SDL_SCANCODE_KP_PLUS, SDL_SCANCODE_END, /* 4 */
SDL_SCANCODE_DOWN, SDL_SCANCODE_PAGEDOWN, SDL_SCANCODE_INSERT, SDL_SCANCODE_DELETE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_NONUSBACKSLASH,SDL_SCANCODE_F11, /* 5 */
SDL_SCANCODE_F12, SDL_SCANCODE_PAUSE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_LGUI, SDL_SCANCODE_RGUI, SDL_SCANCODE_APPLICATION, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 5 */
SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_F13, SDL_SCANCODE_F14, SDL_SCANCODE_F15, SDL_SCANCODE_F16, /* 6 */
SDL_SCANCODE_F17, SDL_SCANCODE_F18, SDL_SCANCODE_F19, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 6 */
SDL_SCANCODE_INTERNATIONAL2, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL1, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 7 */
SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL4, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL5, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL3, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN /* 7 */
};
const SDL_Scancode scancode_rmapping_extended[][2] = {
{SDL_SCANCODE_KP_ENTER, SDL_SCANCODE_RETURN},
{SDL_SCANCODE_RALT, SDL_SCANCODE_LALT},
{SDL_SCANCODE_RCTRL, SDL_SCANCODE_LCTRL},
{SDL_SCANCODE_KP_DIVIDE, SDL_SCANCODE_SLASH},
//{SDL_SCANCODE_KP_PLUS, SDL_SCANCODE_CAPSLOCK}
};
const SDL_Scancode scancode_rmapping_nonextended[][2] = {
{SDL_SCANCODE_KP_7, SDL_SCANCODE_HOME},
{SDL_SCANCODE_KP_8, SDL_SCANCODE_UP},
{SDL_SCANCODE_KP_9, SDL_SCANCODE_PAGEUP},
{SDL_SCANCODE_KP_4, SDL_SCANCODE_LEFT},
{SDL_SCANCODE_KP_6, SDL_SCANCODE_RIGHT},
{SDL_SCANCODE_KP_1, SDL_SCANCODE_END},
{SDL_SCANCODE_KP_2, SDL_SCANCODE_DOWN},
{SDL_SCANCODE_KP_3, SDL_SCANCODE_PAGEDOWN},
{SDL_SCANCODE_KP_0, SDL_SCANCODE_INSERT},
{SDL_SCANCODE_KP_PERIOD, SDL_SCANCODE_DELETE},
{SDL_SCANCODE_KP_MULTIPLY, SDL_SCANCODE_PRINTSCREEN}
};
static void set_fullscreen(bool on, bool call_callback) {
if (fullscreen_state == on) {
return;
}
fullscreen_state = on;
if (on) {
SDL_DisplayMode mode;
SDL_GetDesktopDisplayMode(0, &mode);
window_width = mode.w;
window_height = mode.h;
} else {
window_width = DESIRED_SCREEN_WIDTH;
window_height = DESIRED_SCREEN_HEIGHT;
}
SDL_SetWindowSize(wnd, window_width, window_height);
SDL_SetWindowFullscreen(wnd, on ? SDL_WINDOW_FULLSCREEN : 0);
if (on_fullscreen_changed_callback != NULL && call_callback) {
on_fullscreen_changed_callback(on);
}
}
int test_vsync(void) {
// Even if SDL_GL_SetSwapInterval succeeds, it doesn't mean that VSync actually works.
// A 60 Hz monitor should have a swap interval of 16.67 milliseconds.
// Try to detect the length of a vsync by swapping buffers some times.
// Since the graphics card may enqueue a fixed number of frames,
// first send in four dummy frames to hopefully fill the queue.
// This method will fail if the refresh rate is changed, which, in
// combination with that we can't control the queue size (i.e. lag)
// is a reason this generic SDL2 backend should only be used as last resort.
Uint32 start;
Uint32 end;
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
start = SDL_GetTicks();
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
SDL_GL_SwapWindow(wnd);
end = SDL_GetTicks();
float average = 4.0 * 1000.0 / (end - start);
vsync_enabled = 1;
if (average > 27 && average < 33) {
SDL_GL_SetSwapInterval(1);
} else if (average > 57 && average < 63) {
SDL_GL_SetSwapInterval(2);
} else if (average > 86 && average < 94) {
SDL_GL_SetSwapInterval(3);
} else if (average > 115 && average < 125) {
SDL_GL_SetSwapInterval(4);
} else {
vsync_enabled = 0;
}
}
static void gfx_sdl_init(const char *game_name, bool start_in_fullscreen) {
SDL_Init(SDL_INIT_VIDEO);
SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
//SDL_GL_SetAttribute(SDL_GL_MULTISAMPLEBUFFERS, 1);
//SDL_GL_SetAttribute(SDL_GL_MULTISAMPLESAMPLES, 4);
char title[512];
int len = sprintf(title, "%s (%s)", game_name, GFX_API_NAME);
wnd = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
window_width, window_height, SDL_WINDOW_OPENGL | SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE);
if (start_in_fullscreen) {
set_fullscreen(true, false);
}
SDL_GL_CreateContext(wnd);
SDL_GL_SetSwapInterval(1);
test_vsync();
if (!vsync_enabled)
puts("Warning: VSync is not enabled or not working. Falling back to timer for synchronization");
for (size_t i = 0; i < sizeof(windows_scancode_table) / sizeof(SDL_Scancode); i++) {
inverted_scancode_table[windows_scancode_table[i]] = i;
}
for (size_t i = 0; i < sizeof(scancode_rmapping_extended) / sizeof(scancode_rmapping_extended[0]); i++) {
inverted_scancode_table[scancode_rmapping_extended[i][0]] = inverted_scancode_table[scancode_rmapping_extended[i][1]] + 0x100;
}
for (size_t i = 0; i < sizeof(scancode_rmapping_nonextended) / sizeof(scancode_rmapping_nonextended[0]); i++) {
inverted_scancode_table[scancode_rmapping_extended[i][0]] = inverted_scancode_table[scancode_rmapping_extended[i][1]];
inverted_scancode_table[scancode_rmapping_extended[i][1]] += 0x100;
}
}
static void gfx_sdl_set_fullscreen_changed_callback(void (*on_fullscreen_changed)(bool is_now_fullscreen)) {
on_fullscreen_changed_callback = on_fullscreen_changed;
}
static void gfx_sdl_set_fullscreen(bool enable) {
set_fullscreen(enable, true);
}
static void gfx_sdl_set_keyboard_callbacks(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void)) {
on_key_down_callback = on_key_down;
on_key_up_callback = on_key_up;
on_all_keys_up_callback = on_all_keys_up;
}
static void gfx_sdl_main_loop(void (*run_one_game_iter)(void)) {
while (1) {
run_one_game_iter();
}
}
static void gfx_sdl_get_dimensions(uint32_t *width, uint32_t *height) {
*width = window_width;
*height = window_height;
}
static int translate_scancode(int scancode) {
if (scancode < 512) {
return inverted_scancode_table[scancode];
} else {
return 0;
}
}
static void gfx_sdl_onkeydown(int scancode) {
int key = translate_scancode(scancode);
if (on_key_down_callback != NULL) {
on_key_down_callback(key);
}
}
static void gfx_sdl_onkeyup(int scancode) {
int key = translate_scancode(scancode);
if (on_key_up_callback != NULL) {
on_key_up_callback(key);
}
}
static void gfx_sdl_handle_events(void) {
SDL_Event event;
while (SDL_PollEvent(&event)) {
switch (event.type) {
#ifndef TARGET_WEB
// Scancodes are broken in Emscripten SDL2: https://bugzilla.libsdl.org/show_bug.cgi?id=3259
case SDL_KEYDOWN:
if (event.key.keysym.sym == SDLK_F10) {
set_fullscreen(!fullscreen_state, true);
break;
}
gfx_sdl_onkeydown(event.key.keysym.scancode);
break;
case SDL_KEYUP:
gfx_sdl_onkeyup(event.key.keysym.scancode);
break;
#endif
case SDL_WINDOWEVENT:
if (event.window.event == SDL_WINDOWEVENT_SIZE_CHANGED) {
window_width = event.window.data1;
window_height = event.window.data2;
}
break;
case SDL_QUIT:
exit(0);
}
}
}
static bool gfx_sdl_start_frame(void) {
return true;
}
static void sync_framerate_with_timer(void) {
// Number of milliseconds a frame should take (30 fps)
const Uint32 FRAME_TIME = 1000 / 30;
static Uint32 last_time;
Uint32 elapsed = SDL_GetTicks() - last_time;
if (elapsed < FRAME_TIME)
SDL_Delay(FRAME_TIME - elapsed);
last_time += FRAME_TIME;
}
static void gfx_sdl_swap_buffers_begin(void) {
if (!vsync_enabled) {
sync_framerate_with_timer();
}
SDL_GL_SwapWindow(wnd);
}
static void gfx_sdl_swap_buffers_end(void) {
}
static double gfx_sdl_get_time(void) {
return 0.0;
}
struct GfxWindowManagerAPI gfx_sdl = {
gfx_sdl_init,
gfx_sdl_set_keyboard_callbacks,
gfx_sdl_set_fullscreen_changed_callback,
gfx_sdl_set_fullscreen,
gfx_sdl_main_loop,
gfx_sdl_get_dimensions,
gfx_sdl_handle_events,
gfx_sdl_start_frame,
gfx_sdl_swap_buffers_begin,
gfx_sdl_swap_buffers_end,
gfx_sdl_get_time
};
#endif

View file

@ -0,0 +1,21 @@
#ifndef GFX_WINDOW_MANAGER_API_H
#define GFX_WINDOW_MANAGER_API_H
#include <stdint.h>
#include <stdbool.h>
struct GfxWindowManagerAPI {
void (*init)(const char *game_name, bool start_in_fullscreen);
void (*set_keyboard_callbacks)(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void));
void (*set_fullscreen_changed_callback)(void (*on_fullscreen_changed)(bool is_now_fullscreen));
void (*set_fullscreen)(bool enable);
void (*main_loop)(void (*run_one_game_iter)(void));
void (*get_dimensions)(uint32_t *width, uint32_t *height);
void (*handle_events)(void);
bool (*start_frame)(void);
void (*swap_buffers_begin)(void);
void (*swap_buffers_end)(void);
double (*get_time)(void); // For debug
};
#endif

871
src/pc/mixer.c Normal file
View file

@ -0,0 +1,871 @@
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ultra64.h>
#ifdef __SSE4_1__
#include <immintrin.h>
#define HAS_SSE41 1
#define HAS_NEON 0
#elif __ARM_NEON
#include <arm_neon.h>
#define HAS_SSE41 0
#define HAS_NEON 1
#else
#define HAS_SSE41 0
#define HAS_NEON 0
#endif
#pragma GCC optimize ("unroll-loops")
#if HAS_SSE41
#define LOADLH(l, h) _mm_castpd_si128(_mm_loadh_pd(_mm_load_sd((const double *)(l)), (const double *)(h)))
#endif
#define ROUND_UP_32(v) (((v) + 31) & ~31)
#define ROUND_UP_16(v) (((v) + 15) & ~15)
#define ROUND_UP_8(v) (((v) + 7) & ~7)
static struct {
uint16_t in;
uint16_t out;
uint16_t nbytes;
int16_t vol[2];
uint16_t dry_right;
uint16_t wet_left;
uint16_t wet_right;
int16_t target[2];
int32_t rate[2];
int16_t vol_dry;
int16_t vol_wet;
ADPCM_STATE *adpcm_loop_state;
int16_t adpcm_table[8][2][8];
union {
int16_t as_s16[2512 / sizeof(int16_t)];
uint8_t as_u8[2512];
} buf;
} rspa;
static int16_t resample_table[64][4] = {
{0x0c39, 0x66ad, 0x0d46, 0xffdf}, {0x0b39, 0x6696, 0x0e5f, 0xffd8},
{0x0a44, 0x6669, 0x0f83, 0xffd0}, {0x095a, 0x6626, 0x10b4, 0xffc8},
{0x087d, 0x65cd, 0x11f0, 0xffbf}, {0x07ab, 0x655e, 0x1338, 0xffb6},
{0x06e4, 0x64d9, 0x148c, 0xffac}, {0x0628, 0x643f, 0x15eb, 0xffa1},
{0x0577, 0x638f, 0x1756, 0xff96}, {0x04d1, 0x62cb, 0x18cb, 0xff8a},
{0x0435, 0x61f3, 0x1a4c, 0xff7e}, {0x03a4, 0x6106, 0x1bd7, 0xff71},
{0x031c, 0x6007, 0x1d6c, 0xff64}, {0x029f, 0x5ef5, 0x1f0b, 0xff56},
{0x022a, 0x5dd0, 0x20b3, 0xff48}, {0x01be, 0x5c9a, 0x2264, 0xff3a},
{0x015b, 0x5b53, 0x241e, 0xff2c}, {0x0101, 0x59fc, 0x25e0, 0xff1e},
{0x00ae, 0x5896, 0x27a9, 0xff10}, {0x0063, 0x5720, 0x297a, 0xff02},
{0x001f, 0x559d, 0x2b50, 0xfef4}, {0xffe2, 0x540d, 0x2d2c, 0xfee8},
{0xffac, 0x5270, 0x2f0d, 0xfedb}, {0xff7c, 0x50c7, 0x30f3, 0xfed0},
{0xff53, 0x4f14, 0x32dc, 0xfec6}, {0xff2e, 0x4d57, 0x34c8, 0xfebd},
{0xff0f, 0x4b91, 0x36b6, 0xfeb6}, {0xfef5, 0x49c2, 0x38a5, 0xfeb0},
{0xfedf, 0x47ed, 0x3a95, 0xfeac}, {0xfece, 0x4611, 0x3c85, 0xfeab},
{0xfec0, 0x4430, 0x3e74, 0xfeac}, {0xfeb6, 0x424a, 0x4060, 0xfeaf},
{0xfeaf, 0x4060, 0x424a, 0xfeb6}, {0xfeac, 0x3e74, 0x4430, 0xfec0},
{0xfeab, 0x3c85, 0x4611, 0xfece}, {0xfeac, 0x3a95, 0x47ed, 0xfedf},
{0xfeb0, 0x38a5, 0x49c2, 0xfef5}, {0xfeb6, 0x36b6, 0x4b91, 0xff0f},
{0xfebd, 0x34c8, 0x4d57, 0xff2e}, {0xfec6, 0x32dc, 0x4f14, 0xff53},
{0xfed0, 0x30f3, 0x50c7, 0xff7c}, {0xfedb, 0x2f0d, 0x5270, 0xffac},
{0xfee8, 0x2d2c, 0x540d, 0xffe2}, {0xfef4, 0x2b50, 0x559d, 0x001f},
{0xff02, 0x297a, 0x5720, 0x0063}, {0xff10, 0x27a9, 0x5896, 0x00ae},
{0xff1e, 0x25e0, 0x59fc, 0x0101}, {0xff2c, 0x241e, 0x5b53, 0x015b},
{0xff3a, 0x2264, 0x5c9a, 0x01be}, {0xff48, 0x20b3, 0x5dd0, 0x022a},
{0xff56, 0x1f0b, 0x5ef5, 0x029f}, {0xff64, 0x1d6c, 0x6007, 0x031c},
{0xff71, 0x1bd7, 0x6106, 0x03a4}, {0xff7e, 0x1a4c, 0x61f3, 0x0435},
{0xff8a, 0x18cb, 0x62cb, 0x04d1}, {0xff96, 0x1756, 0x638f, 0x0577},
{0xffa1, 0x15eb, 0x643f, 0x0628}, {0xffac, 0x148c, 0x64d9, 0x06e4},
{0xffb6, 0x1338, 0x655e, 0x07ab}, {0xffbf, 0x11f0, 0x65cd, 0x087d},
{0xffc8, 0x10b4, 0x6626, 0x095a}, {0xffd0, 0x0f83, 0x6669, 0x0a44},
{0xffd8, 0x0e5f, 0x6696, 0x0b39}, {0xffdf, 0x0d46, 0x66ad, 0x0c39}
};
static inline int16_t clamp16(int32_t v) {
if (v < -0x8000) {
return -0x8000;
} else if (v > 0x7fff) {
return 0x7fff;
}
return (int16_t)v;
}
static inline int32_t clamp32(int64_t v) {
if (v < -0x7fffffff - 1) {
return -0x7fffffff - 1;
} else if (v > 0x7fffffff) {
return 0x7fffffff;
}
return (int32_t)v;
}
void aClearBufferImpl(uint16_t addr, int nbytes) {
nbytes = ROUND_UP_16(nbytes);
memset(rspa.buf.as_u8 + addr, 0, nbytes);
}
void aLoadBufferImpl(const void *source_addr) {
memcpy(rspa.buf.as_u8 + rspa.in, source_addr, ROUND_UP_8(rspa.nbytes));
}
void aSaveBufferImpl(int16_t *dest_addr) {
memcpy(dest_addr, rspa.buf.as_s16 + rspa.out / sizeof(int16_t), ROUND_UP_8(rspa.nbytes));
}
void aLoadADPCMImpl(int num_entries_times_16, const int16_t *book_source_addr) {
memcpy(rspa.adpcm_table, book_source_addr, num_entries_times_16);
}
void aSetBufferImpl(uint8_t flags, uint16_t in, uint16_t out, uint16_t nbytes) {
if (flags & A_AUX) {
rspa.dry_right = in;
rspa.wet_left = out;
rspa.wet_right = nbytes;
} else {
rspa.in = in;
rspa.out = out;
rspa.nbytes = nbytes;
}
}
void aSetVolumeImpl(uint8_t flags, int16_t v, int16_t t, int16_t r) {
if (flags & A_AUX) {
rspa.vol_dry = v;
rspa.vol_wet = r;
} else if (flags & A_VOL) {
if (flags & A_LEFT) {
rspa.vol[0] = v;
} else {
rspa.vol[1] = v;
}
} else {
if (flags & A_LEFT) {
rspa.target[0] = v;
rspa.rate[0] = (int32_t)((uint16_t)t << 16 | ((uint16_t)r));
} else {
rspa.target[1] = v;
rspa.rate[1] = (int32_t)((uint16_t)t << 16 | ((uint16_t)r));
}
}
}
void aInterleaveImpl(uint16_t left, uint16_t right) {
int count = ROUND_UP_16(rspa.nbytes) / sizeof(int16_t) / 8;
int16_t *l = rspa.buf.as_s16 + left / sizeof(int16_t);
int16_t *r = rspa.buf.as_s16 + right / sizeof(int16_t);
int16_t *d = rspa.buf.as_s16 + rspa.out / sizeof(int16_t);
while (count > 0) {
int16_t l0 = *l++;
int16_t l1 = *l++;
int16_t l2 = *l++;
int16_t l3 = *l++;
int16_t l4 = *l++;
int16_t l5 = *l++;
int16_t l6 = *l++;
int16_t l7 = *l++;
int16_t r0 = *r++;
int16_t r1 = *r++;
int16_t r2 = *r++;
int16_t r3 = *r++;
int16_t r4 = *r++;
int16_t r5 = *r++;
int16_t r6 = *r++;
int16_t r7 = *r++;
*d++ = l0;
*d++ = r0;
*d++ = l1;
*d++ = r1;
*d++ = l2;
*d++ = r2;
*d++ = l3;
*d++ = r3;
*d++ = l4;
*d++ = r4;
*d++ = l5;
*d++ = r5;
*d++ = l6;
*d++ = r6;
*d++ = l7;
*d++ = r7;
--count;
}
}
void aDMEMMoveImpl(uint16_t in_addr, uint16_t out_addr, int nbytes) {
nbytes = ROUND_UP_16(nbytes);
memmove(rspa.buf.as_u8 + out_addr, rspa.buf.as_u8 + in_addr, nbytes);
}
void aSetLoopImpl(ADPCM_STATE *adpcm_loop_state) {
rspa.adpcm_loop_state = adpcm_loop_state;
}
void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) {
#if HAS_SSE41
const __m128i tblrev = _mm_setr_epi8(12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, -1, -1);
const __m128i pos0 = _mm_set_epi8(3, -1, 3, -1, 2, -1, 2, -1, 1, -1, 1, -1, 0, -1, 0, -1);
const __m128i pos1 = _mm_set_epi8(7, -1, 7, -1, 6, -1, 6, -1, 5, -1, 5, -1, 4, -1, 4, -1);
const __m128i mult = _mm_set_epi16(0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01);
const __m128i mask = _mm_set1_epi16((int16_t)0xf000);
#elif HAS_NEON
static const int8_t pos0_data[] = {-1, 0, -1, 0, -1, 1, -1, 1, -1, 2, -1, 2, -1, 3, -1, 3};
static const int8_t pos1_data[] = {-1, 4, -1, 4, -1, 5, -1, 5, -1, 6, -1, 6, -1, 7, -1, 7};
static const int16_t mult_data[] = {0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10};
static const int16_t table_prefix_data[] = {0, 0, 0, 0, 0, 0, 0, 1 << 11};
const int8x16_t pos0 = vld1q_s8(pos0_data);
const int8x16_t pos1 = vld1q_s8(pos1_data);
const int16x8_t mult = vld1q_s16(mult_data);
const int16x8_t mask = vdupq_n_s16((int16_t)0xf000);
const int16x8_t table_prefix = vld1q_s16(table_prefix_data);
#endif
uint8_t *in = rspa.buf.as_u8 + rspa.in;
int16_t *out = rspa.buf.as_s16 + rspa.out / sizeof(int16_t);
int nbytes = ROUND_UP_32(rspa.nbytes);
if (flags & A_INIT) {
memset(out, 0, 16 * sizeof(int16_t));
} else if (flags & A_LOOP) {
memcpy(out, rspa.adpcm_loop_state, 16 * sizeof(int16_t));
} else {
memcpy(out, state, 16 * sizeof(int16_t));
}
out += 16;
#if HAS_SSE41
__m128i prev_interleaved = _mm_set1_epi32((uint16_t)out[-2] | ((uint16_t)out[-1] << 16));
//__m128i prev_interleaved = _mm_shuffle_epi32(_mm_loadu_si32(out - 2), 0); // GCC misses this?
#elif HAS_NEON
int16x8_t result = vld1q_s16(out - 8);
#endif
while (nbytes > 0) {
int shift = *in >> 4; // should be in 0..12
int table_index = *in++ & 0xf; // should be in 0..7
int16_t (*tbl)[8] = rspa.adpcm_table[table_index];
int i;
#if HAS_SSE41
// The _mm_loadu_si64 instruction was added in GCC 9, and results in the same
// asm as the following instructions, so better be compatible with old GCC.
//__m128i inv = _mm_loadu_si64(in);
uint64_t v; memcpy(&v, in, 8);
__m128i inv = _mm_set_epi64x(0, v);
__m128i invec[2] = {_mm_shuffle_epi8(inv, pos0), _mm_shuffle_epi8(inv, pos1)};
__m128i tblvec0 = _mm_loadu_si128((const __m128i *)tbl[0]);
__m128i tblvec1 = _mm_loadu_si128((const __m128i *)(tbl[1]));
__m128i tbllo = _mm_unpacklo_epi16(tblvec0, tblvec1);
__m128i tblhi = _mm_unpackhi_epi16(tblvec0, tblvec1);
__m128i shiftcount = _mm_set_epi64x(0, 12 - shift); // _mm_cvtsi64_si128 does not exist on 32-bit x86
__m128i tblvec1_rev[8];
tblvec1_rev[0] = _mm_insert_epi16(_mm_shuffle_epi8(tblvec1, tblrev), 1 << 11, 7);
tblvec1_rev[1] = _mm_bsrli_si128(tblvec1_rev[0], 2);
tblvec1_rev[2] = _mm_bsrli_si128(tblvec1_rev[0], 4);
tblvec1_rev[3] = _mm_bsrli_si128(tblvec1_rev[0], 6);
tblvec1_rev[4] = _mm_bsrli_si128(tblvec1_rev[0], 8);
tblvec1_rev[5] = _mm_bsrli_si128(tblvec1_rev[0], 10);
tblvec1_rev[6] = _mm_bsrli_si128(tblvec1_rev[0], 12);
tblvec1_rev[7] = _mm_bsrli_si128(tblvec1_rev[0], 14);
in += 8;
for (i = 0; i < 2; i++) {
__m128i acc0 = _mm_madd_epi16(prev_interleaved, tbllo);
__m128i acc1 = _mm_madd_epi16(prev_interleaved, tblhi);
__m128i muls[8];
__m128i result;
invec[i] = _mm_sra_epi16(_mm_and_si128(_mm_mullo_epi16(invec[i], mult), mask), shiftcount);
muls[7] = _mm_madd_epi16(tblvec1_rev[0], invec[i]);
muls[6] = _mm_madd_epi16(tblvec1_rev[1], invec[i]);
muls[5] = _mm_madd_epi16(tblvec1_rev[2], invec[i]);
muls[4] = _mm_madd_epi16(tblvec1_rev[3], invec[i]);
muls[3] = _mm_madd_epi16(tblvec1_rev[4], invec[i]);
muls[2] = _mm_madd_epi16(tblvec1_rev[5], invec[i]);
muls[1] = _mm_madd_epi16(tblvec1_rev[6], invec[i]);
muls[0] = _mm_madd_epi16(tblvec1_rev[7], invec[i]);
acc0 = _mm_add_epi32(acc0, _mm_hadd_epi32(_mm_hadd_epi32(muls[0], muls[1]), _mm_hadd_epi32(muls[2], muls[3])));
acc1 = _mm_add_epi32(acc1, _mm_hadd_epi32(_mm_hadd_epi32(muls[4], muls[5]), _mm_hadd_epi32(muls[6], muls[7])));
acc0 = _mm_srai_epi32(acc0, 11);
acc1 = _mm_srai_epi32(acc1, 11);
result = _mm_packs_epi32(acc0, acc1);
_mm_storeu_si128((__m128i *)out, result);
out += 8;
prev_interleaved = _mm_shuffle_epi32(result, _MM_SHUFFLE(3, 3, 3, 3));
}
#elif HAS_NEON
int8x8_t inv = vld1_s8((int8_t *)in);
int16x8_t tblvec[2] = {vld1q_s16(tbl[0]), vld1q_s16(tbl[1])};
int16x8_t invec[2] = {vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(inv, vget_low_s8(pos0)),
vtbl1_s8(inv, vget_high_s8(pos0)))),
vreinterpretq_s16_s8(vcombine_s8(vtbl1_s8(inv, vget_low_s8(pos1)),
vtbl1_s8(inv, vget_high_s8(pos1))))};
int16x8_t shiftcount = vdupq_n_s16(shift - 12); // negative means right shift
int16x8_t tblvec1[8];
in += 8;
tblvec1[0] = vextq_s16(table_prefix, tblvec[1], 7);
invec[0] = vmulq_s16(invec[0], mult);
tblvec1[1] = vextq_s16(table_prefix, tblvec[1], 6);
invec[1] = vmulq_s16(invec[1], mult);
tblvec1[2] = vextq_s16(table_prefix, tblvec[1], 5);
tblvec1[3] = vextq_s16(table_prefix, tblvec[1], 4);
invec[0] = vandq_s16(invec[0], mask);
tblvec1[4] = vextq_s16(table_prefix, tblvec[1], 3);
invec[1] = vandq_s16(invec[1], mask);
tblvec1[5] = vextq_s16(table_prefix, tblvec[1], 2);
tblvec1[6] = vextq_s16(table_prefix, tblvec[1], 1);
invec[0] = vqshlq_s16(invec[0], shiftcount);
invec[1] = vqshlq_s16(invec[1], shiftcount);
tblvec1[7] = table_prefix;
for (i = 0; i < 2; i++) {
int32x4_t acc0;
int32x4_t acc1;
acc1 = vmull_lane_s16(vget_high_s16(tblvec[0]), vget_high_s16(result), 2);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec[1]), vget_high_s16(result), 3);
acc0 = vmull_lane_s16(vget_low_s16(tblvec[0]), vget_high_s16(result), 2);
acc0 = vmlal_lane_s16(acc0, vget_low_s16(tblvec[1]), vget_high_s16(result), 3);
acc0 = vmlal_lane_s16(acc0, vget_low_s16(tblvec1[0]), vget_low_s16(invec[i]), 0);
acc0 = vmlal_lane_s16(acc0, vget_low_s16(tblvec1[1]), vget_low_s16(invec[i]), 1);
acc0 = vmlal_lane_s16(acc0, vget_low_s16(tblvec1[2]), vget_low_s16(invec[i]), 2);
acc0 = vmlal_lane_s16(acc0, vget_low_s16(tblvec1[3]), vget_low_s16(invec[i]), 3);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[0]), vget_low_s16(invec[i]), 0);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[1]), vget_low_s16(invec[i]), 1);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[2]), vget_low_s16(invec[i]), 2);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[3]), vget_low_s16(invec[i]), 3);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[4]), vget_high_s16(invec[i]), 0);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[5]), vget_high_s16(invec[i]), 1);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[6]), vget_high_s16(invec[i]), 2);
acc1 = vmlal_lane_s16(acc1, vget_high_s16(tblvec1[7]), vget_high_s16(invec[i]), 3);
result = vcombine_s16(vqshrn_n_s32(acc0, 11), vqshrn_n_s32(acc1, 11));
vst1q_s16(out, result);
out += 8;
}
#else
for (i = 0; i < 2; i++) {
int16_t ins[8];
int16_t prev1 = out[-1];
int16_t prev2 = out[-2];
int j, k;
for (j = 0; j < 4; j++) {
ins[j * 2] = (((*in >> 4) << 28) >> 28) << shift;
ins[j * 2 + 1] = (((*in++ & 0xf) << 28) >> 28) << shift;
}
for (j = 0; j < 8; j++) {
int32_t acc = tbl[0][j] * prev2 + tbl[1][j] * prev1 + (ins[j] << 11);
for (k = 0; k < j; k++) {
acc += tbl[1][((j - k) - 1)] * ins[k];
}
acc >>= 11;
*out++ = clamp16(acc);
}
}
#endif
nbytes -= 16 * sizeof(int16_t);
}
memcpy(state, out - 16, 16 * sizeof(int16_t));
}
void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) {
int16_t tmp[16];
int16_t *in_initial = rspa.buf.as_s16 + rspa.in / sizeof(int16_t);
int16_t *in = in_initial;
int16_t *out = rspa.buf.as_s16 + rspa.out / sizeof(int16_t);
int nbytes = ROUND_UP_16(rspa.nbytes);
uint32_t pitch_accumulator;
int i;
#if !HAS_SSE41 && !HAS_NEON
int16_t *tbl;
int32_t sample;
#endif
if (flags & A_INIT) {
memset(tmp, 0, 5 * sizeof(int16_t));
} else {
memcpy(tmp, state, 16 * sizeof(int16_t));
}
if (flags & 2) {
memcpy(in - 8, tmp + 8, 8 * sizeof(int16_t));
in -= tmp[5] / sizeof(int16_t);
}
in -= 4;
pitch_accumulator = (uint16_t)tmp[4];
memcpy(in, tmp, 4 * sizeof(int16_t));
#if HAS_SSE41
__m128i multiples = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
__m128i pitchvec = _mm_set1_epi16((int16_t)pitch);
__m128i pitchvec_8_steps = _mm_set1_epi32((pitch << 1) * 8);
__m128i pitchacclo_vec = _mm_set1_epi32((uint16_t)pitch_accumulator);
__m128i pl = _mm_mullo_epi16(multiples, pitchvec);
__m128i ph = _mm_mulhi_epu16(multiples, pitchvec);
__m128i acc_a = _mm_add_epi32(_mm_unpacklo_epi16(pl, ph), pitchacclo_vec);
__m128i acc_b = _mm_add_epi32(_mm_unpackhi_epi16(pl, ph), pitchacclo_vec);
do {
__m128i tbl_positions = _mm_srli_epi16(_mm_packus_epi32(
_mm_and_si128(acc_a, _mm_set1_epi32(0xffff)),
_mm_and_si128(acc_b, _mm_set1_epi32(0xffff))), 10);
__m128i in_positions = _mm_packus_epi32(_mm_srli_epi32(acc_a, 16), _mm_srli_epi32(acc_b, 16));
__m128i tbl_entries[4];
__m128i samples[4];
/*for (i = 0; i < 4; i++) {
tbl_entries[i] = _mm_castpd_si128(_mm_loadh_pd(_mm_load_sd(
(const double *)resample_table[_mm_extract_epi16(tbl_positions, 2 * i)]),
(const double *)resample_table[_mm_extract_epi16(tbl_positions, 2 * i + 1)]));
samples[i] = _mm_castpd_si128(_mm_loadh_pd(_mm_load_sd(
(const double *)&in[_mm_extract_epi16(in_positions, 2 * i)]),
(const double *)&in[_mm_extract_epi16(in_positions, 2 * i + 1)]));
samples[i] = _mm_mulhrs_epi16(samples[i], tbl_entries[i]);
}*/
tbl_entries[0] = LOADLH(resample_table[_mm_extract_epi16(tbl_positions, 0)], resample_table[_mm_extract_epi16(tbl_positions, 1)]);
tbl_entries[1] = LOADLH(resample_table[_mm_extract_epi16(tbl_positions, 2)], resample_table[_mm_extract_epi16(tbl_positions, 3)]);
tbl_entries[2] = LOADLH(resample_table[_mm_extract_epi16(tbl_positions, 4)], resample_table[_mm_extract_epi16(tbl_positions, 5)]);
tbl_entries[3] = LOADLH(resample_table[_mm_extract_epi16(tbl_positions, 6)], resample_table[_mm_extract_epi16(tbl_positions, 7)]);
samples[0] = LOADLH(&in[_mm_extract_epi16(in_positions, 0)], &in[_mm_extract_epi16(in_positions, 1)]);
samples[1] = LOADLH(&in[_mm_extract_epi16(in_positions, 2)], &in[_mm_extract_epi16(in_positions, 3)]);
samples[2] = LOADLH(&in[_mm_extract_epi16(in_positions, 4)], &in[_mm_extract_epi16(in_positions, 5)]);
samples[3] = LOADLH(&in[_mm_extract_epi16(in_positions, 6)], &in[_mm_extract_epi16(in_positions, 7)]);
samples[0] = _mm_mulhrs_epi16(samples[0], tbl_entries[0]);
samples[1] = _mm_mulhrs_epi16(samples[1], tbl_entries[1]);
samples[2] = _mm_mulhrs_epi16(samples[2], tbl_entries[2]);
samples[3] = _mm_mulhrs_epi16(samples[3], tbl_entries[3]);
_mm_storeu_si128((__m128i *)out, _mm_hadds_epi16(_mm_hadds_epi16(samples[0], samples[1]), _mm_hadds_epi16(samples[2], samples[3])));
acc_a = _mm_add_epi32(acc_a, pitchvec_8_steps);
acc_b = _mm_add_epi32(acc_b, pitchvec_8_steps);
out += 8;
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
in += (uint16_t)_mm_extract_epi16(acc_a, 1);
pitch_accumulator = (uint16_t)_mm_extract_epi16(acc_a, 0);
#elif HAS_NEON
static const uint16_t multiples_data[8] = {0, 2, 4, 6, 8, 10, 12, 14};
uint16x8_t multiples = vld1q_u16(multiples_data);
uint32x4_t pitchvec_8_steps = vdupq_n_u32((pitch << 1) * 8);
uint32x4_t pitchacclo_vec = vdupq_n_u32((uint16_t)pitch_accumulator);
uint32x4_t acc_a = vmlal_n_u16(pitchacclo_vec, vget_low_u16(multiples), pitch);
uint32x4_t acc_b = vmlal_n_u16(pitchacclo_vec, vget_high_u16(multiples), pitch);
do {
uint16x8x2_t unzipped = vuzpq_u16(vreinterpretq_u16_u32(acc_a), vreinterpretq_u16_u32(acc_b));
uint16x8_t tbl_positions = vshrq_n_u16(unzipped.val[0], 10);
uint16x8_t in_positions = unzipped.val[1];
int16x8_t tbl_entries[4];
int16x8_t samples[4];
int16x8x2_t unzipped1;
int16x8x2_t unzipped2;
tbl_entries[0] = vcombine_s16(vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 0)]), vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 1)]));
tbl_entries[1] = vcombine_s16(vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 2)]), vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 3)]));
tbl_entries[2] = vcombine_s16(vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 4)]), vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 5)]));
tbl_entries[3] = vcombine_s16(vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 6)]), vld1_s16(resample_table[vgetq_lane_u16(tbl_positions, 7)]));
samples[0] = vcombine_s16(vld1_s16(&in[vgetq_lane_u16(in_positions, 0)]), vld1_s16(&in[vgetq_lane_u16(in_positions, 1)]));
samples[1] = vcombine_s16(vld1_s16(&in[vgetq_lane_u16(in_positions, 2)]), vld1_s16(&in[vgetq_lane_u16(in_positions, 3)]));
samples[2] = vcombine_s16(vld1_s16(&in[vgetq_lane_u16(in_positions, 4)]), vld1_s16(&in[vgetq_lane_u16(in_positions, 5)]));
samples[3] = vcombine_s16(vld1_s16(&in[vgetq_lane_u16(in_positions, 6)]), vld1_s16(&in[vgetq_lane_u16(in_positions, 7)]));
samples[0] = vqrdmulhq_s16(samples[0], tbl_entries[0]);
samples[1] = vqrdmulhq_s16(samples[1], tbl_entries[1]);
samples[2] = vqrdmulhq_s16(samples[2], tbl_entries[2]);
samples[3] = vqrdmulhq_s16(samples[3], tbl_entries[3]);
unzipped1 = vuzpq_s16(samples[0], samples[1]);
unzipped2 = vuzpq_s16(samples[2], samples[3]);
samples[0] = vqaddq_s16(unzipped1.val[0], unzipped1.val[1]);
samples[1] = vqaddq_s16(unzipped2.val[0], unzipped2.val[1]);
unzipped1 = vuzpq_s16(samples[0], samples[1]);
samples[0] = vqaddq_s16(unzipped1.val[0], unzipped1.val[1]);
vst1q_s16(out, samples[0]);
acc_a = vaddq_u32(acc_a, pitchvec_8_steps);
acc_b = vaddq_u32(acc_b, pitchvec_8_steps);
out += 8;
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
in += vgetq_lane_u16(vreinterpretq_u16_u32(acc_a), 1);
pitch_accumulator = vgetq_lane_u16(vreinterpretq_u16_u32(acc_a), 0);
#else
do {
for (i = 0; i < 8; i++) {
tbl = resample_table[pitch_accumulator * 64 >> 16];
sample = ((in[0] * tbl[0] + 0x4000) >> 15) +
((in[1] * tbl[1] + 0x4000) >> 15) +
((in[2] * tbl[2] + 0x4000) >> 15) +
((in[3] * tbl[3] + 0x4000) >> 15);
*out++ = clamp16(sample);
pitch_accumulator += (pitch << 1);
in += pitch_accumulator >> 16;
pitch_accumulator %= 0x10000;
}
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
#endif
state[4] = (int16_t)pitch_accumulator;
memcpy(state, in, 4 * sizeof(int16_t));
i = (in - in_initial + 4) & 7;
in -= i;
if (i != 0) {
i = -8 - i;
}
state[5] = i;
memcpy(state + 8, in, 8 * sizeof(int16_t));
}
void aEnvMixerImpl(uint8_t flags, ENVMIX_STATE state) {
int16_t *in = rspa.buf.as_s16 + rspa.in / sizeof(int16_t);
int16_t *dry[2] = {rspa.buf.as_s16 + rspa.out / sizeof(int16_t), rspa.buf.as_s16 + rspa.dry_right / sizeof(int16_t)};
int16_t *wet[2] = {rspa.buf.as_s16 + rspa.wet_left / sizeof(int16_t), rspa.buf.as_s16 + rspa.wet_right / sizeof(int16_t)};
int nbytes = ROUND_UP_16(rspa.nbytes);
#if HAS_SSE41
__m128 vols[2][2];
__m128i dry_factor;
__m128i wet_factor;
__m128 target[2];
__m128 rate[2];
__m128i in_loaded;
__m128i vol_s16;
bool increasing[2];
int c;
if (flags & A_INIT) {
float vol_init[2] = {rspa.vol[0], rspa.vol[1]};
float rate_float[2] = {(float)rspa.rate[0] * (1.0f / 65536.0f), (float)rspa.rate[1] * (1.0f / 65536.0f)};
float step_diff[2] = {vol_init[0] * (rate_float[0] - 1.0f), vol_init[1] * (rate_float[1] - 1.0f)};
for (c = 0; c < 2; c++) {
vols[c][0] = _mm_add_ps(
_mm_set_ps1(vol_init[c]),
_mm_mul_ps(_mm_set1_ps(step_diff[c]), _mm_setr_ps(1.0f / 8.0f, 2.0f / 8.0f, 3.0f / 8.0f, 4.0f / 8.0f)));
vols[c][1] = _mm_add_ps(
_mm_set_ps1(vol_init[c]),
_mm_mul_ps(_mm_set1_ps(step_diff[c]), _mm_setr_ps(5.0f / 8.0f, 6.0f / 8.0f, 7.0f / 8.0f, 8.0f / 8.0f)));
increasing[c] = rate_float[c] >= 1.0f;
target[c] = _mm_set1_ps(rspa.target[c]);
rate[c] = _mm_set1_ps(rate_float[c]);
}
dry_factor = _mm_set1_epi16(rspa.vol_dry);
wet_factor = _mm_set1_epi16(rspa.vol_wet);
memcpy(state + 32, &rate_float[0], 4);
memcpy(state + 34, &rate_float[1], 4);
state[36] = rspa.target[0];
state[37] = rspa.target[1];
state[38] = rspa.vol_dry;
state[39] = rspa.vol_wet;
} else {
float floats[2];
vols[0][0] = _mm_loadu_ps((const float *)state);
vols[0][1] = _mm_loadu_ps((const float *)(state + 8));
vols[1][0] = _mm_loadu_ps((const float *)(state + 16));
vols[1][1] = _mm_loadu_ps((const float *)(state + 24));
memcpy(floats, state + 32, 8);
rate[0] = _mm_set1_ps(floats[0]);
rate[1] = _mm_set1_ps(floats[1]);
increasing[0] = floats[0] >= 1.0f;
increasing[1] = floats[1] >= 1.0f;
target[0] = _mm_set1_ps(state[36]);
target[1] = _mm_set1_ps(state[37]);
dry_factor = _mm_set1_epi16(state[38]);
wet_factor = _mm_set1_epi16(state[39]);
}
do {
in_loaded = _mm_loadu_si128((const __m128i *)in);
in += 8;
for (c = 0; c < 2; c++) {
if (increasing[c]) {
vols[c][0] = _mm_min_ps(vols[c][0], target[c]);
vols[c][1] = _mm_min_ps(vols[c][1], target[c]);
} else {
vols[c][0] = _mm_max_ps(vols[c][0], target[c]);
vols[c][1] = _mm_max_ps(vols[c][1], target[c]);
}
vol_s16 = _mm_packs_epi32(_mm_cvtps_epi32(vols[c][0]), _mm_cvtps_epi32(vols[c][1]));
_mm_storeu_si128((__m128i *)dry[c],
_mm_adds_epi16(
_mm_loadu_si128((const __m128i *)dry[c]),
_mm_mulhrs_epi16(in_loaded, _mm_mulhrs_epi16(vol_s16, dry_factor))));
dry[c] += 8;
if (flags & A_AUX) {
_mm_storeu_si128((__m128i *)wet[c],
_mm_adds_epi16(
_mm_loadu_si128((const __m128i *)wet[c]),
_mm_mulhrs_epi16(in_loaded, _mm_mulhrs_epi16(vol_s16, wet_factor))));
wet[c] += 8;
}
vols[c][0] = _mm_mul_ps(vols[c][0], rate[c]);
vols[c][1] = _mm_mul_ps(vols[c][1], rate[c]);
}
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
_mm_storeu_ps((float *)state, vols[0][0]);
_mm_storeu_ps((float *)(state + 8), vols[0][1]);
_mm_storeu_ps((float *)(state + 16), vols[1][0]);
_mm_storeu_ps((float *)(state + 24), vols[1][1]);
#elif HAS_NEON
float32x4_t vols[2][2];
int16_t dry_factor;
int16_t wet_factor;
float32x4_t target[2];
float rate[2];
int16x8_t in_loaded;
int16x8_t vol_s16;
bool increasing[2];
int c;
if (flags & A_INIT) {
float vol_init[2] = {rspa.vol[0], rspa.vol[1]};
float rate_float[2] = {(float)rspa.rate[0] * (1.0f / 65536.0f), (float)rspa.rate[1] * (1.0f / 65536.0f)};
float step_diff[2] = {vol_init[0] * (rate_float[0] - 1.0f), vol_init[1] * (rate_float[1] - 1.0f)};
static const float step_dividers_data[2][4] = {{1.0f / 8.0f, 2.0f / 8.0f, 3.0f / 8.0f, 4.0f / 8.0f},
{5.0f / 8.0f, 6.0f / 8.0f, 7.0f / 8.0f, 8.0f / 8.0f}};
float32x4_t step_dividers[2] = {vld1q_f32(step_dividers_data[0]), vld1q_f32(step_dividers_data[1])};
for (c = 0; c < 2; c++) {
vols[c][0] = vaddq_f32(vdupq_n_f32(vol_init[c]), vmulq_n_f32(step_dividers[0], step_diff[c]));
vols[c][1] = vaddq_f32(vdupq_n_f32(vol_init[c]), vmulq_n_f32(step_dividers[1], step_diff[c]));
increasing[c] = rate_float[c] >= 1.0f;
target[c] = vdupq_n_f32(rspa.target[c]);
rate[c] = rate_float[c];
}
dry_factor = rspa.vol_dry;
wet_factor = rspa.vol_wet;
memcpy(state + 32, &rate_float[0], 4);
memcpy(state + 34, &rate_float[1], 4);
state[36] = rspa.target[0];
state[37] = rspa.target[1];
state[38] = rspa.vol_dry;
state[39] = rspa.vol_wet;
} else {
vols[0][0] = vreinterpretq_f32_s16(vld1q_s16(state));
vols[0][1] = vreinterpretq_f32_s16(vld1q_s16(state + 8));
vols[1][0] = vreinterpretq_f32_s16(vld1q_s16(state + 16));
vols[1][1] = vreinterpretq_f32_s16(vld1q_s16(state + 24));
memcpy(&rate[0], state + 32, 4);
memcpy(&rate[1], state + 34, 4);
increasing[0] = rate[0] >= 1.0f;
increasing[1] = rate[1] >= 1.0f;
target[0] = vdupq_n_f32(state[36]);
target[1] = vdupq_n_f32(state[37]);
dry_factor = state[38];
wet_factor = state[39];
}
do {
in_loaded = vld1q_s16(in);
in += 8;
for (c = 0; c < 2; c++) {
if (increasing[c]) {
vols[c][0] = vminq_f32(vols[c][0], target[c]);
vols[c][1] = vminq_f32(vols[c][1], target[c]);
} else {
vols[c][0] = vmaxq_f32(vols[c][0], target[c]);
vols[c][1] = vmaxq_f32(vols[c][1], target[c]);
}
vol_s16 = vcombine_s16(vqmovn_s32(vcvtq_s32_f32(vols[c][0])), vqmovn_s32(vcvtq_s32_f32(vols[c][1])));
vst1q_s16(dry[c], vqaddq_s16(vld1q_s16(dry[c]), vqrdmulhq_s16(in_loaded, vqrdmulhq_n_s16(vol_s16, dry_factor))));
dry[c] += 8;
if (flags & A_AUX) {
vst1q_s16(wet[c], vqaddq_s16(vld1q_s16(wet[c]), vqrdmulhq_s16(in_loaded, vqrdmulhq_n_s16(vol_s16, wet_factor))));
wet[c] += 8;
}
vols[c][0] = vmulq_n_f32(vols[c][0], rate[c]);
vols[c][1] = vmulq_n_f32(vols[c][1], rate[c]);
}
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
vst1q_s16(state, vreinterpretq_s16_f32(vols[0][0]));
vst1q_s16(state + 8, vreinterpretq_s16_f32(vols[0][1]));
vst1q_s16(state + 16, vreinterpretq_s16_f32(vols[1][0]));
vst1q_s16(state + 24, vreinterpretq_s16_f32(vols[1][1]));
#else
int16_t target[2];
int32_t rate[2];
int16_t vol_dry, vol_wet;
int32_t step_diff[2];
int32_t vols[2][8];
int c, i;
if (flags & A_INIT) {
target[0] = rspa.target[0];
target[1] = rspa.target[1];
rate[0] = rspa.rate[0];
rate[1] = rspa.rate[1];
vol_dry = rspa.vol_dry;
vol_wet = rspa.vol_wet;
step_diff[0] = rspa.vol[0] * (rate[0] - 0x10000) / 8;
step_diff[1] = rspa.vol[0] * (rate[1] - 0x10000) / 8;
for (i = 0; i < 8; i++) {
vols[0][i] = clamp32((int64_t)(rspa.vol[0] << 16) + step_diff[0] * (i + 1));
vols[1][i] = clamp32((int64_t)(rspa.vol[1] << 16) + step_diff[1] * (i + 1));
}
} else {
memcpy(vols[0], state, 32);
memcpy(vols[1], state + 16, 32);
target[0] = state[32];
target[1] = state[35];
rate[0] = (state[33] << 16) | (uint16_t)state[34];
rate[1] = (state[36] << 16) | (uint16_t)state[37];
vol_dry = state[38];
vol_wet = state[39];
}
do {
for (c = 0; c < 2; c++) {
for (i = 0; i < 8; i++) {
if ((rate[c] >> 16) > 0) {
// Increasing volume
if ((vols[c][i] >> 16) > target[c]) {
vols[c][i] = target[c] << 16;
}
} else {
// Decreasing volume
if ((vols[c][i] >> 16) < target[c]) {
vols[c][i] = target[c] << 16;
}
}
dry[c][i] = clamp16((dry[c][i] * 0x7fff + in[i] * (((vols[c][i] >> 16) * vol_dry + 0x4000) >> 15) + 0x4000) >> 15);
if (flags & A_AUX) {
wet[c][i] = clamp16((wet[c][i] * 0x7fff + in[i] * (((vols[c][i] >> 16) * vol_wet + 0x4000) >> 15) + 0x4000) >> 15);
}
vols[c][i] = clamp32((int64_t)vols[c][i] * rate[c] >> 16);
}
dry[c] += 8;
if (flags & A_AUX) {
wet[c] += 8;
}
}
nbytes -= 16;
in += 8;
} while (nbytes > 0);
memcpy(state, vols[0], 32);
memcpy(state + 16, vols[1], 32);
state[32] = target[0];
state[35] = target[1];
state[33] = (int16_t)(rate[0] >> 16);
state[34] = (int16_t)rate[0];
state[36] = (int16_t)(rate[1] >> 16);
state[37] = (int16_t)rate[1];
state[38] = vol_dry;
state[39] = vol_wet;
#endif
}
void aMixImpl(int16_t gain, uint16_t in_addr, uint16_t out_addr) {
int nbytes = ROUND_UP_32(rspa.nbytes);
int16_t *in = rspa.buf.as_s16 + in_addr / sizeof(int16_t);
int16_t *out = rspa.buf.as_s16 + out_addr / sizeof(int16_t);
#if HAS_SSE41
__m128i gain_vec = _mm_set1_epi16(gain);
#elif !HAS_NEON
int i;
int32_t sample;
#endif
#if !HAS_NEON
if (gain == -0x8000) {
while (nbytes > 0) {
#if HAS_SSE41
__m128i out1, out2, in1, in2;
out1 = _mm_loadu_si128((const __m128i *)out);
out2 = _mm_loadu_si128((const __m128i *)(out + 8));
in1 = _mm_loadu_si128((const __m128i *)in);
in2 = _mm_loadu_si128((const __m128i *)(in + 8));
out1 = _mm_subs_epi16(out1, in1);
out2 = _mm_subs_epi16(out2, in2);
_mm_storeu_si128((__m128i *)out, out1);
_mm_storeu_si128((__m128i *)(out + 8), out2);
out += 16;
in += 16;
#else
for (i = 0; i < 16; i++) {
sample = *out - *in++;
*out++ = clamp16(sample);
}
#endif
nbytes -= 16 * sizeof(int16_t);
}
}
#endif
while (nbytes > 0) {
#if HAS_SSE41
__m128i out1, out2, in1, in2;
out1 = _mm_loadu_si128((const __m128i *)out);
out2 = _mm_loadu_si128((const __m128i *)(out + 8));
in1 = _mm_loadu_si128((const __m128i *)in);
in2 = _mm_loadu_si128((const __m128i *)(in + 8));
out1 = _mm_adds_epi16(out1, _mm_mulhrs_epi16(in1, gain_vec));
out2 = _mm_adds_epi16(out2, _mm_mulhrs_epi16(in2, gain_vec));
_mm_storeu_si128((__m128i *)out, out1);
_mm_storeu_si128((__m128i *)(out + 8), out2);
out += 16;
in += 16;
#elif HAS_NEON
int16x8_t out1, out2, in1, in2;
out1 = vld1q_s16(out);
out2 = vld1q_s16(out + 8);
in1 = vld1q_s16(in);
in2 = vld1q_s16(in + 8);
out1 = vqaddq_s16(out1, vqrdmulhq_n_s16(in1, gain));
out2 = vqaddq_s16(out2, vqrdmulhq_n_s16(in2, gain));
vst1q_s16(out, out1);
vst1q_s16(out + 8, out2);
out += 16;
in += 16;
#else
for (i = 0; i < 16; i++) {
sample = ((*out * 0x7fff + *in++ * gain) + 0x4000) >> 15;
*out++ = clamp16(sample);
}
#endif
nbytes -= 16 * sizeof(int16_t);
}
}

53
src/pc/mixer.h Normal file
View file

@ -0,0 +1,53 @@
#ifndef MIXER_H
#define MIXER_H
#include <stdint.h>
#include <ultra64.h>
#undef aSegment
#undef aClearBuffer
#undef aSetBuffer
#undef aLoadBuffer
#undef aSaveBuffer
#undef aDMEMMove
#undef aMix
#undef aEnvMixer
#undef aResample
#undef aInterleave
#undef aSetVolume
#undef aSetVolume32
#undef aSetLoop
#undef aLoadADPCM
#undef aADPCMdec
void aClearBufferImpl(uint16_t addr, int nbytes);
void aLoadBufferImpl(const void *source_addr);
void aSaveBufferImpl(int16_t *dest_addr);
void aLoadADPCMImpl(int num_entries_times_16, const int16_t *book_source_addr);
void aSetBufferImpl(uint8_t flags, uint16_t in, uint16_t out, uint16_t nbytes);
void aSetVolumeImpl(uint8_t flags, int16_t v, int16_t t, int16_t r);
void aInterleaveImpl(uint16_t left, uint16_t right);
void aDMEMMoveImpl(uint16_t in_addr, uint16_t out_addr, int nbytes);
void aSetLoopImpl(ADPCM_STATE *adpcm_loop_state);
void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state);
void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state);
void aEnvMixerImpl(uint8_t flags, ENVMIX_STATE state);
void aMixImpl(int16_t gain, uint16_t in_addr, uint16_t out_addr);
#define aSegment(pkt, s, b) do { } while(0)
#define aClearBuffer(pkt, d, c) aClearBufferImpl(d, c)
#define aLoadBuffer(pkt, s) aLoadBufferImpl(s)
#define aSaveBuffer(pkt, s) aSaveBufferImpl(s)
#define aLoadADPCM(pkt, c, d) aLoadADPCMImpl(c, d)
#define aSetBuffer(pkt, f, i, o, c) aSetBufferImpl(f, i, o, c)
#define aSetVolume(pkt, f, v, t, r) aSetVolumeImpl(f, v, t, r)
#define aSetVolume32(pkt, f, v, tr) aSetVolume(pkt, f, v, (int16_t)((tr) >> 16), (int16_t)(tr))
#define aInterleave(pkt, l, r) aInterleaveImpl(l, r)
#define aDMEMMove(pkt, i, o, c) aDMEMMoveImpl(i, o, c)
#define aSetLoop(pkt, a) aSetLoopImpl(a)
#define aADPCMdec(pkt, f, s) aADPCMdecImpl(f, s)
#define aResample(pkt, f, p, s) aResampleImpl(f, p, s)
#define aEnvMixer(pkt, f, s) aEnvMixerImpl(f, s)
#define aMix(pkt, f, g, i, o) aMixImpl(g, i, o)
#endif

225
src/pc/pc_main.c Normal file
View file

@ -0,0 +1,225 @@
#include <stdlib.h>
#ifdef TARGET_WEB
#include <emscripten.h>
#include <emscripten/html5.h>
#endif
#include "sm64.h"
#include "game/memory.h"
#include "audio/external.h"
#include "gfx/gfx_pc.h"
#include "gfx/gfx_opengl.h"
#include "gfx/gfx_direct3d11.h"
#include "gfx/gfx_direct3d12.h"
#include "gfx/gfx_dxgi.h"
#include "gfx/gfx_glx.h"
#include "gfx/gfx_sdl.h"
#include "audio/audio_api.h"
#include "audio/audio_wasapi.h"
#include "audio/audio_pulse.h"
#include "audio/audio_alsa.h"
#include "audio/audio_sdl.h"
#include "audio/audio_null.h"
#include "controller/controller_keyboard.h"
#include "configfile.h"
#define CONFIG_FILE "sm64config.txt"
OSMesg D_80339BEC;
OSMesgQueue gSIEventMesgQueue;
s8 gResetTimer;
s8 D_8032C648;
s8 gDebugLevelSelect;
s8 gShowProfiler;
s8 gShowDebugText;
static struct AudioAPI *audio_api;
static struct GfxWindowManagerAPI *wm_api;
static struct GfxRenderingAPI *rendering_api;
extern void gfx_run(Gfx *commands);
extern void thread5_game_loop(void *arg);
extern void create_next_audio_buffer(s16 *samples, u32 num_samples);
void game_loop_one_iteration(void);
void dispatch_audio_sptask(UNUSED struct SPTask *spTask) {
}
void set_vblank_handler(UNUSED s32 index, UNUSED struct VblankHandler *handler, UNUSED OSMesgQueue *queue, UNUSED OSMesg *msg) {
}
static uint8_t inited = 0;
#include "game/game_init.h" // for gGlobalTimer
void send_display_list(struct SPTask *spTask) {
if (!inited) {
return;
}
gfx_run((Gfx *)spTask->task.t.data_ptr);
}
#define printf
#ifdef VERSION_EU
#define SAMPLES_HIGH 656
#define SAMPLES_LOW 640
#else
#define SAMPLES_HIGH 544
#define SAMPLES_LOW 528
#endif
void produce_one_frame(void) {
gfx_start_frame();
game_loop_one_iteration();
int samples_left = audio_api->buffered();
u32 num_audio_samples = samples_left < audio_api->get_desired_buffered() ? SAMPLES_HIGH : SAMPLES_LOW;
//printf("Audio samples: %d %u\n", samples_left, num_audio_samples);
s16 audio_buffer[SAMPLES_HIGH * 2 * 2];
for (int i = 0; i < 2; i++) {
/*if (audio_cnt-- == 0) {
audio_cnt = 2;
}
u32 num_audio_samples = audio_cnt < 2 ? 528 : 544;*/
create_next_audio_buffer(audio_buffer + i * (num_audio_samples * 2), num_audio_samples);
}
//printf("Audio samples before submitting: %d\n", audio_api->buffered());
audio_api->play((u8 *)audio_buffer, 2 * num_audio_samples * 4);
gfx_end_frame();
}
#ifdef TARGET_WEB
static void em_main_loop(void) {
}
static void request_anim_frame(void (*func)(double time)) {
EM_ASM(requestAnimationFrame(function(time) {
dynCall("vd", $0, [time]);
}), func);
}
static void on_anim_frame(double time) {
static double target_time;
time *= 0.03; // milliseconds to frame count (33.333 ms -> 1)
if (time >= target_time + 10.0) {
// We are lagging 10 frames behind, probably due to coming back after inactivity,
// so reset, with a small margin to avoid potential jitter later.
target_time = time - 0.010;
}
for (int i = 0; i < 2; i++) {
// If refresh rate is 15 Hz or something we might need to generate two frames
if (time >= target_time) {
produce_one_frame();
target_time = target_time + 1.0;
}
}
request_anim_frame(on_anim_frame);
}
#endif
static void save_config(void) {
configfile_save(CONFIG_FILE);
}
static void on_fullscreen_changed(bool is_now_fullscreen) {
configFullscreen = is_now_fullscreen;
}
void main_func(void) {
static u64 pool[0x165000/8 / 4 * sizeof(void *)];
main_pool_init(pool, pool + sizeof(pool) / sizeof(pool[0]));
gEffectsMemoryPool = mem_pool_init(0x4000, MEMORY_POOL_LEFT);
configfile_load(CONFIG_FILE);
atexit(save_config);
#ifdef TARGET_WEB
emscripten_set_main_loop(em_main_loop, 0, 0);
request_anim_frame(on_anim_frame);
#endif
#if defined(ENABLE_DX12)
rendering_api = &gfx_direct3d12_api;
wm_api = &gfx_dxgi_api;
#elif defined(ENABLE_DX11)
rendering_api = &gfx_direct3d11_api;
wm_api = &gfx_dxgi_api;
#elif defined(ENABLE_OPENGL)
rendering_api = &gfx_opengl_api;
#if defined(__linux__)
wm_api = &gfx_glx;
#else
wm_api = &gfx_sdl;
#endif
#endif
gfx_init(wm_api, rendering_api, "Super Mario 64 PC-Port", configFullscreen);
wm_api->set_fullscreen_changed_callback(on_fullscreen_changed);
wm_api->set_keyboard_callbacks(keyboard_on_key_down, keyboard_on_key_up, keyboard_on_all_keys_up);
#if HAVE_WASAPI
if (audio_api == NULL && audio_wasapi.init()) {
audio_api = &audio_wasapi;
}
#endif
#if HAVE_PULSE_AUDIO
if (audio_api == NULL && audio_pulse.init()) {
audio_api = &audio_pulse;
}
#endif
#if HAVE_ALSA
if (audio_api == NULL && audio_alsa.init()) {
audio_api = &audio_alsa;
}
#endif
#ifdef TARGET_WEB
if (audio_api == NULL && audio_sdl.init()) {
audio_api = &audio_sdl;
}
#endif
if (audio_api == NULL) {
audio_api = &audio_null;
}
audio_init();
sound_init();
thread5_game_loop(NULL);
#ifdef TARGET_WEB
/*for (int i = 0; i < atoi(argv[1]); i++) {
game_loop_one_iteration();
}*/
inited = 1;
#else
inited = 1;
while (1) {
wm_api->main_loop(produce_one_frame);
}
#endif
}
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
int WINAPI WinMain(UNUSED HINSTANCE hInstance, UNUSED HINSTANCE hPrevInstance, UNUSED LPSTR pCmdLine, UNUSED int nCmdShow) {
main_func();
return 0;
}
#else
int main(UNUSED int argc, UNUSED char *argv[]) {
main_func();
return 0;
}
#endif

View file

@ -0,0 +1,187 @@
#include <stdio.h>
#include <string.h>
#include "lib/src/libultra_internal.h"
#include "macros.h"
#ifdef TARGET_WEB
#include <emscripten.h>
#endif
extern OSMgrArgs piMgrArgs;
u64 osClockRate = 62500000;
s32 osPiStartDma(UNUSED OSIoMesg *mb, UNUSED s32 priority, UNUSED s32 direction,
uintptr_t devAddr, void *vAddr, size_t nbytes,
UNUSED OSMesgQueue *mq) {
memcpy(vAddr, (const void *) devAddr, nbytes);
return 0;
}
void osCreateMesgQueue(OSMesgQueue *mq, OSMesg *msgBuf, s32 count) {
mq->validCount = 0;
mq->first = 0;
mq->msgCount = count;
mq->msg = msgBuf;
return;
}
void osSetEventMesg(UNUSED OSEvent e, UNUSED OSMesgQueue *mq, UNUSED OSMesg msg) {
}
s32 osJamMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) {
return 0;
}
s32 osSendMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED s32 flag) {
#ifdef VERSION_EU
s32 index;
if (mq->validCount >= mq->msgCount) {
return -1;
}
index = (mq->first + mq->validCount) % mq->msgCount;
mq->msg[index] = msg;
mq->validCount++;
#endif
return 0;
}
s32 osRecvMesg(UNUSED OSMesgQueue *mq, UNUSED OSMesg *msg, UNUSED s32 flag) {
#if VERSION_EU
if (mq->validCount == 0) {
return -1;
}
if (msg != NULL) {
*msg = *(mq->first + mq->msg);
}
mq->first = (mq->first + 1) % mq->msgCount;
mq->validCount--;
#endif
return 0;
}
uintptr_t osVirtualToPhysical(void *addr) {
return (uintptr_t) addr;
}
void osCreateViManager(UNUSED OSPri pri) {
}
void osViSetMode(UNUSED OSViMode *mode) {
}
void osViSetEvent(UNUSED OSMesgQueue *mq, UNUSED OSMesg msg, UNUSED u32 retraceCount) {
}
void osViBlack(UNUSED u8 active) {
}
void osViSetSpecialFeatures(UNUSED u32 func) {
}
void osViSwapBuffer(UNUSED void *vaddr) {
}
OSTime osGetTime(void) {
return 0;
}
void osWritebackDCacheAll(void) {
}
void osWritebackDCache(UNUSED void *a, UNUSED size_t b) {
}
void osInvalDCache(UNUSED void *a, UNUSED size_t b) {
}
u32 osGetCount(void) {
static u32 counter;
return counter++;
}
s32 osAiSetFrequency(u32 freq) {
u32 a1;
s32 a2;
u32 D_8033491C;
#ifdef VERSION_EU
D_8033491C = 0x02E6025C;
#else
D_8033491C = 0x02E6D354;
#endif
a1 = D_8033491C / (float) freq + .5f;
if (a1 < 0x84) {
return -1;
}
a2 = (a1 / 66) & 0xff;
if (a2 > 16) {
a2 = 16;
}
return D_8033491C / (s32) a1;
}
s32 osEepromProbe(UNUSED OSMesgQueue *mq) {
return 1;
}
s32 osEepromLongRead(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) {
u8 content[512];
s32 ret = -1;
#ifdef TARGET_WEB
if (EM_ASM_INT({
var s = localStorage.sm64_save_file;
if (s && s.length === 684) {
try {
var binary = atob(s);
if (binary.length === 512) {
for (var i = 0; i < 512; i++) {
HEAPU8[$0 + i] = binary.charCodeAt(i);
}
return 1;
}
} catch (e) {
}
}
return 0;
}, content)) {
memcpy(buffer, content + address * 8, nbytes);
ret = 0;
}
#else
FILE *fp = fopen("sm64_save_file.bin", "rb");
if (fp == NULL) {
return -1;
}
if (fread(content, 1, 512, fp) == 512) {
memcpy(buffer, content + address * 8, nbytes);
ret = 0;
}
fclose(fp);
#endif
return ret;
}
s32 osEepromLongWrite(UNUSED OSMesgQueue *mq, u8 address, u8 *buffer, int nbytes) {
u8 content[512] = {0};
if (address != 0 || nbytes != 512) {
osEepromLongRead(mq, 0, content, 512);
}
memcpy(content + address * 8, buffer, nbytes);
#ifdef TARGET_WEB
EM_ASM({
var str = "";
for (var i = 0; i < 512; i++) {
str += String.fromCharCode(HEAPU8[$0 + i]);
}
localStorage.sm64_save_file = btoa(str);
}, content);
s32 ret = 0;
#else
FILE* fp = fopen("sm64_save_file.bin", "wb");
if (fp == NULL) {
return -1;
}
s32 ret = fwrite(content, 1, 512, fp) == 512 ? 0 : -1;
fclose(fp);
#endif
return ret;
}