commit 610c515256a76f252f906644506767047a2a6894 Author: Joshua Goins Date: Tue Mar 15 15:33:57 2022 -0400 Add initial files diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..98e384e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,9 @@ +project(libxiv) + +add_library(libxiv STATIC + src/fiinparser.cpp + src/indexparser.cpp + src/crc32.cpp + src/gamedata.cpp + src/compression.cpp) +target_include_directories(libxiv PUBLIC include PRIVATE src) \ No newline at end of file diff --git a/include/compression.h b/include/compression.h new file mode 100644 index 0000000..8f5b58b --- /dev/null +++ b/include/compression.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +namespace zlib { + void no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size); +} \ No newline at end of file diff --git a/include/crc32.h b/include/crc32.h new file mode 100644 index 0000000..956644b --- /dev/null +++ b/include/crc32.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +// adapted from https://gist.github.com/timepp/1f678e200d9e0f2a043a9ec6b3690635 +namespace CRC32 { + void generate_table(uint32_t(&table)[256]); + uint32_t update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len); +} \ No newline at end of file diff --git a/include/fiinparser.h b/include/fiinparser.h new file mode 100644 index 0000000..8e9a939 --- /dev/null +++ b/include/fiinparser.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +// this is methods dedicated to parsing "fiin" files, commonly shown as "fileinfo.fiin" + +// header is 1024 bytes +// for some reason, they store unknown1 and unknown 2 in this weird format, +// unknown1 is capped at 256 (in decimal) and will overflow into unknown 2 +// for example, 1 is equal to unknown1 = 96 and unknown2 = 0 +// 96 / 1 == 1 +// if you have say, 14 entries, then unknown1 = 64 and unknown2 = 5 +// 5 (unknown2) * 256 = 1280 + 64 (unknown1) = 1344 +// 1344 / 96 = 14 +// i could've made a mistake and this is actually really common but i don't know +struct FileInfoHeader { + char magic[9]; + uint8_t dummy1[16]; + uint8_t unknown; // version? always seems to be 4 + uint8_t dummy2[2]; + uint8_t unknown1; + uint8_t unknown2; + uint8_t dummy[994]; +}; + +// each entry is 96 bytes +struct FileInfoEntry { + uint8_t dummy[8]; // length of file name in some format + char str[64]; // simple \0 encoded string + uint8_t dummy2[24]; // sha1 +}; + +struct FileInfo { + FileInfoHeader header; + std::vector entries; +}; + +FileInfo readFileInfo(const std::string_view path); \ No newline at end of file diff --git a/include/gamedata.h b/include/gamedata.h new file mode 100644 index 0000000..e23022c --- /dev/null +++ b/include/gamedata.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +/* + * This handles reading/extracting the raw data from game data packs, such as dat0, index and index2 files. + * This is not local to "one" repository or sqpack, but oversees operation over all of them. + * + * This will "lazy-load" index and dat files as needed for now. + * + * This is definitely not the final name of this class :-p + */ +class GameData { +public: + /* + * Initializes the game data manager, this should pointing to the parent directory of the ex1/ex2/ffxiv directory. + */ + explicit GameData(std::string_view dataDirectory); + + /* + * This extracts the raw file from dataFilePath to outPath; + */ + void extractFile(std::string_view dataFilePath, std::string_view outPath); + +private: + /* + * This returns a proper SQEX-style filename for index, index2, and dat files. + * filenames are in the format of {category}{expansion}{chunk}.{platform}.{type} + */ + std::string calculateFilename(int category, int expansion, int chunk, std::string_view platform, std::string_view type); + + /* + * Returns the repository, category for a given game path - respectively. + */ + std::tuple calculateRepositoryCategory(std::string_view path); + + /* + * Calculates a uint64 hash from a given game path. + */ + uint64_t calculateHash(std::string_view path); + + std::string dataDirectory; +}; \ No newline at end of file diff --git a/include/indexparser.h b/include/indexparser.h new file mode 100644 index 0000000..6c506e5 --- /dev/null +++ b/include/indexparser.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + +// these are methods dedicated to reading ".index" and ".index2" files +// major thanks to xiv.dev for providing the struct definitions + +enum PlatformId : uint8_t +{ + Win32, + PS3, + PS4 +}; + +// https://github.com/SapphireServer/Sapphire/blob/develop/deps/datReader/SqPack.cpp#L5 +struct SqPackHeader +{ + char magic[0x8]; + PlatformId platformId; + uint8_t padding0[3]; + uint32_t size; + uint32_t version; + uint32_t type; +}; + +struct SqPackIndexHeader +{ + uint32_t size; + uint32_t type; + uint32_t indexDataOffset; + uint32_t indexDataSize; +}; + +struct IndexHashTableEntry +{ + uint64_t hash; + uint32_t unknown : 1; + uint32_t dataFileId : 3; + uint32_t offset : 28; + uint32_t _padding; +}; + +struct Index2HashTableEntry +{ + uint32_t hash; + uint32_t unknown : 1; + uint32_t dataFileId : 3; + uint32_t offset : 28; +}; + +template +struct IndexFile { + SqPackHeader packHeader; + SqPackIndexHeader indexHeader; + + std::vector entries; +}; + +IndexFile readIndexFile(const std::string_view path); +IndexFile readIndex2File(const std::string_view path); \ No newline at end of file diff --git a/include/string_utils.h b/include/string_utils.h new file mode 100644 index 0000000..5c179c1 --- /dev/null +++ b/include/string_utils.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +std::vector tokenize(const std::string_view string, const std::string_view& delimiters) { + std::vector tokens; + + const size_t length = string.length(); + size_t lastPos = 0; + + while(lastPos < length + 1) { + size_t pos = string.find_first_of(delimiters, lastPos); + if(pos == std::string_view::npos) + pos = length; + + if(pos != lastPos) + tokens.emplace_back(string.data() + lastPos, pos - lastPos); + + lastPos = pos + 1; + } + + return tokens; +} + +bool stringContains(const std::string_view a, const std::string_view b) { + return a.find(b) != std::string::npos; +} + +std::string toLowercase(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), + [](unsigned char c){ return std::tolower(c); }); + + return str; +} \ No newline at end of file diff --git a/src/compression.cpp b/src/compression.cpp new file mode 100644 index 0000000..0f7e00b --- /dev/null +++ b/src/compression.cpp @@ -0,0 +1,30 @@ +#include "compression.h" + +#include +#include + +// adopted from https://github.com/ahom/ffxiv_reverse/blob/312a0af8b58929fab48438aceae8da587be9407f/xiv/utils/src/zlib.cpp#L31 +void zlib::no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size) { + z_stream strm = {}; + strm.avail_in = in_size; + + // Init with -15 because we do not have header in this compressed data + auto ret = inflateInit2(&strm, -15); + if (ret != Z_OK) { + throw std::runtime_error("Error at zlib init: " + std::to_string(ret)); + } + + // Set pointers to the right addresses + strm.next_in = in; + strm.avail_out = out_size; + strm.next_out = out; + + // Effectively decompress data + ret = inflate(&strm, Z_NO_FLUSH); + if (ret != Z_STREAM_END) { + throw std::runtime_error("Error at zlib inflate: " + std::to_string(ret)); + } + + // Clean up + inflateEnd(&strm); +} \ No newline at end of file diff --git a/src/crc32.cpp b/src/crc32.cpp new file mode 100644 index 0000000..bda444b --- /dev/null +++ b/src/crc32.cpp @@ -0,0 +1,27 @@ +#include "crc32.h" + +void CRC32::generate_table(uint32_t(&table)[256]) { + uint32_t polynomial = 0xEDB88320; + for (uint32_t i = 0; i < 256; i++) { + uint32_t c = i; + for (size_t j = 0; j < 8; j++) { + if (c & 1) { + c = polynomial ^ (c >> 1); + } + else { + c >>= 1; + } + } + table[i] = c; + } +} + +uint32_t CRC32::update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len) { + uint32_t c = initial ^ 0xFFFFFFFF; + const auto* u = static_cast(buf); + for (size_t i = 0; i < len; ++i) { + c = table[(c ^ u[i]) & 0xFF] ^ (c >> 8); + } + + return c ^ 0xFFFFFFFF; +} \ No newline at end of file diff --git a/src/fiinparser.cpp b/src/fiinparser.cpp new file mode 100644 index 0000000..92f673b --- /dev/null +++ b/src/fiinparser.cpp @@ -0,0 +1,38 @@ +#include "fiinparser.h" + +#include +#include +#include + +FileInfo readFileInfo(const std::string_view path) { + FILE* file = fopen(path.data(), "rb"); + if(!file) { + throw std::runtime_error("Failed to read file info from " + std::string(path.data())); + } + + FileInfo info; + fread(&info.header, sizeof info.header, 1, file); + + char magic[9] = "FileInfo"; + if(strcmp(info.header.magic, magic) != 0) { + throw std::runtime_error("Invalid fileinfo magic!"); + } + + int overflow = info.header.unknown2; + int extra = overflow * 256; + int first = info.header.unknown1 / 96; + int first2 = extra / 96; + int actualEntries = first + first2 + 1; // is this 1 really needed? lol + + int numEntries = actualEntries; + for(int i = 0; i < numEntries; i++) { + FileInfoEntry entry; + fread(&entry, sizeof entry, 1, file); + + info.entries.push_back(entry); + } + + fclose(file); + + return info; +} \ No newline at end of file diff --git a/src/gamedata.cpp b/src/gamedata.cpp new file mode 100644 index 0000000..61e0dea --- /dev/null +++ b/src/gamedata.cpp @@ -0,0 +1,183 @@ +#include "gamedata.h" +#include "indexparser.h" +#include "crc32.h" +#include "compression.h" +#include "string_utils.h" + +#include +#include +#include + +// TODO: should be enum? +// taken from https://xiv.dev/data-files/sqpack#categories +std::unordered_map categoryToID = { + {"common", 0}, + {"bgcommon", 1}, + {"bg", 2}, + {"cut", 3}, + {"chara", 4}, + {"shader", 5}, + {"ui", 6}, + {"sound", 7}, + {"vfx", 8}, + {"ui_script", 9}, + {"exd", 10}, + {"game_script", 11}, + {"music", 12}, + {"sqpack_test", 13}, + {"debug", 14}, +}; + +GameData::GameData(const std::string_view dataDirectory) { + this->dataDirectory = dataDirectory; +} + +uint64_t GameData::calculateHash(const std::string_view path) { + std::string data = toLowercase(path.data()); + + auto lastSeperator = data.find_last_of('/'); + const std::string filename = data.substr(lastSeperator + 1, data.length()); + const std::string directory = data.substr(0, lastSeperator); + + uint32_t table[256] = {}; + CRC32::generate_table(table); + + // we actually want JAMCRC, which is just the bitwise not of a regular crc32 hash + const uint32_t directoryCrc = ~CRC32::update(table, 0, directory.data(), directory.size()); + const uint32_t filenameCrc = ~CRC32::update(table, 0, filename.data(), filename.size()); + + return static_cast(directoryCrc) << 32 | filenameCrc; +} + +std::tuple GameData::calculateRepositoryCategory(std::string_view path) { + std::string repository, category; + + auto tokens = tokenize(path, "/"); + if(stringContains(tokens[1], "ex") && !stringContains(tokens[0], "exd")) { + repository = tokens[1]; + } else { + repository = "ffxiv"; + } + + category = tokens[0]; + + return {repository, category}; +} + +int getExpansionID(std::string_view repositoryName) { + if(repositoryName == "ffxiv") + return 0; + + return std::stoi(std::string(repositoryName.substr(2, 2))); +} + +std::string GameData::calculateFilename(const int category, const int expansion, const int chunk, const std::string_view platform, const std::string_view type) { + return fmt::sprintf("%02x%02x%02x.%s.%s", category, expansion, chunk, platform, type); +} + +void GameData::extractFile(std::string_view dataFilePath, std::string_view outPath) { + const uint64_t hash = calculateHash(dataFilePath); + auto [repository, category] = calculateRepositoryCategory(dataFilePath); + + // TODO: handle platforms other than win32 + auto indexFilename = calculateFilename(categoryToID[category], getExpansionID(repository), 0, "win32", "index"); + + // TODO: handle hashes in index2 files (we can read them but it's not setup yet.) + auto indexFile = readIndexFile(dataDirectory + "/" + repository + "/" + indexFilename); + + for(const auto entry : indexFile.entries) { + if(entry.hash == hash) { + auto dataFilename = calculateFilename(categoryToID[category], getExpansionID(repository), entry.dataFileId, "win32", "dat0"); + + FILE* file = fopen((dataDirectory + "/" + repository + "/" + dataFilename).c_str(), "rb"); + if(file == nullptr) { + throw std::runtime_error("Failed to open data file: " + dataFilename); + } + + const size_t offset = entry.offset * 0x80; + fseek(file, offset, SEEK_SET); + + enum FileType : int32_t { + Empty = 1, + Standard = 2, + Model = 3, + Texture = 4 + }; + + struct FileInfo { + uint32_t size; + FileType fileType; + int32_t fileSize; + uint32_t dummy[2]; + uint32_t numBlocks; + } info; + + fread(&info, sizeof(FileInfo), 1, file); + + if(info.fileType != FileType::Standard) { + throw std::runtime_error("File type is not handled yet for " + std::string(dataFilePath)); + } + + struct Block { + int32_t offset; + int16_t dummy; + int16_t dummy2; + }; + + std::vector blocks; + + for(int i = 0; i < info.numBlocks; i++) { + Block block; + fread(&block, sizeof(Block), 1, file); + + blocks.push_back(block); + } + + std::vector data; + + const size_t startingPos = offset + info.size; + for(auto block : blocks) { + struct BlockHeader { + int32_t size; + int32_t dummy; + int32_t compressedLength; // < 32000 is uncompressed data + int32_t decompressedLength; + } header; + + fseek(file, startingPos + block.offset, SEEK_SET); + + fread(&header, sizeof(BlockHeader), 1, file); + + std::vector localdata; + + bool isCompressed = header.compressedLength < 32000; + if(isCompressed) { + localdata.resize(header.decompressedLength); + + std::vector compressed_data; + compressed_data.resize(header.compressedLength); + fread(compressed_data.data(), header.compressedLength, 1, file); + + zlib::no_header_decompress(reinterpret_cast(compressed_data.data()), + compressed_data.size(), + reinterpret_cast(localdata.data()), + header.decompressedLength); + } else { + localdata.resize(header.decompressedLength); + + fread(localdata.data(), header.decompressedLength, 1, file); + } + + data.insert(data.end(), localdata.begin(), localdata.end()); + } + + fclose(file); + + FILE* newFile = fopen(outPath.data(), "w"); + fwrite(data.data(), data.size(), 1, newFile); + fclose(newFile); + } + } + + fmt::print("Extracted {} to {}", dataFilePath, outPath); +} diff --git a/src/indexparser.cpp b/src/indexparser.cpp new file mode 100644 index 0000000..45d4ad1 --- /dev/null +++ b/src/indexparser.cpp @@ -0,0 +1,65 @@ +#include "indexparser.h" + +#include +#include +#include + +template +void commonParseSqPack(FILE* file, IndexFile& index) { + fread(&index.packHeader, sizeof(SqPackHeader), 1, file); + + if(strcmp(index.packHeader.magic, "SqPack") != 0) { + throw std::runtime_error("Invalid sqpack magic."); + } + + // data starts at size + fseek(file, index.packHeader.size, SEEK_SET); + + // read index header + fread(&index.indexHeader, sizeof(SqPackIndexHeader), 1, file); + + if(index.packHeader.version != 1) { + throw std::runtime_error("Invalid sqpack version."); + } + + fseek(file, index.indexHeader.indexDataOffset, SEEK_SET); +} + +IndexFile readIndexFile(const std::string_view path) { + FILE* file = fopen(path.data(), "rb"); + if(!file) { + throw std::runtime_error("Failed to read index file from " + std::string(path.data())); + } + + IndexFile index; + commonParseSqPack(file, index); + + uint32_t numEntries = index.indexHeader.indexDataSize / sizeof(IndexHashTableEntry); + for(uint32_t i = 0; i < numEntries; i++) { + IndexHashTableEntry entry = {}; + fread(&entry, sizeof(IndexHashTableEntry), 1, file); + + index.entries.push_back(entry); + } + + return index; +} + +IndexFile readIndex2File(const std::string_view path) { + FILE* file = fopen(path.data(), "rb"); + if(!file) { + throw std::runtime_error("Failed to read index2 file from " + std::string(path.data())); + } + + IndexFile index; + commonParseSqPack(file, index); + + for(int i = 0; i < index.indexHeader.indexDataSize; i++) { + Index2HashTableEntry entry = {}; + fread(&entry, sizeof entry, 1, file); + + index.entries.push_back(entry); + } + + return index; +} \ No newline at end of file