Add initial files

2022-03-15 15:33:57 -04:00 · 2022-03-15 15:33:57 -04:00 · 610c515256
commit 610c515256
12 changed files with 549 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,9 @@
 project(libxiv)
 add_library(libxiv STATIC
        src/fiinparser.cpp
        src/indexparser.cpp
        src/crc32.cpp
        src/gamedata.cpp
        src/compression.cpp)
 target_include_directories(libxiv PUBLIC include PRIVATE src)
--- a/include/compression.h
+++ b/include/compression.h
@ -0,0 +1,7 @@
 #pragma once
 #include <cstdint>
 namespace zlib {
    void no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size);
 }
--- a/include/crc32.h
+++ b/include/crc32.h
@ -0,0 +1,10 @@
 #pragma once
 #include <cstdint>
 #include <cstddef>
 // adapted from https://gist.github.com/timepp/1f678e200d9e0f2a043a9ec6b3690635
 namespace CRC32 {
    void generate_table(uint32_t(&table)[256]);
    uint32_t update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len);
 }
--- a/include/fiinparser.h
+++ b/include/fiinparser.h
@ -0,0 +1,40 @@
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <string_view>
 // this is methods dedicated to parsing "fiin" files, commonly shown as "fileinfo.fiin"
 // header is 1024 bytes
 // for some reason, they store unknown1 and unknown 2 in this weird format,
 // unknown1 is capped at 256 (in decimal) and will overflow into unknown 2
 // for example, 1 is equal to unknown1 = 96 and unknown2 = 0
 // 96 / 1 == 1
 // if you have say, 14 entries, then unknown1 = 64 and unknown2 = 5
 // 5 (unknown2) * 256 = 1280 + 64 (unknown1) = 1344
 // 1344 / 96 = 14
 // i could've made a mistake and this is actually really common but i don't know
 struct FileInfoHeader {
    char magic[9];
    uint8_t dummy1[16];
    uint8_t unknown; // version? always seems to be 4
    uint8_t dummy2[2];
    uint8_t unknown1;
    uint8_t unknown2;
    uint8_t dummy[994];
 };
 // each entry is 96 bytes
 struct FileInfoEntry {
    uint8_t dummy[8]; // length of file name in some format
    char str[64]; // simple \0 encoded string
    uint8_t dummy2[24]; // sha1
 };
 struct FileInfo {
    FileInfoHeader header;
    std::vector<FileInfoEntry> entries;
 };
 FileInfo readFileInfo(const std::string_view path);
--- a/include/gamedata.h
+++ b/include/gamedata.h
@ -0,0 +1,44 @@
 #pragma once
 #include <string_view>
 #include <string>
 /*
 * This handles reading/extracting the raw data from game data packs, such as dat0, index and index2 files.
 * This is not local to "one" repository or sqpack, but oversees operation over all of them.
 *
 * This will "lazy-load" index and dat files as needed for now.
 *
 * This is definitely not the final name of this class :-p
 */
 class GameData {
 public:
    /*
     * Initializes the game data manager, this should pointing to the parent directory of the ex1/ex2/ffxiv directory.
     */
    explicit GameData(std::string_view dataDirectory);
    /*
     * This extracts the raw file from dataFilePath to outPath;
     */
    void extractFile(std::string_view dataFilePath, std::string_view outPath);
 private:
    /*
     * This returns a proper SQEX-style filename for index, index2, and dat files.
     * filenames are in the format of {category}{expansion}{chunk}.{platform}.{type}
     */
    std::string calculateFilename(int category, int expansion, int chunk, std::string_view platform, std::string_view type);
    /*
     * Returns the repository, category for a given game path - respectively.
     */
    std::tuple<std::string, std::string> calculateRepositoryCategory(std::string_view path);
    /*
     * Calculates a uint64 hash from a given game path.
     */
    uint64_t calculateHash(std::string_view path);
    std::string dataDirectory;
 };
--- a/include/indexparser.h
+++ b/include/indexparser.h
@ -0,0 +1,62 @@
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <string_view>
 // these are methods dedicated to reading ".index" and ".index2" files
 // major thanks to xiv.dev for providing the struct definitions
 enum PlatformId : uint8_t
 {
    Win32,
    PS3,
    PS4
 };
 // https://github.com/SapphireServer/Sapphire/blob/develop/deps/datReader/SqPack.cpp#L5
 struct SqPackHeader
 {
    char magic[0x8];
    PlatformId platformId;
    uint8_t padding0[3];
    uint32_t size;
    uint32_t version;
    uint32_t type;
 };
 struct SqPackIndexHeader
 {
    uint32_t size;
    uint32_t type;
    uint32_t indexDataOffset;
    uint32_t indexDataSize;
 };
 struct IndexHashTableEntry
 {
    uint64_t hash;
    uint32_t unknown : 1;
    uint32_t dataFileId : 3;
    uint32_t offset : 28;
    uint32_t _padding;
 };
 struct Index2HashTableEntry
 {
    uint32_t hash;
    uint32_t unknown : 1;
    uint32_t dataFileId : 3;
    uint32_t offset : 28;
 };
 template<class Entry>
 struct IndexFile {
    SqPackHeader packHeader;
    SqPackIndexHeader indexHeader;
    std::vector<Entry> entries;
 };
 IndexFile<IndexHashTableEntry> readIndexFile(const std::string_view path);
 IndexFile<Index2HashTableEntry> readIndex2File(const std::string_view path);
--- a/include/string_utils.h
+++ b/include/string_utils.h
@ -0,0 +1,34 @@
 #pragma once
 #include <algorithm>
 std::vector<std::string> tokenize(const std::string_view string, const std::string_view& delimiters) {
    std::vector<std::string> tokens;
    const size_t length = string.length();
    size_t lastPos = 0;
    while(lastPos < length + 1) {
        size_t pos = string.find_first_of(delimiters, lastPos);
        if(pos == std::string_view::npos)
            pos = length;
        if(pos != lastPos)
            tokens.emplace_back(string.data() + lastPos, pos - lastPos);
        lastPos = pos + 1;
    }
    return tokens;
 }
 bool stringContains(const std::string_view a, const std::string_view b) {
    return a.find(b) != std::string::npos;
 }
 std::string toLowercase(std::string str) {
    std::transform(str.begin(), str.end(), str.begin(),
                   [](unsigned char c){ return std::tolower(c); });
    return str;
 }
--- a/src/compression.cpp
+++ b/src/compression.cpp
@ -0,0 +1,30 @@
 #include "compression.h"
 #include <zlib.h>
 #include <stdexcept>
 // adopted from https://github.com/ahom/ffxiv_reverse/blob/312a0af8b58929fab48438aceae8da587be9407f/xiv/utils/src/zlib.cpp#L31
 void zlib::no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size) {
    z_stream strm = {};
    strm.avail_in = in_size;
    // Init with -15 because we do not have header in this compressed data
    auto ret = inflateInit2(&strm, -15);
    if (ret != Z_OK) {
        throw std::runtime_error("Error at zlib init: " + std::to_string(ret));
    }
    // Set pointers to the right addresses
    strm.next_in = in;
    strm.avail_out = out_size;
    strm.next_out = out;
    // Effectively decompress data
    ret = inflate(&strm, Z_NO_FLUSH);
    if (ret != Z_STREAM_END) {
        throw std::runtime_error("Error at zlib inflate: " + std::to_string(ret));
    }
    // Clean up
    inflateEnd(&strm);
 }
--- a/src/crc32.cpp
+++ b/src/crc32.cpp
@ -0,0 +1,27 @@
 #include "crc32.h"
 void CRC32::generate_table(uint32_t(&table)[256]) {
    uint32_t polynomial = 0xEDB88320;
    for (uint32_t i = 0; i < 256; i++) {
        uint32_t c = i;
        for (size_t j = 0; j < 8; j++) {
            if (c & 1) {
                c = polynomial ^ (c >> 1);
            }
            else {
                c >>= 1;
            }
        }
        table[i] = c;
    }
 }
 uint32_t CRC32::update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len) {
    uint32_t c = initial ^ 0xFFFFFFFF;
    const auto* u = static_cast<const uint8_t*>(buf);
    for (size_t i = 0; i < len; ++i) {
        c = table[(c ^ u[i]) & 0xFF] ^ (c >> 8);
    }
    return c ^ 0xFFFFFFFF;
 }
--- a/src/fiinparser.cpp
+++ b/src/fiinparser.cpp
@ -0,0 +1,38 @@
 #include "fiinparser.h"
 #include <cstdio>
 #include <cstring>
 #include <fmt/format.h>
 FileInfo readFileInfo(const std::string_view path) {
    FILE* file = fopen(path.data(), "rb");
    if(!file) {
        throw std::runtime_error("Failed to read file info from " + std::string(path.data()));
    }
    FileInfo info;
    fread(&info.header, sizeof info.header, 1, file);
    char magic[9] = "FileInfo";
    if(strcmp(info.header.magic, magic) != 0) {
        throw std::runtime_error("Invalid fileinfo magic!");
    }
    int overflow = info.header.unknown2;
    int extra = overflow * 256;
    int first = info.header.unknown1 / 96;
    int first2 = extra / 96;
    int actualEntries = first + first2 + 1; // is this 1 really needed? lol
    int numEntries = actualEntries;
    for(int i = 0; i < numEntries; i++) {
        FileInfoEntry entry;
        fread(&entry, sizeof entry, 1, file);
        info.entries.push_back(entry);
    }
    fclose(file);
    return info;
 }
--- a/src/gamedata.cpp
+++ b/src/gamedata.cpp
@ -0,0 +1,183 @@
 #include "gamedata.h"
 #include "indexparser.h"
 #include "crc32.h"
 #include "compression.h"
 #include "string_utils.h"
 #include <string>
 #include <algorithm>
 #include <fmt/printf.h>
 // TODO: should be enum?
 // taken from https://xiv.dev/data-files/sqpack#categories
 std::unordered_map<std::string_view, int> categoryToID = {
        {"common", 0},
        {"bgcommon", 1},
        {"bg", 2},
        {"cut", 3},
        {"chara", 4},
        {"shader", 5},
        {"ui", 6},
        {"sound", 7},
        {"vfx", 8},
        {"ui_script", 9},
        {"exd", 10},
        {"game_script", 11},
        {"music", 12},
        {"sqpack_test", 13},
        {"debug", 14},
 };
 GameData::GameData(const std::string_view dataDirectory) {
    this->dataDirectory = dataDirectory;
 }
 uint64_t GameData::calculateHash(const std::string_view path) {
    std::string data = toLowercase(path.data());
    auto lastSeperator = data.find_last_of('/');
    const std::string filename = data.substr(lastSeperator + 1, data.length());
    const std::string directory = data.substr(0, lastSeperator);
    uint32_t table[256] = {};
    CRC32::generate_table(table);
    // we actually want JAMCRC, which is just the bitwise not of a regular crc32 hash
    const uint32_t directoryCrc = ~CRC32::update(table, 0, directory.data(), directory.size());
    const uint32_t filenameCrc = ~CRC32::update(table, 0, filename.data(), filename.size());
    return static_cast<uint64_t>(directoryCrc) << 32 | filenameCrc;
 }
 std::tuple<std::string, std::string> GameData::calculateRepositoryCategory(std::string_view path) {
    std::string repository, category;
    auto tokens = tokenize(path, "/");
    if(stringContains(tokens[1], "ex") && !stringContains(tokens[0], "exd")) {
        repository = tokens[1];
    } else {
        repository = "ffxiv";
    }
    category = tokens[0];
    return {repository, category};
 }
 int getExpansionID(std::string_view repositoryName) {
    if(repositoryName == "ffxiv")
        return 0;
    return std::stoi(std::string(repositoryName.substr(2, 2)));
 }
 std::string GameData::calculateFilename(const int category, const int expansion, const int chunk, const std::string_view platform, const std::string_view type) {
    return fmt::sprintf("%02x%02x%02x.%s.%s", category, expansion, chunk, platform, type);
 }
 void GameData::extractFile(std::string_view dataFilePath, std::string_view outPath) {
    const uint64_t hash = calculateHash(dataFilePath);
    auto [repository, category] = calculateRepositoryCategory(dataFilePath);
    // TODO: handle platforms other than win32
    auto indexFilename = calculateFilename(categoryToID[category], getExpansionID(repository), 0, "win32", "index");
    // TODO: handle hashes in index2 files (we can read them but it's not setup yet.)
    auto indexFile = readIndexFile(dataDirectory + "/" + repository + "/" + indexFilename);
    for(const auto entry : indexFile.entries) {
        if(entry.hash == hash) {
            auto dataFilename = calculateFilename(categoryToID[category], getExpansionID(repository), entry.dataFileId, "win32", "dat0");
            FILE* file = fopen((dataDirectory + "/" + repository + "/" + dataFilename).c_str(), "rb");
            if(file == nullptr) {
                throw std::runtime_error("Failed to open data file: " + dataFilename);
            }
            const size_t offset = entry.offset * 0x80;
            fseek(file, offset, SEEK_SET);
            enum FileType : int32_t {
                Empty = 1,
                Standard = 2,
                Model = 3,
                Texture = 4
            };
            struct FileInfo {
                uint32_t size;
                FileType fileType;
                int32_t fileSize;
                uint32_t dummy[2];
                uint32_t numBlocks;
            } info;
            fread(&info, sizeof(FileInfo), 1, file);
            if(info.fileType != FileType::Standard) {
                throw std::runtime_error("File type is not handled yet for " + std::string(dataFilePath));
            }
            struct Block {
                int32_t offset;
                int16_t dummy;
                int16_t dummy2;
            };
            std::vector<Block> blocks;
            for(int i = 0; i < info.numBlocks; i++) {
                Block block;
                fread(&block, sizeof(Block), 1, file);
                blocks.push_back(block);
            }
            std::vector<std::uint8_t> data;
            const size_t startingPos = offset + info.size;
            for(auto block : blocks) {
                struct BlockHeader {
                    int32_t size;
                    int32_t dummy;
                    int32_t compressedLength; // < 32000 is uncompressed data
                    int32_t decompressedLength;
                } header;
                fseek(file, startingPos + block.offset, SEEK_SET);
                fread(&header, sizeof(BlockHeader), 1, file);
                std::vector<uint8_t> localdata;
                bool isCompressed = header.compressedLength < 32000;
                if(isCompressed) {
                    localdata.resize(header.decompressedLength);
                    std::vector<uint8_t> compressed_data;
                    compressed_data.resize(header.compressedLength);
                    fread(compressed_data.data(), header.compressedLength, 1, file);
                    zlib::no_header_decompress(reinterpret_cast<uint8_t*>(compressed_data.data()),
                                         compressed_data.size(),
                                         reinterpret_cast<uint8_t*>(localdata.data()),
                                         header.decompressedLength);
                } else {
                    localdata.resize(header.decompressedLength);
                    fread(localdata.data(), header.decompressedLength, 1, file);
                }
                data.insert(data.end(), localdata.begin(), localdata.end());
            }
            fclose(file);
            FILE* newFile = fopen(outPath.data(), "w");
            fwrite(data.data(), data.size(), 1, newFile);
            fclose(newFile);
        }
    }
    fmt::print("Extracted {} to {}", dataFilePath, outPath);
 }
--- a/src/indexparser.cpp
+++ b/src/indexparser.cpp
@ -0,0 +1,65 @@
 #include "indexparser.h"
 #include <cstdio>
 #include <cstring>
 #include <stdexcept>
 template<class T>
 void commonParseSqPack(FILE* file, IndexFile<T>& index) {
    fread(&index.packHeader, sizeof(SqPackHeader), 1, file);
    if(strcmp(index.packHeader.magic, "SqPack") != 0) {
        throw std::runtime_error("Invalid sqpack magic.");
    }
    // data starts at size
    fseek(file, index.packHeader.size, SEEK_SET);
    // read index header
    fread(&index.indexHeader, sizeof(SqPackIndexHeader), 1, file);
    if(index.packHeader.version != 1) {
        throw std::runtime_error("Invalid sqpack version.");
    }
    fseek(file, index.indexHeader.indexDataOffset, SEEK_SET);
 }
 IndexFile<IndexHashTableEntry> readIndexFile(const std::string_view path) {
    FILE* file = fopen(path.data(), "rb");
    if(!file) {
        throw std::runtime_error("Failed to read index file from " + std::string(path.data()));
    }
    IndexFile<IndexHashTableEntry> index;
    commonParseSqPack(file, index);
    uint32_t numEntries = index.indexHeader.indexDataSize / sizeof(IndexHashTableEntry);
    for(uint32_t i = 0; i < numEntries; i++) {
        IndexHashTableEntry entry = {};
        fread(&entry, sizeof(IndexHashTableEntry), 1, file);
        index.entries.push_back(entry);
    }
    return index;
 }
 IndexFile<Index2HashTableEntry> readIndex2File(const std::string_view path) {
    FILE* file = fopen(path.data(), "rb");
    if(!file) {
        throw std::runtime_error("Failed to read index2 file from " + std::string(path.data()));
    }
    IndexFile<Index2HashTableEntry> index;
    commonParseSqPack(file, index);
    for(int i = 0; i < index.indexHeader.indexDataSize; i++) {
        Index2HashTableEntry entry = {};
        fread(&entry, sizeof entry, 1, file);
        index.entries.push_back(entry);
    }
    return index;
 }