commit 610c515256a76f252f906644506767047a2a6894
Author: Joshua Goins <josh@redstrate.com>
Date:   Tue Mar 15 15:33:57 2022 -0400

    Add initial files

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..98e384e
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,9 @@
+project(libxiv)
+
+add_library(libxiv STATIC
+        src/fiinparser.cpp
+        src/indexparser.cpp
+        src/crc32.cpp
+        src/gamedata.cpp
+        src/compression.cpp)
+target_include_directories(libxiv PUBLIC include PRIVATE src)
\ No newline at end of file
diff --git a/include/compression.h b/include/compression.h
new file mode 100644
index 0000000..8f5b58b
--- /dev/null
+++ b/include/compression.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <cstdint>
+
+namespace zlib {
+    void no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size);
+}
\ No newline at end of file
diff --git a/include/crc32.h b/include/crc32.h
new file mode 100644
index 0000000..956644b
--- /dev/null
+++ b/include/crc32.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+
+// adapted from https://gist.github.com/timepp/1f678e200d9e0f2a043a9ec6b3690635
+namespace CRC32 {
+    void generate_table(uint32_t(&table)[256]);
+    uint32_t update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len);
+}
\ No newline at end of file
diff --git a/include/fiinparser.h b/include/fiinparser.h
new file mode 100644
index 0000000..8e9a939
--- /dev/null
+++ b/include/fiinparser.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <string_view>
+
+// this is methods dedicated to parsing "fiin" files, commonly shown as "fileinfo.fiin"
+
+// header is 1024 bytes
+// for some reason, they store unknown1 and unknown 2 in this weird format,
+// unknown1 is capped at 256 (in decimal) and will overflow into unknown 2
+// for example, 1 is equal to unknown1 = 96 and unknown2 = 0
+// 96 / 1 == 1
+// if you have say, 14 entries, then unknown1 = 64 and unknown2 = 5
+// 5 (unknown2) * 256 = 1280 + 64 (unknown1) = 1344
+// 1344 / 96 = 14
+// i could've made a mistake and this is actually really common but i don't know
+struct FileInfoHeader {
+    char magic[9];
+    uint8_t dummy1[16];
+    uint8_t unknown; // version? always seems to be 4
+    uint8_t dummy2[2];
+    uint8_t unknown1;
+    uint8_t unknown2;
+    uint8_t dummy[994];
+};
+
+// each entry is 96 bytes
+struct FileInfoEntry {
+    uint8_t dummy[8]; // length of file name in some format
+    char str[64]; // simple \0 encoded string
+    uint8_t dummy2[24]; // sha1
+};
+
+struct FileInfo {
+    FileInfoHeader header;
+    std::vector<FileInfoEntry> entries;
+};
+
+FileInfo readFileInfo(const std::string_view path);
\ No newline at end of file
diff --git a/include/gamedata.h b/include/gamedata.h
new file mode 100644
index 0000000..e23022c
--- /dev/null
+++ b/include/gamedata.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <string_view>
+#include <string>
+
+/*
+ * This handles reading/extracting the raw data from game data packs, such as dat0, index and index2 files.
+ * This is not local to "one" repository or sqpack, but oversees operation over all of them.
+ *
+ * This will "lazy-load" index and dat files as needed for now.
+ *
+ * This is definitely not the final name of this class :-p
+ */
+class GameData {
+public:
+    /*
+     * Initializes the game data manager, this should pointing to the parent directory of the ex1/ex2/ffxiv directory.
+     */
+    explicit GameData(std::string_view dataDirectory);
+
+    /*
+     * This extracts the raw file from dataFilePath to outPath;
+     */
+    void extractFile(std::string_view dataFilePath, std::string_view outPath);
+
+private:
+    /*
+     * This returns a proper SQEX-style filename for index, index2, and dat files.
+     * filenames are in the format of {category}{expansion}{chunk}.{platform}.{type}
+     */
+    std::string calculateFilename(int category, int expansion, int chunk, std::string_view platform, std::string_view type);
+
+    /*
+     * Returns the repository, category for a given game path - respectively.
+     */
+    std::tuple<std::string, std::string> calculateRepositoryCategory(std::string_view path);
+
+    /*
+     * Calculates a uint64 hash from a given game path.
+     */
+    uint64_t calculateHash(std::string_view path);
+
+    std::string dataDirectory;
+};
\ No newline at end of file
diff --git a/include/indexparser.h b/include/indexparser.h
new file mode 100644
index 0000000..6c506e5
--- /dev/null
+++ b/include/indexparser.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <string_view>
+
+// these are methods dedicated to reading ".index" and ".index2" files
+// major thanks to xiv.dev for providing the struct definitions
+
+enum PlatformId : uint8_t
+{
+    Win32,
+    PS3,
+    PS4
+};
+
+// https://github.com/SapphireServer/Sapphire/blob/develop/deps/datReader/SqPack.cpp#L5
+struct SqPackHeader
+{
+    char magic[0x8];
+    PlatformId platformId;
+    uint8_t padding0[3];
+    uint32_t size;
+    uint32_t version;
+    uint32_t type;
+};
+
+struct SqPackIndexHeader
+{
+    uint32_t size;
+    uint32_t type;
+    uint32_t indexDataOffset;
+    uint32_t indexDataSize;
+};
+
+struct IndexHashTableEntry
+{
+    uint64_t hash;
+    uint32_t unknown : 1;
+    uint32_t dataFileId : 3;
+    uint32_t offset : 28;
+    uint32_t _padding;
+};
+
+struct Index2HashTableEntry
+{
+    uint32_t hash;
+    uint32_t unknown : 1;
+    uint32_t dataFileId : 3;
+    uint32_t offset : 28;
+};
+
+template<class Entry>
+struct IndexFile {
+    SqPackHeader packHeader;
+    SqPackIndexHeader indexHeader;
+
+    std::vector<Entry> entries;
+};
+
+IndexFile<IndexHashTableEntry> readIndexFile(const std::string_view path);
+IndexFile<Index2HashTableEntry> readIndex2File(const std::string_view path);
\ No newline at end of file
diff --git a/include/string_utils.h b/include/string_utils.h
new file mode 100644
index 0000000..5c179c1
--- /dev/null
+++ b/include/string_utils.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <algorithm>
+
+std::vector<std::string> tokenize(const std::string_view string, const std::string_view& delimiters) {
+    std::vector<std::string> tokens;
+
+    const size_t length = string.length();
+    size_t lastPos = 0;
+
+    while(lastPos < length + 1) {
+        size_t pos = string.find_first_of(delimiters, lastPos);
+        if(pos == std::string_view::npos)
+            pos = length;
+
+        if(pos != lastPos)
+            tokens.emplace_back(string.data() + lastPos, pos - lastPos);
+
+        lastPos = pos + 1;
+    }
+
+    return tokens;
+}
+
+bool stringContains(const std::string_view a, const std::string_view b) {
+    return a.find(b) != std::string::npos;
+}
+
+std::string toLowercase(std::string str) {
+    std::transform(str.begin(), str.end(), str.begin(),
+                   [](unsigned char c){ return std::tolower(c); });
+
+    return str;
+}
\ No newline at end of file
diff --git a/src/compression.cpp b/src/compression.cpp
new file mode 100644
index 0000000..0f7e00b
--- /dev/null
+++ b/src/compression.cpp
@@ -0,0 +1,30 @@
+#include "compression.h"
+
+#include <zlib.h>
+#include <stdexcept>
+
+// adopted from https://github.com/ahom/ffxiv_reverse/blob/312a0af8b58929fab48438aceae8da587be9407f/xiv/utils/src/zlib.cpp#L31
+void zlib::no_header_decompress(uint8_t* in, uint32_t in_size, uint8_t* out, uint32_t out_size) {
+    z_stream strm = {};
+    strm.avail_in = in_size;
+
+    // Init with -15 because we do not have header in this compressed data
+    auto ret = inflateInit2(&strm, -15);
+    if (ret != Z_OK) {
+        throw std::runtime_error("Error at zlib init: " + std::to_string(ret));
+    }
+
+    // Set pointers to the right addresses
+    strm.next_in = in;
+    strm.avail_out = out_size;
+    strm.next_out = out;
+
+    // Effectively decompress data
+    ret = inflate(&strm, Z_NO_FLUSH);
+    if (ret != Z_STREAM_END) {
+        throw std::runtime_error("Error at zlib inflate: " + std::to_string(ret));
+    }
+
+    // Clean up
+    inflateEnd(&strm);
+}
\ No newline at end of file
diff --git a/src/crc32.cpp b/src/crc32.cpp
new file mode 100644
index 0000000..bda444b
--- /dev/null
+++ b/src/crc32.cpp
@@ -0,0 +1,27 @@
+#include "crc32.h"
+
+void CRC32::generate_table(uint32_t(&table)[256]) {
+    uint32_t polynomial = 0xEDB88320;
+    for (uint32_t i = 0; i < 256; i++) {
+        uint32_t c = i;
+        for (size_t j = 0; j < 8; j++) {
+            if (c & 1) {
+                c = polynomial ^ (c >> 1);
+            }
+            else {
+                c >>= 1;
+            }
+        }
+        table[i] = c;
+    }
+}
+
+uint32_t CRC32::update(uint32_t (&table)[256], uint32_t initial, const void* buf, size_t len) {
+    uint32_t c = initial ^ 0xFFFFFFFF;
+    const auto* u = static_cast<const uint8_t*>(buf);
+    for (size_t i = 0; i < len; ++i) {
+        c = table[(c ^ u[i]) & 0xFF] ^ (c >> 8);
+    }
+
+    return c ^ 0xFFFFFFFF;
+}
\ No newline at end of file
diff --git a/src/fiinparser.cpp b/src/fiinparser.cpp
new file mode 100644
index 0000000..92f673b
--- /dev/null
+++ b/src/fiinparser.cpp
@@ -0,0 +1,38 @@
+#include "fiinparser.h"
+
+#include <cstdio>
+#include <cstring>
+#include <fmt/format.h>
+
+FileInfo readFileInfo(const std::string_view path) {
+    FILE* file = fopen(path.data(), "rb");
+    if(!file) {
+        throw std::runtime_error("Failed to read file info from " + std::string(path.data()));
+    }
+
+    FileInfo info;
+    fread(&info.header, sizeof info.header, 1, file);
+
+    char magic[9] = "FileInfo";
+    if(strcmp(info.header.magic, magic) != 0) {
+        throw std::runtime_error("Invalid fileinfo magic!");
+    }
+
+    int overflow = info.header.unknown2;
+    int extra = overflow * 256;
+    int first = info.header.unknown1 / 96;
+    int first2 = extra / 96;
+    int actualEntries = first + first2 + 1; // is this 1 really needed? lol
+
+    int numEntries = actualEntries;
+    for(int i = 0; i < numEntries; i++) {
+        FileInfoEntry entry;
+        fread(&entry, sizeof entry, 1, file);
+
+        info.entries.push_back(entry);
+    }
+
+    fclose(file);
+
+    return info;
+}
\ No newline at end of file
diff --git a/src/gamedata.cpp b/src/gamedata.cpp
new file mode 100644
index 0000000..61e0dea
--- /dev/null
+++ b/src/gamedata.cpp
@@ -0,0 +1,183 @@
+#include "gamedata.h"
+#include "indexparser.h"
+#include "crc32.h"
+#include "compression.h"
+#include "string_utils.h"
+
+#include <string>
+#include <algorithm>
+#include <fmt/printf.h>
+
+// TODO: should be enum?
+// taken from https://xiv.dev/data-files/sqpack#categories
+std::unordered_map<std::string_view, int> categoryToID = {
+        {"common", 0},
+        {"bgcommon", 1},
+        {"bg", 2},
+        {"cut", 3},
+        {"chara", 4},
+        {"shader", 5},
+        {"ui", 6},
+        {"sound", 7},
+        {"vfx", 8},
+        {"ui_script", 9},
+        {"exd", 10},
+        {"game_script", 11},
+        {"music", 12},
+        {"sqpack_test", 13},
+        {"debug", 14},
+};
+
+GameData::GameData(const std::string_view dataDirectory) {
+    this->dataDirectory = dataDirectory;
+}
+
+uint64_t GameData::calculateHash(const std::string_view path) {
+    std::string data = toLowercase(path.data());
+
+    auto lastSeperator = data.find_last_of('/');
+    const std::string filename = data.substr(lastSeperator + 1, data.length());
+    const std::string directory = data.substr(0, lastSeperator);
+
+    uint32_t table[256] = {};
+    CRC32::generate_table(table);
+
+    // we actually want JAMCRC, which is just the bitwise not of a regular crc32 hash
+    const uint32_t directoryCrc = ~CRC32::update(table, 0, directory.data(), directory.size());
+    const uint32_t filenameCrc = ~CRC32::update(table, 0, filename.data(), filename.size());
+
+    return static_cast<uint64_t>(directoryCrc) << 32 | filenameCrc;
+}
+
+std::tuple<std::string, std::string> GameData::calculateRepositoryCategory(std::string_view path) {
+    std::string repository, category;
+
+    auto tokens = tokenize(path, "/");
+    if(stringContains(tokens[1], "ex") && !stringContains(tokens[0], "exd")) {
+        repository = tokens[1];
+    } else {
+        repository = "ffxiv";
+    }
+
+    category = tokens[0];
+
+    return {repository, category};
+}
+
+int getExpansionID(std::string_view repositoryName) {
+    if(repositoryName == "ffxiv")
+        return 0;
+
+    return std::stoi(std::string(repositoryName.substr(2, 2)));
+}
+
+std::string GameData::calculateFilename(const int category, const int expansion, const int chunk, const std::string_view platform, const std::string_view type) {
+    return fmt::sprintf("%02x%02x%02x.%s.%s", category, expansion, chunk, platform, type);
+}
+
+void GameData::extractFile(std::string_view dataFilePath, std::string_view outPath) {
+    const uint64_t hash = calculateHash(dataFilePath);
+    auto [repository, category] = calculateRepositoryCategory(dataFilePath);
+
+    // TODO: handle platforms other than win32
+    auto indexFilename = calculateFilename(categoryToID[category], getExpansionID(repository), 0, "win32", "index");
+
+    // TODO: handle hashes in index2 files (we can read them but it's not setup yet.)
+    auto indexFile = readIndexFile(dataDirectory + "/" + repository + "/" + indexFilename);
+
+    for(const auto entry : indexFile.entries) {
+        if(entry.hash == hash) {
+            auto dataFilename = calculateFilename(categoryToID[category], getExpansionID(repository), entry.dataFileId, "win32", "dat0");
+
+            FILE* file = fopen((dataDirectory + "/" + repository + "/" + dataFilename).c_str(), "rb");
+            if(file == nullptr) {
+                throw std::runtime_error("Failed to open data file: " + dataFilename);
+            }
+
+            const size_t offset = entry.offset * 0x80;
+            fseek(file, offset, SEEK_SET);
+
+            enum FileType : int32_t {
+                Empty = 1,
+                Standard = 2,
+                Model = 3,
+                Texture = 4
+            };
+
+            struct FileInfo {
+                uint32_t size;
+                FileType fileType;
+                int32_t fileSize;
+                uint32_t dummy[2];
+                uint32_t numBlocks;
+            } info;
+
+            fread(&info, sizeof(FileInfo), 1, file);
+
+            if(info.fileType != FileType::Standard) {
+                throw std::runtime_error("File type is not handled yet for " + std::string(dataFilePath));
+            }
+
+            struct Block {
+                int32_t offset;
+                int16_t dummy;
+                int16_t dummy2;
+            };
+
+            std::vector<Block> blocks;
+
+            for(int i = 0; i < info.numBlocks; i++) {
+                Block block;
+                fread(&block, sizeof(Block), 1, file);
+
+                blocks.push_back(block);
+            }
+
+            std::vector<std::uint8_t> data;
+
+            const size_t startingPos = offset + info.size;
+            for(auto block : blocks) {
+                struct BlockHeader {
+                    int32_t size;
+                    int32_t dummy;
+                    int32_t compressedLength; // < 32000 is uncompressed data
+                    int32_t decompressedLength;
+                } header;
+
+                fseek(file, startingPos + block.offset, SEEK_SET);
+
+                fread(&header, sizeof(BlockHeader), 1, file);
+
+                std::vector<uint8_t> localdata;
+
+                bool isCompressed = header.compressedLength < 32000;
+                if(isCompressed) {
+                    localdata.resize(header.decompressedLength);
+
+                    std::vector<uint8_t> compressed_data;
+                    compressed_data.resize(header.compressedLength);
+                    fread(compressed_data.data(), header.compressedLength, 1, file);
+
+                    zlib::no_header_decompress(reinterpret_cast<uint8_t*>(compressed_data.data()),
+                                         compressed_data.size(),
+                                         reinterpret_cast<uint8_t*>(localdata.data()),
+                                         header.decompressedLength);
+                } else {
+                    localdata.resize(header.decompressedLength);
+
+                    fread(localdata.data(), header.decompressedLength, 1, file);
+                }
+
+                data.insert(data.end(), localdata.begin(), localdata.end());
+            }
+
+            fclose(file);
+
+            FILE* newFile = fopen(outPath.data(), "w");
+            fwrite(data.data(), data.size(), 1, newFile);
+            fclose(newFile);
+        }
+    }
+
+    fmt::print("Extracted {} to {}", dataFilePath, outPath);
+}
diff --git a/src/indexparser.cpp b/src/indexparser.cpp
new file mode 100644
index 0000000..45d4ad1
--- /dev/null
+++ b/src/indexparser.cpp
@@ -0,0 +1,65 @@
+#include "indexparser.h"
+
+#include <cstdio>
+#include <cstring>
+#include <stdexcept>
+
+template<class T>
+void commonParseSqPack(FILE* file, IndexFile<T>& index) {
+    fread(&index.packHeader, sizeof(SqPackHeader), 1, file);
+
+    if(strcmp(index.packHeader.magic, "SqPack") != 0) {
+        throw std::runtime_error("Invalid sqpack magic.");
+    }
+
+    // data starts at size
+    fseek(file, index.packHeader.size, SEEK_SET);
+
+    // read index header
+    fread(&index.indexHeader, sizeof(SqPackIndexHeader), 1, file);
+
+    if(index.packHeader.version != 1) {
+        throw std::runtime_error("Invalid sqpack version.");
+    }
+
+    fseek(file, index.indexHeader.indexDataOffset, SEEK_SET);
+}
+
+IndexFile<IndexHashTableEntry> readIndexFile(const std::string_view path) {
+    FILE* file = fopen(path.data(), "rb");
+    if(!file) {
+        throw std::runtime_error("Failed to read index file from " + std::string(path.data()));
+    }
+
+    IndexFile<IndexHashTableEntry> index;
+    commonParseSqPack(file, index);
+
+    uint32_t numEntries = index.indexHeader.indexDataSize / sizeof(IndexHashTableEntry);
+    for(uint32_t i = 0; i < numEntries; i++) {
+        IndexHashTableEntry entry = {};
+        fread(&entry, sizeof(IndexHashTableEntry), 1, file);
+
+        index.entries.push_back(entry);
+    }
+
+    return index;
+}
+
+IndexFile<Index2HashTableEntry> readIndex2File(const std::string_view path) {
+    FILE* file = fopen(path.data(), "rb");
+    if(!file) {
+        throw std::runtime_error("Failed to read index2 file from " + std::string(path.data()));
+    }
+
+    IndexFile<Index2HashTableEntry> index;
+    commonParseSqPack(file, index);
+
+    for(int i = 0; i < index.indexHeader.indexDataSize; i++) {
+        Index2HashTableEntry entry = {};
+        fread(&entry, sizeof entry, 1, file);
+
+        index.entries.push_back(entry);
+    }
+
+    return index;
+}
\ No newline at end of file