// SPDX-FileCopyrightText: 2024 Joshua Goins // SPDX-License-Identifier: GPL-3.0-or-later use std::collections::HashMap; use std::io::{Cursor, Seek, SeekFrom}; use binrw::{BinRead, BinReaderExt}; use binrw::binrw; use crate::ByteSpan; // Based off of https://github.com/Lotlab/ffxiv-vulgar-words-reader/ // Credit goes to Jim Kirisame for documenting this format // TODO: double check I'm reading everything correctly #[binrw] #[derive(Debug)] #[brw(little)] pub struct EntryItem { flag: u32, sibling: u32, child: u32, offset: u32 } #[binrw] #[derive(Debug)] #[brw(little)] struct DictionaryHeader { #[br(seek_before = SeekFrom::Start(0x8124))] #[br(count = 256)] chara_replace1: Vec, #[br(count = 256)] chara_replace2: Vec, #[br(count = 256)] chara_replace3: Vec, #[br(count = 5)] block_offsets: Vec, #[br(count = 5)] block_lengths: Vec, #[br(pad_before = 4)] #[br(count = 256)] chara_block: Vec, #[br(ignore)] begin_node: Vec, #[br(ignore)] inner_node: Vec, #[br(ignore)] chara: Vec, #[br(ignore)] word: Vec, #[br(ignore)] entries: Vec, } pub struct Dictionary { header: DictionaryHeader, pub words: Vec } impl Dictionary { /// Parses an existing dictionary file. pub fn from_existing(buffer: ByteSpan) -> Option { let mut cursor = Cursor::new(buffer); let mut dict = DictionaryHeader::read(&mut cursor).unwrap(); let map_start = 0x8750u32; let map_size = 0x200u32; // fix up offsets for offset in &mut dict.block_offsets { *offset = *offset + map_start + map_size; } for i in 0..dict.block_lengths[0] / 2 { let offset = dict.block_offsets[0] + i * 2; cursor.seek(SeekFrom::Start(offset as u64)).ok()?; dict.begin_node.push(cursor.read_le::().ok()?); } for i in 0..dict.block_lengths[1] / 2 { let offset = dict.block_offsets[1] + i * 2; cursor.seek(SeekFrom::Start(offset as u64)).ok()?; dict.inner_node.push(cursor.read_le::().ok()?); } for i in 0..dict.block_lengths[2] / 2 { let offset = dict.block_offsets[2] + i * 2; cursor.seek(SeekFrom::Start(offset as u64)).ok()?; dict.chara.push(cursor.read_le::().ok()?); } for i in 0..dict.block_lengths[3] / 2 { let offset = dict.block_offsets[3] + i * 2; cursor.seek(SeekFrom::Start(offset as u64)).ok()?; dict.word.push(cursor.read_le::().ok()?); } for i in 0..dict.block_lengths[4] / 16 { let offset = dict.block_offsets[4] + i * 16; cursor.seek(SeekFrom::Start(offset as u64)).ok()?; dict.entries.push(cursor.read_le::().ok()?); } let mut dict = Dictionary { header: dict, words: Vec::new() }; // TODO: lol dict.words = dict.list_words()?; Some(dict) } fn list_words(&self) -> Option> { let mut result = Vec::new(); let lut = self.generate_index_rune_lookup_table(); for (id, v) in self.header.begin_node.iter().enumerate() { if *v == 0 { continue; } let chara = Dictionary::index_to_rune(&lut, id as u32); self.dump_dict_node(&mut result, *v as i32, String::from(chara as u8 as char)) } Some(result) } fn generate_index_rune_lookup_table(&self) -> HashMap { let mut map = HashMap::new(); for i in 0..self.header.chara_block.len() { map.insert(self.header.chara_block[i] as u16, i as u16); } map } fn index_to_rune(lookup_table: &HashMap, index: u32) -> i32 { let higher = index >> 8; let lower = index & 0xFF; if higher == 0 { return 0; } return if let Some(new_val) = lookup_table.get(&(higher as u16)) { (((*new_val as u32) << 8) + lower) as i32 } else { 0 } } fn dump_dict_node(&self, vec: &mut Vec, entry_id: i32, prev: String) { let node = &self.header.entries[entry_id as usize]; for i in 0..node.sibling { let Some(current) = self.get_string(entry_id, i as i32) else { return; }; if node.child == 0 { vec.push(prev.clone() + ¤t); continue; } let value = self.header.inner_node[(node.child + i) as usize]; if value == 0 { vec.push(prev.clone() + ¤t); continue; } self.dump_dict_node(vec, value as i32, prev.clone() + ¤t); } } fn get_string(&self, entry_id: i32, sibling_id: i32) -> Option { if let Some(characters) = self.get_string_characters(entry_id, sibling_id) { return String::from_utf16(&characters).ok(); } None } fn get_string_characters(&self, entry_id: i32, sibling_id: i32) -> Option> { if entry_id as usize >= self.header.entries.len() { return None; } let entry = self.header.entries.get(entry_id as usize)?; if entry.flag == 0 { let pos = (entry.offset / 2) as i32 + sibling_id; if pos as usize > self.header.chara.len() { return None; } if self.header.chara[pos as usize] == 0 { return None; } return Some(vec![self.header.chara[pos as usize]]); } let begin = entry.offset / 2; let mut end = begin + 1; while (end as usize) < self.header.word.len() && self.header.word[end as usize] != 0 { end += 1; } Some(self.header.word[begin as usize..end as usize].to_vec()) } } #[cfg(test)] mod tests { use std::fs::read; use std::path::PathBuf; use super::*; #[test] fn test_invalid() { let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); d.push("resources/tests"); d.push("random"); // Feeding it invalid data should not panic Dictionary::from_existing(&read(d).unwrap()); } }