aboutsummaryrefslogtreecommitdiff
path: root/doc/HowTo/PakReader.md
diff options
context:
space:
mode:
Diffstat (limited to 'doc/HowTo/PakReader.md')
-rw-r--r--doc/HowTo/PakReader.md421
1 files changed, 421 insertions, 0 deletions
diff --git a/doc/HowTo/PakReader.md b/doc/HowTo/PakReader.md
new file mode 100644
index 0000000..e7071c3
--- /dev/null
+++ b/doc/HowTo/PakReader.md
@@ -0,0 +1,421 @@
+# HowTo: Reading PACK (pak) Archives
+
+## Overview
+
+In this guide, we will be creating a program called `PakReader`, which will be
+able to read PAK archives (list all files within the archive or extract a
+specific file from the archive).
+
+## Prerequisites
+
+We want to implement a simple command-line utility that can:
+
+- List the contents of the archive
+- Extract a specific file from the archive
+
+To extract the `Text.txt` file from the `sample.pak` archive to `output.txt`, we
+would run the following command:
+
+```sh
+./PakReader -i Text.txt -o output.txt sample.pak
+```
+
+To list files in the `sample.pak` archive, we would run the following command:
+
+```sh
+./PakReader -l sample.pak
+```
+
+## PACK Archive Format
+
+PACK archives are extremely simple. A PACK archive consists of three parts:
+
+- Header at the beginning of the file
+- File table with the name and size of the file
+- File data
+
+Header has the following structure:
+
+| Name | Type | Information |
+|--------|---------------|-------------------------------------|
+| id | 4 byte string | Always contains the word "PACK" |
+| offset | uint32 | Offset of the file table (in bytes) |
+| size | uint32 | Size of the file table (in bytes) |
+
+File table entry has the following structure:
+
+| Name | Type | Information |
+|--------|----------------|-------------------------------------|
+| name | 54 byte string | Full file path (null-terminated) |
+| offset | uint32 | Offset in the file (in bytes) |
+| size | uint32 | Size of the file (in bytes) |
+
+## Includes
+
+To implement this utility, we are going to need to include the following headers:
+
+- `BH/IO.h` to work with files (or input/output devices)
+- `BH/Util.h` to read integers with specific width and endianness
+- `BH/Args.h` to work with command-line arguments
+
+## Working with Files
+
+Working with files in BHLib is based around the IO device (called `BH_IO`).
+Firstly, you need to create an IO device with the `BH_FileNew` function.
+Secondly, you need to open the IO device with the `BH_IOOpen` function. While
+opening the IO device, you can specify in which mode it will work: reading
+(`BH_IO_READ`) or writing (`BH_IO_WRITE`). Additionally, we can specify whether
+the IO device (or in our case, the file) should exist before opening
+(`BH_IO_EXIST`), be truncated before opening (`BH_IO_TRUNCATE`), should it be
+created (`BH_IO_CREATE`), or opened in append mode (`BH_IO_APPEND`).
+
+Here is an example for opening an existing file in read-only mode:
+
+```c
+BH_IO *io = BH_FileNew("coolfile.dat");
+if (BH_IOOpen(io, BH_IO_READ | BH_IO_EXIST))
+{
+ printf("Can't open file 'coolfile.dat'\n", config.file);
+ BH_IOFree(io);
+ return -1;
+}
+```
+
+## Reading Data from the File
+
+In general, it's best to read some data into a temporary buffer, then parse the
+data into variables/structure fields. This makes sure you will not run into
+issues caused by misalignment (slowdowns or exceptions/crashes).
+
+For reading or writing integers, it's recommended to use `BH_Read*` and
+`BH_Write*`, respectively. Suffixes `16`, `32`, `64` denote integers' width in
+bits. Suffixes `LE` and `BE` denote endianness (LittleEndian and BigEndian).
+Suffixes `u` and `s` denote unsigned or signed integers.
+
+For example, the function `BH_Read32LEs` reads a signed 32-bit integer that is
+stored in LittleEndian format.
+
+Below is an example for reading header data into the structure `header`:
+
+```c
+char buffer[HEADER_SIZE];
+size_t actual;
+
+if (BH_IORead(io, buffer, HEADER_SIZE, &actual) || actual != HEADER_SIZE)
+ return BH_ERROR;
+
+if (memcmp(buffer, "PACK", 4))
+ return BH_ERROR;
+
+memcpy(header->id, buffer, 4);
+header->offset = BH_Read32LEu(buffer + 4);
+header->size = BH_Read32LEu(buffer + 8);
+```
+
+## Working with Command-Line Arguments
+
+Working (or parsing) with CMD arguments is somewhat similar to POSIX
+`getopt_long` function. Firstly, you need to define what options (or switches)
+are present in your program. To do this, create an array of `BH_ArgsOption`,
+filled with option definitions and one empty element at the end of the array.
+
+Each element consists of the following:
+
+- Key
+- Name
+- Flags
+- Description or documentation string
+
+`Key` is used to identify options while processing them. Key value can be an
+ASCII character or an integer. If key is an ASCII character, it becomes an
+option's short name (key 'd' becomes `-d`).
+
+`Name` is used to represent 'long' option names (name 'hello-world' becomes
+`--hello-world`). You can omit `Name`.
+
+`Flags` are used to specify whether an option requires a value (or if they are
+optional).
+
+`Description` is used in pair with the function `BH_ArgsHelp` to display
+information about the option.
+
+Then you need to define a `callback` function that will do something in response
+to the option being parsed.
+
+Array of options, option callback, and ARGC/ARGV is then passed to the function
+`BH_ArgsParse`.
+
+Here is the example of using the options:
+
+```c
+static BH_ArgsOption options[] = {
+ {'h', "help", 0, "Display this help"},
+ {'l', "list", 0, "List files in the archive instead of reading"},
+ {'i', "input", BH_ARGS_VALUE, "Input file in archive"},
+ {'o', "output", BH_ARGS_VALUE, "Output file"},
+ {0, NULL, 0, NULL}
+};
+
+static int callback(int key, char *arg, void *data)
+{
+ Config *config = (Config *)data;
+
+ switch (key)
+ {
+ case BH_ARGS_UNKNOWN: break;
+ case BH_ARGS_ARGUMENT: if (!config->file) config->file = arg; break;
+ case 'h': PrintHelp(); exit(0);
+ case 'l': config->list = 1; break;
+ case 'i': config->input = arg; break;
+ case 'o': config->output = arg; break;
+ }
+
+ return BH_OK;
+}
+
+int main(int argc, char **argv)
+{
+ Config config;
+
+ /* Parse arguments */
+ memset(&config, 0, sizeof(config));
+ if (BH_ArgsParse(argc, argv, options, callback, &config) || !config.file)
+ {
+ PrintHelp();
+ return -1;
+ }
+
+ /* ... */
+ return 0;
+}
+```
+
+## Putting Everything Together
+
+Now, let's put everything together and implement `PakReader`.
+
+```c
+#include <BH/Args.h>
+#include <BH/IO.h>
+#include <BH/Util.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+/* PAK header and entry structures and some constants */
+#define HEADER_SIZE 12
+#define ENTRY_SIZE 64
+
+typedef struct PakHeader
+{
+ char id[4];
+ uint32_t offset;
+ uint32_t size;
+} PakHeader;
+
+
+typedef struct PakEntry
+{
+ char name[56];
+ uint32_t offset;
+ uint32_t size;
+} PakEntry;
+
+
+static int ParseHeader(BH_IO *io,
+ PakHeader *header)
+{
+ char buffer[HEADER_SIZE];
+ size_t actual;
+
+ if (BH_IORead(io, buffer, HEADER_SIZE, &actual) || actual != HEADER_SIZE)
+ return BH_ERROR;
+
+ if (memcmp(buffer, "PACK", 4))
+ return BH_ERROR;
+
+ memcpy(header->id, buffer, 4);
+ header->offset = BH_Read32LEu(buffer + 4);
+ header->size = BH_Read32LEu(buffer + 8);
+
+ return BH_OK;
+}
+
+
+static int ParseEntry(BH_IO *io,
+ PakEntry *entry)
+{
+ char buffer[ENTRY_SIZE];
+ size_t actual;
+
+ if (BH_IORead(io, buffer, ENTRY_SIZE, &actual) || actual != ENTRY_SIZE)
+ return BH_ERROR;
+
+ memcpy(entry->name, buffer, 56);
+ entry->offset = BH_Read32LEu(buffer + 56);
+ entry->size = BH_Read32LEu(buffer + 60);
+ entry->name[55] = 0;
+
+ return BH_OK;
+}
+
+
+/* Configuration and options */
+typedef struct Config
+{
+ char *file;
+ char *input;
+ char *output;
+ int list;
+} Config;
+
+
+static BH_ArgsOption options[] = {
+ {'h', "help", 0, "Display this help"},
+ {'l', "list", 0, "List files in the archive instead of reading"},
+ {'i', "input", BH_ARGS_VALUE, "Input file in archive"},
+ {'o', "output", BH_ARGS_VALUE, "Output file"},
+ {0, NULL, 0, NULL}
+};
+
+
+static void PrintHelp(void)
+{
+ printf("Usage: PakReader [options...] <file>\n");
+ BH_ArgsHelp(options, 0);
+}
+
+
+static int OptionsCallback(int key,
+ char *arg,
+ void *data)
+{
+ Config *config = (Config *)data;
+
+ switch (key)
+ {
+ case BH_ARGS_UNKNOWN: break;
+ case BH_ARGS_ARGUMENT: if (!config->file) config->file = arg; break;
+ case 'h': PrintHelp(); exit(0);
+ case 'l': config->list = 1; break;
+ case 'i': config->input = arg; break;
+ case 'o': config->output = arg; break;
+ }
+
+ return BH_OK;
+}
+
+
+/* Copy data between two IO */
+static int CopyData(BH_IO *from,
+ BH_IO *to,
+ size_t size)
+{
+ size_t i, length, actual;
+ char tmp[512];
+
+ for (i = 0; i < size; i += sizeof(tmp))
+ {
+ length = size - i;
+ if (length > 512)
+ length = 512;
+
+ if (BH_IORead(from, tmp, length, &actual) || length != actual)
+ return BH_ERROR;
+
+ if (BH_IOWrite(to, tmp, length, &actual) || length != actual)
+ return BH_ERROR;
+ }
+
+ return BH_OK;
+}
+
+
+/* Process pack (list files or extract file) */
+static int ProcessPack(Config *config,
+ BH_IO *io)
+{
+ PakHeader header;
+ PakEntry entry;
+ BH_IO *output;
+ size_t i;
+
+ /* Read header and seek to begging of the file table */
+ if (ParseHeader(io, &header))
+ return BH_ERROR;
+
+ if (BH_IOSeek(io, header.offset, BH_IO_SEEK_SET))
+ return BH_ERROR;
+
+ /* Parse and output entries */
+ for (i = header.size / 64; i; i--)
+ {
+ if (ParseEntry(io, &entry))
+ return BH_ERROR;
+
+ if (config->list)
+ printf("%s %d\n", entry.name, entry.size);
+ else
+ {
+ if (strcmp(entry.name, config->input))
+ continue;
+
+ output = BH_FileNew(config->output);
+ if (BH_IOOpen(output, BH_IO_WRITE) ||
+ BH_IOSeek(io, entry.offset, BH_IO_SEEK_SET) ||
+ CopyData(io, output, entry.size))
+ {
+ BH_IOFree(output);
+ return BH_ERROR;
+ }
+
+ BH_IOFree(output);
+ return BH_OK;
+ }
+ }
+
+ if (config->list)
+ return BH_OK;
+ return BH_ERROR;
+}
+
+
+/* Main entry */
+int main(int argc, char **argv)
+{
+ Config config;
+ BH_IO *io;
+ int result;
+
+ /* Parse arguments */
+ memset(&config, 0, sizeof(config));
+ if (BH_ArgsParse(argc, argv, options, OptionsCallback, &config) || !config.file)
+ {
+ PrintHelp();
+ return -1;
+ }
+
+ /* Check required arguments */
+ if (!config.list && (!config.input || !config.output))
+ {
+ printf("Specify input and output files\n");
+ PrintHelp();
+ return -1;
+ }
+
+ /* Read and write */
+ io = BH_FileNew(config.file);
+ if (BH_IOOpen(io, BH_IO_READ | BH_IO_EXIST))
+ {
+ printf("Can't open file %s\n", config.file);
+ BH_IOFree(io);
+ return -1;
+ }
+
+ result = ProcessPack(&config, io);
+ BH_IOFree(io);
+
+ return result;
+}
+```