/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* darray.c - Double-array trie structure
* Created: 2006-08-13
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "trie-private.h"
#include "darray.h"
#include "fileutils.h"
/*----------------------------------*
* INTERNAL TYPES DECLARATIONS *
*----------------------------------*/
/*
* Type for keeping intermediate values of TrieIndex.
* Must be bigger than TrieIndex, so that overflow can be easily detected.
*/
typedef int32 TrieIndexInt;
typedef struct _Symbols Symbols;
struct _Symbols {
short num_symbols;
TrieChar symbols[256];
};
static Symbols * symbols_new ();
static void symbols_free (Symbols *syms);
static void symbols_add (Symbols *syms, TrieChar c);
#define symbols_num(s) ((s)->num_symbols)
#define symbols_get(s,i) ((s)->symbols[i])
#define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
/*-----------------------------------*
* PRIVATE METHODS DECLARATIONS *
*-----------------------------------*/
#define da_get_free_list(d) (1)
static Bool da_check_free_cell (DArray *d,
TrieIndexInt s);
static Bool da_has_children (DArray *d,
TrieIndex s);
static Symbols * da_output_symbols (DArray *d,
TrieIndex s);
static TrieChar * da_get_state_key (DArray *d,
TrieIndex state);
static TrieIndex da_find_free_base (DArray *d,
const Symbols *symbols);
static Bool da_fit_symbols (DArray *d,
TrieIndex base,
const Symbols *symbols);
static void da_relocate_base (DArray *d,
TrieIndex s,
TrieIndex new_base);
static Bool da_extend_pool (DArray *d,
TrieIndexInt to_index);
static void da_alloc_cell (DArray *d,
TrieIndex cell);
static void da_free_cell (DArray *d,
TrieIndex cell);
static Bool da_enumerate_recursive (DArray *d,
TrieIndex state,
DAEnumFunc enum_func,
void *user_data);
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
/*------------------------------------*
* INTERNAL TYPES IMPLEMENTATIONS *
*------------------------------------*/
static Symbols *
symbols_new ()
{
Symbols *syms;
syms = (Symbols *) malloc (sizeof (Symbols));
if (!syms)
return NULL;
syms->num_symbols = 0;
return syms;
}
static void
symbols_free (Symbols *syms)
{
free (syms);
}
static void
symbols_add (Symbols *syms, TrieChar c)
{
short lower, upper;
lower = 0;
upper = syms->num_symbols;
while (lower < upper) {
short middle;
middle = (lower + upper)/2;
if (c > syms->symbols[middle])
lower = middle + 1;
else if (c < syms->symbols[middle])
upper = middle;
else
return;
}
if (lower < syms->num_symbols) {
memmove (syms->symbols + lower + 1, syms->symbols + lower,
syms->num_symbols - lower);
}
syms->symbols[lower] = c;
syms->num_symbols++;
}
/*------------------------------*
* PRIVATE DATA DEFINITONS *
*------------------------------*/
typedef struct {
TrieIndex base;
TrieIndex check;
} DACell;
struct _DArray {
TrieIndex num_cells;
DACell *cells;
FILE *file;
Bool is_dirty;
};
/*-----------------------------*
* METHODS IMPLEMENTAIONS *
*-----------------------------*/
#define DA_SIGNATURE 0xDAFD
/* DA Header:
* - Cell 0: SIGNATURE, 1
* - Cell 1: free circular-list pointers
* - Cell 2: root node
* - Cell 3: DA pool begin
*/
#define DA_POOL_BEGIN 3
DArray *
da_open (const char *path, const char *name, TrieIOMode mode)
{
DArray *d;
TrieIndex i;
d = (DArray *) malloc (sizeof (DArray));
d->file = file_open (path, name, ".br", mode);
if (!d->file)
goto exit1;
/* init cells data */
d->num_cells = file_length (d->file) / 4;
if (0 == d->num_cells) {
d->num_cells = DA_POOL_BEGIN;
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
if (!d->cells)
goto exit2;
d->cells[0].base = DA_SIGNATURE;
d->cells[0].check = 1;
d->cells[1].base = -1;
d->cells[1].check = -1;
d->cells[2].base = DA_POOL_BEGIN;
d->cells[2].check = 0;
d->is_dirty = TRUE;
} else {
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
if (!d->cells)
goto exit2;
file_read_int16 (d->file, &d->cells[0].base);
file_read_int16 (d->file, &d->cells[0].check);
if (DA_SIGNATURE != (uint16) d->cells[0].base)
goto exit3;
for (i = 1; i < d->num_cells; i++) {
file_read_int16 (d->file, &d->cells[i].base);
file_read_int16 (d->file, &d->cells[i].check);
}
d->is_dirty = FALSE;
}
return d;
exit3:
free (d->cells);
exit2:
fclose (d->file);
exit1:
free (d);
return NULL;
}
int
da_close (DArray *d)
{
int ret;
if (0 != (ret = da_save (d)))
return ret;
if (0 != (ret = fclose (d->file)))
return ret;
free (d->cells);
free (d);
return 0;
}
int
da_save (DArray *d)
{
TrieIndex i;
if (!d->is_dirty)
return 0;
rewind (d->file);
for (i = 0; i < d->num_cells; i++) {
if (!file_write_int16 (d->file, d->cells[i].base) ||
!file_write_int16 (d->file, d->cells[i].check))
{
return -1;
}
}
d->is_dirty = FALSE;
return 0;
}
TrieIndex
da_get_root (const DArray *d)
{
/* can be calculated value for multi-index trie */
return 2;
}
TrieIndex
da_get_base (const DArray *d, TrieIndex s)
{
return (s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
}
TrieIndex
da_get_check (const DArray *d, TrieIndex s)
{
return (s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
}
void
da_set_base (DArray *d, TrieIndex s, TrieIndex val)
{
if (s < d->num_cells) {
d->cells[s].base = val;
d->is_dirty = TRUE;
}
}
void
da_set_check (DArray *d, TrieIndex s, TrieIndex val)
{
if (s < d->num_cells) {
d->cells[s].check = val;
d->is_dirty = TRUE;
}
}
Bool
da_walk (DArray *d, TrieIndex *s, TrieChar c)
{
TrieIndex next;
next = da_get_base (d, *s) + c;
if (da_get_check (d, next) == *s) {
*s = next;
return TRUE;
}
return FALSE;
}
TrieIndex
da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
{
TrieIndexInt base, next;
base = da_get_base (d, s);
if (base > 0) {
next = da_get_base (d, s) + c;
/* if already there, do not actually insert */
if (da_get_check (d, next) == s)
return next;
if (!da_check_free_cell (d, next)) {
Symbols *symbols;
TrieIndex new_base;
/* relocate BASE[s] */
symbols = da_output_symbols (d, s);
symbols_add (symbols, c);
new_base = da_find_free_base (d, symbols);
symbols_free (symbols);
评论0