/*
*******************************************************************************
*
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: propsvec.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store bits (Unicode character properties) in bit set vectors.
*/
#include <stdlib.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "utrie.h"
#include "utrie2.h"
#include "uarrsort.h"
#include "propsvec.h"
#include "uassert.h"
struct UPropsVectors {
uint32_t *v;
int32_t columns; /* number of columns, plus two for start & limit values */
int32_t maxRows;
int32_t rows;
int32_t prevRow; /* search optimization: remember last row seen */
UBool isCompacted;
};
#define UPVEC_INITIAL_ROWS (1<<12)
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
UPropsVectors *pv;
uint32_t *v, *row;
uint32_t cp;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(columns<1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
columns+=2; /* count range start and limit columns */
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
if(pv==NULL || v==NULL) {
uprv_free(pv);
uprv_free(v);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(pv, 0, sizeof(UPropsVectors));
pv->v=v;
pv->columns=columns;
pv->maxRows=UPVEC_INITIAL_ROWS;
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
/* set the all-Unicode row and the special-value rows */
row=pv->v;
uprv_memset(row, 0, pv->rows*columns*4);
row[0]=0;
row[1]=0x110000;
row+=columns;
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
row[0]=cp;
row[1]=cp+1;
row+=columns;
}
return pv;
}
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv) {
if(pv!=NULL) {
uprv_free(pv->v);
uprv_free(pv);
}
}
static uint32_t *
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
uint32_t *row;
int32_t columns, i, start, limit, prevRow;
columns=pv->columns;
limit=pv->rows;
prevRow=pv->prevRow;
/* check the vicinity of the last-seen row (start searching with an unrolled loop) */
row=pv->v+prevRow*columns;
if(rangeStart>=(UChar32)row[0]) {
if(rangeStart<(UChar32)row[1]) {
/* same row as last seen */
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* next row after the last one */
pv->prevRow=prevRow+1;
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* second row after the last one */
pv->prevRow=prevRow+2;
return row;
} else if((rangeStart-(UChar32)row[1])<10) {
/* we are close, continue looping */
prevRow+=2;
do {
++prevRow;
row+=columns;
} while(rangeStart>=(UChar32)row[1]);
pv->prevRow=prevRow;
return row;
}
} else if(rangeStart<(UChar32)pv->v[1]) {
/* the very first row */
pv->prevRow=0;
return pv->v;
}
/* do a binary search for the start of the range */
start=0;
while(start<limit-1) {
i=(start+limit)/2;
row=pv->v+i*columns;
if(rangeStart<(UChar32)row[0]) {
limit=i;
} else if(rangeStart<(UChar32)row[1]) {
pv->prevRow=i;
return row;
} else {
start=i;
}
}
/* must be found because all ranges together always cover all of Unicode */
pv->prevRow=start;
return pv->v+start*columns;
}
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode) {
uint32_t *firstRow, *lastRow;
int32_t columns;
UChar32 limit;
UBool splitFirstRow, splitLastRow;
/* argument checking */
if(U_FAILURE(*pErrorCode)) {
return;
}
if( pv==NULL ||
start<0 || start>end || end>UPVEC_MAX_CP ||
column<0 || column>=(pv->columns-2)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pv->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
limit=end+1;
/* initialize */
columns=pv->columns;
column+=2; /* skip range start and limit columns */
value&=mask;
/* find the rows whose ranges overlap with the input range */
/* find the first and last rows, always successful */
firstRow=_findRow(pv, start);
lastRow=_findRow(pv, end);
/*
* Rows need to be split if they partially overlap with the
* input range (only possible for the first and last rows)
* and if their value differs from the input value.
*/
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
/* split first/last rows if necessary */
if(splitFirstRow || splitLastRow) {
int32_t count, rows;
rows=pv->rows;
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
uint32_t *newVectors;
int32_t newMaxRows;
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
newMaxRows=UPVEC_MEDIUM_ROWS;
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
newMaxRows=UPVEC_MAX_ROWS;
} else {
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
if(newVectors==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(newVectors, pv->v, rows*columns*4);
firstRow=newVectors+(firstRow-pv->v);
lastRow=newVectors+(lastRow-pv->v);
uprv_free(pv->v);
pv->v=newVectors;
pv->maxRows=newMaxRows;
}
/* count the number of row cells to move after the last row, and move them */
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
if(count>0) {
uprv_memmove(
lastRow+(1+splitFirstRow+splitLastRow)*columns,
lastRow+columns,
count*4);
}
pv->rows=rows+splitFirstRow+splitLastRow;
/* split the first row, and move the firstRow pointer to the second part */
if(splitFirstRow) {
/* copy all affected rows up one and move the lastRow pointer */
count = (int32_t)((lastRow-firstRow)+columns);
uprv_memmove(firstRow+columns, firstRow, count*4);
lastRow+=columns;
/* split the range and move the firstRow pointer */
firstRow[1]=firstRow[columns]=(uint32_t)start;
firstRow+=columns;
}
/* split the last row */
if(splitLastRow) {
/* copy the last row data */
uprv_memcpy(lastRow+columns, lastRow, columns*4);
/* split the range and move the firstRow pointer */
lastRow[1]=lastRow[columns]=(uint32_t)limit;
}
}
/* set the "row last seen" to the last row for the range */
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
/* set the input value in all remaining rows */
firstRow+=column;
lastRow+=column;
mask=~m