// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_UDF_UDF_H
#define DORIS_BE_UDF_UDF_H
#include <boost/cstdint.hpp>
#include <string.h>
// This is the only Doris header required to develop UDFs and UDAs. This header
// contains the types that need to be used and the FunctionContext object. The context
// object serves as the interface object between the UDF/UDA and the doris process.
namespace doris {
class FunctionContextImpl;
}
namespace doris_udf {
// All input and output values will be one of the structs below. The struct is a simple
// object containing a boolean to store if the value is NULL and the value itself. The
// value is unspecified if the NULL boolean is set.
struct AnyVal;
struct BooleanVal;
struct TinyIntVal;
struct SmallIntVal;
struct IntVal;
struct BigIntVal;
struct StringVal;
struct DateTimeVal;
struct DecimalVal;
struct DecimalV2Val;
struct HllVal;
// The FunctionContext is passed to every UDF/UDA and is the interface for the UDF to the
// rest of the system. It contains APIs to examine the system state, report errors
// and manage memory.
class FunctionContext {
public:
enum DorisVersion {
V2_0,
};
enum Type {
INVALID_TYPE = 0,
TYPE_NULL,
TYPE_BOOLEAN,
TYPE_TINYINT,
TYPE_SMALLINT,
TYPE_INT,
TYPE_BIGINT,
TYPE_LARGEINT,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_DECIMAL,
TYPE_DATE,
TYPE_DATETIME,
TYPE_CHAR,
TYPE_VARCHAR,
TYPE_HLL,
TYPE_STRING,
TYPE_FIXED_BUFFER,
TYPE_DECIMALV2,
TYPE_OBJECT
};
struct TypeDesc {
Type type;
/// Only valid if type == TYPE_DECIMAL
int precision;
int scale;
/// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR
int len;
};
struct UniqueId {
int64_t hi;
int64_t lo;
};
enum FunctionStateScope {
/// Indicates that the function state for this FunctionContext's UDF is shared across
/// the plan fragment (a query is divided into multiple plan fragments, each of which
/// is responsible for a part of the query execution). Within the plan fragment, there
/// may be multiple instances of the UDF executing concurrently with multiple
/// FunctionContexts sharing this state, meaning that the state must be
/// thread-safe. The Prepare() function for the UDF may be called with this scope
/// concurrently on a single host if the UDF will be evaluated in multiple plan
/// fragments on that host. In general, read-only state that doesn't need to be
/// recomputed for every UDF call should be fragment-local.
/// TODO: not yet implemented
FRAGMENT_LOCAL,
/// Indicates that the function state is local to the execution thread. This state
/// does not need to be thread-safe. However, this state will be initialized (via the
/// Prepare() function) once for every execution thread, so fragment-local state
/// should be used when possible for better performance. In general, inexpensive
/// shared state that is written to by the UDF (e.g. scratch space) should be
/// thread-local.
THREAD_LOCAL,
};
// Returns the version of Doris that's currently running.
DorisVersion version() const;
// Returns the user that is running the query. Returns NULL if it is not
// available.
const char* user() const;
// Returns the query_id for the current query.
UniqueId query_id() const;
// Sets an error for this UDF. If this is called, this will trigger the
// query to fail.
// Note: when you set error for the UDFs used in Data Load, you should
// ensure the function return value is null.
void set_error(const char* error_msg);
// when you reused this FunctionContext, you maybe need clear the error status and message.
void clear_error_msg();
// Adds a warning that is returned to the user. This can include things like
// overflow or other recoverable error conditions.
// Warnings are capped at a maximum number. Returns true if the warning was
// added and false if it was ignored due to the cap.
bool add_warning(const char* warning_msg);
// Returns true if there's been an error set.
bool has_error() const;
// Returns the current error message. Returns NULL if there is no error.
const char* error_msg() const;
// Allocates memory for UDAs. All UDA allocations should use this if possible instead of
// malloc/new. The UDA is responsible for calling Free() on all buffers returned
// by Allocate().
// If this Allocate causes the memory limit to be exceeded, the error will be set
// in this object causing the query to fail.
uint8_t* allocate(int byte_size);
// Reallocates 'ptr' to the new byte_size. If the currently underlying allocation
// is big enough, the original ptr will be returned. If the allocation needs to
// grow, a new allocation is made that is at least 'byte_size' and the contents
// of 'ptr' will be copied into it.
// This should be used for buffers that constantly get appended to.
uint8_t* reallocate(uint8_t* ptr, int byte_size);
// Frees a buffer returned from Allocate() or Reallocate()
void free(uint8_t* buffer);
// For allocations that cannot use the Allocate() API provided by this
// object, TrackAllocation()/Free() can be used to just keep count of the
// byte sizes. For each call to TrackAllocation(), the UDF/UDA must call
// the corresponding Free().
void track_allocation(int64_t byte_size);
void free(int64_t byte_size);
// TODO: Do we need to add arbitrary key/value metadata. This would be plumbed
// through the query. E.g. "select UDA(col, 'sample=true') from tbl".
// const char* GetMetadata(const char*) const;
// TODO: Add mechanism for UDAs to update stats similar to runtime profile counters
// TODO: Add mechanism to query for table/column stats
// Returns the underlying opaque implementation object. The UDF/UDA should not
// use this. This is used internally.
doris::FunctionContextImpl* impl() {
return _impl;
}
/// Methods for maintaining state across UDF/UDA function calls. SetFunctionState() can
/// be used to store a pointer that can then be retreived via GetFunctionState(). If
/// GetFunctionState() is called when no pointer is set, it will return
/// NULL. SetFunctionState() does not take ownership of 'ptr'; it is up to the UDF/UDA
/// to clean up any function state if necessary.
void set_function_state(FunctionStateScope scope, void* ptr);
void* get_function_state(FunctionStateScope scope) const;
// Returns the return type information of this function. For UDAs, this is the final
// return type of the UDA (e.g., the type returned by the finalize function).
const TypeDesc& get_return_type() const;
// Returns the intermediate type for UDAs, i.e., the one returned by
// update and merge functions. Returns INVALID_TYPE for UDFs.
const TypeDesc& get_intermediate_type() const;
评论0