tesseract-3.02.02-win32-lib-include-dirs.zip资源-CSDN文库

共33个文件

h：24个

lib：4个

vsprops：2个

tesseract

需积分: 9 84 浏览量 2017-10-17 09:57:41 上传评论收藏 28MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

tesseract-3.02.02-win32-lib-include-dirs.zip （33个子文件）

tesseract-3.02.02-win32-lib-include-dirs

include

tesseract_versionnumbers.vsprops 433B

tesseract

baseapi.h 30KB

fileerr.h 1KB

helpers.h 5KB

unicharset.h 35KB

unicharmap.h 3KB

ndminx.h 1KB

apitypes.h 1KB

params.h 10KB

capi.h 16KB

genericvector.h 25KB

platform.h 2KB

basedir.h 1KB

resultiterator.h 9KB

publictypes.h 11KB

tesscallback.h 32KB

serialis.h 2KB

errcode.h 3KB

ltrresultiterator.h 9KB

strngs.h 6KB

unichar.h 3KB

thresholder.h 8KB

memry.h 2KB

host.h 6KB

pageiterator.h 13KB

leptonica_versionnumbers.vsprops 660B

lib

libtesseract302-static.lib 14MB

libtesseract302d.dll 4.08MB

libtesseract302.lib 111KB

libtesseract302.exp 65KB

libtesseract302.dll 1.5MB

libtesseract302d.lib 111KB

libtesseract302-static-debug.lib 84.78MB

/////////////////////////////////////////////////////////////////////// // File: unicharset.h // Description: Unicode character/ligature set class. // Author: Thomas Kielbus // Created: Wed Jun 28 17:05:01 PDT 2006 // // (C) Copyright 2006, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifndef TESSERACT_CCUTIL_UNICHARSET_H__ #define TESSERACT_CCUTIL_UNICHARSET_H__ #include "errcode.h" #include "helpers.h" #include "strngs.h" #include "tesscallback.h" #include "unichar.h" #include "unicharmap.h" class CHAR_FRAGMENT { public: // Minimum number of characters used for fragment representation. static const int kMinLen = 6; // Maximum number of characters used for fragment representation. static const int kMaxLen = 3 + UNICHAR_LEN + 2; // Maximum number of fragments per character. static const int kMaxChunks = 5; // Setters and Getters. inline void set_all(const char *unichar, int pos, int total, bool natural) { set_unichar(unichar); set_pos(pos); set_total(total); set_natural(natural); } inline void set_unichar(const char *uch) { strncpy(this->unichar, uch, UNICHAR_LEN); this->unichar[UNICHAR_LEN] = '\0'; } inline void set_pos(int p) { this->pos = p; } inline void set_total(int t) { this->total = t; } inline const char* get_unichar() const { return this->unichar; } inline int get_pos() const { return this->pos; } inline int get_total() const { return this->total; } // Returns the string that represents a fragment // with the given unichar, pos and total. static STRING to_string(const char *unichar, int pos, int total, bool natural); // Returns the string that represents this fragment. STRING to_string() const { return to_string(unichar, pos, total, natural); } // Checks whether a fragment has the same unichar, // position and total as the given inputs. inline bool equals(const char *other_unichar, int other_pos, int other_total) const { return (strcmp(this->unichar, other_unichar) == 0 && this->pos == other_pos && this->total == other_total); } inline bool equals(const CHAR_FRAGMENT *other) const { return this->equals(other->get_unichar(), other->get_pos(), other->get_total()); } // Checks whether a given fragment is a continuation of this fragment. // Assumes that the given fragment pointer is not NULL. inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const { return (strcmp(this->unichar, fragment->get_unichar()) == 0 && this->total == fragment->get_total() && this->pos == fragment->get_pos() + 1); } // Returns true if this fragment is a beginning fragment. inline bool is_beginning() const { return this->pos == 0; } // Returns true if this fragment is an ending fragment. inline bool is_ending() const { return this->pos == this->total-1; } // Returns true if the fragment was a separate component to begin with, // ie did not need chopping to be isolated, but may have been separated // out from a multi-outline blob. inline bool is_natural() const { return natural; } void set_natural(bool value) { natural = value; } // Parses the string to see whether it represents a character fragment // (rather than a regular character). If so, allocates memory for a new // CHAR_FRAGMENT instance and fills it in with the corresponding fragment // information. Fragments are of the form: // |m|1|2, meaning chunk 1 of 2 of character m, or // |:|1n2, meaning chunk 1 of 2 of character :, and no chopping was needed // to divide the parts, as they were already separate connected components. // // If parsing succeeded returns the pointer to the allocated CHAR_FRAGMENT // instance, otherwise (if the string does not represent a fragment or it // looks like it does, but parsing it as a fragment fails) returns NULL. // // Note: The caller is responsible for deallocating memory // associated with the returned pointer. static CHAR_FRAGMENT *parse_from_string(const char *str); private: char unichar[UNICHAR_LEN + 1]; // True if the fragment was a separate component to begin with, // ie did not need chopping to be isolated, but may have been separated // out from a multi-outline blob. bool natural; inT16 pos; // fragment position in the character inT16 total; // total number of fragments in the character }; // The UNICHARSET class is an utility class for Tesseract that holds the // set of characters that are used by the engine. Each character is identified // by a unique number, from 0 to (size - 1). class UNICHARSET { public: // Custom list of characters and their ligature forms (UTF8) // These map to unicode values in the private use area (PUC) and are supported // by only few font families (eg. Wyld, Adobe Caslon Pro). static const char* kCustomLigatures[][2]; // ICU 2.0 UCharDirection enum (from third_party/icu/include/unicode/uchar.h) enum Direction { U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3, U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7, U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11, U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15, U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT }; // Create an empty UNICHARSET UNICHARSET(); ~UNICHARSET(); // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. const UNICHAR_ID unichar_to_id(const char* const unichar_repr) const; // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. Only the first length characters from unichar_repr are used. const UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const; // Return the minimum number of bytes that matches a legal UNICHAR_ID, // while leaving a legal UNICHAR_ID afterwards. In other words, if there // is both a short and a long match to the string, return the length that // ensures there is a legal match after it. int step(const char* str) const; // Return whether the given UTF-8 string is encodable with this UNICHARSET. // If not encodable, write the first byte offset which cannot be converted // into the second (return) argument. bool encodable_string(const char *str, int *first_bad_position) const; // Return the unichar representation corresponding to the given UNICHAR_ID // within the UNICHARSET. const char* const id_to_unichar(UNICHAR_ID id) const; // Return the UTF8 representation corresponding to the given UNICHAR_ID after // resolving any private encodings internal to Tesseract. This method is // preferrable to id_to_unichar for outputting text that will be visible to // external applications. const char* const id_to_unichar

评论收藏

内容反馈