# encoding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import os
import re
import shutil
import threading
import warnings
import six
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.datetime_safe import datetime
from django.utils.encoding import force_str
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
from haystack.constants import DJANGO_CT, DJANGO_ID, ID
from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
from haystack.inputs import Clean, Exact, PythonData, Raw
from haystack.models import SearchResult
from haystack.utils import get_identifier, get_model_ct
from haystack.utils import log as logging
from haystack.utils.app_loading import haystack_get_model
from jieba.analyse import ChineseAnalyzer
from whoosh import index
from whoosh.analysis import StemmingAnalyzer
from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
from whoosh.fields import ID as WHOOSH_ID
from whoosh.filedb.filestore import FileStorage, RamStorage
from whoosh.highlight import ContextFragmenter, HtmlFormatter
from whoosh.highlight import highlight as whoosh_highlight
from whoosh.qparser import QueryParser
from whoosh.searching import ResultsPage
from whoosh.writing import AsyncWriter
try:
import whoosh
except ImportError:
raise MissingDependency(
"The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
# Handle minimum requirement.
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
raise MissingDependency(
"The 'whoosh' backend requires version 2.5.0 or greater.")
# Bubble up the correct error.
DATETIME_REGEX = re.compile(
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
LOCALS = threading.local()
LOCALS.RAM_STORE = None
class WhooshHtmlFormatter(HtmlFormatter):
"""
This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
We use it to have consistent results across backends. Specifically,
Solr, Xapian and Elasticsearch are using this formatting.
"""
template = '<%(tag)s>%(t)s</%(tag)s>'
class WhooshSearchBackend(BaseSearchBackend):
# Word reserved by Whoosh for special use.
RESERVED_WORDS = (
'AND',
'NOT',
'OR',
'TO',
)
# Characters reserved by Whoosh for special use.
# The '\\' must come first, so as not to overwrite the other slash
# replacements.
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':', '.',
)
def __init__(self, connection_alias, **connection_options):
super(
WhooshSearchBackend,
self).__init__(
connection_alias,
**connection_options)
self.setup_complete = False
self.use_file_storage = True
self.post_limit = getattr(
connection_options,
'POST_LIMIT',
128 * 1024 * 1024)
self.path = connection_options.get('PATH')
if connection_options.get('STORAGE', 'file') != 'file':
self.use_file_storage = False
if self.use_file_storage and not self.path:
raise ImproperlyConfigured(
"You must specify a 'PATH' in your settings for connection '%s'." %
connection_alias)
self.log = logging.getLogger('haystack')
def setup(self):
"""
Defers loading until needed.
"""
from haystack import connections
new_index = False
# Make sure the index is there.
if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path)
new_index = True
if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError(
"The path to your Whoosh index '%s' is not writable for the current user/group." %
self.path)
if self.use_file_storage:
self.storage = FileStorage(self.path)
else:
global LOCALS
if getattr(LOCALS, 'RAM_STORE', None) is None:
LOCALS.RAM_STORE = RamStorage()
self.storage = LOCALS.RAM_STORE
self.content_field_name, self.schema = self.build_schema(
connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema)
if new_index is True:
self.index = self.storage.create_index(self.schema)
else:
try:
self.index = self.storage.open_index(schema=self.schema)
except index.EmptyIndexError:
self.index = self.storage.create_index(self.schema)
self.setup_complete = True
def build_schema(self, fields):
schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True),
}
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(
stored=True, field_boost=field_class.boost)
else:
schema_fields[field_class.index_fieldname] = KEYWORD(
stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']:
schema_fields[field_class.index_fieldname] = DATETIME(
stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer':
schema_fields[field_class.index_fieldname] = NUMERIC(
stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float':
schema_fields[field_class.index_fieldname] = NUMERIC(
stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2.
schema_fields[field_class.index_fieldname] = BOOLEAN(
stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(
minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
stored=field_class.stored,
field_boost=field_class.boost)
else:
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
schema_fields[field_class.index_fieldname] = TEXT(
stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than re
django公开项目博客
需积分: 0 81 浏览量
更新于2023-08-16
1
收藏 3.17MB RAR 举报
主要功能: Key Features:
文章,页面,分类目录,标签的添加,删除,编辑等。文章、评论及页面支持Markdown,支持代码高亮。
Addition, deletion, editing, etc. of articles, pages, categories, tags, etc. Article, comment and page support, code highlighting support.
支持文章全文搜索。 Supports full-text search of articles.
完整的评论功能,包括发表回复评论,以及评论的邮件提醒,支持Markdown。 Full comment functionality, including posting reply comments, and email reminders for comments, support.
侧边栏功能,最新文章,最多阅读,标签云等。 Sidebar features, latest articles, most read, tag cloud, etc.
支持Oauth登陆
测试小郭
- 粉丝: 21
- 资源: 2
最新资源
- 基于opensees 平台建立的单柱墩模型 考虑了滑移粘接的捏缩效应 内容包括有 1.墩柱模型建模全过程及源代码 2.钢筋混凝土之间的粘接滑移 3.基于位移控制的滞回分析代码
- 车用驱动电机原理与控制基础-P144公式(6-54)
- 群智能算法优化bp:将思维进化算法结合两层bp,对数据进行预测回归,对多层bp神经网络有兴趣的朋友可以借鉴,有意咨询,非诚勿扰 思维进化优化算法(Memetic Evolutionary Algor
- 纸箱封装包装机sw22可编辑全套技术资料100%好用.zip
- 小清新教学通用模板.pptx
- stm32 永磁同步电机pcb,原理图 利用stm32f4xx制作的pmsm 控制器电路原理图,pcb,还有pmsm simulink模型 以及simulink模型代码自动生成来设计电机控制算法资料
- JavaEE-图书管理系统源码+数据库+文档说明
- 机器视觉,OpenCV,Qt,工业相机采集,图像采集,图像处理,卡尺工具,找线,找圆,颜色检测,模板匹配,形状匹配,海康工业相机采集+基于形状的模板匹配界面,提前说明,形状匹配算法封装成dll直接调用
- 模拟IC设计,buck型dcdc设计,smic.18工艺,aot自适应导通模式,输出0.6v,最大负载电流1.2A,纹波30mv附近,可实现pwm和pfm的切,可以直接导入到cadence仿真查看,比
- MATLAB代码:面向削峰填谷的电动汽车多目标优化调度策略 关键词:电动汽车 削峰填谷 多目标 充放电优化 参考文档:自己整理的说明文档,公式、约束、数据齐全,可联系我查看 仿真平台:MATLAB Y
- 整车控制器VCU模型,控制策略,说明书,接口定义文档
- PyTorch入门案例-手写数字图像去噪
- 中宝磨牛设备sw18可编辑全套技术资料100%好用.zip
- 重负载平移机sw15可编辑全套技术资料100%好用.zip
- 汇川easy523+HMI. 电子凸轮双轴绕线 绕线的例程 主轴周期360度 一层为来回一圈,自动计算圈数,绕线完成后输出完成信号,可与其他取料机对接,进行自动放转子,自动取绕线完成产品A1431
- 永磁同步电机模型预测电流控制Simulink仿真