문서에서 언급 된 "FileIndex"는 haystack.indexes.SearchIndex의 가상 하위 클래스입니다.
from haystack import indexes
from myapp.models import MyFile
class FileIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
title = indexes.CharField(model_attr='title')
owner = indexes.CharField(model_attr='owner__name')
def get_model(self):
return MyFile
def index_queryset(self, using=None):
return self.get_model().objects.all()
def prepare(self, obj):
data = super(FileIndex, self).prepare(obj)
# This could also be a regular Python open() call, a StringIO instance
# or the result of opening a URL. Note that due to a library limitation
# file_obj must have a .name attribute even if you need to set one
# manually before calling extract_file_contents:
file_obj = obj.the_file.open()
extracted_data = self.backend.extract_file_contents(file_obj)
# Now we'll finally perform the template processing to render the
# text field with *all* of our metadata visible for templating:
t = loader.select_template(('search/indexes/myapp/myfile_text.txt',))
data['text'] = t.render(Context({'object': obj,
'extracted': extracted_data}))
return data
그래서 extracted_data
은 당신이 PDF/DOCX 내용을 추출 해낸 어떤 과정으로 대체 될 것입니다 예를 들면 다음과 같습니다. 그런 다음 해당 데이터를 포함하도록 템플릿을 업데이트하십시오.