I'm getting this error
UnicodeDecodeError at /select_text 'utf-8' codec can't decode byte 0xe7 in position 92: invalid continuation byte Request Method: POST Request URL: http://agata.pgie.ufrgs.br/select_text Django Version: 2.0.1 Exception Type: UnicodeDecodeError Exception Value: 'utf-8' codec can't decode byte 0xe7 in position 92: invalid continuation byte Exception Location: /home/metis/public_html/AGATA/agataenv/lib/python3.4/codecs.py in decode, line 319 Python Executable: /usr/bin/python3 Python Version: 3.4.3 Python Path: ['/home/metis/public_html/AGATA', '/home/metis/public_html/AGATA/agataenv/lib/python3.4', '/home/metis/public_html/AGATA/agataenv/lib/python3.4/plat-x86_64-linux-gnu', '/home/metis/public_html/AGATA/agataenv/lib/python3.4/lib-dynload', '/usr/lib/python3.4', '/usr/lib/python3.4/plat-x86_64-linux-gnu', '/home/metis/public_html/AGATA/agataenv/lib/python3.4/site-packages'] Server time: Thu, 22 Feb 2018 12:29:51 +0000 Unicode error hint The string that could not be encoded/decoded was: Varia��es nvironment:
Request Method: POST Request URL: http://agata.pgie.ufrgs.br/select_text Django Version: 2.0.1 Python Version: 3.4.3 Installed Applications: ['django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'textMining', 'bootstrapform'] Installed Middleware: ['django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware'] Traceback: File "/home/metis/public_html/AGATA/agataenv/lib/python3.4/site-packages/django/core/handlers/exception.py" in inner 35. response = get_response(request) File "/home/metis/public_html/AGATA/agataenv/lib/python3.4/site-packages/django/core/handlers/base.py" in _get_response 128. response = self.process_exception_by_middleware(e, request) File "/home/metis/public_html/AGATA/agataenv/lib/python3.4/site-packages/django/core/handlers/base.py" in _get_response 126. response = wrapped_callback(request, *callback_args, **callback_kwargs) File "/home/metis/public_html/AGATA/textMining/views.py" in select_text 59. text_mining = TextMining(file_path, keywords) File "/home/metis/public_html/AGATA/textMining/TextMining.py" in __init__ 15. self.separete_file_sentences() File "/home/metis/public_html/AGATA/textMining/TextMining.py" in separete_file_sentences 31. file_text = text_file.read().decode('string-escape').decode("utf-8") File "/home/metis/public_html/AGATA/agataenv/lib/python3.4/codecs.py" in decode 319. (result, consumed) = self._buffer_decode(data, self.errors, final) Exception Type: UnicodeDecodeError at /select_text Exception Value: 'utf-8' codec can't decode byte 0xe7 in position 92: invalid continuation byte on my Django app, already on Apache.., can't figure out what's the problem here, since I'm dealing with encoding (At least I think so..)
My code(following the sequence):
def select_text(request): book_file = request.FILES['book'] fs = FileSystemStorage() file_name = fs.save(book_file.name, book_file) uploaded_file_url = fs.url(file_name) print(uploaded_file_url) keywords = [ request.POST['keyword_1'], request.POST['keyword_2'], request.POST['keyword_3'], ] blank_optional_keywords = { 'keyword_2' : False, 'keyword_3' : False } if keywords[1] == "": blank_optional_keywords['keyword_2'] = True if keywords[2] == "": blank_optional_keywords['keyword_3'] = True request.session["blank_optional_keywords"] = blank_optional_keywords #file_name = "LivroMA4_P1_formatado(1).txt" #file_path = get_file_path(file_name, 'text') file_path = get_file_path(uploaded_file_url, 'upload') text_mining = TextMining(file_path, keywords) text_mining.get_keywords_sentences() sentences = text_mining._keyword_sentences sentences_info = generate_sentences_info(sentences) request.session["sentences_info"] = sentences_info return render(request, 'textMining/select_text.html', {'sentences_info': sentences_info}) The TextMining class functions:
class TextMining(object): def __init__(self, file_path, keywords): self._file_path = file_path self._keywords = keywords self._sentences = list() self._keyword_sentences = dict() self.lower_keywords() self.separete_file_sentences() ... def separete_file_sentences(self): with open(self._file_path, "r", encoding='utf-8') as text_file: file_text = text_file.read() sentences = nltk.tokenize.sent_tokenize(file_text) for i in range(len(sentences)): if(len(sentences[i]) > 0): self._sentences.append(sentences[i]) I've been dealing with this for a few days now, tried a lot of things, but nothing works..
urls.py (TextMining app)
urlpatterns = [ url(r'^$', views.index, name='index'), url(r'^select_text', views.select_text, name = 'select_text'), url(r'^edit_text', views.edit_text, name = 'edit_text'), url(r'^generate_aiml', views.generate_aiml, name = 'generate_aiml'), ] urls.py (TextMiningProject)
urlpatterns = [ url(r'^admin/', admin.site.urls), url(r'^', include('textMining.urls')), ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) if settings.DEBUG is True: urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)