Skip to content
This repository was archived by the owner on Apr 19, 2022. It is now read-only.

Commit dc64292

Browse files
committedMar 22, 2021
Get attachment headers asynchronously and improve test data
1 parent 5569389 commit dc64292

7 files changed

+71
-38
lines changed
 

‎check.py

+48-21
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1+
import asyncio
2+
import httpx
13
import re
2-
import read
3-
4-
import os
54
import requests
6-
from urllib.parse import urlparse
5+
import os
6+
7+
import read
78

89
"""
910
Consistency check in the files
1011
"""
1112

1213
# TODO: verificare se occorrono ulteriori controlli sul testo, ad esempio strim()
13-
# TODO: migliora la gestione degli attachments
1414

15-
def validity(csv, doc):
15+
async def validity(csv, doc):
1616

1717
# 'oggetto' and 'pec' are required
1818
valid, msg = valid_csv(csv)
@@ -28,7 +28,7 @@ def validity(csv, doc):
2828
return msg
2929

3030
# check and download the attachments
31-
valid, msg = valid_attachments(csv)
31+
valid, msg = await valid_attachments(csv)
3232
if not valid:
3333
print(msg['text'])
3434
return msg
@@ -80,9 +80,10 @@ def valid_consistency(csv, doc):
8080
return valid, msg
8181

8282

83-
def valid_attachments(csv):
83+
async def valid_attachments(csv):
8484
print('\nCheck if the attachments are valid')
8585
msg = {}
86+
msg['text'] = ''
8687
valid = True
8788

8889
# Get all the attachments
@@ -97,19 +98,45 @@ def valid_attachments(csv):
9798
attachments[line] = line_attachments
9899
line+=1
99100

100-
# Check and download all the attachments
101-
text_error = ''
102-
for k, v in attachments.items():
103-
urls = [u for u in v if 'http://' in u or 'https://' in u]
104-
for url in urls:
105-
url_text = urlparse(url)
106-
name = os.path.basename(url_text.path)
107-
108-
# Won't be able to identify the name file
109-
if len(url) > 0 and name == '':
101+
# Check all the attachments
102+
for row, urls in attachments.items():
103+
104+
# No empty 'allegato' fields
105+
valid_urls = [i.strip() for i in urls if i.strip() != '']
106+
107+
# Get headers (async way) and process
108+
results = await task(valid_urls)
109+
for r in results:
110+
if type(r['response']) == httpx.Headers:
111+
url = r['url']
112+
ct = r['response']['content-type'].lower()
113+
if 'text' in ct or 'html' in ct:
114+
valid = False
115+
msg['field'] = 'xlsx'
116+
msg['text'] += """Controlla l\'allegato {u} alla riga {r}. Il link non contiene un file.\n""".format(u=url, r=int(row)+1)
117+
else:
118+
url = r['url']
110119
valid = False
111-
mgs['field'] = 'xlsx'
112-
text_error += """Controlla l\'allegato {u} alla riga {k}. Non sembra un file valido.\n""".format(u=url, k=k)
113-
msg['text'] = text_error
120+
msg['field'] = 'xlsx'
121+
msg['text'] += """Controlla l\'allegato {u} alla riga {r}. Non sembra un link valido.\n""".format(u=url, r=int(row)+1)
114122

115123
return valid, msg
124+
125+
126+
async def request_header(client, url):
127+
print(' Get attachment header: %s' % url)
128+
res = None
129+
try:
130+
response = await client.head(url)
131+
res = response.headers
132+
except Exception as err:
133+
res = 'Errore'
134+
135+
return {'url': url, 'response': res}
136+
137+
138+
async def task(urls):
139+
async with httpx.AsyncClient() as client:
140+
tasks = [request_header(client, url) for url in urls]
141+
results = await asyncio.gather(*tasks)
142+
return results

‎create.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,16 @@ def mails(csv, templ):
1717
# Recipient
1818
m['recipient'] = row['pec']
1919

20-
# Mail text
20+
# Body
2121
doc = mail_text(row, templ)
2222
text_doc = get_text(doc)
2323
m['body'] = text_doc
2424

2525
# Attachments
2626
m['attachments'] = []
27-
2827
for k,v in row.items():
2928
if 'allegato' in k and v != '':
30-
url_text = urlparse(v)
31-
name = os.path.basename(url_text.path)
32-
m['attachments'].append(name)
29+
m['attachments'].append(v)
3330

3431
all_mails.append(m)
3532

‎preview.html

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
<div class="row">
2-
<div class="col-sm-1"></div>
3-
<div class="col-sm-10">
4-
<p class="fw-bolder fs-1 text"> {{ subject }} </p>
2+
<div class="col-sm-2"></div>
3+
<div class="col-sm-8">
4+
<p class="fw-bolder fs-1 text"> Oggetto: {{ subject }} </p>
55
<p class="fs-2 text"> Destinatario: {{ recipient }} </p>
66
<div style="white-space: pre-line">
77
<p class="font-monospace lh-lg fw-normal">{{ body }}</p>
88
</div>
9+
<p class="fs-3 text"> Allegati: </p>
10+
<ul>
11+
{%- for item in attachments -%}
12+
<li><a href="{{ item }}">{{ item }}</a></li>
13+
{%- endfor -%}
14+
</ul>
915
<form id="send">
1016
<button class="btn btn-primary" type="submit" formaction='/send'>Invia le mail</button>
1117
<button class="btn btn-primary" type="submit" formaction='/'>Modifica i file caricati</button>
1218
</form>
1319
</div>
14-
<div class="col-sm-1"></div>
20+
<div class="col-sm-2"></div>
1521
</div>

‎server.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ async def load_files(request: Request,
3535
data.xlsx['content-type'] = xlsx_file.content_type
3636

3737
# Process and manage the results
38-
result, docx, xlsx = process_inputs(xlsx_byte, xlsx_file.content_type, docx_byte)
38+
result, docx, xlsx = await process_inputs(xlsx_byte, xlsx_file.content_type, docx_byte)
3939

4040
if result == 'OK':
4141
data.mails = create.mails(xlsx, docx)
4242
data.docx['file'] = docx
43-
data.xlsx['file'] = xlsx
44-
43+
data.xlsx['file'] = xlsx
44+
4545
return result
4646

4747

@@ -52,7 +52,8 @@ def prepare_preview(request: Request):
5252
'request': request,
5353
'subject': data.mails[0]['subject'],
5454
'recipient' : data.mails[0]['recipient'],
55-
'body': data.mails[0]['body']
55+
'body': data.mails[0]['body'],
56+
'attachments': data.mails[0]['attachments']
5657
}
5758

5859
return templates.TemplateResponse('preview.html', context=context)
@@ -70,14 +71,14 @@ async def massive_send(request: Request):
7071
return templates.TemplateResponse('results.html', context=context)
7172

7273

73-
def process_inputs(xlsx_byte, xlsx_content_type, docx_byte):
74+
async def process_inputs(xlsx_byte, xlsx_content_type, docx_byte):
7475
if xlsx_content_type == 'text/csv':
7576
csv_reader = read.read_csv(xlsx_byte)
7677
elif xlsx_content_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
7778
csv_reader = read.read_xlsx(xlsx_byte)
7879

7980
xlsx = read.read_csv_reader(csv_reader)
8081
docx = read.read_docx(docx_byte)
81-
res = check.validity(xlsx, docx)
82+
res = await check.validity(xlsx, docx)
8283

8384
return res, docx, xlsx

‎test/file-sample_100kB.doc

-98 KB
Binary file not shown.

‎test/test.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
destinatario,oggetto,pec,nome_progetto,firma,allegato1,allegato2,allegato3
2-
Caruso,Oggetto della mail per Caruso,giuseppe.futia@gmail.com,Progetto di Caruso,Wikimedia Italia,https://file-examples-com.github.io/uploads/2017/02/file-sample_100kB.doc,,
1+
destinatario,oggetto,pec,nome_progetto,firma,allegato1,allegato2,allegato3,allegato4
2+
Caruso,Oggetto della mail per Caruso,giuseppe.futia@gmail.com,Progetto di Caruso,Wikimedia Italia,https://file-examples-com.github.io/uploads/2017/02/file-sample_100kB.doc,,,

‎test/test_bad_attachments.csv

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
destinatario,oggetto,pec,nome_progetto,firma,allegato1,allegato2,allegato3,allegato4
2+
Caruso,Oggetto della mail per Caruso,giuseppe.futia@gmail.com,Progetto di Caruso,Wikimedia Italia,https://file-examples-com.github.io/uploads/2017/02/file-sample_100kB.doc,https://www.googsdasdasle.it,ciao,http://www.google.it

0 commit comments

Comments
 (0)
This repository has been archived.