@@ -277,13 +277,11 @@ def pipeline_api(
277
277
# This will raise if the file is encrypted
278
278
pdf .metadata
279
279
except pypdf .errors .EmptyFileError :
280
- raise HTTPException (
281
- status_code = 400 , detail = f"File does not appear to be a valid PDF"
282
- )
280
+ raise HTTPException (status_code = 400 , detail = "File does not appear to be a valid PDF" )
283
281
except pypdf .errors .FileNotDecryptedError :
284
282
raise HTTPException (
285
283
status_code = 400 ,
286
- detail = f "File is encrypted. Please decrypt it with password." ,
284
+ detail = "File is encrypted. Please decrypt it with password." ,
287
285
)
288
286
289
287
strategy = (m_strategy [0 ] if len (m_strategy ) else "auto" ).lower ()
@@ -332,19 +330,30 @@ def pipeline_api(
332
330
m_skip_infer_table_types [0 ] if len (m_skip_infer_table_types ) else ["pdf" , "jpg" , "png" ]
333
331
)
334
332
335
- chunking_strategy = ( m_chunking_strategy [0 ].lower () if len (m_chunking_strategy ) else None )
333
+ chunking_strategy = m_chunking_strategy [0 ].lower () if len (m_chunking_strategy ) else None
336
334
chunk_strategies = ["by_title" ]
337
335
if chunking_strategy and (chunking_strategy not in chunk_strategies ):
338
336
raise HTTPException (
339
- status_code = 400 , detail = f"Invalid chunking strategy: { chunking_strategy } . Must be one of { chunk_strategies } "
337
+ status_code = 400 ,
338
+ detail = f"Invalid chunking strategy: { chunking_strategy } . Must be one of { chunk_strategies } " ,
340
339
)
341
-
342
- multipage_sections_str = (m_multipage_sections [0 ] if len (m_multipage_sections ) else "false" ).lower ()
340
+
341
+ multipage_sections_str = (
342
+ m_multipage_sections [0 ] if len (m_multipage_sections ) else "false"
343
+ ).lower ()
343
344
multipage_sections = multipage_sections_str == "true"
344
345
345
- combine_under_n_chars = (int (m_combine_under_n_chars [0 ]) if m_combine_under_n_chars and m_combine_under_n_chars [0 ].isdigit () else 500 )
346
+ combine_under_n_chars = (
347
+ int (m_combine_under_n_chars [0 ])
348
+ if m_combine_under_n_chars and m_combine_under_n_chars [0 ].isdigit ()
349
+ else 500
350
+ )
346
351
347
- new_after_n_chars = (int (m_new_after_n_chars [0 ]) if m_new_after_n_chars and m_new_after_n_chars [0 ].isdigit () else 1500 )
352
+ new_after_n_chars = (
353
+ int (m_new_after_n_chars [0 ])
354
+ if m_new_after_n_chars and m_new_after_n_chars [0 ].isdigit ()
355
+ else 1500
356
+ )
348
357
349
358
try :
350
359
logger .debug (
@@ -477,9 +486,7 @@ def get_validated_mimetype(file):
477
486
if content_type not in allowed_mimetypes :
478
487
raise HTTPException (
479
488
status_code = 400 ,
480
- detail = (
481
- f"File type { content_type } is not supported."
482
- ),
489
+ detail = (f"File type { content_type } is not supported." ),
483
490
)
484
491
485
492
return content_type
0 commit comments