|
|
|
@ -22,7 +22,10 @@ def scan_document_folder():
|
|
|
|
|
source_path = os.path.join(ocr_settings["path_to_input"], file_name)
|
|
|
|
|
img = Image.open(source_path)
|
|
|
|
|
#Extract text from image
|
|
|
|
|
text = pytesseract.image_to_string(img)
|
|
|
|
|
language = "deu"
|
|
|
|
|
pageseg_mode = "3" # Auto-Segmentation
|
|
|
|
|
options = f"-l {language} --psm {pageseg_mode}"
|
|
|
|
|
text = pytesseract.image_to_string(img,config=options)
|
|
|
|
|
|
|
|
|
|
# Prep target folder + files
|
|
|
|
|
file,file_ext = os.path.splitext(file_name)
|
|
|
|
|