diff --git a/ocr_scan.py b/ocr_scan.py index c9d3147..62b51e5 100644 --- a/ocr_scan.py +++ b/ocr_scan.py @@ -22,7 +22,10 @@ def scan_document_folder(): source_path = os.path.join(ocr_settings["path_to_input"], file_name) img = Image.open(source_path) #Extract text from image - text = pytesseract.image_to_string(img) + language = "deu" + pageseg_mode = "3" # Auto-Segmentation + options = f"-l {language} --psm {pageseg_mode}" + text = pytesseract.image_to_string(img,config=options) # Prep target folder + files file,file_ext = os.path.splitext(file_name)