from PIL import Image from pytesseract import pytesseract import os from configparser import ConfigParser def scan_document_folder(): ocr_config = ConfigParser() ocr_config.read("ocr_config.ini") ocr_settings = ocr_config['ocr'] path_to_tesseract = ocr_settings["path_to_tesseract"] pytesseract.tesseract_cmd = path_to_tesseract #Get the file names in the directory for root, dirs, file_names in os.walk(ocr_settings["path_to_input"]): #Iterate over each file name in the folder for file_name in file_names: #Open image with PIL img = Image.open(ocr_settings["path_to_input"] + file_name) #Extract text from image text = pytesseract.image_to_string(img) print(text) if __name__ == "__main__": scan_document_folder()