from PIL import Image
from pytesseract import pytesseract
import os
from configparser import ConfigParser


def scan_document_folder():
    ocr_config = ConfigParser()
    ocr_config.read("ocr_config.ini")
    
    ocr_settings = ocr_config['ocr']
    path_to_tesseract = ocr_settings["path_to_tesseract"]
    pytesseract.tesseract_cmd = path_to_tesseract
    
    #Get the file names in the directory
    for root, dirs, file_names in os.walk(ocr_settings["path_to_input"]):
        #Iterate over each file name in the folder
        for file_name in file_names:
            #Open image with PIL
            img = Image.open(ocr_settings["path_to_input"] + file_name)

            #Extract text from image
            text = pytesseract.image_to_string(img)

            print(text)


if __name__ == "__main__":
    scan_document_folder()