You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

29 lines
839 B
Python

from PIL import Image
from pytesseract import pytesseract
import os
from configparser import ConfigParser
def scan_document_folder():
ocr_config = ConfigParser()
ocr_config.read("ocr_config.ini")
ocr_settings = ocr_config['ocr']
path_to_tesseract = ocr_settings["path_to_tesseract"]
pytesseract.tesseract_cmd = path_to_tesseract
#Get the file names in the directory
for root, dirs, file_names in os.walk(ocr_settings["path_to_input"]):
#Iterate over each file name in the folder
for file_name in file_names:
#Open image with PIL
img = Image.open(ocr_settings["path_to_input"] + file_name)
#Extract text from image
text = pytesseract.image_to_string(img)
print(text)
if __name__ == "__main__":
scan_document_folder()