You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

27 lines
780 B
Python

from PIL import Image
from pytesseract import pytesseract
import os
#Define path to tessaract.exe #or equivalent on os
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
#Define path to image
#path_to_image = 'input/sampletext1-ocr.png'
#Define path to images folder
path_to_input = r'input/'
path_to_output = r'output/'
#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract
#Get the file names in the directory
for root, dirs, file_names in os.walk(path_to_input):
#Iterate over each file name in the folder
for file_name in file_names:
#Open image with PIL
img = Image.open(path_to_input + file_name)
#Extract text from image
text = pytesseract.image_to_string(img)
print(text)