ocr_document_scanner/ocr_scan.py

from PIL import Image
from pytesseract import pytesseract
import os

#Define path to tessaract.exe #or equivalent on os
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

#Define path to image
#path_to_image = 'input/sampletext1-ocr.png'
#Define path to images folder
path_to_input = r'input/'
path_to_output = r'output/'

#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract

#Get the file names in the directory
for root, dirs, file_names in os.walk(path_to_input):
    #Iterate over each file name in the folder
    for file_name in file_names:
        #Open image with PIL
        img = Image.open(path_to_input + file_name)

        #Extract text from image
        text = pytesseract.image_to_string(img)

        print(text)
Add basis ocr tesseract example 2 years ago			`from PIL import Image`
			`from pytesseract import pytesseract`
			`import os`

			`#Define path to tessaract.exe #or equivalent on os`
			`path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'`

			`#Define path to image`
			`#path_to_image = 'input/sampletext1-ocr.png'`
			`#Define path to images folder`
			`path_to_input = r'input/'`
			`path_to_output = r'output/'`

			`#Point tessaract_cmd to tessaract.exe`
			`pytesseract.tesseract_cmd = path_to_tesseract`

			`#Get the file names in the directory`
			`for root, dirs, file_names in os.walk(path_to_input):`
			`#Iterate over each file name in the folder`
			`for file_name in file_names:`
			`#Open image with PIL`
			`img = Image.open(path_to_input + file_name)`

			`#Extract text from image`
			`text = pytesseract.image_to_string(img)`

			`print(text)`