Add basis ocr tesseract example

2 years ago · c165c8787e
parent 1bc10faa84
commit c165c8787e
1 changed files with 27 additions and 0 deletions
--- a/ocr_scan.py
+++ b/ocr_scan.py
@ -0,0 +1,27 @@
 from PIL import Image
 from pytesseract import pytesseract
 import os
 #Define path to tessaract.exe #or equivalent on os
 path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
 #Define path to image
 #path_to_image = 'input/sampletext1-ocr.png'
 #Define path to images folder
 path_to_input = r'input/'
 path_to_output = r'output/'
 #Point tessaract_cmd to tessaract.exe
 pytesseract.tesseract_cmd = path_to_tesseract
 #Get the file names in the directory
 for root, dirs, file_names in os.walk(path_to_input):
    #Iterate over each file name in the folder
    for file_name in file_names:
        #Open image with PIL
        img = Image.open(path_to_input + file_name)
        #Extract text from image
        text = pytesseract.image_to_string(img)
        print(text)