Add basis ocr tesseract example
parent
1bc10faa84
commit
c165c8787e
@ -0,0 +1,27 @@
|
|||||||
|
from PIL import Image
|
||||||
|
from pytesseract import pytesseract
|
||||||
|
import os
|
||||||
|
|
||||||
|
#Define path to tessaract.exe #or equivalent on os
|
||||||
|
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
||||||
|
|
||||||
|
#Define path to image
|
||||||
|
#path_to_image = 'input/sampletext1-ocr.png'
|
||||||
|
#Define path to images folder
|
||||||
|
path_to_input = r'input/'
|
||||||
|
path_to_output = r'output/'
|
||||||
|
|
||||||
|
#Point tessaract_cmd to tessaract.exe
|
||||||
|
pytesseract.tesseract_cmd = path_to_tesseract
|
||||||
|
|
||||||
|
#Get the file names in the directory
|
||||||
|
for root, dirs, file_names in os.walk(path_to_input):
|
||||||
|
#Iterate over each file name in the folder
|
||||||
|
for file_name in file_names:
|
||||||
|
#Open image with PIL
|
||||||
|
img = Image.open(path_to_input + file_name)
|
||||||
|
|
||||||
|
#Extract text from image
|
||||||
|
text = pytesseract.image_to_string(img)
|
||||||
|
|
||||||
|
print(text)
|
Loading…
Reference in New Issue