From c165c8787ec0d9cf2d8cddc2b6e076b007c6c653 Mon Sep 17 00:00:00 2001 From: dev_alex Date: Tue, 25 Jul 2023 22:04:39 +0200 Subject: [PATCH] Add basis ocr tesseract example --- ocr_scan.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 ocr_scan.py diff --git a/ocr_scan.py b/ocr_scan.py new file mode 100644 index 0000000..0aac941 --- /dev/null +++ b/ocr_scan.py @@ -0,0 +1,27 @@ +from PIL import Image +from pytesseract import pytesseract +import os + +#Define path to tessaract.exe #or equivalent on os +path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe' + +#Define path to image +#path_to_image = 'input/sampletext1-ocr.png' +#Define path to images folder +path_to_input = r'input/' +path_to_output = r'output/' + +#Point tessaract_cmd to tessaract.exe +pytesseract.tesseract_cmd = path_to_tesseract + +#Get the file names in the directory +for root, dirs, file_names in os.walk(path_to_input): + #Iterate over each file name in the folder + for file_name in file_names: + #Open image with PIL + img = Image.open(path_to_input + file_name) + + #Extract text from image + text = pytesseract.image_to_string(img) + + print(text) \ No newline at end of file