From c165c8787ec0d9cf2d8cddc2b6e076b007c6c653 Mon Sep 17 00:00:00 2001
From: dev_alex <dev_alex@192.168.178.37>
Date: Tue, 25 Jul 2023 22:04:39 +0200
Subject: [PATCH] Add basis ocr tesseract example

---
 ocr_scan.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 ocr_scan.py

diff --git a/ocr_scan.py b/ocr_scan.py
new file mode 100644
index 0000000..0aac941
--- /dev/null
+++ b/ocr_scan.py
@@ -0,0 +1,27 @@
+from PIL import Image
+from pytesseract import pytesseract
+import os
+
+#Define path to tessaract.exe #or equivalent on os
+path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+
+#Define path to image
+#path_to_image = 'input/sampletext1-ocr.png'
+#Define path to images folder
+path_to_input = r'input/'
+path_to_output = r'output/'
+
+#Point tessaract_cmd to tessaract.exe
+pytesseract.tesseract_cmd = path_to_tesseract
+
+#Get the file names in the directory
+for root, dirs, file_names in os.walk(path_to_input):
+    #Iterate over each file name in the folder
+    for file_name in file_names:
+        #Open image with PIL
+        img = Image.open(path_to_input + file_name)
+
+        #Extract text from image
+        text = pytesseract.image_to_string(img)
+
+        print(text)
\ No newline at end of file