Finalize Script + Add export to mount fn

main
dev_alex 1 year ago
parent 618f33094a
commit e6c7549bff

@ -5,13 +5,14 @@ from configparser import ConfigParser
import shutil import shutil
def scan_document_folder(): def scan_document_folder() -> None:
"""Scan Input Odner für OCR Analyse und Verarbeitung der Bilder
"""
# Read Config, Setup # Read Config, Setup
ocr_config = ConfigParser() ocr_config = ConfigParser()
ocr_config.read("ocr_config.ini") ocr_config.read("ocr_config.ini")
ocr_settings = ocr_config['ocr'] ocr_settings = ocr_config['ocr']
path_to_tesseract = ocr_settings["path_to_tesseract"] path_to_tesseract = ocr_settings["path_to_tesseract"]
# TODO Checken warum hier keine Umlaute erkannt werden -> Sprachmodell wechseln /config
pytesseract.tesseract_cmd = path_to_tesseract pytesseract.tesseract_cmd = path_to_tesseract
#Get the file names in the directory #Get the file names in the directory
@ -35,9 +36,9 @@ def scan_document_folder():
original_file = os.path.join(os.path.dirname(os.path.abspath(__file__)) , ocr_settings["path_to_output"] , file,"") original_file = os.path.join(os.path.dirname(os.path.abspath(__file__)) , ocr_settings["path_to_output"] , file,"")
shutil.copy2(source_path,original_file) shutil.copy2(source_path,original_file)
os.remove(source_path)
extracted_file = os.path.join(os.path.dirname(os.path.abspath(__file__)) , ocr_settings["path_to_output"] , file, file + ".txt") extracted_file = os.path.join(os.path.dirname(os.path.abspath(__file__)) , ocr_settings["path_to_output"] , file, file + ".txt")
#print(text)
with open(extracted_file, "w") as text_file: with open(extracted_file, "w") as text_file:
text_file.write(text) text_file.write(text)
@ -45,5 +46,29 @@ def scan_document_folder():
img.save(pdf_file, "PDF" ,resolution=100.0, save_all=True) img.save(pdf_file, "PDF" ,resolution=100.0, save_all=True)
def export_to_mount(symlinks=False, ignore=None) -> None:
"""Export zu einem externen Folder(z.b. NAS Mount)
Args:
symlinks (bool, optional): Config, ob shutil symlinks berücksichtigen soll. Defaults to False.
ignore (str, optional): Config, ob shutil gewisse Files ignorieren soll. Defaults to None.
"""
# Read Config, Setup
ocr_config = ConfigParser()
ocr_config.read("ocr_config.ini")
ocr_settings = ocr_config['ocr']
src = os.path.join(os.path.dirname(os.path.abspath(__file__)) , ocr_settings["path_to_output"])
mount_path = ocr_settings["path_to_mount"]
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(mount_path, item)
if os.path.isdir(s):
shutil.copytree(s, d, symlinks, ignore)
else:
shutil.copy2(s, d)
if __name__ == "__main__": if __name__ == "__main__":
scan_document_folder() scan_document_folder()
export_to_mount()
Loading…
Cancel
Save