You can create your own script to label images. Here is some sample code that will allow you to do so, you can customize it if you want
import sys import os import cv2 def isImage(filepath) -> bool: ''' checks if file is an image ''' lowercasePath = filepath.lower() # you can add more formats here cases = [ lowercasePath.endswith('jpg'), lowercasePath.endswith('png'), lowercasePath.endswith('jpeg'), ] return any(cases) def getPaths(imgdir, condition=lambda x: True): ''' given path to image folder will return you a list of full paths to files which this folder contain :param condition: is a function that will filter only those files that satisfy condition ''' files = map(lambda x: os.path.join(imgdir, x).strip(), os.listdir(imgdir)) filtered = filter(condition, files) return list(filtered) def labelingProcess(imgdir): print("Welcome to the labeling tool") print("if you want to stop labeling just close the program or press ctrl+C") WIDTH = 640 HEIGHT = 480 WINDOWNAME = "frame" window = cv2.namedWindow(WINDOWNAME, cv2.WINDOW_NORMAL) cv2.resizeWindow(WINDOWNAME, WIDTH, HEIGHT) cv2.moveWindow(WINDOWNAME, 10, 10) pathsToImages = getPaths(imgdir, isImage) if not len(pathsToImages): print("couldn't find any images") return for pathtoimage in pathsToImages: imageName = os.path.basename(pathtoimage) # label img has the same name as image only ends with .txt labelName = ''.join(imageName.split('.')[:-1]) + '.gt.txt' labelPath = os.path.join(imgdir, labelName) # skip labeled images if os.path.exists(labelPath): continue # read image image = cv2.imread(pathtoimage) if image is None: print("couldn't open the image") continue h, w = image.shape[:2] # resize to fixed size (only for visualization) hnew = HEIGHT wnew = int(w * hnew / h) image = cv2.resize(image, (wnew, hnew)) cv2.imshow(WINDOWNAME, image) cv2.waitKey(1) print("enter what is written on the image or \ press enter to skip or") label = input() if not len(label): continue with open(labelPath, 'w') as labelfile: labelfile.write(label) cv2.destroyAllWindows() if __name__ == '__main__': imgdir = sys.argv[1] labelingProcess(imgdir)
for this particular script requirement is opencv
usage:
python3 labelingtool.py <path to your folder with images>
It will read images from from your folder and create corresponding .gt.txt file with annotation. During labelling process you can type annotation in the terminal.
Further to train your own model you can use for example this repo https://github.com/thongvm/ocrd-train
It needs dataset to be in the format image and corresponding annotation
image1.tif image1.gt.txt image2.tif image2.gt.txt ...
To convert your images to .tif you can use mogrify for example
this code will convert all jpg files to tif files
mogrify -format tif *.jpg