analyze HTR system

CWBluejackets · Jan 3, 2019 · 3968a52 · 3968a52
1 parent 4119ac8
commit 3968a52
Show file tree

Hide file tree

Showing 6 changed files with 138 additions and 2 deletions.
diff --git a/data/analyze.png b/data/analyze.png
diff --git a/data/pixelRelevance.npy b/data/pixelRelevance.npy
diff --git a/data/translationInvariance.npy b/data/translationInvariance.npy
diff --git a/doc/analyze.png b/doc/analyze.png
diff --git a/src/Model.py b/src/Model.py
@@ -211,7 +211,7 @@ def trainBatch(self, batch):
 		return lossVal
 
 
-	def inferBatch(self, batch, calcProbability=False):
+	def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False):
 		"feed a batch into the NN to recngnize the texts"
 
 		# decode, optionally save RNN output
@@ -225,7 +225,7 @@ def inferBatch(self, batch, calcProbability=False):
 		# feed RNN output and recognized text into CTC loss to compute labeling probability
 		probs = None
 		if calcProbability:
-			sparse = self.toSparse(texts)
+			sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts)
 			ctcInput = evalRes[1]
 			evalList = self.lossPerElement
 			feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements}

diff --git a/src/analyze.py b/src/analyze.py
@@ -0,0 +1,136 @@
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import math
+import copy
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from DataLoader import Batch
+from Model import Model, DecoderType
+from SamplePreprocessor import preprocess
+
+
+class FilePaths:
+	"filenames and paths to data"
+	fnCharList = '../model/charList.txt'
+	fnAnalyze = '../data/analyze.png'
+	fnPixelRelevance = '../data/pixelRelevance.npy'
+	fnTranslationInvariance = '../data/translationInvariance.npy'
+
+
+def odds(val):
+	return val / (1 - val)
+
+
+def weightOfEvidence(origProb, margProb):
+	return math.log2(odds(origProb)) - math.log2(odds(margProb))
+
+
+def analyzePixelRelevance():
+	"simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis"
+
+	# setup model
+	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+
+	# read image and specify ground-truth text
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	(w, h) = img.shape
+	assert Model.imgSize[1] == w
+	gt = 'are'
+
+	# compute probability of gt text in original image
+	batch = Batch([gt], [preprocess(img, Model.imgSize)])
+	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+	origProb = probs[0]
+
+	# iterate over all pixels in image
+	pixelRelevance = np.zeros(img.shape, np.float32)
+	for x in range(w):
+		for y in range(h):
+
+			# try a subset of possible grayvalues of pixel (x,y)
+			imgsMarginalized = []
+			for g in [0, 63, 127, 191, 255]:
+				imgChanged = copy.deepcopy(img)
+				imgChanged[x, y] = g
+				imgsMarginalized.append(preprocess(imgChanged, Model.imgSize))
+
+			# put them all into one batch
+			batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized)
+
+			# compute probabilities
+			(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+
+			# marginalize over pixel value (assume uniform distribution)
+			margProb = sum(probs)/len(probs)
+
+			pixelRelevance[x, y] = weightOfEvidence(origProb, margProb)
+
+			print(x, y, pixelRelevance[x, y], origProb, margProb)
+
+	np.save(FilePaths.fnPixelRelevance, pixelRelevance)
+
+
+
+def analyzeTranslationInvariance():
+	# setup model
+	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+
+	# read image and specify ground-truth text
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	(w, h) = img.shape
+	assert Model.imgSize[1] == w
+	gt = 'are'
+
+	imgList = []
+	for dy in range(Model.imgSize[0]-h+1):
+		targetImg = np.ones((Model.imgSize[1], Model.imgSize[0])) * 255
+		targetImg[:,dy:h+dy] = img
+		imgList.append(preprocess(targetImg, Model.imgSize))
+
+	# put images and gt texts into batch
+	batch = Batch([gt]*len(imgList), imgList)
+
+	# compute probabilities
+	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+	np.save(FilePaths.fnTranslationInvariance, probs)
+
+
+def showResults():
+	# 1. pixel relevance
+	pixelRelevance = np.load(FilePaths.fnPixelRelevance)
+	plt.figure('Pixel relevance')
+
+	plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5)
+	plt.colorbar()
+
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	plt.imshow(img, cmap=plt.cm.gray, alpha=.4)
+
+
+	# 2. translation invariance
+	probs = np.load(FilePaths.fnTranslationInvariance)
+	plt.figure('Translation invariance')
+
+	plt.plot(probs, 'o-')
+	plt.xlabel('horizontal translation')
+	plt.ylabel('text probability')
+
+	# show both plots
+	plt.show()
+
+
+if __name__ == '__main__':
+	if len(sys.argv)>1:
+		if sys.argv[1]=='--relevance':
+			print('Analyze pixel relevance')
+			analyzePixelRelevance()
+		elif sys.argv[1]=='--invariance':
+			print('Analyze translation invariance')
+			analyzeTranslationInvariance()
+	else:
+		print('Show results')
+		showResults()
+