Skip to content

Commit

Permalink
analyze HTR system
Browse files Browse the repository at this point in the history
  • Loading branch information
githubharald committed Jan 3, 2019
1 parent 4119ac8 commit 3968a52
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 2 deletions.
Binary file added data/analyze.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/pixelRelevance.npy
Binary file not shown.
Binary file added data/translationInvariance.npy
Binary file not shown.
Binary file added doc/analyze.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions src/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def trainBatch(self, batch):
return lossVal


def inferBatch(self, batch, calcProbability=False):
def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False):
"feed a batch into the NN to recngnize the texts"

# decode, optionally save RNN output
Expand All @@ -225,7 +225,7 @@ def inferBatch(self, batch, calcProbability=False):
# feed RNN output and recognized text into CTC loss to compute labeling probability
probs = None
if calcProbability:
sparse = self.toSparse(texts)
sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts)
ctcInput = evalRes[1]
evalList = self.lossPerElement
feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements}
Expand Down
136 changes: 136 additions & 0 deletions src/analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from __future__ import division
from __future__ import print_function

import sys
import math
import copy
import numpy as np
import cv2
import matplotlib.pyplot as plt
from DataLoader import Batch
from Model import Model, DecoderType
from SamplePreprocessor import preprocess


class FilePaths:
"filenames and paths to data"
fnCharList = '../model/charList.txt'
fnAnalyze = '../data/analyze.png'
fnPixelRelevance = '../data/pixelRelevance.npy'
fnTranslationInvariance = '../data/translationInvariance.npy'


def odds(val):
return val / (1 - val)


def weightOfEvidence(origProb, margProb):
return math.log2(odds(origProb)) - math.log2(odds(margProb))


def analyzePixelRelevance():
"simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis"

# setup model
model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)

# read image and specify ground-truth text
img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
(w, h) = img.shape
assert Model.imgSize[1] == w
gt = 'are'

# compute probability of gt text in original image
batch = Batch([gt], [preprocess(img, Model.imgSize)])
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
origProb = probs[0]

# iterate over all pixels in image
pixelRelevance = np.zeros(img.shape, np.float32)
for x in range(w):
for y in range(h):

# try a subset of possible grayvalues of pixel (x,y)
imgsMarginalized = []
for g in [0, 63, 127, 191, 255]:
imgChanged = copy.deepcopy(img)
imgChanged[x, y] = g
imgsMarginalized.append(preprocess(imgChanged, Model.imgSize))

# put them all into one batch
batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized)

# compute probabilities
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)

# marginalize over pixel value (assume uniform distribution)
margProb = sum(probs)/len(probs)

pixelRelevance[x, y] = weightOfEvidence(origProb, margProb)

print(x, y, pixelRelevance[x, y], origProb, margProb)

np.save(FilePaths.fnPixelRelevance, pixelRelevance)



def analyzeTranslationInvariance():
# setup model
model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)

# read image and specify ground-truth text
img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
(w, h) = img.shape
assert Model.imgSize[1] == w
gt = 'are'

imgList = []
for dy in range(Model.imgSize[0]-h+1):
targetImg = np.ones((Model.imgSize[1], Model.imgSize[0])) * 255
targetImg[:,dy:h+dy] = img
imgList.append(preprocess(targetImg, Model.imgSize))

# put images and gt texts into batch
batch = Batch([gt]*len(imgList), imgList)

# compute probabilities
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
np.save(FilePaths.fnTranslationInvariance, probs)


def showResults():
# 1. pixel relevance
pixelRelevance = np.load(FilePaths.fnPixelRelevance)
plt.figure('Pixel relevance')

plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5)
plt.colorbar()

img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
plt.imshow(img, cmap=plt.cm.gray, alpha=.4)


# 2. translation invariance
probs = np.load(FilePaths.fnTranslationInvariance)
plt.figure('Translation invariance')

plt.plot(probs, 'o-')
plt.xlabel('horizontal translation')
plt.ylabel('text probability')

# show both plots
plt.show()


if __name__ == '__main__':
if len(sys.argv)>1:
if sys.argv[1]=='--relevance':
print('Analyze pixel relevance')
analyzePixelRelevance()
elif sys.argv[1]=='--invariance':
print('Analyze translation invariance')
analyzeTranslationInvariance()
else:
print('Show results')
showResults()

0 comments on commit 3968a52

Please sign in to comment.