From f73a033045c68c3fa03f2d3bdb1fbe5171cb783c Mon Sep 17 00:00:00 2001
From: Andrei Pradan <andrei.pradan@3pillarglobal.com>
Date: Wed, 27 Jul 2016 17:29:38 +0300
Subject: [PATCH] Python3 compatibility tweaks

---
 docs/conf.py                             |   16 +-
 pycaption/base.py                        |   58 +-
 pycaption/dfxp/__init__.py               |    2 -
 pycaption/dfxp/base.py                   |  312 ++--
 pycaption/dfxp/extras.py                 |  117 +-
 pycaption/geometry.py                    |  135 +-
 pycaption/sami.py                        |  292 ++--
 pycaption/scc/__init__.py                |  105 +-
 pycaption/scc/constants.py               | 1780 +++++++++++-----------
 pycaption/scc/specialized_collections.py |   41 +-
 pycaption/scc/state_machines.py          |    2 +-
 pycaption/srt.py                         |   50 +-
 pycaption/transcript.py                  |   10 +-
 pycaption/webvtt.py                      |  157 +-
 tests/mixins.py                          |   32 +-
 tests/samples/dfxp.py                    |   11 +-
 tests/samples/sami.py                    |   36 +-
 tests/samples/scc.py                     |   26 +-
 tests/samples/srt.py                     |   12 +-
 tests/samples/webvtt.py                  |   32 +-
 tests/test_dfxp.py                       |   35 +-
 tests/test_dfxp_conversion.py            |   46 +-
 tests/test_sami.py                       |   30 +-
 tests/test_sami_conversion.py            |    6 +-
 tests/test_scc.py                        |   62 +-
 tests/test_srt.py                        |   10 +-
 tests/test_webvtt.py                     |  102 +-
 tox.ini                                  |    2 +-
 28 files changed, 1750 insertions(+), 1769 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 4c3ba16d..842817ef 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -43,8 +43,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'pycaption'
-copyright = u'2012, PBS.org (available under the Apache License, Version 2.0)'
+project = 'pycaption'
+copyright = '2012, PBS.org (available under the Apache License, Version 2.0)'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -199,8 +199,8 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  ('index', 'pycaption.tex', u'pycaption Documentation',
-   u'PBS', 'manual'),
+  ('index', 'pycaption.tex', 'pycaption Documentation',
+   'PBS', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -229,8 +229,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'pycaption', u'pycaption Documentation',
-     [u'PBS'], 1)
+    ('index', 'pycaption', 'pycaption Documentation',
+     ['PBS'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -243,8 +243,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'pycaption', u'pycaption Documentation',
-   u'PBS', 'pycaption', 'One line description of project.',
+  ('index', 'pycaption', 'pycaption Documentation',
+   'PBS', 'pycaption', 'One line description of project.',
    'Miscellaneous'),
 ]
 
diff --git a/pycaption/base.py b/pycaption/base.py
index abae4804..b5e30d2a 100644
--- a/pycaption/base.py
+++ b/pycaption/base.py
@@ -4,14 +4,14 @@
 
 from .exceptions import CaptionReadError, CaptionReadTimingError
 
-DEFAULT_LANGUAGE_CODE = u'en-US'
+DEFAULT_LANGUAGE_CODE = 'en-US'
 
 
 def force_byte_string(content):
     try:
-        return content.encode(u'UTF-8')
+        return content.encode('UTF-8')
     except UnicodeEncodeError:
-        raise RuntimeError(u'Invalid content encoding')
+        raise RuntimeError('Invalid content encoding')
     except UnicodeDecodeError:
         return content
 
@@ -64,10 +64,11 @@ def __init__(self, relativize=True, video_width=None, video_height=None,
             converted were made. This is necessary for relativization.
         :param video_height: The height of the video for which the captions
             being converted were made. This is necessary for relativization.
-        :param fit_to_screen: If extent is not set or if origin + extent > 100%,
-            (re)calculate it based on origin. It is a pycaption fix for caption
-            files that are technically valid but contains inconsistent settings
-            that may cause long captions to be cut out of the screen.
+        :param fit_to_screen: If extent is not set or
+            if origin + extent > 100%, (re)calculate it based on origin.
+            It is a pycaption fix for caption files that are technically valid
+            but contains inconsistent settings that may cause long captions to
+            be cut out of the screen.
         """
         self.relativize = relativize
         self.video_width = video_width
@@ -130,11 +131,11 @@ def __repr__(self):
         if t == CaptionNode.TEXT:
             return repr(self.content)
         elif t == CaptionNode.BREAK:
-            return repr(u'BREAK')
+            return repr('BREAK')
         elif t == CaptionNode.STYLE:
-            return repr(u'STYLE: %s %s' % (self.start, self.content))
+            return repr('STYLE: %s %s' % (self.start, self.content))
         else:
-            raise RuntimeError(u'Unknown node type: ' + unicode(t))
+            raise RuntimeError('Unknown node type: ' + str(t))
 
     @staticmethod
     def create_text(text, layout_info=None):
@@ -175,13 +176,13 @@ def __init__(self, start, end, nodes, style={}, layout_info=None):
         :type layout_info: Layout
         """
         if not isinstance(start, Number):
-            raise CaptionReadTimingError(u"Captions must be initialized with a"
-                                         u" valid start time")
+            raise CaptionReadTimingError("Captions must be initialized with a"
+                                         " valid start time")
         if not isinstance(end, Number):
-            raise CaptionReadTimingError(u"Captions must be initialized with a"
-                                         u" valid end time")
+            raise CaptionReadTimingError("Captions must be initialized with a"
+                                         " valid end time")
         if not nodes:
-            raise CaptionReadError(u"Node list cannot be empty")
+            raise CaptionReadError("Node list cannot be empty")
         self.start = start
         self.end = end
         self.nodes = nodes
@@ -208,7 +209,7 @@ def format_end(self, msec_separator=None):
 
     def __repr__(self):
         return repr(
-            u'{start} --> {end}\n{text}'.format(
+            '{start} --> {end}\n{text}'.format(
                 start=self.format_start(),
                 end=self.format_end(),
                 text=self.get_text()
@@ -223,29 +224,29 @@ def get_text_for_node(node):
             if node.type_ == CaptionNode.TEXT:
                 return node.content
             if node.type_ == CaptionNode.BREAK:
-                return u'\n'
-            return u''
+                return '\n'
+            return ''
         text_nodes = [get_text_for_node(node) for node in self.nodes]
-        return u''.join(text_nodes).strip()
+        return ''.join(text_nodes).strip()
 
     def _format_timestamp(self, value, msec_separator=None):
         datetime_value = timedelta(milliseconds=(int(value / 1000)))
 
         str_value = text_type(datetime_value)[:11]
         if not datetime_value.microseconds:
-            str_value += u'.000'
+            str_value += '.000'
 
         if msec_separator is not None:
-            str_value = str_value.replace(u".", msec_separator)
+            str_value = str_value.replace(".", msec_separator)
 
-        return u'0' + str_value
+        return '0' + str_value
 
 
 class CaptionList(list):
     """ A list of captions with a layout object attached to it """
     def __init__(self, iterable=None, layout_info=None):
         """
-        :param iterator: An iterator used to populate the caption list
+        :param iterable: An iterator used to populate the caption list
         :param Layout layout_info: A Layout object with the positioning info
         """
         self.layout_info = layout_info
@@ -258,10 +259,9 @@ def __getslice__(self, i, j):
 
     def __getitem__(self, y):
         item = list.__getitem__(self, y)
-        if isinstance(item, Caption) :
+        if isinstance(item, Caption):
             return item
-        return CaptionList(item
-            , layout_info=self.layout_info)
+        return CaptionList(item, layout_info=self.layout_info)
 
     def __add__(self, other):
         add_is_safe = (
@@ -305,7 +305,7 @@ def set_captions(self, lang, captions):
         self._captions[lang] = captions
 
     def get_languages(self):
-        return self._captions.keys()
+        return list(self._captions.keys())
 
     def get_captions(self, lang):
         return self._captions.get(lang, [])
@@ -334,7 +334,7 @@ def set_styles(self, styles):
 
     def is_empty(self):
         return all(
-            [len(captions) == 0 for captions in self._captions.values()]
+            [len(captions) == 0 for captions in list(self._captions.values())]
         )
 
     def set_layout_info(self, lang, layout_info):
@@ -364,6 +364,7 @@ def adjust_caption_timing(self, offset=0, rate_skew=1.0):
                     out_captions.append(caption)
             self.set_captions(lang, out_captions)
 
+
 # Functions
 def merge_concurrent_captions(caption_set):
     """Merge captions that have the same start and end times"""
@@ -391,6 +392,7 @@ def merge_concurrent_captions(caption_set):
             caption_set.set_captions(lang, merged_captions)
     return caption_set
 
+
 def merge(captions):
     """
     Merge list of captions into one caption. The start/end times from the first
diff --git a/pycaption/dfxp/__init__.py b/pycaption/dfxp/__init__.py
index a30993e2..0a6ea04f 100644
--- a/pycaption/dfxp/__init__.py
+++ b/pycaption/dfxp/__init__.py
@@ -1,4 +1,2 @@
-from __future__ import absolute_import
-
 from .base import *
 from .extras import SinglePositioningDFXPWriter, LegacyDFXPWriter
diff --git a/pycaption/dfxp/base.py b/pycaption/dfxp/base.py
index d75ddf1b..a8ab7cc2 100644
--- a/pycaption/dfxp/base.py
+++ b/pycaption/dfxp/base.py
@@ -1,10 +1,7 @@
 import re
-from builtins import str
 from copy import deepcopy
-
-from bs4 import BeautifulSoup, NavigableString
 from xml.sax.saxutils import escape
-import six
+from bs4 import BeautifulSoup, NavigableString
 
 from ..base import (
     BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
@@ -21,7 +18,7 @@
     'DFXP_DEFAULT_REGION_ID', 'DFXPReader', 'DFXPWriter', 'DFXP_DEFAULT_REGION'
 ]
 
-DFXP_BASE_MARKUP = u'''
+DFXP_BASE_MARKUP = '''
 <tt xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts="http://www.w3.org/ns/ttml#styling">
     <head>
@@ -33,9 +30,9 @@
 '''
 
 DFXP_DEFAULT_STYLE = {
-    u'color': u'white',
-    u'font-family': u'monospace',
-    u'font-size': u'1c',
+    'color': 'white',
+    'font-family': 'monospace',
+    'font-size': '1c',
 }
 
 DFXP_DEFAULT_REGION = Layout(
@@ -43,13 +40,12 @@
         HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)
 )
 
-DFXP_DEFAULT_STYLE_ID = u'default'
-DFXP_DEFAULT_REGION_ID = u'bottom'
+DFXP_DEFAULT_STYLE_ID = 'default'
+DFXP_DEFAULT_REGION_ID = 'bottom'
 
 
 class DFXPReader(BaseReader):
 
-
     def __init__(self, *args, **kw):
         super(DFXPReader, self).__init__(*args, **kw)
         self.read_invalid_positioning = (
@@ -57,14 +53,14 @@ def __init__(self, *args, **kw):
         self.nodes = []
 
     def detect(self, content):
-        if u'</tt>' in content.lower():
+        if '</tt>' in content.lower():
             return True
         else:
             return False
 
     def read(self, content):
-        if type(content) != six.text_type:
-            raise InvalidInputError(u'The content is not a unicode string.')
+        if type(content) != str:
+            raise InvalidInputError('The content is not a unicode string.')
 
         dfxp_document = self._get_dfxp_parser_class()(
             content, read_invalid_positioning=self.read_invalid_positioning)
@@ -73,25 +69,25 @@ def read(self, content):
         style_dict = {}
 
         # Each div represents all the captions for a single language.
-        for div in dfxp_document.find_all(u'div'):
-            lang = div.attrs.get(u'xml:lang', DEFAULT_LANGUAGE_CODE)
+        for div in dfxp_document.find_all('div'):
+            lang = div.attrs.get('xml:lang', DEFAULT_LANGUAGE_CODE)
 
             caption_dict[lang] = self._translate_div(div)
 
-        for style in dfxp_document.find_all(u'style'):
-            id_ = style.attrs.get(u'xml:id') or style.attrs.get(u'id')
+        for style in dfxp_document.find_all('style'):
+            id_ = style.attrs.get('xml:id') or style.attrs.get('id')
             if id_:
                 # Don't create document styles for those styles that are
                 # descendants of <region> tags. See link:
                 # http://www.w3.org/TR/ttaf1-dfxp/#styling-vocabulary-style
-                if u'region' not in [
+                if 'region' not in [
                         parent_.name for parent_ in style.parents]:
                     style_dict[id_] = self._translate_style(style)
 
         caption_set = CaptionSet(caption_dict, styles=style_dict)
 
         if caption_set.is_empty():
-            raise CaptionReadNoCaptions(u"empty caption file")
+            raise CaptionReadNoCaptions("empty caption file")
 
         return caption_set
 
@@ -103,7 +99,7 @@ def _get_dfxp_parser_class():
 
     def _translate_div(self, div):
         return CaptionList(
-            [self._translate_p_tag(p_tag) for p_tag in div.find_all(u'p')],
+            [self._translate_p_tag(p_tag) for p_tag in div.find_all('p')],
             div.layout_info
         )
 
@@ -117,26 +113,26 @@ def _translate_p_tag(self, p_tag):
             start, end, self.nodes, style=styles, layout_info=p_tag.layout_info)
 
     def _find_times(self, p_tag):
-        start = self._translate_time(p_tag[u'begin'])
+        start = self._translate_time(p_tag['begin'])
 
         try:
-            end = self._translate_time(p_tag[u'end'])
+            end = self._translate_time(p_tag['end'])
         except KeyError:
-            dur = self._translate_time(p_tag[u'dur'])
+            dur = self._translate_time(p_tag['dur'])
             end = start + dur
 
         return start, end
 
     def _translate_time(self, stamp):
         if stamp[-1].isdigit():
-            timesplit = stamp.split(u':')
-            if u'.' not in timesplit[2]:
-                timesplit[2] = timesplit[2] + u'.000'
-            secsplit = timesplit[2].split(u'.')
+            timesplit = stamp.split(':')
+            if '.' not in timesplit[2]:
+                timesplit[2] += '.000'
+            secsplit = timesplit[2].split('.')
             if len(timesplit) > 3:
                 secsplit.append((int(timesplit[3]) / 30) * 100)
             while len(secsplit[1]) < 3:
-                secsplit[1] += u'0'
+                secsplit[1] += '0'
             microseconds = (int(timesplit[0]) * 3600000000 +
                             int(timesplit[1]) * 60000000 +
                             int(secsplit[0]) * 1000000 +
@@ -147,16 +143,16 @@ def _translate_time(self, stamp):
             m = re.search('^([0-9.]+)([a-z]+)$', stamp)
             value = float(m.group(1))
             metric = m.group(2)
-            if metric == u"h":
+            if metric == "h":
                 microseconds = value * 60 * 60 * 1000000
-            elif metric == u"m":
+            elif metric == "m":
                 microseconds = value * 60 * 1000000
-            elif metric == u"s":
+            elif metric == "s":
                 microseconds = value * 1000000
-            elif metric == u"ms":
+            elif metric == "ms":
                 microseconds = value * 1000
             else:
-                raise InvalidInputError(u"Unsupported offset-time metric " + metric)
+                raise InvalidInputError("Unsupported offset-time metric " + metric)
 
             return int(microseconds)
 
@@ -164,7 +160,7 @@ def _translate_tag(self, tag):
         # convert text
         if isinstance(tag, NavigableString):
             # strips indentation whitespace only
-            pattern = re.compile(u"^(?:[\n\r]+\s*)?(.+)")
+            pattern = re.compile("^(?:[\n\r]+\s*)?(.+)")
             result = pattern.search(tag)
             if result:
                 # Escaping/unescaping xml entities is the responsibility of the
@@ -177,16 +173,16 @@ def _translate_tag(self, tag):
                     tag_text, layout_info=tag.layout_info)
                 self.nodes.append(node)
         # convert line breaks
-        elif tag.name == u'br':
+        elif tag.name == 'br':
             self.nodes.append(
                 CaptionNode.create_break(layout_info=tag.layout_info))
         # convert italics
-        elif tag.name == u'span':
+        elif tag.name == 'span':
             # convert span
             self._translate_span(tag)
-        elif tag.name == u'p' and not tag.contents:
+        elif tag.name == 'p' and not tag.contents:
             node = CaptionNode.create_text(
-                u'', layout_info=tag.layout_info)
+                '', layout_info=tag.layout_info)
             self.nodes.append(node)
         else:
             # recursively call function for any children elements
@@ -200,7 +196,7 @@ def _translate_span(self, tag):
         # TODO - this is an obvious very old bug. args will be a dictionary.
         # but since nobody complained, I'll leave it like that.
         # Happy investigating!
-        if args != u'':
+        if args != '':
             node = CaptionNode.create_style(
                 True, args, layout_info=tag.layout_info)
             node.start = True
@@ -235,38 +231,38 @@ def _translate_style(self, tag):
         attrs = {}
         dfxp_attrs = tag.attrs
         for arg in dfxp_attrs:
-            if arg.lower() == u"style":
+            if arg.lower() == "style":
                 # Support multiple classes per tag
-                attrs[u'classes'] = dfxp_attrs[arg].strip().split(u' ')
+                attrs['classes'] = dfxp_attrs[arg].strip().split(' ')
                 # Save old class attribute for compatibility
-                attrs[u'class'] = dfxp_attrs[arg]
-            elif arg.lower() == u"tts:fontstyle" and dfxp_attrs[arg] == u"italic":
-                attrs[u'italics'] = True
-            elif arg.lower() == u"tts:fontweight" and dfxp_attrs[arg] == u"bold":
-                attrs[u'bold'] = True
-            elif arg.lower() == u"tts:textdecoration" and u"underline" in dfxp_attrs[arg].strip().split(u" "):
-                attrs[u'underline'] = True
-            elif arg.lower() == u"tts:textalign":
-                attrs[u'text-align'] = dfxp_attrs[arg]
-            elif arg.lower() == u"tts:fontfamily":
-                attrs[u'font-family'] = dfxp_attrs[arg]
-            elif arg.lower() == u"tts:fontsize":
-                attrs[u'font-size'] = dfxp_attrs[arg]
-            elif arg.lower() == u"tts:color":
-                attrs[u'color'] = dfxp_attrs[arg]
+                attrs['class'] = dfxp_attrs[arg]
+            elif arg.lower() == "tts:fontstyle" and dfxp_attrs[arg] == "italic":
+                attrs['italics'] = True
+            elif arg.lower() == "tts:fontweight" and dfxp_attrs[arg] == "bold":
+                attrs['bold'] = True
+            elif arg.lower() == "tts:textdecoration" and "underline" in dfxp_attrs[arg].strip().split(" "):
+                attrs['underline'] = True
+            elif arg.lower() == "tts:textalign":
+                attrs['text-align'] = dfxp_attrs[arg]
+            elif arg.lower() == "tts:fontfamily":
+                attrs['font-family'] = dfxp_attrs[arg]
+            elif arg.lower() == "tts:fontsize":
+                attrs['font-size'] = dfxp_attrs[arg]
+            elif arg.lower() == "tts:color":
+                attrs['color'] = dfxp_attrs[arg]
         return attrs
 
 
 class DFXPWriter(BaseWriter):
     def __init__(self, *args, **kwargs):
         self.write_inline_positioning = kwargs.pop(
-            u'write_inline_positioning', False)
+            'write_inline_positioning', False)
         self.p_style = False
         self.open_span = False
         self.region_creator = None
         super(DFXPWriter, self).__init__(*args, **kwargs)
 
-    def write(self, caption_set, force=u''):
+    def write(self, caption_set, force=''):
         """Converts a CaptionSet into an equivalent corresponding DFXP file
 
         :type caption_set: pycaption.base.CaptionSet
@@ -274,8 +270,8 @@ def write(self, caption_set, force=u''):
 
         :rtype: unicode
         """
-        dfxp = BeautifulSoup(DFXP_BASE_MARKUP, u'lxml-xml')
-        dfxp.find(u'tt')[u'xml:lang'] = u"en"
+        dfxp = BeautifulSoup(DFXP_BASE_MARKUP, 'lxml-xml')
+        dfxp.find('tt')['xml:lang'] = "en"
 
         langs = caption_set.get_languages()
         if force in langs:
@@ -304,18 +300,18 @@ def write(self, caption_set, force=u''):
         self.region_creator = self._get_region_creator_class()(dfxp, caption_set)
         self.region_creator.create_document_regions()
 
-        body = dfxp.find(u'body')
+        body = dfxp.find('body')
 
         for lang in langs:
-            div = dfxp.new_tag(u'div')
-            div[u'xml:lang'] = six.text_type(lang)
+            div = dfxp.new_tag('div')
+            div['xml:lang'] = str(lang)
             self._assign_positioning_data(div, lang, caption_set)
 
             for caption in caption_set.get_captions(lang):
                 if caption.style:
                     caption_style = caption.style
                 else:
-                    caption_style = {u'class': DFXP_DEFAULT_STYLE_ID}
+                    caption_style = {'class': DFXP_DEFAULT_STYLE_ID}
 
                 p = self._recreate_p_tag(
                     caption, caption_style, dfxp, caption_set, lang)
@@ -349,7 +345,7 @@ def _assign_positioning_data(self, tag, lang, caption_set=None,
             lang, caption_set, caption, caption_node)
 
         if assigned_id:
-            tag[u'region'] = assigned_id
+            tag['region'] = assigned_id
 
             # Write non-standard positioning information
             if self.write_inline_positioning:
@@ -357,16 +353,16 @@ def _assign_positioning_data(self, tag, lang, caption_set=None,
 
     def _recreate_styling_tag(self, style, content, dfxp):
         # TODO - should be drastically simplified: if attributes : append
-        dfxp_style = dfxp.new_tag(u'style')
-        dfxp_style.attrs.update({u'xml:id': style})
+        dfxp_style = dfxp.new_tag('style')
+        dfxp_style.attrs.update({'xml:id': style})
 
         attributes = _recreate_style(content, dfxp)
         dfxp_style.attrs.update(attributes)
 
-        new_tag = dfxp.new_tag(u'style')
-        new_tag.attrs.update({u'xml:id': style})
+        new_tag = dfxp.new_tag('style')
+        new_tag.attrs.update({'xml:id': style})
         if dfxp_style != new_tag:
-            dfxp.find(u'styling').append(dfxp_style)
+            dfxp.find('styling').append(dfxp_style)
 
         return dfxp
 
@@ -374,25 +370,25 @@ def _recreate_p_tag(self, caption, caption_style, dfxp, caption_set=None,
                         lang=None):
         start = caption.format_start()
         end = caption.format_end()
-        p = dfxp.new_tag(u"p", begin=start, end=end)
+        p = dfxp.new_tag("p", begin=start, end=end)
         p.string = self._recreate_text(caption, dfxp, caption_set, lang)
 
-        if dfxp.find(u"style", {u"xml:id": u"p"}):
-            p[u'style'] = u'p'
+        if dfxp.find("style", {"xml:id": "p"}):
+            p['style'] = 'p'
 
         p.attrs.update(_recreate_style(caption_style, dfxp))
 
         return p
 
     def _recreate_text(self, caption, dfxp, caption_set=None, lang=None):
-        line = u''
+        line = ''
 
         for node in caption.nodes:
             if node.type_ == CaptionNode.TEXT:
                 line += self._encode(node.content)
 
             elif node.type_ == CaptionNode.BREAK:
-                line = line.rstrip() + u'<br/>\n    '
+                line = line.rstrip() + '<br/>\n    '
 
             elif node.type_ == CaptionNode.STYLE:
                 line = self._recreate_span(
@@ -408,34 +404,34 @@ def _recreate_span(self, line, node, dfxp, caption_set=None, caption=None,
         # We are left with creating tags manually, which is hard to understand
         # and harder to maintain
         if node.start:
-            styles = u''
+            styles = ''
 
             content_with_style = _recreate_style(node.content, dfxp)
-            for style, value in content_with_style.items():
-                styles += u' %s="%s"' % (style, value)
+            for style, value in list(content_with_style.items()):
+                styles += ' %s="%s"' % (style, value)
             if node.layout_info:
                 region_id, region_attribs = (
                     self.region_creator.get_positioning_info(
                         lang, caption_set, caption, node
                     ))
-                styles += u' region="{region_id}"'.format(
+                styles += ' region="{region_id}"'.format(
                     region_id=region_id)
                 if self.write_inline_positioning:
-                    styles += u' ' + u' '.join(
+                    styles += ' ' + ' '.join(
                         [
-                            u'{key}="{val}"'.format(key=k_, val=v_)
-                            for k_, v_ in region_attribs.items()
+                            '{key}="{val}"'.format(key=k_, val=v_)
+                            for k_, v_ in list(region_attribs.items())
                         ]
                     )
 
             if styles:
                 if self.open_span:
-                    line = line.rstrip() + u'</span> '
-                line += u'<span%s>' % styles
+                    line = line.rstrip() + '</span> '
+                line += '<span%s>' % styles
                 self.open_span = True
 
         elif self.open_span:
-            line = line.rstrip() + u'</span> '
+            line = line.rstrip() + '</span> '
             self.open_span = False
 
         return line
@@ -468,7 +464,7 @@ class LayoutAwareDFXPParser(BeautifulSoup):
     # to save memory
     NO_POSITIONING_INFO = None
 
-    def __init__(self, markup=u"", features=u"html.parser", builder=None,
+    def __init__(self, markup="", features="html.parser", builder=None,
                  parse_only=None, from_encoding=None,
                  read_invalid_positioning=False, **kwargs):
         """The `features` param determines the parser to be used. The parsers
@@ -477,9 +473,9 @@ def __init__(self, markup=u"", features=u"html.parser", builder=None,
         one because even though the docs say it's slower, it's very forgiving
         (it allows unescaped `<` characters, for example). It doesn't support
         the `&apos;` entity, however, since it respects the HTML4 and not HTML5
-        syntax. Since this is valid XML 1.0, as a workaround we have to manually
-        replace the every occurance of this entity in the string before using
-        the parser.
+        syntax. Since this is valid XML 1.0, as a workaround we have to
+        manually replace the every occurrence of this entity in the string
+        before using the parser.
 
         The reason why we haven't used the 'xml' parser is that it destroys
         characters such as < or & (even the escaped ones).
@@ -502,14 +498,14 @@ def __init__(self, markup=u"", features=u"html.parser", builder=None,
         """
 
         # Work around for lack of '&apos;' support in html.parser
-        markup = markup.replace(u"&apos;", "'")
+        markup = markup.replace("&apos;", "'")
 
         super(LayoutAwareDFXPParser, self).__init__(
             markup, features, builder, parse_only, from_encoding, **kwargs)
 
         self.read_invalid_positioning = read_invalid_positioning
 
-        for div in self.find_all(u'div'):
+        for div in self.find_all('div'):
             self._pre_order_visit(div)
 
     def _pre_order_visit(self, element, inherit_from=None):
@@ -544,7 +540,7 @@ def _get_region_from_ancestors(element):
         region_id = None
         parent = element.parent
         while parent:
-            region_id = parent.get(u'region')
+            region_id = parent.get('region')
             if region_id:
                 break
             parent = parent.parent
@@ -564,7 +560,7 @@ def _get_region_from_descendants(element):
 
         region_id = None
         child_region_ids = {
-            child.get(u'region') for child in element.findChildren()
+            child.get('region') for child in element.findChildren()
         }
         if len(child_region_ids) > 1:
             raise LookupError
@@ -588,8 +584,8 @@ def _determine_region_id(cls, element):
         # element could be a NavigableString. Those are dumb.
         region_id = None
 
-        if hasattr(element, u'get'):
-            region_id = element.get(u'region')
+        if hasattr(element, 'get'):
+            region_id = element.get('region')
 
         if not region_id:
             region_id = cls._get_region_from_ancestors(element)
@@ -616,7 +612,7 @@ def _extract_positioning_information(self, region_id, element):
         region_tag = None
 
         if region_id is not None:
-            region_tag = self.find(u'region', {u'xml:id': region_id})
+            region_tag = self.find('region', {'xml:id': region_id})
 
         region_scraper = (
             self._get_layout_info_scraper_class()(self, region_tag))
@@ -656,13 +652,13 @@ def __init__(self, document, region=None):
         :param region: the region tag
         """
         self.region = region
-        self._styling_section = document.findChild(u'styling')
+        self._styling_section = document.findChild('styling')
         if region:
             self.region_styles = self._get_style_sources(
                 self._styling_section, region)
         else:
             self.region_styles = []
-        self.root_element = document.find(u'tt')
+        self.root_element = document.find('tt')
 
     @classmethod
     def _get_style_sources(cls, styling_section, element):
@@ -685,7 +681,7 @@ def _get_style_sources(cls, styling_section, element):
         styling
         """
         # If we're analyzing a NavigableString, just quit
-        if not hasattr(element, u'findAll'):
+        if not hasattr(element, 'findAll'):
             return ()
 
         nested_styles = []
@@ -696,19 +692,19 @@ def _get_style_sources(cls, styling_section, element):
         # if the parent is a <div> tag. Technically, this step shouldn't be
         # skipped, but it would make the reader read in O(n^2) (half an hour
         # for 1500 timed captions)
-        if element.name not in (u'div', u'body', u'tt'):
+        if element.name not in ('div', 'body', 'tt'):
             for style in element.contents:
-                if getattr(style, u'name', None) == u'style':
+                if getattr(style, 'name', None) == 'style':
                     nested_styles.extend(
                         cls._get_style_reference_chain(style, styling_section)
                     )
 
-        referenced_style_id = element.get(u'style')
+        referenced_style_id = element.get('style')
 
         referenced_styles = []
         if referenced_style_id and styling_section:
             referenced_style = styling_section.findChild(
-                u'style', {u'xml:id': referenced_style_id}
+                'style', {'xml:id': referenced_style_id}
             )
 
             referenced_styles = (
@@ -736,11 +732,11 @@ def _get_style_reference_chain(cls, style, styling_tag):
         if not styling_tag:
             return result
 
-        reference = style.get(u'style')
+        reference = style.get('style')
 
         if reference:
             referenced_styles = styling_tag.findChildren(
-                u'style', {u'xml:id': reference}
+                'style', {'xml:id': reference}
             )
 
             if len(referenced_styles) == 1:
@@ -749,8 +745,8 @@ def _get_style_reference_chain(cls, style, styling_tag):
                 )
             elif len(referenced_styles) > 1:
                 raise CaptionReadSyntaxError(
-                    u"Invalid caption file. "
-                    u"More than 1 style with 'xml:id': {id}"
+                    "Invalid caption file. "
+                    "More than 1 style with 'xml:id': {id}"
                     .format(id=reference)
                 )
 
@@ -779,36 +775,36 @@ def scrape_positioning_info(self, element=None, even_invalid=False):
         usable_elem = element if even_invalid else None
 
         origin = self._find_attribute(
-            usable_elem, u'tts:origin', Point.from_xml_attribute, [u'auto']
+            usable_elem, 'tts:origin', Point.from_xml_attribute, ['auto']
         ) or DFXP_DEFAULT_REGION.origin
 
         extent = self._find_attribute(
-            usable_elem, u'tts:extent', Stretch.from_xml_attribute, [u'auto'])
+            usable_elem, 'tts:extent', Stretch.from_xml_attribute, ['auto'])
 
         if not extent:
             extent = self._find_root_extent() or DFXP_DEFAULT_REGION.extent
 
         padding = self._find_attribute(
-            usable_elem, u'tts:padding', Padding.from_xml_attribute
+            usable_elem, 'tts:padding', Padding.from_xml_attribute
         ) or DFXP_DEFAULT_REGION.padding
 
         # tts:textAlign is a special attribute, which can not be ignored when
         # specified on the element itself (only <p> nodes matter)
         # On elements like <span> it is also read, because this was legacy
         # behavior.
-        if getattr(element, u'name', None) in (u'span', u'p'):
+        if getattr(element, 'name', None) in ('span', 'p'):
             text_align_source = element
         else:
             text_align_source = None
 
         text_align = (
-            self._find_attribute(text_align_source, u'tts:textAlign')
+            self._find_attribute(text_align_source, 'tts:textAlign')
             or _create_external_horizontal_alignment(
                 DFXP_DEFAULT_REGION.alignment.horizontal
             )
         )
         display_align = (
-            self._find_attribute(usable_elem, u'tts:displayAlign')
+            self._find_attribute(usable_elem, 'tts:displayAlign')
             or _create_external_vertical_alignment(
                 DFXP_DEFAULT_REGION.alignment.vertical
             )
@@ -903,16 +899,16 @@ def _find_root_extent(self):
         if extent is None:
             root = self.root_element
             extent = _get_object_from_attribute(
-                root, u'tts:extent', Stretch.from_xml_attribute
+                root, 'tts:extent', Stretch.from_xml_attribute
             )
 
             if extent is not None:
                 if not extent.is_measured_in(UnitEnum.PIXEL):
                     raise CaptionReadSyntaxError(
-                        u"The base <tt> element attribute 'tts:extent' should "
-                        u"only be specified in pixels. Check the docs: "
-                        u"http://www.w3.org/TR/ttaf1-dfxp/"
-                        u"#style-attribute-extent"
+                        "The base <tt> element attribute 'tts:extent' should "
+                        "only be specified in pixels. Check the docs: "
+                        "http://www.w3.org/TR/ttaf1-dfxp/"
+                        "#style-attribute-extent"
                     )
         return extent
 
@@ -997,16 +993,16 @@ def _create_unique_regions(unique_layouts, dfxp, id_factory):
         :rtype: dict
         """
         region_map = {}
-        layout_section = dfxp.find(u'layout')
+        layout_section = dfxp.find('layout')
 
         for region_spec in unique_layouts:
             if (
                     region_spec.origin or region_spec.extent or
                     region_spec.padding or region_spec.alignment):
 
-                new_region = dfxp.new_tag(u'region')
+                new_region = dfxp.new_tag('region')
                 new_id = id_factory()
-                new_region[u'xml:id'] = new_id
+                new_region['xml:id'] = new_id
 
                 region_map[region_spec] = new_id
                 region_attribs = _convert_layout_to_attributes(region_spec)
@@ -1034,12 +1030,12 @@ def create_document_regions(self):
 
         self._region_map.update(default_region_map)
 
-    def _get_new_id(self, prefix=u'r'):
+    def _get_new_id(self, prefix='r'):
         """Return new, unique ids (use an internal counter).
 
         :type prefix: unicode
         """
-        new_id = six.text_type((prefix or u'') + six.text_type(self._id_seed))
+        new_id = str((prefix or '') + str(self._id_seed))
         self._id_seed += 1
         return new_id
 
@@ -1099,37 +1095,37 @@ def get_positioning_info(
     def cleanup_regions(self):
         """Remove the unused regions from the output file
         """
-        layout_tag = self._dfxp.find(u'layout')
+        layout_tag = self._dfxp.find('layout')
         if not layout_tag:
             return
 
-        regions = layout_tag.findChildren(u'region')
+        regions = layout_tag.findChildren('region')
         if not regions:
             return
 
         for region in regions:
-            if region.attrs.get(u'xml:id') not in self._assigned_region_ids:
+            if region.attrs.get('xml:id') not in self._assigned_region_ids:
                 region.extract()
 
 
 def _recreate_style(content, dfxp):
     dfxp_style = {}
 
-    if u'class' in content:
-        if dfxp.find(u"style", {u"xml:id": content[u'class']}):
-            dfxp_style[u'style'] = content[u'class']
-    if u'text-align' in content:
-        dfxp_style[u'tts:textAlign'] = content[u'text-align']
-    if u'italics' in content:
-        dfxp_style[u'tts:fontStyle'] = u'italic'
-    if u'font-family' in content:
-        dfxp_style[u'tts:fontFamily'] = content[u'font-family']
-    if u'font-size' in content:
-        dfxp_style[u'tts:fontSize'] = content[u'font-size']
-    if u'color' in content:
-        dfxp_style[u'tts:color'] = content[u'color']
-    if u'display-align' in content:
-        dfxp_style[u'tts:displayAlign'] = content[u'display-align']
+    if 'class' in content:
+        if dfxp.find("style", {"xml:id": content['class']}):
+            dfxp_style['style'] = content['class']
+    if 'text-align' in content:
+        dfxp_style['tts:textAlign'] = content['text-align']
+    if 'italics' in content:
+        dfxp_style['tts:fontStyle'] = 'italic'
+    if 'font-family' in content:
+        dfxp_style['tts:fontFamily'] = content['font-family']
+    if 'font-size' in content:
+        dfxp_style['tts:fontSize'] = content['font-size']
+    if 'color' in content:
+        dfxp_style['tts:color'] = content['color']
+    if 'display-align' in content:
+        dfxp_style['tts:displayAlign'] = content['display-align']
 
     return dfxp_style
 
@@ -1170,15 +1166,15 @@ def _create_external_horizontal_alignment(horizontal_component):
     result = None
 
     if horizontal_component == HorizontalAlignmentEnum.LEFT:
-        result = u'left'
+        result = 'left'
     if horizontal_component == HorizontalAlignmentEnum.CENTER:
-        result = u'center'
+        result = 'center'
     if horizontal_component == HorizontalAlignmentEnum.RIGHT:
-        result = u'right'
+        result = 'right'
     if horizontal_component == HorizontalAlignmentEnum.START:
-        result = u'start'
+        result = 'start'
     if horizontal_component == HorizontalAlignmentEnum.END:
-        result = u'end'
+        result = 'end'
 
     return result
 
@@ -1193,11 +1189,11 @@ def _create_external_vertical_alignment(vertical_component):
     result = None
 
     if vertical_component == VerticalAlignmentEnum.TOP:
-        result = u'before'
+        result = 'before'
     if vertical_component == VerticalAlignmentEnum.CENTER:
-        result = u'center'
+        result = 'center'
     if vertical_component == VerticalAlignmentEnum.BOTTOM:
-        result = u'after'
+        result = 'after'
 
     return result
 
@@ -1220,12 +1216,12 @@ def _create_external_alignment(alignment):
     horizontal_alignment = _create_external_horizontal_alignment(
         alignment.horizontal)
     if horizontal_alignment:
-        result[u'tts:textAlign'] = horizontal_alignment
+        result['tts:textAlign'] = horizontal_alignment
 
     vertical_alignment = _create_external_vertical_alignment(
         alignment.vertical)
     if vertical_alignment:
-        result[u'tts:displayAlign'] = vertical_alignment
+        result['tts:displayAlign'] = vertical_alignment
 
     return result
 
@@ -1243,7 +1239,7 @@ def _get_object_from_attribute(tag, attr_name, factory,
     :param ignore_vals: iterable of attribute values to ignore
     :raise CaptionReadSyntaxError: if the attribute has some crazy value
     """
-    if not hasattr(tag, u'has_attr'):
+    if not hasattr(tag, 'has_attr'):
         return
 
     attr_value = None
@@ -1279,18 +1275,18 @@ def _convert_layout_to_attributes(layout):
     result = {}
     if not layout:
         # TODO - change this to actually use the DFXP_DEFAULT_REGION
-        result[u'tts:textAlign'] = HorizontalAlignmentEnum.CENTER
-        result[u'tts:displayAlign'] = VerticalAlignmentEnum.BOTTOM
+        result['tts:textAlign'] = HorizontalAlignmentEnum.CENTER
+        result['tts:displayAlign'] = VerticalAlignmentEnum.BOTTOM
         return result
 
     if layout.origin:
-        result[u'tts:origin'] = layout.origin.to_xml_attribute()
+        result['tts:origin'] = layout.origin.to_xml_attribute()
 
     if layout.extent:
-        result[u'tts:extent'] = layout.extent.to_xml_attribute()
+        result['tts:extent'] = layout.extent.to_xml_attribute()
 
     if layout.padding:
-        result[u'tts:padding'] = layout.padding.to_xml_attribute()
+        result['tts:padding'] = layout.padding.to_xml_attribute()
 
     if layout.alignment:
         result.update(_create_external_alignment(layout.alignment))
diff --git a/pycaption/dfxp/extras.py b/pycaption/dfxp/extras.py
index 779f1de6..48855559 100644
--- a/pycaption/dfxp/extras.py
+++ b/pycaption/dfxp/extras.py
@@ -9,7 +9,7 @@
 from xml.sax.saxutils import escape
 from bs4 import BeautifulSoup
 
-LEGACY_DFXP_BASE_MARKUP = u'''
+LEGACY_DFXP_BASE_MARKUP = '''
 <tt xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts="http://www.w3.org/ns/ttml#styling">
     <head>
@@ -21,17 +21,17 @@
 '''
 
 LEGACY_DFXP_DEFAULT_STYLE = {
-    u'color': u'white',
-    u'font-family': u'monospace',
-    u'font-size': u'1c',
+    'color': 'white',
+    'font-family': 'monospace',
+    'font-size': '1c',
 }
 
-LEGACY_DFXP_DEFAULT_STYLE_ID = u'default'
-LEGACY_DFXP_DEFAULT_REGION_ID = u'bottom'
+LEGACY_DFXP_DEFAULT_STYLE_ID = 'default'
+LEGACY_DFXP_DEFAULT_REGION_ID = 'bottom'
 
 LEGACY_DFXP_DEFAULT_REGION = {
-    u'text-align': u'center',
-    u'display-align': u'after'
+    'text-align': 'center',
+    'display-align': 'after'
 }
 
 
@@ -43,7 +43,7 @@ def __init__(self, default_positioning=DFXP_DEFAULT_REGION,
         super(SinglePositioningDFXPWriter, self).__init__(*args, **kwargs)
         self.default_positioning = default_positioning
 
-    def write(self, captions_set, force=u''):
+    def write(self, captions_set, force=''):
         """Writes a DFXP file using the positioning provided in the initializer
 
         :type captions_set: pycaption.base.CaptionSet
@@ -88,18 +88,19 @@ def _create_single_positioning_caption_set(caption_set, positioning):
 
         return caption_set
 
+
 class LegacyDFXPWriter(BaseWriter):
     """Ported the legacy DFXPWriter from 0.4.5"""
     def __init__(self, *args, **kw):
         self.p_style = False
         self.open_span = False
 
-    def write(self, caption_set, force=u''):
+    def write(self, caption_set, force=''):
         caption_set = deepcopy(caption_set)
         caption_set = merge_concurrent_captions(caption_set)
 
-        dfxp = BeautifulSoup(LEGACY_DFXP_BASE_MARKUP, u'lxml-xml')
-        dfxp.find(u'tt')[u'xml:lang'] = u"en"
+        dfxp = BeautifulSoup(LEGACY_DFXP_BASE_MARKUP, 'lxml-xml')
+        dfxp.find('tt')['xml:lang'] = "en"
 
         for style_id, style in caption_set.get_styles():
             if style != {}:
@@ -113,7 +114,7 @@ def write(self, caption_set, force=u''):
         dfxp = self._recreate_region_tag(
             LEGACY_DFXP_DEFAULT_REGION_ID, LEGACY_DFXP_DEFAULT_REGION, dfxp)
 
-        body = dfxp.find(u'body')
+        body = dfxp.find('body')
 
         if force:
             langs = [self._force_language(force, caption_set.get_languages())]
@@ -121,16 +122,16 @@ def write(self, caption_set, force=u''):
             langs = caption_set.get_languages()
 
         for lang in langs:
-            div = dfxp.new_tag(u'div')
-            div[u'xml:lang'] = u'%s' % lang
+            div = dfxp.new_tag('div')
+            div['xml:lang'] = '%s' % lang
 
             for caption in caption_set.get_captions(lang):
                 if caption.style:
                     caption_style = caption.style
-                    caption_style.update({u'region': LEGACY_DFXP_DEFAULT_REGION_ID})
+                    caption_style.update({'region': LEGACY_DFXP_DEFAULT_REGION_ID})
                 else:
-                    caption_style = {u'class': LEGACY_DFXP_DEFAULT_STYLE_ID,
-                                     u'region': LEGACY_DFXP_DEFAULT_REGION_ID}
+                    caption_style = {'class': LEGACY_DFXP_DEFAULT_STYLE_ID,
+                                     'region': LEGACY_DFXP_DEFAULT_REGION_ID}
                 p = self._recreate_p_tag(caption, caption_style, dfxp)
                 div.append(p)
 
@@ -148,54 +149,54 @@ def _force_language(self, force, langs):
         return langs[-1]
 
     def _recreate_region_tag(self, region_id, styling, dfxp):
-        dfxp_region = dfxp.new_tag(u'region')
-        dfxp_region.attrs.update({u'xml:id': region_id})
+        dfxp_region = dfxp.new_tag('region')
+        dfxp_region.attrs.update({'xml:id': region_id})
 
         attributes = self._recreate_style(styling, dfxp)
         dfxp_region.attrs.update(attributes)
 
-        new_tag = dfxp.new_tag(u'region')
-        new_tag.attrs.update({u'xml:id': region_id})
+        new_tag = dfxp.new_tag('region')
+        new_tag.attrs.update({'xml:id': region_id})
         if dfxp_region != new_tag:
-            dfxp.find(u'layout').append(dfxp_region)
+            dfxp.find('layout').append(dfxp_region)
         return dfxp
 
     def _recreate_styling_tag(self, style, content, dfxp):
-        dfxp_style = dfxp.new_tag(u'style')
-        dfxp_style.attrs.update({u'xml:id': style})
+        dfxp_style = dfxp.new_tag('style')
+        dfxp_style.attrs.update({'xml:id': style})
 
         attributes = self._recreate_style(content, dfxp)
         dfxp_style.attrs.update(attributes)
 
-        new_tag = dfxp.new_tag(u'style')
-        new_tag.attrs.update({u'xml:id': style})
+        new_tag = dfxp.new_tag('style')
+        new_tag.attrs.update({'xml:id': style})
         if dfxp_style != new_tag:
-            dfxp.find(u'styling').append(dfxp_style)
+            dfxp.find('styling').append(dfxp_style)
 
         return dfxp
 
     def _recreate_p_tag(self, caption, caption_style, dfxp):
         start = caption.format_start()
         end = caption.format_end()
-        p = dfxp.new_tag(u"p", begin=start, end=end)
+        p = dfxp.new_tag("p", begin=start, end=end)
         p.string = self._recreate_text(caption, dfxp)
 
-        if dfxp.find(u"style", {u"xml:id": u"p"}):
-            p[u'style'] = u'p'
+        if dfxp.find("style", {"xml:id": "p"}):
+            p['style'] = 'p'
 
         p.attrs.update(self._recreate_style(caption_style, dfxp))
 
         return p
 
     def _recreate_text(self, caption, dfxp):
-        line = u''
+        line = ''
 
         for node in caption.nodes:
             if node.type_ == CaptionNode.TEXT:
-                line += escape(node.content) + u' '
+                line += escape(node.content) + ' '
 
             elif node.type_ == CaptionNode.BREAK:
-                line = line.rstrip() + u'<br/>\n    '
+                line = line.rstrip() + '<br/>\n    '
 
             elif node.type_ == CaptionNode.STYLE:
                 line = self._recreate_span(line, node, dfxp)
@@ -204,20 +205,20 @@ def _recreate_text(self, caption, dfxp):
 
     def _recreate_span(self, line, node, dfxp):
         if node.start:
-            styles = u''
+            styles = ''
 
             content_with_style = self._recreate_style(node.content, dfxp)
-            for style, value in content_with_style.items():
-                styles += u' %s="%s"' % (style, value)
+            for style, value in list(content_with_style.items()):
+                styles += ' %s="%s"' % (style, value)
 
             if styles:
                 if self.open_span:
-                    line = line.rstrip() + u'</span> '
-                line += u'<span%s>' % styles
+                    line = line.rstrip() + '</span> '
+                line += '<span%s>' % styles
                 self.open_span = True
 
         elif self.open_span:
-            line = line.rstrip() + u'</span> '
+            line = line.rstrip() + '</span> '
             self.open_span = False
 
         return line
@@ -225,23 +226,23 @@ def _recreate_span(self, line, node, dfxp):
     def _recreate_style(self, content, dfxp):
         dfxp_style = {}
 
-        if u'region' in content:
-            if dfxp.find(u'region', {u'xml:id': content[u'region']}):
-                dfxp_style[u'region'] = content[u'region']
-        if u'class' in content:
-            if dfxp.find(u"style", {u"xml:id": content[u'class']}):
-                dfxp_style[u'style'] = content[u'class']
-        if u'text-align' in content:
-            dfxp_style[u'tts:textAlign'] = content[u'text-align']
-        if u'italics' in content:
-            dfxp_style[u'tts:fontStyle'] = u'italic'
-        if u'font-family' in content:
-            dfxp_style[u'tts:fontFamily'] = content[u'font-family']
-        if u'font-size' in content:
-            dfxp_style[u'tts:fontSize'] = content[u'font-size']
-        if u'color' in content:
-            dfxp_style[u'tts:color'] = content[u'color']
-        if u'display-align' in content:
-            dfxp_style[u'tts:displayAlign'] = content[u'display-align']
+        if 'region' in content:
+            if dfxp.find('region', {'xml:id': content['region']}):
+                dfxp_style['region'] = content['region']
+        if 'class' in content:
+            if dfxp.find("style", {"xml:id": content['class']}):
+                dfxp_style['style'] = content['class']
+        if 'text-align' in content:
+            dfxp_style['tts:textAlign'] = content['text-align']
+        if 'italics' in content:
+            dfxp_style['tts:fontStyle'] = 'italic'
+        if 'font-family' in content:
+            dfxp_style['tts:fontFamily'] = content['font-family']
+        if 'font-size' in content:
+            dfxp_style['tts:fontSize'] = content['font-size']
+        if 'color' in content:
+            dfxp_style['tts:color'] = content['color']
+        if 'display-align' in content:
+            dfxp_style['tts:displayAlign'] = content['display-align']
 
         return dfxp_style
diff --git a/pycaption/geometry.py b/pycaption/geometry.py
index 8fab090f..a39e9b1b 100644
--- a/pycaption/geometry.py
+++ b/pycaption/geometry.py
@@ -9,8 +9,8 @@
 """
 import six
 
-from .exceptions import RelativizationError
 from enum import Enum
+from .exceptions import RelativizationError
 
 
 class UnitEnum(Enum):
@@ -22,12 +22,11 @@ class UnitEnum(Enum):
         if unit == UnitEnum.CELL :
             ...
     """
-    PIXEL = u'px'
-    EM = u'em'
-    PERCENT = u'%'
-    CELL = u'c'
-    PT = u'pt'
-
+    PIXEL = 'px'
+    EM = 'em'
+    PERCENT = '%'
+    CELL = 'c'
+    PT = 'pt'
 
 
 class VerticalAlignmentEnum(Enum):
@@ -38,19 +37,19 @@ class VerticalAlignmentEnum(Enum):
         if alignment == VerticalAlignmentEnum.BOTTOM:
             ...
     """
-    TOP = u'top'
-    CENTER = u'center'
-    BOTTOM = u'bottom'
+    TOP = 'top'
+    CENTER = 'center'
+    BOTTOM = 'bottom'
 
 
 class HorizontalAlignmentEnum(Enum):
     """Enumeration object specifying the horizontal alignment preferences
     """
-    LEFT = u'left'
-    CENTER = u'center'
-    RIGHT = u'right'
-    START = u'start'
-    END = u'end'
+    LEFT = 'left'
+    CENTER = 'center'
+    RIGHT = 'right'
+    START = 'start'
+    END = 'end'
 
 
 class Alignment(object):
@@ -80,7 +79,7 @@ def __eq__(self, other):
         )
 
     def __repr__(self):
-        return u"<Alignment ({horizontal} {vertical})>".format(
+        return "<Alignment ({horizontal} {vertical})>".format(
             horizontal=self.horizontal, vertical=self.vertical
         )
 
@@ -95,22 +94,22 @@ def from_horizontal_and_vertical_align(cls, text_align=None,
         horizontal_obj = None
         vertical_obj = None
 
-        if text_align == u'left':
+        if text_align == 'left':
             horizontal_obj = HorizontalAlignmentEnum.LEFT
-        if text_align == u'start':
+        if text_align == 'start':
             horizontal_obj = HorizontalAlignmentEnum.START
-        if text_align == u'center':
+        if text_align == 'center':
             horizontal_obj = HorizontalAlignmentEnum.CENTER
-        if text_align == u'right':
+        if text_align == 'right':
             horizontal_obj = HorizontalAlignmentEnum.RIGHT
-        if text_align == u'end':
+        if text_align == 'end':
             horizontal_obj = HorizontalAlignmentEnum.END
 
-        if display_align == u'before':
+        if display_align == 'before':
             vertical_obj = VerticalAlignmentEnum.TOP
-        if display_align == u'center':
+        if display_align == 'center':
             vertical_obj = VerticalAlignmentEnum.CENTER
-        if display_align == u'after':
+        if display_align == 'after':
             vertical_obj = VerticalAlignmentEnum.BOTTOM
 
         if not any([horizontal_obj, vertical_obj]):
@@ -130,7 +129,7 @@ def from_xml_attribute(cls, attribute):
 
         :type attribute: unicode
         """
-        horizontal, vertical = six.text_type(attribute).split(u' ')
+        horizontal, vertical = six.text_type(attribute).split(' ')
         horizontal = Size.from_string(horizontal)
         vertical = Size.from_string(vertical)
 
@@ -150,8 +149,8 @@ def __init__(self, horizontal, vertical):
         """
         for parameter in [horizontal, vertical]:
             if not isinstance(parameter, Size):
-                raise ValueError(u"Stretch must be initialized with two valid "
-                                 u"Size objects.")
+                raise ValueError("Stretch must be initialized with two valid "
+                                 "Size objects.")
         self.horizontal = horizontal
         self.vertical = vertical
 
@@ -167,7 +166,7 @@ def is_measured_in(self, measure_unit):
         )
 
     def __repr__(self):
-        return u'<Stretch ({horizontal}, {vertical})>'.format(
+        return '<Stretch ({horizontal}, {vertical})>'.format(
             horizontal=self.horizontal, vertical=self.vertical
         )
 
@@ -193,13 +192,13 @@ def __hash__(self):
             67
         )
 
-    def __nonzero__(self):
+    def __bool__(self):
         return True if self.horizontal or self.vertical else False
 
     def to_xml_attribute(self, **kwargs):
         """Returns a unicode representation of this object as an xml attribute
         """
-        return u'{horizontal} {vertical}'.format(
+        return '{horizontal} {vertical}'.format(
             horizontal=self.horizontal.to_xml_attribute(),
             vertical=self.vertical.to_xml_attribute()
         )
@@ -314,8 +313,8 @@ def __init__(self, x, y):
         """
         for parameter in [x, y]:
             if not isinstance(parameter, Size):
-                raise ValueError(u"Point must be initialized with two valid "
-                                 u"Size objects.")
+                raise ValueError("Point must be initialized with two valid "
+                                 "Size objects.")
         self.x = x
         self.y = y
 
@@ -368,7 +367,7 @@ def align_from_origin(cls, p1, p2):
                     Point(max(p1.x, p2.x), max(p1.y, p2.y)))
 
     def __repr__(self):
-        return u'<Point ({x}, {y})>'.format(
+        return '<Point ({x}, {y})>'.format(
             x=self.x, y=self.y
         )
 
@@ -395,13 +394,13 @@ def __hash__(self):
             57
         )
 
-    def __nonzero__(self):
+    def __bool__(self):
         return True if self.x or self.y else False
 
     def to_xml_attribute(self, **kwargs):
         """Returns a unicode representation of this object as an xml attribute
         """
-        return u'{x} {y}'.format(
+        return '{x} {y}'.format(
             x=self.x.to_xml_attribute(), y=self.y.to_xml_attribute())
 
 
@@ -417,9 +416,9 @@ def __init__(self, value, unit):
         :param unit: A UnitEnum member
         """
         if value is None:
-            raise ValueError(u"Size must be initialized with a value.")
+            raise ValueError("Size must be initialized with a value.")
         if not isinstance(unit,UnitEnum):
-            raise ValueError(u"Size must be initialized with a valid unit.")
+            raise ValueError("Size must be initialized with a valid unit.")
 
         self.value = float(value)
         self.unit = unit
@@ -428,16 +427,17 @@ def __sub__(self, other):
         if self.unit == other.unit:
             return Size(self.value - other.value, self.unit)
         else:
-            raise ValueError(u"The sizes should have the same measure units.")
+            raise ValueError("The sizes should have the same measure units.")
 
     def __abs__(self):
         return Size(abs(self.value), self.unit)
 
     def __cmp__(self, other):
         if self.unit == other.unit:
-            return cmp(self.value, other.value)
+            # python3 does not have cmp
+            return (self.value > other.value) - (self.value < other.value)
         else:
-            raise ValueError(u"The sizes should have the same measure units.")
+            raise ValueError("The sizes should have the same measure units.")
 
     def __lt__(self, other):
         return self.value < other.value
@@ -447,7 +447,7 @@ def __add__(self, other):
         if self.unit == other.unit:
             return Size(self.value + other.value, self.unit)
         else:
-            raise ValueError(u"The sizes should have the same measure units.")
+            raise ValueError("The sizes should have the same measure units.")
 
     def is_relative(self):
         """
@@ -469,10 +469,10 @@ def as_percentage_of(self, video_width=None, video_height=None):
         # The input must be valid so that any conversion can be done
         if not (video_width or video_height):
             raise RelativizationError(
-                u"Either video width or height must be given as a reference")
+                "Either video width or height must be given as a reference")
         elif video_width and video_height:
             raise RelativizationError(
-                u"Only video width or height can be given as reference")
+                "Only video width or height can be given as reference")
 
         if unit == UnitEnum.EM:
             # TODO: Implement proper conversion of em in function of font-size
@@ -533,31 +533,31 @@ def from_string(cls, string):
 
             if value is None:
                 raise ValueError(
-                    u"""Couldn't recognize the value "{value}" as a number"""
+                    """Couldn't recognize the value "{value}" as a number"""
                     .format(value=raw_number)
                 )
             instance = cls(value, unit)
             return instance
         else:
             raise ValueError(
-                u"The specified value is not valid because its unit "
-                u"is not recognized: {value}. "
-                u"The only supported units are: {supported}"
-                .format(value=raw_number, supported=u', '.join(UnitEnum._member_map_))
+                "The specified value is not valid because its unit "
+                "is not recognized: {value}. "
+                "The only supported units are: {supported}"
+                .format(value=raw_number, supported=', '.join(UnitEnum._member_map_))
             )
 
     def __repr__(self):
-        return u'<Size ({value} {unit})>'.format(
+        return '<Size ({value} {unit})>'.format(
             value=self.value, unit=self.unit.value
         )
 
     def __str__(self):
         value = round(self.value, 2)
         if value.is_integer():
-            s = u"{}".format(int(value))
+            s = "{}".format(int(value))
         else:
-            s = u"{:.2f}".format(value).rstrip('0').rstrip('.')
-        return u"{}{}".format(s, self.unit.value)
+            s = "{:.2f}".format(value).rstrip('0').rstrip('.')
+        return "{}{}".format(s, self.unit.value)
 
     def to_xml_attribute(self, **kwargs):
         """Returns a unicode representation of this object, as an xml attribute
@@ -583,7 +583,7 @@ def __hash__(self):
             47
         )
 
-    def __nonzero__(self):
+    def __bool__(self):
         return self.unit in UnitEnum and self.value is not None
 
 
@@ -628,7 +628,7 @@ def from_xml_attribute(cls, attribute):
         :param attribute: a string like object, representing a dfxp attr. value
         :return: a Padding object
         """
-        values_list = six.text_type(attribute).split(u' ')
+        values_list = six.text_type(attribute).split(' ')
         sizes = []
 
         for value in values_list:
@@ -643,16 +643,16 @@ def from_xml_attribute(cls, attribute):
         elif len(sizes) == 4:
             return cls(sizes[0], sizes[2], sizes[3], sizes[1])
         else:
-            raise ValueError(u'The provided value "{value}" could not be '
-                             u"parsed into the a padding. Check out "
-                             u"http://www.w3.org/TR/ttaf1-dfxp/"
-                             u"#style-attribute-padding for the definition "
-                             u"and examples".format(value=attribute))
+            raise ValueError('The provided value "{value}" could not be '
+                             "parsed into the a padding. Check out "
+                             "http://www.w3.org/TR/ttaf1-dfxp/"
+                             "#style-attribute-padding for the definition "
+                             "and examples".format(value=attribute))
 
     def __repr__(self):
         return (
-            u"<Padding (before: {before}, after: {after}, start: {start}, "
-            u"end: {end})>".format(
+            "<Padding (before: {before}, after: {after}, start: {start}, "
+            "end: {end})>".format(
                 before=self.before, after=self.after, start=self.start,
                 end=self.end
             )
@@ -688,7 +688,7 @@ def __hash__(self):
         )
 
     def to_xml_attribute(
-            self, attribute_order=(u'before', u'end', u'after', u'start'),
+            self, attribute_order=('before', 'end', 'after', 'start'),
             **kwargs):
         """Returns a unicode representation of this object as an xml attribute
 
@@ -709,9 +709,9 @@ def to_xml_attribute(
             # A Padding object with attributes set to None is considered
             # invalid. All four possible paddings must be set. If one of them
             # is not, this error is raised.
-            raise ValueError(u"The attribute order specified is invalid.")
+            raise ValueError("The attribute order specified is invalid.")
 
-        return u' '.join(string_list)
+        return ' '.join(string_list)
 
     def as_percentage_of(self, video_width, video_height):
         return Padding(
@@ -733,6 +733,7 @@ def is_relative(self):
             is_relative &= self.end.is_relative()
         return is_relative
 
+
 class Layout(object):
     """Should encapsulate all the information needed to determine (as correctly
     as possible) the layout (positioning) of elements on the screen.
@@ -780,7 +781,7 @@ def __init__(self, origin=None, extent=None, padding=None, alignment=None,
                 if not attr:
                     setattr(self, attr_name, getattr(inherit_from, attr_name))
 
-    def __nonzero__(self):
+    def __bool__(self):
         return any([
             self.origin, self.extent, self.padding, self.alignment,
             self.webvtt_positioning
@@ -788,8 +789,8 @@ def __nonzero__(self):
 
     def __repr__(self):
         return (
-            u"<Layout (origin: {origin}, extent: {extent}, "
-            u"padding: {padding}, alignment: {alignment})>".format(
+            "<Layout (origin: {origin}, extent: {extent}, "
+            "padding: {padding}, alignment: {alignment})>".format(
                 origin=self.origin, extent=self.extent, padding=self.padding,
                 alignment=self.alignment
             )
diff --git a/pycaption/sami.py b/pycaption/sami.py
index 3840a3ef..4911e771 100644
--- a/pycaption/sami.py
+++ b/pycaption/sami.py
@@ -36,42 +36,33 @@
 
 """
 import re
-
-from collections import deque
 import six
-
-try:
-    from htmlentitydefs import name2codepoint
-except:
-    from html.entities import name2codepoint
-
-try:
-    from HTMLParser import HTMLParser, HTMLParseError
-except:
-    from html.parser import HTMLParser
-
 from logging import FATAL
+from collections import deque
+from copy import deepcopy
+from future.backports.html.parser import HTMLParseError
+
+from html.parser import HTMLParser
+from html.entities import name2codepoint
 from xml.sax.saxutils import escape
-from copy import deepcopy, copy
 
-from cssutils import parseString, log, css as cssutils_css
+
 from bs4 import BeautifulSoup, NavigableString
+from cssutils import parseString, log, css as cssutils_css
 
 from .base import (
     BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
     DEFAULT_LANGUAGE_CODE)
 from .exceptions import (
     CaptionReadNoCaptions, CaptionReadSyntaxError, InvalidInputError)
-from .geometry import (
-    Layout, Alignment, Padding, Size
-)
+from .geometry import Layout, Alignment, Padding, Size
 
 
 # change cssutils default logging
 log.setLevel(FATAL)
 
 
-SAMI_BASE_MARKUP = u'''
+SAMI_BASE_MARKUP = '''
 <sami>
     <head>
         <style type="text/css"/>
@@ -88,7 +79,7 @@ def __init__(self, *args, **kw):
         self.first_alignment = None
 
     def detect(self, content):
-        if u'<sami' in content.lower():
+        if '<sami' in content.lower():
             return True
         else:
             return False
@@ -107,9 +98,9 @@ def read(self, content):
         caption_dict = {}
         for language in doc_langs:
             lang_layout = None
-            for target, styling in doc_styles.items():
-                if target not in [u'p', u'sync', u'span']:
-                    if styling.get(u'lang', None) == language:
+            for target, styling in list(doc_styles.items()):
+                if target not in ['p', 'sync', 'span']:
+                    if styling.get('lang', None) == language:
                         lang_layout = self._build_layout(
                             doc_styles.get(target, {}),
                             inherit_from=global_layout
@@ -127,13 +118,13 @@ def read(self, content):
         )
 
         # Convert styles from CSS to internal representation
-        for style in doc_styles.items():
+        for style in list(doc_styles.items()):
             style = (style[0], self._translate_parsed_style(style[1]))
 
         caption_set.set_styles(doc_styles)
 
         if caption_set.is_empty():
-            raise CaptionReadNoCaptions(u"empty caption file")
+            raise CaptionReadNoCaptions("empty caption file")
 
         return caption_set
 
@@ -203,8 +194,8 @@ def _translate_lang(self, language, sami_soup, parent_layout):
         captions = CaptionList(layout_info=parent_layout)
         milliseconds = 0
 
-        for p in sami_soup.select(u'p[lang|=%s]' % language):
-            milliseconds = int(float(p.parent[u'start']))
+        for p in sami_soup.select('p[lang|=%s]' % language):
+            milliseconds = int(float(p.parent['start']))
             start = milliseconds * 1000
             end = 0
 
@@ -240,12 +231,12 @@ def _translate_lang(self, language, sami_soup, parent_layout):
         return captions
 
     def _get_style_name_from_tag(self, tag):
-        if tag == u'i':
-            return u'italics'
-        elif tag == u'b':
-            return u'bold'
-        elif tag == u'u':
-            return u'underline'
+        if tag == 'i':
+            return 'italics'
+        elif tag == 'b':
+            return 'bold'
+        elif tag == 'u':
+            return 'underline'
         else:
             raise RuntimeError("Unknown style tag")
 
@@ -260,17 +251,17 @@ def _translate_tag(self, tag, inherit_from=None):
             # (e.g. &amp;) automatically. The following variable, therefore,
             # should contain a plain unicode string.
             # strips indentation whitespace only
-            pattern = re.compile(u"^(?:[\n\r]+\s*)?(.+)")
+            pattern = re.compile("^(?:[\n\r]+\s*)?(.+)")
             result = pattern.search(tag)
             if not result:
                 return
             tag_text = result.groups()[0]
             self.line.append(CaptionNode.create_text(tag_text, inherit_from))
         # convert line breaks
-        elif tag.name == u'br':
+        elif tag.name == 'br':
             self.line.append(CaptionNode.create_break(inherit_from))
         # convert italics, bold, and underline
-        elif tag.name == u'i' or tag.name == u'b' or tag.name == u'u':
+        elif tag.name == 'i' or tag.name == 'b' or tag.name == 'u':
             style_name = self._get_style_name_from_tag(tag.name)
             self.line.append(
                 CaptionNode.create_style(True, {style_name: True})
@@ -280,7 +271,7 @@ def _translate_tag(self, tag, inherit_from=None):
                 self._translate_tag(a, inherit_from)
             self.line.append(
                 CaptionNode.create_style(False, {style_name: True}))
-        elif tag.name == u'span':
+        elif tag.name == 'span':
             self._translate_span(tag, inherit_from)
         else:
             # recursively call function for any children elements
@@ -312,12 +303,12 @@ def _translate_attrs(self, tag):
         attrs = {}
         css_attrs = tag.attrs
 
-        if u'class' in css_attrs:
-            attrs[u'class'] = css_attrs[u'class'][0].lower()
-        if u'id' in css_attrs:
-            attrs[u'class'] = css_attrs[u'id'].lower()
-        if u'style' in css_attrs:
-            styles = css_attrs[u'style'].split(u';')
+        if 'class' in css_attrs:
+            attrs['class'] = css_attrs['class'][0].lower()
+        if 'id' in css_attrs:
+            attrs['class'] = css_attrs['id'].lower()
+        if 'style' in css_attrs:
+            styles = css_attrs['style'].split(';')
             attrs.update(self._translate_style(attrs, styles))
 
         return attrs
@@ -325,12 +316,12 @@ def _translate_attrs(self, tag):
     # convert attributes from inline CSS
     def _translate_style(self, attrs, styles):
         for style in styles:
-            style = style.split(u':')
+            style = style.split(':')
             if len(style) == 2:
                 css_property, value = style
             else:
                 continue
-            if css_property == u'text-align':
+            if css_property == 'text-align':
                 self._save_first_alignment(value.strip())
             else:
                 self._translate_css_property(attrs, css_property, value)
@@ -347,20 +338,20 @@ def _translate_parsed_style(self, styles):
         return attrs
 
     def _translate_css_property(self, attrs, css_property, value):
-        if css_property == u'font-family':
-            attrs[u'font-family'] = value.strip()
-        elif css_property == u'font-size':
-            attrs[u'font-size'] = value.strip()
-        elif css_property == u'font-style' and value.strip() == u'italic':
-            attrs[u'italics'] = True
-        elif css_property == u'text-decoration' and value.strip() == u'underline':
-            attrs[u'underline'] = True
-        elif css_property == u'font-weight' and value.strip() == u'bold':
-            attrs[u'bold'] = True
-        elif css_property == u'lang':
-            attrs[u'lang'] = value.strip()
-        elif css_property == u'color':
-            attrs[u'color'] = value.strip()
+        if css_property == 'font-family':
+            attrs['font-family'] = value.strip()
+        elif css_property == 'font-size':
+            attrs['font-size'] = value.strip()
+        elif css_property == 'font-style' and value.strip() == 'italic':
+            attrs['italics'] = True
+        elif css_property == 'text-decoration' and value.strip() == 'underline':
+            attrs['underline'] = True
+        elif css_property == 'font-weight' and value.strip() == 'bold':
+            attrs['bold'] = True
+        elif css_property == 'lang':
+            attrs['lang'] = value.strip()
+        elif css_property == 'color':
+            attrs['color'] = value.strip()
 
     def _save_first_alignment(self, align):
         """
@@ -394,7 +385,7 @@ def __init__(self, *args, **kwargs):
 
     def write(self, caption_set):
         caption_set = deepcopy(caption_set)
-        sami = BeautifulSoup(SAMI_BASE_MARKUP, u"lxml-xml")
+        sami = BeautifulSoup(SAMI_BASE_MARKUP, "lxml-xml")
 
         caption_set.layout_info = self._relativize_and_fit_to_screen(
             caption_set.layout_info)
@@ -424,10 +415,10 @@ def write(self, caption_set):
                     caption, sami, lang, primary, caption_set)
 
         stylesheet = self._recreate_stylesheet(caption_set)
-        sami.find(u'style').append(stylesheet)
+        sami.find('style').append(stylesheet)
 
-        a = sami.prettify(formatter=None).split(u'\n')
-        caption_content = u'\n'.join(a[1:])
+        a = sami.prettify(formatter=None).split('\n')
+        caption_content = '\n'.join(a[1:])
         return caption_content
 
     def _recreate_p_tag(self, caption, sami, lang, primary, captions):
@@ -453,15 +444,15 @@ def _recreate_p_tag(self, caption, sami, lang, primary, captions):
 
         sami, sync = self._recreate_sync(sami, lang, primary, time)
 
-        p = sami.new_tag(u"p")
+        p = sami.new_tag("p")
 
-        p_style = u''
-        for attr, value in self._recreate_style(caption.style).items():
-            p_style += u'%s:%s;' % (attr, value)
+        p_style = ''
+        for attr, value in list(self._recreate_style(caption.style).items()):
+            p_style += '%s:%s;' % (attr, value)
         if p_style:
-            p[u'p_style'] = p_style
+            p['p_style'] = p_style
 
-        p[u'class'] = self._recreate_p_lang(caption, lang, captions)
+        p['class'] = self._recreate_p_lang(caption, lang, captions)
         p.string = self._recreate_text(caption.nodes)
 
         sync.append(p)
@@ -482,26 +473,26 @@ def _recreate_sync(self, sami, lang, primary, time):
         :rtype: BeautifulSoup
         """
         if lang == primary:
-            sync = sami.new_tag(u"sync", start=u"%d" % time)
+            sync = sami.new_tag("sync", start="%d" % time)
             sami.body.append(sync)
         else:
-            sync = sami.find(u"sync", start=u"%d" % time)
+            sync = sami.find("sync", start="%d" % time)
             if sync is None:
                 sami, sync = self._find_closest_sync(sami, time)
 
         return sami, sync
 
     def _find_closest_sync(self, sami, time):
-        sync = sami.new_tag(u"sync", start=u"%d" % time)
+        sync = sami.new_tag("sync", start="%d" % time)
 
-        earlier = sami.find_all(u"sync", start=lambda x: int(x) < time)
+        earlier = sami.find_all("sync", start=lambda x: int(x) < time)
         if earlier:
             last_sync = earlier[-1]
             last_sync.insert_after(sync)
         else:
             def later_syncs(start):
                 return int(start) > time
-            later = sami.find_all(u"sync", start=later_syncs)
+            later = sami.find_all("sync", start=later_syncs)
             if later:
                 last_sync = later[0]
                 last_sync.insert_before(sync)
@@ -510,9 +501,9 @@ def later_syncs(start):
     def _recreate_blank_tag(self, sami, caption, lang, primary, captions):
         sami, sync = self._recreate_sync(sami, lang, primary, self.last_time)
 
-        p = sami.new_tag(u"p")
-        p[u'class'] = self._recreate_p_lang(caption, lang, captions)
-        p.string = u'&nbsp;'
+        p = sami.new_tag("p")
+        p['class'] = self._recreate_p_lang(caption, lang, captions)
+        p.string = '&nbsp;'
 
         sync.append(p)
 
@@ -520,14 +511,14 @@ def _recreate_blank_tag(self, sami, caption, lang, primary, captions):
 
     def _recreate_p_lang(self, caption, lang, captions):
         try:
-            if u'lang' in captions.get_style(caption.style[u'class']):
-                return caption.style[u'class']
+            if 'lang' in captions.get_style(caption.style['class']):
+                return caption.style['class']
         except KeyError:
             pass
         return lang
 
     def _recreate_stylesheet(self, caption_set):
-        stylesheet = u'<!--'
+        stylesheet = '<!--'
 
         for attr, value in caption_set.get_styles():
             if value != {}:
@@ -535,12 +526,12 @@ def _recreate_stylesheet(self, caption_set):
                     attr, value, caption_set.layout_info)
 
         for lang in caption_set.get_languages():
-            lang_string = u'lang: {}'.format(lang)
+            lang_string = 'lang: {}'.format(lang)
             if lang_string not in stylesheet:
                 stylesheet += self._recreate_style_block(
-                    lang, {u'lang': lang}, caption_set.get_layout_info(lang))
+                    lang, {'lang': lang}, caption_set.get_layout_info(lang))
 
-        return stylesheet + u'   -->'
+        return stylesheet + '   -->'
 
     def _recreate_style_block(self, target, rules, layout_info):
         """
@@ -551,13 +542,13 @@ def _recreate_style_block(self, target, rules, layout_info):
         :param layout_info: A Layout object providing positioning information
             to be converted to CSS
         """
-        if target not in [u'p', u'sync', u'span']:
+        if target not in ['p', 'sync', 'span']:
             # If it's not a valid SAMI element, then it's a custom class name
-            selector = u'.{}'.format(target)
+            selector = '.{}'.format(target)
         else:
             selector = target
 
-        sami_style = u'\n    {} {{\n    '.format(selector)
+        sami_style = '\n    {} {{\n    '.format(selector)
 
         if layout_info and layout_info.padding:
             rules.update({
@@ -568,18 +559,18 @@ def _recreate_style_block(self, target, rules, layout_info):
             })
 
         for attr, value in sorted(self._recreate_style(rules).items()):
-            sami_style += u' {}: {};\n    '.format(attr, value)
+            sami_style += ' {}: {};\n    '.format(attr, value)
 
-        return sami_style + u'}\n'
+        return sami_style + '}\n'
 
     def _recreate_text(self, caption):
-        line = u''
+        line = ''
 
         for node in caption:
             if node.type_ == CaptionNode.TEXT:
-                line += self._encode(node.content) + u' '
+                line += self._encode(node.content) + ' '
             elif node.type_ == CaptionNode.BREAK:
-                line = line.rstrip() + u'<br/>\n    '
+                line = line.rstrip() + '<br/>\n    '
             elif node.type_ == CaptionNode.STYLE:
                 line = self._recreate_line_style(line, node)
 
@@ -588,28 +579,28 @@ def _recreate_text(self, caption):
     def _recreate_line_style(self, line, node):
         if node.start:
             if self.open_span:
-                line = line.rstrip() + u'</span> '
+                line = line.rstrip() + '</span> '
             line = self._recreate_span(line, node.content)
         else:
             if self.open_span:
-                line = line.rstrip() + u'</span> '
+                line = line.rstrip() + '</span> '
                 self.open_span = False
 
         return line
 
     def _recreate_span(self, line, content):
-        style = u''
-        klass = u''
-        if u'class' in content:
-            klass += u' class="%s"' % content[u'class']
+        style = ''
+        klass = ''
+        if 'class' in content:
+            klass += ' class="%s"' % content['class']
 
-        for attr, value in self._recreate_style(content).items():
-            style += u'%s:%s;' % (attr, value)
+        for attr, value in list(self._recreate_style(content).items()):
+            style += '%s:%s;' % (attr, value)
 
         if style or klass:
             if style:
-                style = u' style="%s"' % style
-            line += u'<span%s%s>' % (klass, style)
+                style = ' style="%s"' % style
+            line += '<span%s%s>' % (klass, style)
             self.open_span = True
 
         return line
@@ -620,14 +611,14 @@ def _recreate_style(self, rules):
         """
         sami_style = {}
 
-        for key, value in rules.items():
+        for key, value in list(rules.items()):
             # Recreate original CSS rules from internal style
-            if key == u'italics' and value == True:
-                sami_style[u'font-style'] = u'italic'
-            elif key == u'bold' and value == True:
-                sami_style[u'font-weight'] = u'bold'
-            elif key == u'underline' and value == True:
-                sami_style[u'text-decoration'] = u'underline'
+            if key == 'italics' and value == True:
+                sami_style['font-style'] = 'italic'
+            elif key == 'bold' and value == True:
+                sami_style['font-weight'] = 'bold'
+            elif key == 'underline' and value == True:
+                sami_style['text-decoration'] = 'underline'
             else:
                 sami_style[key] = value
 
@@ -645,14 +636,14 @@ def _encode(self, s):
 class SAMIParser(HTMLParser):
     def __init__(self, *args, **kw):
         HTMLParser.__init__(self, *args, **kw)
-        self.sami = u''
-        self.line = u''
+        self.sami = ''
+        self.line = ''
         self.styles = {}
         self.queue = deque()
         self.langs = set()
-        self.last_element = u''
+        self.last_element = ''
         self.name2codepoint = name2codepoint.copy()
-        self.name2codepoint[u'apos'] = 0x0027
+        self.name2codepoint['apos'] = 0x0027
         self.convert_charrefs = False
 
     def handle_starttag(self, tag, attrs):
@@ -664,73 +655,70 @@ def handle_starttag(self, tag, attrs):
         self.last_element = tag
 
         # treat divs as spans
-        if tag == u'div':
-            tag = u'span'
+        if tag == 'div':
+            tag = 'span'
 
         # figure out the caption language of P tags
-        if tag == u'p':
+        if tag == 'p':
             lang = self._find_lang(attrs)
 
             # if no language detected, set it as the default
             lang = lang or DEFAULT_LANGUAGE_CODE
-            attrs.append((u'lang', lang))
+            attrs.append(('lang', lang))
             self.langs.add(lang)
 
         # clean-up line breaks
-        if tag == u'br':
-            self.sami += u"<br/>"
+        if tag == 'br':
+            self.sami += "<br/>"
         # add tag to queue
         else:
             # if already in queue, first close tags off in LIFO order
             while tag in self.queue:
                 closer = self.queue.pop()
-                self.sami = self.sami + u"</%s>" % closer
+                self.sami += "</%s>" % closer
             # open new tag in queue
             self.queue.append(tag)
             # add tag with attributes
             for attr, value in attrs:
-                tag += u' %s="%s"' % (attr.lower(), value)
-            self.sami += u"<%s>" % tag
+                tag += ' %s="%s"' % (attr.lower(), value)
+            self.sami += "<%s>" % tag
 
     # override the parser's handling of endtags
     def handle_endtag(self, tag):
         # treat divs as spans
-        if tag == u'div':
-            tag = u'span'
+        if tag == 'div':
+            tag = 'span'
 
         # handle incorrectly formatted sync/p tags
-        if tag in [u'p', u'sync'] and tag == self.last_element:
+        if tag in ['p', 'sync'] and tag == self.last_element:
             return
 
         # close off tags in LIFO order, if matching starting tag in queue
         while tag in self.queue:
             closing_tag = self.queue.pop()
-            self.sami += u"</%s>" % closing_tag
+            self.sami += "</%s>" % closing_tag
 
     def handle_entityref(self, name):
-        if name in [u'gt', u'lt']:
-            self.sami += u'&%s;' % name
+        if name in ['gt', 'lt']:
+            self.sami += '&%s;' % name
         else:
             try:
-                self.sami += six.unichr(self.name2codepoint[name])
+                self.sami += chr(self.name2codepoint[name])
             except (KeyError, ValueError):
-                self.sami += u'&%s' % name
-
-        self.last_element = u''
-
-
+                self.sami += '&%s' % name
 
+        self.last_element = ''
 
     def handle_charref(self, name):
-        if name[0] == u'x':
-            self.sami += six.unichr(int(name[1:], 16))
+        if name[0] == 'x':
+            self.sami += chr(int(name[1:], 16))
         else:
-            self.sami += six.unichr(int(name))
+            self.sami += chr(int(name))
 
     # override the parser's handling of data
     def handle_data(self, data):
         self.sami += data
-        self.last_element = u''
+        self.last_element = ''
 
     # override the parser's feed function
     def feed(self, data):
@@ -738,29 +726,29 @@ def feed(self, data):
         :param data: Raw SAMI unicode string
         :returns: tuple (unicode, dict, set)
         """
-        no_cc = u'no closed captioning available'
+        no_cc = 'no closed captioning available'
 
-        if u'<html' in data.lower():
+        if '<html' in data.lower():
             raise CaptionReadSyntaxError(
-                u'SAMI File seems to be an HTML file.')
+                'SAMI File seems to be an HTML file.')
         elif no_cc in data.lower():
-            raise CaptionReadSyntaxError(u'SAMI File contains "%s"' % no_cc)
+            raise CaptionReadSyntaxError('SAMI File contains "%s"' % no_cc)
 
         # try to find style tag in SAMI
         try:
             # prevent BS4 error with huge SAMI files with unclosed tags
-            index = data.lower().find(u"</head>")
+            index = data.lower().find("</head>")
 
             self.styles = self._css_parse(
-                BeautifulSoup(data[:index], "lxml").find(u'style').get_text())
+                BeautifulSoup(data[:index], "lxml").find('style').get_text())
         except AttributeError:
             self.styles = {}
 
         # fix erroneous italics tags
-        data = data.replace(u'<i/>', u'<i>')
+        data = data.replace('<i/>', '<i>')
 
         # fix awkward tags found in some SAMIs
-        data = data.replace(u';>', u'>')
+        data = data.replace(';>', '>')
         try:
             HTMLParser.feed(self, data)
         except HTMLParseError as e:
@@ -769,7 +757,7 @@ def feed(self, data):
         # close any tags that remain in the queue
         while self.queue != deque([]):
             closing_tag = self.queue.pop()
-            self.sami += u"</%s>" % closing_tag
+            self.sami += "</%s>" % closing_tag
 
         return self.sami, self.styles, self.langs
 
@@ -785,15 +773,15 @@ def _css_parse(self, css):
         for rule in sheet:
             new_style = {}
             selector = rule.selectorText.lower()
-            if selector[0] in [u'#', u'.']:
+            if selector[0] in ['#', '.']:
                 selector = selector[1:]
             # keep any style attributes that are needed
             for prop in rule.style:
-                if prop.name == u'color':
+                if prop.name == 'color':
                     cv = cssutils_css.ColorValue(prop.value)
                     # Code for RGB to hex conversion comes from
                     # http://bit.ly/1kwfBnQ
-                    new_style[u'color'] = u"#%02x%02x%02x" % (
+                    new_style['color'] = "#%02x%02x%02x" % (
                         cv.red, cv.green, cv.blue)
                 else:
                     new_style[prop.name] = prop.value
@@ -805,12 +793,12 @@ def _css_parse(self, css):
     def _find_lang(self, attrs):
         for attr, value in attrs:
             # if lang is an attribute of the tag
-            if attr.lower() == u'lang':
+            if attr.lower() == 'lang':
                 return value[:2]
             # if the P tag has a class, try and find the language
-            if attr.lower() == u'class':
+            if attr.lower() == 'class':
                 try:
-                    return self.styles[value.lower()][u'lang']
+                    return self.styles[value.lower()]['lang']
                 except KeyError:
                     pass
 
diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py
index 9af4b27d..3eb08e7c 100644
--- a/pycaption/scc/__init__.py
+++ b/pycaption/scc/__init__.py
@@ -146,7 +146,6 @@ def get_corrected_end_time(caption):
     return caption.start + 4 * 1000 * 1000
 
 
-
 class SCCReader(BaseReader):
     """Converts a given unicode string to a CaptionSet.
 
@@ -160,17 +159,17 @@ def __init__(self, *args, **kw):
             DefaultProvidingPositionTracker()
         )
 
-        self.last_command = u''
+        self.last_command = ''
 
         self.buffer_dict = NotifyingDict()
 
-        self.buffer_dict[u'pop'] = self.node_creator_factory.new_creator()
-        self.buffer_dict[u'paint'] = self.node_creator_factory.new_creator()
-        self.buffer_dict[u'roll'] = self.node_creator_factory.new_creator()
+        self.buffer_dict['pop'] = self.node_creator_factory.new_creator()
+        self.buffer_dict['paint'] = self.node_creator_factory.new_creator()
+        self.buffer_dict['roll'] = self.node_creator_factory.new_creator()
 
         # Call this method when the active key changes
         self.buffer_dict.add_change_observer(self._flush_implicit_buffers)
-        self.buffer_dict.set_active(u'pop')
+        self.buffer_dict.set_active('pop')
 
         self.roll_rows = []
         self.roll_rows_expected = 0
@@ -191,7 +190,7 @@ def detect(self, content):
         else:
             return False
 
-    def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0):
+    def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
         """Converts the unicode string into a CaptionSet
 
         :type content: six.text_type
@@ -211,7 +210,7 @@ def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0):
         :rtype: CaptionSet
         """
         if type(content) != six.text_type:
-            raise InvalidInputError(u'The content is not a unicode string.')
+            raise InvalidInputError('The content is not a unicode string.')
 
         self.simulate_roll_up = simulate_roll_up
         self.time_translator.offset = offset * 1000000
@@ -227,7 +226,7 @@ def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0):
         captions = CaptionSet({lang: self.caption_stash.get_all()})
 
         if captions.is_empty():
-            raise CaptionReadNoCaptions(u"empty caption file")
+            raise CaptionReadNoCaptions("empty caption file")
         else:
             last_caption = captions.get_captions(lang)[-1]
             last_caption.end = get_corrected_end_time(last_caption)
@@ -266,22 +265,22 @@ def _flush_implicit_buffers(self, old_key=None, *args):
         If they're on the last row however, or if the caption type is changing,
         we make sure to convert the buffers to text, so we don't lose any info.
         """
-        if old_key == u'pop':
+        if old_key == 'pop':
             return
 
-        elif old_key is None or old_key == u'roll':
+        elif old_key is None or old_key == 'roll':
             if not self.buffer.is_empty():
                 self._roll_up()
 
-        elif old_key is None or old_key == u'paint':
+        elif old_key is None or old_key == 'paint':
             # xxx - perhaps the self.buffer property is sufficient
-            if not self.buffer_dict[u'paint'].is_empty():
+            if not self.buffer_dict['paint'].is_empty():
                 self.caption_stash.create_and_store(
-                    self.buffer_dict[u'paint'], self.time)
+                    self.buffer_dict['paint'], self.time)
 
     def _translate_line(self, line):
         # ignore blank lines
-        if line.strip() == u'':
+        if line.strip() == '':
             return
 
         # split line in timestamp and words
@@ -289,15 +288,15 @@ def _translate_line(self, line):
         parts = r.findall(line.lower())
 
         # XXX!!!!!! THESE 2 LINES ARE A HACK
-        if parts[0][2].strip() == u'942f':
+        if parts[0][2].strip() == '942f':
             self._fix_last_timing(timing=parts[0][0])
 
         self.time_translator.start_at(parts[0][0])
 
         # loop through each word
-        for word in parts[0][2].split(u' '):
+        for word in parts[0][2].split(' '):
             # ignore empty results
-            if word.strip() != u'':
+            if word.strip() != '':
                 self._translate_word(word)
 
     def _translate_word(self, word):
@@ -324,7 +323,7 @@ def _translate_word(self, word):
     def _handle_double_command(self, word):
         # ensure we don't accidentally use the same command twice
         if word == self.last_command:
-            self.last_command = u''
+            self.last_command = ''
             return True
         else:
             self.last_command = word
@@ -352,12 +351,12 @@ def _translate_command(self, word):
             return
 
         # if command is pop_up
-        if word == u'9420':
-            self.buffer_dict.set_active(u'pop')
+        if word == '9420':
+            self.buffer_dict.set_active('pop')
 
         # command is paint_on [Resume Direct Captioning]
-        elif word == u'9429':
-            self.buffer_dict.set_active(u'paint')
+        elif word == '9429':
+            self.buffer_dict.set_active('paint')
 
             self.roll_rows_expected = 1
             if not self.buffer.is_empty():
@@ -369,15 +368,15 @@ def _translate_command(self, word):
             self.time = self.time_translator.get_time()
 
         # if command is roll_up 2, 3 or 4 rows
-        elif word in (u'9425', u'9426', u'94a7'):
-            self.buffer_dict.set_active(u'roll')
+        elif word in ('9425', '9426', '94a7'):
+            self.buffer_dict.set_active('roll')
 
             # count how many lines are expected
-            if word == u'9425':
+            if word == '9425':
                 self.roll_rows_expected = 2
-            elif word == u'9426':
+            elif word == '9426':
                 self.roll_rows_expected = 3
-            elif word == u'94a7':
+            elif word == '94a7':
                 self.roll_rows_expected = 4
 
             # if content is in the queue, turn it into a caption
@@ -391,23 +390,23 @@ def _translate_command(self, word):
             self.time = self.time_translator.get_time()
 
         # clear pop_on buffer
-        elif word == u'94ae':
+        elif word == '94ae':
             self.buffer = self.node_creator_factory.new_creator()
 
         # display pop_on buffer [End Of Caption]
-        elif word == u'942f':
+        elif word == '942f':
             self.time = self.time_translator.get_time()
             self.caption_stash.create_and_store(self.buffer, self.time)
             self.buffer = self.node_creator_factory.new_creator()
 
         # roll up captions [Carriage Return]
-        elif word == u'94ad':
+        elif word == '94ad':
             # display roll-up buffer
             if not self.buffer.is_empty():
                 self._roll_up()
 
         # clear screen
-        elif word == u'942c':
+        elif word == '942c':
             self.roll_rows = []
 
             # XXX - The 942c command has nothing to do with paint-ons
@@ -415,9 +414,9 @@ def _translate_command(self, word):
             # the proper buffer (self.buffer) is used.
             # Most likely using `self.buffer` instead of the paint buffer
             # is the right thing to do, but this needs some further attention.
-            if not self.buffer_dict[u'paint'].is_empty():
+            if not self.buffer_dict['paint'].is_empty():
                 self.caption_stash.create_and_store(
-                    self.buffer_dict[u'paint'], self.time)
+                    self.buffer_dict['paint'], self.time)
                 self.buffer = self.node_creator_factory.new_creator()
 
             # attempt to add proper end time to last caption(s)
@@ -485,7 +484,7 @@ def __init__(self, *args, **kw):
         super(SCCWriter, self).__init__(*args, **kw)
 
     def write(self, caption_set):
-        output = HEADER + u'\n\n'
+        output = HEADER + '\n\n'
 
         if caption_set.is_empty():
             return output
@@ -517,12 +516,12 @@ def write(self, caption_set):
         # PASS 3:
         # Write captions.
         for (code, start, end) in codes:
-            output += (u'%s\t' % self._format_timestamp(start))
-            output += u'94ae 94ae 9420 9420 '
+            output += ('%s\t' % self._format_timestamp(start))
+            output += '94ae 94ae 9420 9420 '
             output += code
-            output += u'942c 942c 942f 942f\n\n'
+            output += '942c 942c 942f 942f\n\n'
             if end is not None:
-                output += u'%s\t942c 942c\n\n' % self._format_timestamp(end)
+                output += '%s\t942c 942c\n\n' % self._format_timestamp(end)
 
         return output
 
@@ -533,24 +532,24 @@ def caption_node_to_text(caption_node):
             if caption_node.type_ == CaptionNode.TEXT:
                 return six.text_type(caption_node.content)
             elif caption_node.type_ == CaptionNode.BREAK:
-                return u'\n'
-        caption_text = u''.join(
+                return '\n'
+        caption_text = ''.join(
             [caption_node_to_text(node) for node in caption.nodes])
-        inner_lines = caption_text.split( u'\n')
+        inner_lines = caption_text.split('\n')
         inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
-        return u'\n'.join(inner_lines_laid_out)
+        return '\n'.join(inner_lines_laid_out)
 
     @staticmethod
     def _maybe_align(code):
         # Finish a half-word with a no-op so we can move to a full word
         if len(code) % 5 == 2:
-            code += u'80 '
+            code += '80 '
         return code
 
     @staticmethod
     def _maybe_space(code):
         if len(code) % 5 == 4:
-            code += u' '
+            code += ' '
         return code
 
     def _print_character(self, code, char):
@@ -560,7 +559,7 @@ def _print_character(self, code, char):
             try:
                 char_code = SPECIAL_OR_EXTENDED_CHAR_TO_CODE[char]
             except KeyError:
-                char_code = u'91b6'  # Use £ as "unknown character" symbol
+                char_code = '91b6'  # Use £ as "unknown character" symbol
 
         if len(char_code) == 2:
             return code + char_code
@@ -571,14 +570,14 @@ def _print_character(self, code, char):
             return code
 
     def _text_to_code(self, s):
-        code = u''
-        lines = self._layout_line(s).split( u'\n')
+        code = ''
+        lines = self._layout_line(s).split('\n')
         for row, line in enumerate(lines):
             row += 16 - len(lines)
             # Move cursor to column 0 of the destination row
             for _ in range(2):
-                code += (u'%s%s ' % (PAC_HIGH_BYTE_BY_ROW[row],
-                                     PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]))
+                code += ('%s%s ' % (PAC_HIGH_BYTE_BY_ROW[row],
+                                    PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]))
             # Print the line using the SCC encoding
             for char in line:
                 code = self._print_character(code, char)
@@ -598,7 +597,7 @@ def _format_timestamp(microseconds):
         seconds = math.floor(seconds_float)
         seconds_float -= seconds
         frames = math.floor(seconds_float * 30)
-        return u'%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frames)
+        return '%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frames)
 
 
 class _SccTimeTranslator(object):
@@ -631,7 +630,7 @@ def _translate_time(stamp, offset):
             Helpful for when the captions are off by some time interval.
         :rtype: int
         """
-        if u';' in stamp:
+        if ';' in stamp:
             # Drop-frame timebase runs at the same rate as wall clock
             seconds_per_timestamp_second = 1.0
         else:
@@ -639,7 +638,7 @@ def _translate_time(stamp, offset):
             # 1 second of timecode is longer than an actual second (1.001s)
             seconds_per_timestamp_second = 1001.0 / 1000.0
 
-        time_split = stamp.replace(u';', u':').split(u':')
+        time_split = stamp.replace(';', ':').split(':')
 
         timestamp_seconds = (int(time_split[0]) * 3600 +
                              int(time_split[1]) * 60 +
diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py
index 1a7c9522..ba7ca2a2 100644
--- a/pycaption/scc/constants.py
+++ b/pycaption/scc/constants.py
@@ -4,775 +4,775 @@
 from future.utils import viewitems
 
 COMMANDS = {
-    u'9420': u'',
-    u'9429': u'',
-    u'9425': u'',
-    u'9426': u'',
-    u'94a7': u'',
-    u'942a': u'',
-    u'94ab': u'',
-    u'942c': u'',
-    u'94ae': u'',
-    u'942f': u'',
-    u'9779': u'<$>{break}<$>',
-    u'9775': u'<$>{break}<$>',
-    u'9776': u'<$>{break}<$>',
-    u'9770': u'<$>{break}<$>',
-    u'9773': u'<$>{break}<$>',
-    u'10c8': u'<$>{break}<$>',
-    u'10c2': u'<$>{break}<$>',
-    u'166e': u'<$>{break}<$>{italic}<$>',
-    u'166d': u'<$>{break}<$>',
-    u'166b': u'<$>{break}<$>',
-    u'10c4': u'<$>{break}<$>',
-    u'9473': u'<$>{break}<$>',
-    u'977f': u'<$>{break}<$>',
-    u'977a': u'<$>{break}<$>',
-    u'1668': u'<$>{break}<$>',
-    u'1667': u'<$>{break}<$>',
-    u'1664': u'<$>{break}<$>',
-    u'1661': u'<$>{break}<$>',
-    u'10ce': u'<$>{break}<$>{italic}<$>',
-    u'94c8': u'<$>{break}<$>',
-    u'94c7': u'<$>{break}<$>',
-    u'94c4': u'<$>{break}<$>',
-    u'94c2': u'<$>{break}<$>',
-    u'94c1': u'<$>{break}<$>',
-    u'915e': u'<$>{break}<$>',
-    u'915d': u'<$>{break}<$>',
-    u'915b': u'<$>{break}<$>',
-    u'925d': u'<$>{break}<$>',
-    u'925e': u'<$>{break}<$>',
-    u'925b': u'<$>{break}<$>',
-    u'97e6': u'<$>{break}<$>',
-    u'97e5': u'<$>{break}<$>',
-    u'97e3': u'<$>{break}<$>',
-    u'97e0': u'<$>{break}<$>',
-    u'97e9': u'<$>{break}<$>',
-    u'9154': u'<$>{break}<$>',
-    u'9157': u'<$>{break}<$>',
-    u'9151': u'<$>{break}<$>',
-    u'9258': u'<$>{break}<$>',
-    u'9152': u'<$>{break}<$>',
-    u'9257': u'<$>{break}<$>',
-    u'9254': u'<$>{break}<$>',
-    u'9252': u'<$>{break}<$>',
-    u'9158': u'<$>{break}<$>',
-    u'9251': u'<$>{break}<$>',
-    u'94cd': u'<$>{break}<$>',
-    u'94ce': u'<$>{break}<$>{italic}<$>',
-    u'94cb': u'<$>{break}<$>',
-    u'97ef': u'<$>{break}<$>{italic}<$>',
-    u'1373': u'<$>{break}<$>',
-    u'97ec': u'<$>{break}<$>',
-    u'97ea': u'<$>{break}<$>',
-    u'15c7': u'<$>{break}<$>',
-    u'974f': u'<$>{break}<$>{italic}<$>',
-    u'10c1': u'<$>{break}<$>',
-    u'974a': u'<$>{break}<$>',
-    u'974c': u'<$>{break}<$>',
-    u'10c7': u'<$>{break}<$>',
-    u'976d': u'<$>{break}<$>',
-    u'15d6': u'<$>{break}<$>',
-    u'15d5': u'<$>{break}<$>',
-    u'15d3': u'<$>{break}<$>',
-    u'15d0': u'<$>{break}<$>',
-    u'15d9': u'<$>{break}<$>',
-    u'9745': u'<$>{break}<$>',
-    u'9746': u'<$>{break}<$>',
-    u'9740': u'<$>{break}<$>',
-    u'9743': u'<$>{break}<$>',
-    u'9749': u'<$>{break}<$>',
-    u'15df': u'<$>{break}<$>',
-    u'15dc': u'<$>{break}<$>',
-    u'15da': u'<$>{break}<$>',
-    u'15f8': u'<$>{break}<$>',
-    u'94fe': u'<$>{break}<$>',
-    u'94fd': u'<$>{break}<$>',
-    u'94fc': u'<$>{break}<$>',
-    u'94fb': u'<$>{break}<$>',
-    u'944f': u'<$>{break}<$>{italic}<$>',
-    u'944c': u'<$>{break}<$>',
-    u'944a': u'<$>{break}<$>',
-    u'92fc': u'<$>{break}<$>',
-    u'1051': u'<$>{break}<$>',
-    u'1052': u'<$>{break}<$>',
-    u'1054': u'<$>{break}<$>',
-    u'92fe': u'<$>{break}<$>',
-    u'92fd': u'<$>{break}<$>',
-    u'1058': u'<$>{break}<$>',
-    u'157a': u'<$>{break}<$>',
-    u'157f': u'<$>{break}<$>',
-    u'9279': u'<$>{break}<$>',
-    u'94f4': u'<$>{break}<$>',
-    u'94f7': u'<$>{break}<$>',
-    u'94f1': u'<$>{break}<$>',
-    u'9449': u'<$>{break}<$>',
-    u'92fb': u'<$>{break}<$>',
-    u'9446': u'<$>{break}<$>',
-    u'9445': u'<$>{break}<$>',
-    u'9443': u'<$>{break}<$>',
-    u'94f8': u'<$>{break}<$>',
-    u'9440': u'<$>{break}<$>',
-    u'1057': u'<$>{break}<$>',
-    u'9245': u'<$>{break}<$>',
-    u'92f2': u'<$>{break}<$>',
-    u'1579': u'<$>{break}<$>',
-    u'92f7': u'<$>{break}<$>',
-    u'105e': u'<$>{break}<$>',
-    u'92f4': u'<$>{break}<$>',
-    u'1573': u'<$>{break}<$>',
-    u'1570': u'<$>{break}<$>',
-    u'1576': u'<$>{break}<$>',
-    u'1575': u'<$>{break}<$>',
-    u'16c1': u'<$>{break}<$>',
-    u'16c2': u'<$>{break}<$>',
-    u'9168': u'<$>{break}<$>',
-    u'16c7': u'<$>{break}<$>',
-    u'9164': u'<$>{break}<$>',
-    u'9167': u'<$>{break}<$>',
-    u'9161': u'<$>{break}<$>',
-    u'9162': u'<$>{break}<$>',
-    u'947f': u'<$>{break}<$>',
-    u'91c2': u'<$>{break}<$>',
-    u'91c1': u'<$>{break}<$>',
-    u'91c7': u'<$>{break}<$>',
-    u'91c4': u'<$>{break}<$>',
-    u'13e3': u'<$>{break}<$>',
-    u'91c8': u'<$>{break}<$>',
-    u'91d0': u'<$>{break}<$>',
-    u'13e5': u'<$>{break}<$>',
-    u'13c8': u'<$>{break}<$>',
-    u'16cb': u'<$>{break}<$>',
-    u'16cd': u'<$>{break}<$>',
-    u'16ce': u'<$>{break}<$>{italic}<$>',
-    u'916d': u'<$>{break}<$>',
-    u'916e': u'<$>{break}<$>{italic}<$>',
-    u'916b': u'<$>{break}<$>',
-    u'91d5': u'<$>{break}<$>',
-    u'137a': u'<$>{break}<$>',
-    u'91cb': u'<$>{break}<$>',
-    u'91ce': u'<$>{break}<$>{italic}<$>',
-    u'91cd': u'<$>{break}<$>',
-    u'13ec': u'<$>{break}<$>',
-    u'13c1': u'<$>{break}<$>',
-    u'13ea': u'<$>{break}<$>',
-    u'13ef': u'<$>{break}<$>{italic}<$>',
-    u'94f2': u'<$>{break}<$>',
-    u'97fb': u'<$>{break}<$>',
-    u'97fc': u'<$>{break}<$>',
-    u'1658': u'<$>{break}<$>',
-    u'97fd': u'<$>{break}<$>',
-    u'97fe': u'<$>{break}<$>',
-    u'1652': u'<$>{break}<$>',
-    u'1651': u'<$>{break}<$>',
-    u'1657': u'<$>{break}<$>',
-    u'1654': u'<$>{break}<$>',
-    u'10cb': u'<$>{break}<$>',
-    u'97f2': u'<$>{break}<$>',
-    u'97f1': u'<$>{break}<$>',
-    u'97f7': u'<$>{break}<$>',
-    u'97f4': u'<$>{break}<$>',
-    u'165b': u'<$>{break}<$>',
-    u'97f8': u'<$>{break}<$>',
-    u'165d': u'<$>{break}<$>',
-    u'165e': u'<$>{break}<$>',
-    u'15cd': u'<$>{break}<$>',
-    u'10cd': u'<$>{break}<$>',
-    u'9767': u'<$>{break}<$>',
-    u'9249': u'<$>{break}<$>',
-    u'1349': u'<$>{break}<$>',
-    u'91d9': u'<$>{break}<$>',
-    u'1340': u'<$>{break}<$>',
-    u'91d3': u'<$>{break}<$>',
-    u'9243': u'<$>{break}<$>',
-    u'1343': u'<$>{break}<$>',
-    u'91d6': u'<$>{break}<$>',
-    u'1345': u'<$>{break}<$>',
-    u'1346': u'<$>{break}<$>',
-    u'9246': u'<$>{break}<$>',
-    u'94e9': u'<$>{break}<$>',
-    u'94e5': u'<$>{break}<$>',
-    u'94e6': u'<$>{break}<$>',
-    u'94e0': u'<$>{break}<$>',
-    u'94e3': u'<$>{break}<$>',
-    u'15ea': u'<$>{break}<$>',
-    u'15ec': u'<$>{break}<$>',
-    u'15ef': u'<$>{break}<$>{italic}<$>',
-    u'16fe': u'<$>{break}<$>',
-    u'16fd': u'<$>{break}<$>',
-    u'16fc': u'<$>{break}<$>',
-    u'16fb': u'<$>{break}<$>',
-    u'1367': u'<$>{break}<$>',
-    u'94ef': u'<$>{break}<$>{italic}<$>',
-    u'94ea': u'<$>{break}<$>',
-    u'94ec': u'<$>{break}<$>',
-    u'924a': u'<$>{break}<$>',
-    u'91dc': u'<$>{break}<$>',
-    u'924c': u'<$>{break}<$>',
-    u'91da': u'<$>{break}<$>',
-    u'91df': u'<$>{break}<$>',
-    u'134f': u'<$>{break}<$>{italic}<$>',
-    u'924f': u'<$>{break}<$>{italic}<$>',
-    u'16f8': u'<$>{break}<$>',
-    u'16f7': u'<$>{break}<$>',
-    u'16f4': u'<$>{break}<$>',
-    u'16f2': u'<$>{break}<$>',
-    u'16f1': u'<$>{break}<$>',
-    u'15e0': u'<$>{break}<$>',
-    u'15e3': u'<$>{break}<$>',
-    u'15e5': u'<$>{break}<$>',
-    u'15e6': u'<$>{break}<$>',
-    u'15e9': u'<$>{break}<$>',
-    u'9757': u'<$>{break}<$>',
-    u'9754': u'<$>{break}<$>',
-    u'9752': u'<$>{break}<$>',
-    u'9751': u'<$>{break}<$>',
-    u'9758': u'<$>{break}<$>',
-    u'92f1': u'<$>{break}<$>',
-    u'104c': u'<$>{break}<$>',
-    u'104a': u'<$>{break}<$>',
-    u'104f': u'<$>{break}<$>{italic}<$>',
-    u'105d': u'<$>{break}<$>',
-    u'92f8': u'<$>{break}<$>',
-    u'975e': u'<$>{break}<$>',
-    u'975d': u'<$>{break}<$>',
-    u'975b': u'<$>{break}<$>',
-    u'1043': u'<$>{break}<$>',
-    u'1040': u'<$>{break}<$>',
-    u'1046': u'<$>{break}<$>',
-    u'1045': u'<$>{break}<$>',
-    u'1049': u'<$>{break}<$>',
-    u'9479': u'<$>{break}<$>',
-    u'917f': u'<$>{break}<$>',
-    u'9470': u'<$>{break}<$>',
-    u'9476': u'<$>{break}<$>',
-    u'917a': u'<$>{break}<$>',
-    u'9475': u'<$>{break}<$>',
-    u'927a': u'<$>{break}<$>',
-    u'927f': u'<$>{break}<$>',
-    u'134a': u'<$>{break}<$>',
-    u'15fb': u'<$>{break}<$>',
-    u'15fc': u'<$>{break}<$>',
-    u'15fd': u'<$>{break}<$>',
-    u'15fe': u'<$>{break}<$>',
-    u'1546': u'<$>{break}<$>',
-    u'1545': u'<$>{break}<$>',
-    u'1543': u'<$>{break}<$>',
-    u'1540': u'<$>{break}<$>',
-    u'1549': u'<$>{break}<$>',
-    u'13fd': u'<$>{break}<$>',
-    u'13fe': u'<$>{break}<$>',
-    u'13fb': u'<$>{break}<$>',
-    u'13fc': u'<$>{break}<$>',
-    u'92e9': u'<$>{break}<$>',
-    u'92e6': u'<$>{break}<$>',
-    u'9458': u'<$>{break}<$>',
-    u'92e5': u'<$>{break}<$>',
-    u'92e3': u'<$>{break}<$>',
-    u'92e0': u'<$>{break}<$>',
-    u'9270': u'<$>{break}<$>',
-    u'9273': u'<$>{break}<$>',
-    u'9275': u'<$>{break}<$>',
-    u'9276': u'<$>{break}<$>',
-    u'15f1': u'<$>{break}<$>',
-    u'15f2': u'<$>{break}<$>',
-    u'15f4': u'<$>{break}<$>',
-    u'15f7': u'<$>{break}<$>',
-    u'9179': u'<$>{break}<$>',
-    u'9176': u'<$>{break}<$>',
-    u'9175': u'<$>{break}<$>',
-    u'947a': u'<$>{break}<$>',
-    u'9173': u'<$>{break}<$>',
-    u'9170': u'<$>{break}<$>',
-    u'13f7': u'<$>{break}<$>',
-    u'13f4': u'<$>{break}<$>',
-    u'13f2': u'<$>{break}<$>',
-    u'13f1': u'<$>{break}<$>',
-    u'92ef': u'<$>{break}<$>{italic}<$>',
-    u'92ec': u'<$>{break}<$>',
-    u'13f8': u'<$>{break}<$>',
-    u'92ea': u'<$>{break}<$>',
-    u'154f': u'<$>{break}<$>{italic}<$>',
-    u'154c': u'<$>{break}<$>',
-    u'154a': u'<$>{break}<$>',
-    u'16c4': u'<$>{break}<$>',
-    u'16c8': u'<$>{break}<$>',
-    u'97c8': u'<$>{break}<$>',
-    u'164f': u'<$>{break}<$>{italic}<$>',
-    u'164a': u'<$>{break}<$>',
-    u'164c': u'<$>{break}<$>',
-    u'1645': u'<$>{break}<$>',
-    u'1646': u'<$>{break}<$>',
-    u'1640': u'<$>{break}<$>',
-    u'1643': u'<$>{break}<$>',
-    u'1649': u'<$>{break}<$>',
-    u'94df': u'<$>{break}<$>',
-    u'94dc': u'<$>{break}<$>',
-    u'94da': u'<$>{break}<$>',
-    u'135b': u'<$>{break}<$>',
-    u'135e': u'<$>{break}<$>',
-    u'135d': u'<$>{break}<$>',
-    u'1370': u'<$>{break}<$>',
-    u'9240': u'<$>{break}<$>',
-    u'13e9': u'<$>{break}<$>',
-    u'1375': u'<$>{break}<$>',
-    u'1679': u'<$>{break}<$>',
-    u'1358': u'<$>{break}<$>',
-    u'1352': u'<$>{break}<$>',
-    u'1351': u'<$>{break}<$>',
-    u'1376': u'<$>{break}<$>',
-    u'1357': u'<$>{break}<$>',
-    u'1354': u'<$>{break}<$>',
-    u'1379': u'<$>{break}<$>',
-    u'94d9': u'<$>{break}<$>',
-    u'94d6': u'<$>{break}<$>',
-    u'94d5': u'<$>{break}<$>',
-    u'15462': u'<$>{break}<$>',
-    u'94d3': u'<$>{break}<$>',
-    u'94d0': u'<$>{break}<$>',
-    u'13e0': u'<$>{break}<$>',
-    u'13e6': u'<$>{break}<$>',
-    u'976b': u'<$>{break}<$>',
-    u'15c4': u'<$>{break}<$>',
-    u'15c2': u'<$>{break}<$>',
-    u'15c1': u'<$>{break}<$>',
-    u'976e': u'<$>{break}<$>{italic}<$>',
-    u'134c': u'<$>{break}<$>',
-    u'15c8': u'<$>{break}<$>',
-    u'92c8': u'<$>{break}<$>',
-    u'16e9': u'<$>{break}<$>',
-    u'16e3': u'<$>{break}<$>',
-    u'16e0': u'<$>{break}<$>',
-    u'16e6': u'<$>{break}<$>',
-    u'16e5': u'<$>{break}<$>',
-    u'91e5': u'<$>{break}<$>',
-    u'91e6': u'<$>{break}<$>',
-    u'91e0': u'<$>{break}<$>',
-    u'91e3': u'<$>{break}<$>',
-    u'13c4': u'<$>{break}<$>',
-    u'13c7': u'<$>{break}<$>',
-    u'91e9': u'<$>{break}<$>',
-    u'13c2': u'<$>{break}<$>',
-    u'9762': u'<$>{break}<$>',
-    u'15ce': u'<$>{break}<$>{italic}<$>',
-    u'9761': u'<$>{break}<$>',
-    u'15cb': u'<$>{break}<$>',
-    u'9764': u'<$>{break}<$>',
-    u'9768': u'<$>{break}<$>',
-    u'91ef': u'<$>{break}<$>{italic}<$>',
-    u'91ea': u'<$>{break}<$>',
-    u'91ec': u'<$>{break}<$>',
-    u'13ce': u'<$>{break}<$>{italic}<$>',
-    u'13cd': u'<$>{break}<$>',
-    u'97da': u'<$>{break}<$>',
-    u'13cb': u'<$>{break}<$>',
-    u'13462': u'<$>{break}<$>',
-    u'16ec': u'<$>{break}<$>',
-    u'16ea': u'<$>{break}<$>',
-    u'16ef': u'<$>{break}<$>{italic}<$>',
-    u'97c1': u'<$>{break}<$>',
-    u'97c2': u'<$>{break}<$>',
-    u'97c4': u'<$>{break}<$>',
-    u'97c7': u'<$>{break}<$>',
-    u'92cd': u'<$>{break}<$>',
-    u'92ce': u'<$>{break}<$>{italic}<$>',
-    u'92cb': u'<$>{break}<$>',
-    u'92da': u'<$>{break}<$>',
-    u'92dc': u'<$>{break}<$>',
-    u'92df': u'<$>{break}<$>',
-    u'97df': u'<$>{break}<$>',
-    u'155b': u'<$>{break}<$>',
-    u'155e': u'<$>{break}<$>',
-    u'155d': u'<$>{break}<$>',
-    u'97dc': u'<$>{break}<$>',
-    u'1675': u'<$>{break}<$>',
-    u'1676': u'<$>{break}<$>',
-    u'1670': u'<$>{break}<$>',
-    u'1673': u'<$>{break}<$>',
-    u'16462': u'<$>{break}<$>',
-    u'97cb': u'<$>{break}<$>',
-    u'97ce': u'<$>{break}<$>{italic}<$>',
-    u'97cd': u'<$>{break}<$>',
-    u'92c4': u'<$>{break}<$>',
-    u'92c7': u'<$>{break}<$>',
-    u'92c1': u'<$>{break}<$>',
-    u'92c2': u'<$>{break}<$>',
-    u'1551': u'<$>{break}<$>',
-    u'97d5': u'<$>{break}<$>',
-    u'97d6': u'<$>{break}<$>',
-    u'1552': u'<$>{break}<$>',
-    u'97d0': u'<$>{break}<$>',
-    u'1554': u'<$>{break}<$>',
-    u'1557': u'<$>{break}<$>',
-    u'97d3': u'<$>{break}<$>',
-    u'1558': u'<$>{break}<$>',
-    u'167f': u'<$>{break}<$>',
-    u'137f': u'<$>{break}<$>',
-    u'167a': u'<$>{break}<$>',
-    u'92d9': u'<$>{break}<$>',
-    u'92d0': u'<$>{break}<$>',
-    u'92d3': u'<$>{break}<$>',
-    u'92d5': u'<$>{break}<$>',
-    u'92d6': u'<$>{break}<$>',
-    u'10dc': u'<$>{break}<$>',
-    u'9262': u'<$>{break}<$>',
-    u'9261': u'<$>{break}<$>',
-    u'91f8': u'<$>{break}<$>',
-    u'10df': u'<$>{break}<$>',
-    u'9264': u'<$>{break}<$>',
-    u'91f4': u'<$>{break}<$>',
-    u'91f7': u'<$>{break}<$>',
-    u'91f1': u'<$>{break}<$>',
-    u'91f2': u'<$>{break}<$>',
-    u'97d9': u'<$>{break}<$>',
-    u'9149': u'<$>{break}<$>',
-    u'9143': u'<$>{break}<$>',
-    u'9140': u'<$>{break}<$>',
-    u'9146': u'<$>{break}<$>',
-    u'9145': u'<$>{break}<$>',
-    u'9464': u'<$>{break}<$>',
-    u'9467': u'<$>{break}<$>',
-    u'9461': u'<$>{break}<$>',
-    u'9462': u'<$>{break}<$>',
-    u'9468': u'<$>{break}<$>',
-    u'914c': u'<$>{break}<$>',
-    u'914a': u'<$>{break}<$>',
-    u'914f': u'<$>{break}<$>{italic}<$>',
-    u'10d3': u'<$>{break}<$>',
-    u'926b': u'<$>{break}<$>',
-    u'10d0': u'<$>{break}<$>',
-    u'10d6': u'<$>{break}<$>',
-    u'926e': u'<$>{break}<$>{italic}<$>',
-    u'926d': u'<$>{break}<$>',
-    u'91fd': u'<$>{break}<$>',
-    u'91fe': u'<$>{break}<$>',
-    u'10d9': u'<$>{break}<$>',
-    u'91fb': u'<$>{break}<$>',
-    u'91fc': u'<$>{break}<$>',
-    u'946e': u'<$>{break}<$>{italic}<$>',
-    u'946d': u'<$>{break}<$>',
-    u'946b': u'<$>{break}<$>',
-    u'10da': u'<$>{break}<$>',
-    u'10d5': u'<$>{break}<$>',
-    u'9267': u'<$>{break}<$>',
-    u'9268': u'<$>{break}<$>',
-    u'16df': u'<$>{break}<$>',
-    u'16da': u'<$>{break}<$>',
-    u'16dc': u'<$>{break}<$>',
-    u'9454': u'<$>{break}<$>',
-    u'9457': u'<$>{break}<$>',
-    u'9451': u'<$>{break}<$>',
-    u'9452': u'<$>{break}<$>',
-    u'136d': u'<$>{break}<$>',
-    u'136e': u'<$>{break}<$>{italic}<$>',
-    u'136b': u'<$>{break}<$>',
-    u'13d9': u'<$>{break}<$>',
-    u'13da': u'<$>{break}<$>',
-    u'13dc': u'<$>{break}<$>',
-    u'13df': u'<$>{break}<$>',
-    u'1568': u'<$>{break}<$>',
-    u'1561': u'<$>{break}<$>',
-    u'1564': u'<$>{break}<$>',
-    u'1567': u'<$>{break}<$>',
-    u'16d5': u'<$>{break}<$>',
-    u'16d6': u'<$>{break}<$>',
-    u'16d0': u'<$>{break}<$>',
-    u'16d3': u'<$>{break}<$>',
-    u'945d': u'<$>{break}<$>',
-    u'945e': u'<$>{break}<$>',
-    u'16d9': u'<$>{break}<$>',
-    u'945b': u'<$>{break}<$>',
-    u'156b': u'<$>{break}<$>',
-    u'156d': u'<$>{break}<$>',
-    u'156e': u'<$>{break}<$>{italic}<$>',
-    u'105b': u'<$>{break}<$>',
-    u'1364': u'<$>{break}<$>',
-    u'1368': u'<$>{break}<$>',
-    u'1361': u'<$>{break}<$>',
-    u'13d0': u'<$>{break}<$>',
-    u'13d3': u'<$>{break}<$>',
-    u'13d5': u'<$>{break}<$>',
-    u'13d6': u'<$>{break}<$>',
-    u'97a1': u'',
-    u'97a2': u'',
-    u'9723': u'',
-    u'94a1': u'',
-    u'94a4': u'',
-    u'94ad': u'',
-    u'1020': u'',
-    u'10a1': u'',
-    u'10a2': u'',
-    u'1023': u'',
-    u'10a4': u'',
-    u'1025': u'',
-    u'1026': u'',
-    u'10a7': u'',
-    u'10a8': u'',
-    u'1029': u'',
-    u'102a': u'',
-    u'10ab': u'',
-    u'102c': u'',
-    u'10ad': u'',
-    u'10ae': u'',
-    u'102f': u'',
-    u'97ad': u'',
-    u'97a4': u'',
-    u'9725': u'',
-    u'9726': u'',
-    u'97a7': u'',
-    u'97a8': u'',
-    u'9729': u'',
-    u'972a': u'',
-    u'9120': u'<$>{end-italic}<$>',
-    u'91a1': u'',
-    u'91a2': u'',
-    u'9123': u'',
-    u'91a4': u'',
-    u'9125': u'',
-    u'9126': u'',
-    u'91a7': u'',
-    u'91a8': u'',
-    u'9129': u'',
-    u'912a': u'',
-    u'91ab': u'',
-    u'912c': u'',
-    u'91ad': u'',
-    u'97ae': u'',
-    u'972f': u'',
-    u'91ae': u'<$>{italic}<$>',
-    u'912f': u'<$>{italic}<$>',
-    u'94a8': u'',
-    u'9423': u'',
-    u'94a2': u'',
+    '9420': '',
+    '9429': '',
+    '9425': '',
+    '9426': '',
+    '94a7': '',
+    '942a': '',
+    '94ab': '',
+    '942c': '',
+    '94ae': '',
+    '942f': '',
+    '9779': '<$>{break}<$>',
+    '9775': '<$>{break}<$>',
+    '9776': '<$>{break}<$>',
+    '9770': '<$>{break}<$>',
+    '9773': '<$>{break}<$>',
+    '10c8': '<$>{break}<$>',
+    '10c2': '<$>{break}<$>',
+    '166e': '<$>{break}<$>{italic}<$>',
+    '166d': '<$>{break}<$>',
+    '166b': '<$>{break}<$>',
+    '10c4': '<$>{break}<$>',
+    '9473': '<$>{break}<$>',
+    '977f': '<$>{break}<$>',
+    '977a': '<$>{break}<$>',
+    '1668': '<$>{break}<$>',
+    '1667': '<$>{break}<$>',
+    '1664': '<$>{break}<$>',
+    '1661': '<$>{break}<$>',
+    '10ce': '<$>{break}<$>{italic}<$>',
+    '94c8': '<$>{break}<$>',
+    '94c7': '<$>{break}<$>',
+    '94c4': '<$>{break}<$>',
+    '94c2': '<$>{break}<$>',
+    '94c1': '<$>{break}<$>',
+    '915e': '<$>{break}<$>',
+    '915d': '<$>{break}<$>',
+    '915b': '<$>{break}<$>',
+    '925d': '<$>{break}<$>',
+    '925e': '<$>{break}<$>',
+    '925b': '<$>{break}<$>',
+    '97e6': '<$>{break}<$>',
+    '97e5': '<$>{break}<$>',
+    '97e3': '<$>{break}<$>',
+    '97e0': '<$>{break}<$>',
+    '97e9': '<$>{break}<$>',
+    '9154': '<$>{break}<$>',
+    '9157': '<$>{break}<$>',
+    '9151': '<$>{break}<$>',
+    '9258': '<$>{break}<$>',
+    '9152': '<$>{break}<$>',
+    '9257': '<$>{break}<$>',
+    '9254': '<$>{break}<$>',
+    '9252': '<$>{break}<$>',
+    '9158': '<$>{break}<$>',
+    '9251': '<$>{break}<$>',
+    '94cd': '<$>{break}<$>',
+    '94ce': '<$>{break}<$>{italic}<$>',
+    '94cb': '<$>{break}<$>',
+    '97ef': '<$>{break}<$>{italic}<$>',
+    '1373': '<$>{break}<$>',
+    '97ec': '<$>{break}<$>',
+    '97ea': '<$>{break}<$>',
+    '15c7': '<$>{break}<$>',
+    '974f': '<$>{break}<$>{italic}<$>',
+    '10c1': '<$>{break}<$>',
+    '974a': '<$>{break}<$>',
+    '974c': '<$>{break}<$>',
+    '10c7': '<$>{break}<$>',
+    '976d': '<$>{break}<$>',
+    '15d6': '<$>{break}<$>',
+    '15d5': '<$>{break}<$>',
+    '15d3': '<$>{break}<$>',
+    '15d0': '<$>{break}<$>',
+    '15d9': '<$>{break}<$>',
+    '9745': '<$>{break}<$>',
+    '9746': '<$>{break}<$>',
+    '9740': '<$>{break}<$>',
+    '9743': '<$>{break}<$>',
+    '9749': '<$>{break}<$>',
+    '15df': '<$>{break}<$>',
+    '15dc': '<$>{break}<$>',
+    '15da': '<$>{break}<$>',
+    '15f8': '<$>{break}<$>',
+    '94fe': '<$>{break}<$>',
+    '94fd': '<$>{break}<$>',
+    '94fc': '<$>{break}<$>',
+    '94fb': '<$>{break}<$>',
+    '944f': '<$>{break}<$>{italic}<$>',
+    '944c': '<$>{break}<$>',
+    '944a': '<$>{break}<$>',
+    '92fc': '<$>{break}<$>',
+    '1051': '<$>{break}<$>',
+    '1052': '<$>{break}<$>',
+    '1054': '<$>{break}<$>',
+    '92fe': '<$>{break}<$>',
+    '92fd': '<$>{break}<$>',
+    '1058': '<$>{break}<$>',
+    '157a': '<$>{break}<$>',
+    '157f': '<$>{break}<$>',
+    '9279': '<$>{break}<$>',
+    '94f4': '<$>{break}<$>',
+    '94f7': '<$>{break}<$>',
+    '94f1': '<$>{break}<$>',
+    '9449': '<$>{break}<$>',
+    '92fb': '<$>{break}<$>',
+    '9446': '<$>{break}<$>',
+    '9445': '<$>{break}<$>',
+    '9443': '<$>{break}<$>',
+    '94f8': '<$>{break}<$>',
+    '9440': '<$>{break}<$>',
+    '1057': '<$>{break}<$>',
+    '9245': '<$>{break}<$>',
+    '92f2': '<$>{break}<$>',
+    '1579': '<$>{break}<$>',
+    '92f7': '<$>{break}<$>',
+    '105e': '<$>{break}<$>',
+    '92f4': '<$>{break}<$>',
+    '1573': '<$>{break}<$>',
+    '1570': '<$>{break}<$>',
+    '1576': '<$>{break}<$>',
+    '1575': '<$>{break}<$>',
+    '16c1': '<$>{break}<$>',
+    '16c2': '<$>{break}<$>',
+    '9168': '<$>{break}<$>',
+    '16c7': '<$>{break}<$>',
+    '9164': '<$>{break}<$>',
+    '9167': '<$>{break}<$>',
+    '9161': '<$>{break}<$>',
+    '9162': '<$>{break}<$>',
+    '947f': '<$>{break}<$>',
+    '91c2': '<$>{break}<$>',
+    '91c1': '<$>{break}<$>',
+    '91c7': '<$>{break}<$>',
+    '91c4': '<$>{break}<$>',
+    '13e3': '<$>{break}<$>',
+    '91c8': '<$>{break}<$>',
+    '91d0': '<$>{break}<$>',
+    '13e5': '<$>{break}<$>',
+    '13c8': '<$>{break}<$>',
+    '16cb': '<$>{break}<$>',
+    '16cd': '<$>{break}<$>',
+    '16ce': '<$>{break}<$>{italic}<$>',
+    '916d': '<$>{break}<$>',
+    '916e': '<$>{break}<$>{italic}<$>',
+    '916b': '<$>{break}<$>',
+    '91d5': '<$>{break}<$>',
+    '137a': '<$>{break}<$>',
+    '91cb': '<$>{break}<$>',
+    '91ce': '<$>{break}<$>{italic}<$>',
+    '91cd': '<$>{break}<$>',
+    '13ec': '<$>{break}<$>',
+    '13c1': '<$>{break}<$>',
+    '13ea': '<$>{break}<$>',
+    '13ef': '<$>{break}<$>{italic}<$>',
+    '94f2': '<$>{break}<$>',
+    '97fb': '<$>{break}<$>',
+    '97fc': '<$>{break}<$>',
+    '1658': '<$>{break}<$>',
+    '97fd': '<$>{break}<$>',
+    '97fe': '<$>{break}<$>',
+    '1652': '<$>{break}<$>',
+    '1651': '<$>{break}<$>',
+    '1657': '<$>{break}<$>',
+    '1654': '<$>{break}<$>',
+    '10cb': '<$>{break}<$>',
+    '97f2': '<$>{break}<$>',
+    '97f1': '<$>{break}<$>',
+    '97f7': '<$>{break}<$>',
+    '97f4': '<$>{break}<$>',
+    '165b': '<$>{break}<$>',
+    '97f8': '<$>{break}<$>',
+    '165d': '<$>{break}<$>',
+    '165e': '<$>{break}<$>',
+    '15cd': '<$>{break}<$>',
+    '10cd': '<$>{break}<$>',
+    '9767': '<$>{break}<$>',
+    '9249': '<$>{break}<$>',
+    '1349': '<$>{break}<$>',
+    '91d9': '<$>{break}<$>',
+    '1340': '<$>{break}<$>',
+    '91d3': '<$>{break}<$>',
+    '9243': '<$>{break}<$>',
+    '1343': '<$>{break}<$>',
+    '91d6': '<$>{break}<$>',
+    '1345': '<$>{break}<$>',
+    '1346': '<$>{break}<$>',
+    '9246': '<$>{break}<$>',
+    '94e9': '<$>{break}<$>',
+    '94e5': '<$>{break}<$>',
+    '94e6': '<$>{break}<$>',
+    '94e0': '<$>{break}<$>',
+    '94e3': '<$>{break}<$>',
+    '15ea': '<$>{break}<$>',
+    '15ec': '<$>{break}<$>',
+    '15ef': '<$>{break}<$>{italic}<$>',
+    '16fe': '<$>{break}<$>',
+    '16fd': '<$>{break}<$>',
+    '16fc': '<$>{break}<$>',
+    '16fb': '<$>{break}<$>',
+    '1367': '<$>{break}<$>',
+    '94ef': '<$>{break}<$>{italic}<$>',
+    '94ea': '<$>{break}<$>',
+    '94ec': '<$>{break}<$>',
+    '924a': '<$>{break}<$>',
+    '91dc': '<$>{break}<$>',
+    '924c': '<$>{break}<$>',
+    '91da': '<$>{break}<$>',
+    '91df': '<$>{break}<$>',
+    '134f': '<$>{break}<$>{italic}<$>',
+    '924f': '<$>{break}<$>{italic}<$>',
+    '16f8': '<$>{break}<$>',
+    '16f7': '<$>{break}<$>',
+    '16f4': '<$>{break}<$>',
+    '16f2': '<$>{break}<$>',
+    '16f1': '<$>{break}<$>',
+    '15e0': '<$>{break}<$>',
+    '15e3': '<$>{break}<$>',
+    '15e5': '<$>{break}<$>',
+    '15e6': '<$>{break}<$>',
+    '15e9': '<$>{break}<$>',
+    '9757': '<$>{break}<$>',
+    '9754': '<$>{break}<$>',
+    '9752': '<$>{break}<$>',
+    '9751': '<$>{break}<$>',
+    '9758': '<$>{break}<$>',
+    '92f1': '<$>{break}<$>',
+    '104c': '<$>{break}<$>',
+    '104a': '<$>{break}<$>',
+    '104f': '<$>{break}<$>{italic}<$>',
+    '105d': '<$>{break}<$>',
+    '92f8': '<$>{break}<$>',
+    '975e': '<$>{break}<$>',
+    '975d': '<$>{break}<$>',
+    '975b': '<$>{break}<$>',
+    '1043': '<$>{break}<$>',
+    '1040': '<$>{break}<$>',
+    '1046': '<$>{break}<$>',
+    '1045': '<$>{break}<$>',
+    '1049': '<$>{break}<$>',
+    '9479': '<$>{break}<$>',
+    '917f': '<$>{break}<$>',
+    '9470': '<$>{break}<$>',
+    '9476': '<$>{break}<$>',
+    '917a': '<$>{break}<$>',
+    '9475': '<$>{break}<$>',
+    '927a': '<$>{break}<$>',
+    '927f': '<$>{break}<$>',
+    '134a': '<$>{break}<$>',
+    '15fb': '<$>{break}<$>',
+    '15fc': '<$>{break}<$>',
+    '15fd': '<$>{break}<$>',
+    '15fe': '<$>{break}<$>',
+    '1546': '<$>{break}<$>',
+    '1545': '<$>{break}<$>',
+    '1543': '<$>{break}<$>',
+    '1540': '<$>{break}<$>',
+    '1549': '<$>{break}<$>',
+    '13fd': '<$>{break}<$>',
+    '13fe': '<$>{break}<$>',
+    '13fb': '<$>{break}<$>',
+    '13fc': '<$>{break}<$>',
+    '92e9': '<$>{break}<$>',
+    '92e6': '<$>{break}<$>',
+    '9458': '<$>{break}<$>',
+    '92e5': '<$>{break}<$>',
+    '92e3': '<$>{break}<$>',
+    '92e0': '<$>{break}<$>',
+    '9270': '<$>{break}<$>',
+    '9273': '<$>{break}<$>',
+    '9275': '<$>{break}<$>',
+    '9276': '<$>{break}<$>',
+    '15f1': '<$>{break}<$>',
+    '15f2': '<$>{break}<$>',
+    '15f4': '<$>{break}<$>',
+    '15f7': '<$>{break}<$>',
+    '9179': '<$>{break}<$>',
+    '9176': '<$>{break}<$>',
+    '9175': '<$>{break}<$>',
+    '947a': '<$>{break}<$>',
+    '9173': '<$>{break}<$>',
+    '9170': '<$>{break}<$>',
+    '13f7': '<$>{break}<$>',
+    '13f4': '<$>{break}<$>',
+    '13f2': '<$>{break}<$>',
+    '13f1': '<$>{break}<$>',
+    '92ef': '<$>{break}<$>{italic}<$>',
+    '92ec': '<$>{break}<$>',
+    '13f8': '<$>{break}<$>',
+    '92ea': '<$>{break}<$>',
+    '154f': '<$>{break}<$>{italic}<$>',
+    '154c': '<$>{break}<$>',
+    '154a': '<$>{break}<$>',
+    '16c4': '<$>{break}<$>',
+    '16c8': '<$>{break}<$>',
+    '97c8': '<$>{break}<$>',
+    '164f': '<$>{break}<$>{italic}<$>',
+    '164a': '<$>{break}<$>',
+    '164c': '<$>{break}<$>',
+    '1645': '<$>{break}<$>',
+    '1646': '<$>{break}<$>',
+    '1640': '<$>{break}<$>',
+    '1643': '<$>{break}<$>',
+    '1649': '<$>{break}<$>',
+    '94df': '<$>{break}<$>',
+    '94dc': '<$>{break}<$>',
+    '94da': '<$>{break}<$>',
+    '135b': '<$>{break}<$>',
+    '135e': '<$>{break}<$>',
+    '135d': '<$>{break}<$>',
+    '1370': '<$>{break}<$>',
+    '9240': '<$>{break}<$>',
+    '13e9': '<$>{break}<$>',
+    '1375': '<$>{break}<$>',
+    '1679': '<$>{break}<$>',
+    '1358': '<$>{break}<$>',
+    '1352': '<$>{break}<$>',
+    '1351': '<$>{break}<$>',
+    '1376': '<$>{break}<$>',
+    '1357': '<$>{break}<$>',
+    '1354': '<$>{break}<$>',
+    '1379': '<$>{break}<$>',
+    '94d9': '<$>{break}<$>',
+    '94d6': '<$>{break}<$>',
+    '94d5': '<$>{break}<$>',
+    '15462': '<$>{break}<$>',
+    '94d3': '<$>{break}<$>',
+    '94d0': '<$>{break}<$>',
+    '13e0': '<$>{break}<$>',
+    '13e6': '<$>{break}<$>',
+    '976b': '<$>{break}<$>',
+    '15c4': '<$>{break}<$>',
+    '15c2': '<$>{break}<$>',
+    '15c1': '<$>{break}<$>',
+    '976e': '<$>{break}<$>{italic}<$>',
+    '134c': '<$>{break}<$>',
+    '15c8': '<$>{break}<$>',
+    '92c8': '<$>{break}<$>',
+    '16e9': '<$>{break}<$>',
+    '16e3': '<$>{break}<$>',
+    '16e0': '<$>{break}<$>',
+    '16e6': '<$>{break}<$>',
+    '16e5': '<$>{break}<$>',
+    '91e5': '<$>{break}<$>',
+    '91e6': '<$>{break}<$>',
+    '91e0': '<$>{break}<$>',
+    '91e3': '<$>{break}<$>',
+    '13c4': '<$>{break}<$>',
+    '13c7': '<$>{break}<$>',
+    '91e9': '<$>{break}<$>',
+    '13c2': '<$>{break}<$>',
+    '9762': '<$>{break}<$>',
+    '15ce': '<$>{break}<$>{italic}<$>',
+    '9761': '<$>{break}<$>',
+    '15cb': '<$>{break}<$>',
+    '9764': '<$>{break}<$>',
+    '9768': '<$>{break}<$>',
+    '91ef': '<$>{break}<$>{italic}<$>',
+    '91ea': '<$>{break}<$>',
+    '91ec': '<$>{break}<$>',
+    '13ce': '<$>{break}<$>{italic}<$>',
+    '13cd': '<$>{break}<$>',
+    '97da': '<$>{break}<$>',
+    '13cb': '<$>{break}<$>',
+    '13462': '<$>{break}<$>',
+    '16ec': '<$>{break}<$>',
+    '16ea': '<$>{break}<$>',
+    '16ef': '<$>{break}<$>{italic}<$>',
+    '97c1': '<$>{break}<$>',
+    '97c2': '<$>{break}<$>',
+    '97c4': '<$>{break}<$>',
+    '97c7': '<$>{break}<$>',
+    '92cd': '<$>{break}<$>',
+    '92ce': '<$>{break}<$>{italic}<$>',
+    '92cb': '<$>{break}<$>',
+    '92da': '<$>{break}<$>',
+    '92dc': '<$>{break}<$>',
+    '92df': '<$>{break}<$>',
+    '97df': '<$>{break}<$>',
+    '155b': '<$>{break}<$>',
+    '155e': '<$>{break}<$>',
+    '155d': '<$>{break}<$>',
+    '97dc': '<$>{break}<$>',
+    '1675': '<$>{break}<$>',
+    '1676': '<$>{break}<$>',
+    '1670': '<$>{break}<$>',
+    '1673': '<$>{break}<$>',
+    '16462': '<$>{break}<$>',
+    '97cb': '<$>{break}<$>',
+    '97ce': '<$>{break}<$>{italic}<$>',
+    '97cd': '<$>{break}<$>',
+    '92c4': '<$>{break}<$>',
+    '92c7': '<$>{break}<$>',
+    '92c1': '<$>{break}<$>',
+    '92c2': '<$>{break}<$>',
+    '1551': '<$>{break}<$>',
+    '97d5': '<$>{break}<$>',
+    '97d6': '<$>{break}<$>',
+    '1552': '<$>{break}<$>',
+    '97d0': '<$>{break}<$>',
+    '1554': '<$>{break}<$>',
+    '1557': '<$>{break}<$>',
+    '97d3': '<$>{break}<$>',
+    '1558': '<$>{break}<$>',
+    '167f': '<$>{break}<$>',
+    '137f': '<$>{break}<$>',
+    '167a': '<$>{break}<$>',
+    '92d9': '<$>{break}<$>',
+    '92d0': '<$>{break}<$>',
+    '92d3': '<$>{break}<$>',
+    '92d5': '<$>{break}<$>',
+    '92d6': '<$>{break}<$>',
+    '10dc': '<$>{break}<$>',
+    '9262': '<$>{break}<$>',
+    '9261': '<$>{break}<$>',
+    '91f8': '<$>{break}<$>',
+    '10df': '<$>{break}<$>',
+    '9264': '<$>{break}<$>',
+    '91f4': '<$>{break}<$>',
+    '91f7': '<$>{break}<$>',
+    '91f1': '<$>{break}<$>',
+    '91f2': '<$>{break}<$>',
+    '97d9': '<$>{break}<$>',
+    '9149': '<$>{break}<$>',
+    '9143': '<$>{break}<$>',
+    '9140': '<$>{break}<$>',
+    '9146': '<$>{break}<$>',
+    '9145': '<$>{break}<$>',
+    '9464': '<$>{break}<$>',
+    '9467': '<$>{break}<$>',
+    '9461': '<$>{break}<$>',
+    '9462': '<$>{break}<$>',
+    '9468': '<$>{break}<$>',
+    '914c': '<$>{break}<$>',
+    '914a': '<$>{break}<$>',
+    '914f': '<$>{break}<$>{italic}<$>',
+    '10d3': '<$>{break}<$>',
+    '926b': '<$>{break}<$>',
+    '10d0': '<$>{break}<$>',
+    '10d6': '<$>{break}<$>',
+    '926e': '<$>{break}<$>{italic}<$>',
+    '926d': '<$>{break}<$>',
+    '91fd': '<$>{break}<$>',
+    '91fe': '<$>{break}<$>',
+    '10d9': '<$>{break}<$>',
+    '91fb': '<$>{break}<$>',
+    '91fc': '<$>{break}<$>',
+    '946e': '<$>{break}<$>{italic}<$>',
+    '946d': '<$>{break}<$>',
+    '946b': '<$>{break}<$>',
+    '10da': '<$>{break}<$>',
+    '10d5': '<$>{break}<$>',
+    '9267': '<$>{break}<$>',
+    '9268': '<$>{break}<$>',
+    '16df': '<$>{break}<$>',
+    '16da': '<$>{break}<$>',
+    '16dc': '<$>{break}<$>',
+    '9454': '<$>{break}<$>',
+    '9457': '<$>{break}<$>',
+    '9451': '<$>{break}<$>',
+    '9452': '<$>{break}<$>',
+    '136d': '<$>{break}<$>',
+    '136e': '<$>{break}<$>{italic}<$>',
+    '136b': '<$>{break}<$>',
+    '13d9': '<$>{break}<$>',
+    '13da': '<$>{break}<$>',
+    '13dc': '<$>{break}<$>',
+    '13df': '<$>{break}<$>',
+    '1568': '<$>{break}<$>',
+    '1561': '<$>{break}<$>',
+    '1564': '<$>{break}<$>',
+    '1567': '<$>{break}<$>',
+    '16d5': '<$>{break}<$>',
+    '16d6': '<$>{break}<$>',
+    '16d0': '<$>{break}<$>',
+    '16d3': '<$>{break}<$>',
+    '945d': '<$>{break}<$>',
+    '945e': '<$>{break}<$>',
+    '16d9': '<$>{break}<$>',
+    '945b': '<$>{break}<$>',
+    '156b': '<$>{break}<$>',
+    '156d': '<$>{break}<$>',
+    '156e': '<$>{break}<$>{italic}<$>',
+    '105b': '<$>{break}<$>',
+    '1364': '<$>{break}<$>',
+    '1368': '<$>{break}<$>',
+    '1361': '<$>{break}<$>',
+    '13d0': '<$>{break}<$>',
+    '13d3': '<$>{break}<$>',
+    '13d5': '<$>{break}<$>',
+    '13d6': '<$>{break}<$>',
+    '97a1': '',
+    '97a2': '',
+    '9723': '',
+    '94a1': '',
+    '94a4': '',
+    '94ad': '',
+    '1020': '',
+    '10a1': '',
+    '10a2': '',
+    '1023': '',
+    '10a4': '',
+    '1025': '',
+    '1026': '',
+    '10a7': '',
+    '10a8': '',
+    '1029': '',
+    '102a': '',
+    '10ab': '',
+    '102c': '',
+    '10ad': '',
+    '10ae': '',
+    '102f': '',
+    '97ad': '',
+    '97a4': '',
+    '9725': '',
+    '9726': '',
+    '97a7': '',
+    '97a8': '',
+    '9729': '',
+    '972a': '',
+    '9120': '<$>{end-italic}<$>',
+    '91a1': '',
+    '91a2': '',
+    '9123': '',
+    '91a4': '',
+    '9125': '',
+    '9126': '',
+    '91a7': '',
+    '91a8': '',
+    '9129': '',
+    '912a': '',
+    '91ab': '',
+    '912c': '',
+    '91ad': '',
+    '97ae': '',
+    '972f': '',
+    '91ae': '<$>{italic}<$>',
+    '912f': '<$>{italic}<$>',
+    '94a8': '',
+    '9423': '',
+    '94a2': '',
 }
 
 
 CHARACTERS = {
-    u'20': u' ',
-    u'a1': u'!',
-    u'a2': u'"',
-    u'23': u'#',
-    u'a4': u'$',
-    u'25': u'%',
-    u'26': u'&',
-    u'a7': u'\'',
-    u'a8': u'(',
-    u'29': u')',
-    u'2a': u'á',
-    u'ab': u'+',
-    u'2c': u',',
-    u'ad': u'-',
-    u'ae': u'.',
-    u'2f': u'/',
-    u'b0': u'0',
-    u'31': u'1',
-    u'32': u'2',
-    u'b3': u'3',
-    u'34': u'4',
-    u'b5': u'5',
-    u'b6': u'6',
-    u'37': u'7',
-    u'38': u'8',
-    u'b9': u'9',
-    u'ba': u':',
-    u'3b': u';',
-    u'bc': u'<',
-    u'3d': u'=',
-    u'3e': u'>',
-    u'bf': u'?',
-    u'40': u'@',
-    u'c1': u'A',
-    u'c2': u'B',
-    u'43': u'C',
-    u'c4': u'D',
-    u'45': u'E',
-    u'46': u'F',
-    u'c7': u'G',
-    u'c8': u'H',
-    u'49': u'I',
-    u'4a': u'J',
-    u'cb': u'K',
-    u'4c': u'L',
-    u'cd': u'M',
-    u'ce': u'N',
-    u'4f': u'O',
-    u'd0': u'P',
-    u'51': u'Q',
-    u'52': u'R',
-    u'd3': u'S',
-    u'54': u'T',
-    u'd5': u'U',
-    u'd6': u'V',
-    u'57': u'W',
-    u'58': u'X',
-    u'd9': u'Y',
-    u'da': u'Z',
-    u'5b': u'[',
-    u'dc': u'é',
-    u'5d': u']',
-    u'5e': u'í',
-    u'df': u'ó',
-    u'e0': u'ú',
-    u'61': u'a',
-    u'62': u'b',
-    u'e3': u'c',
-    u'64': u'd',
-    u'e5': u'e',
-    u'e6': u'f',
-    u'67': u'g',
-    u'68': u'h',
-    u'e9': u'i',
-    u'ea': u'j',
-    u'6b': u'k',
-    u'ec': u'l',
-    u'6d': u'm',
-    u'6e': u'n',
-    u'ef': u'o',
-    u'70': u'p',
-    u'f1': u'q',
-    u'f2': u'r',
-    u'73': u's',
-    u'f4': u't',
-    u'75': u'u',
-    u'76': u'v',
-    u'f7': u'w',
-    u'f8': u'x',
-    u'79': u'y',
-    u'7a': u'z',
-    u'fb': u'ç',
-    u'7c': u'÷',
-    u'fd': u'Ñ',
-    u'fe': u'ñ',
-    u'7f': u'',
-    u'80': u''
+    '20': ' ',
+    'a1': '!',
+    'a2': '"',
+    '23': '#',
+    'a4': '$',
+    '25': '%',
+    '26': '&',
+    'a7': '\'',
+    'a8': '(',
+    '29': ')',
+    '2a': 'á',
+    'ab': '+',
+    '2c': ',',
+    'ad': '-',
+    'ae': '.',
+    '2f': '/',
+    'b0': '0',
+    '31': '1',
+    '32': '2',
+    'b3': '3',
+    '34': '4',
+    'b5': '5',
+    'b6': '6',
+    '37': '7',
+    '38': '8',
+    'b9': '9',
+    'ba': ':',
+    '3b': ';',
+    'bc': '<',
+    '3d': '=',
+    '3e': '>',
+    'bf': '?',
+    '40': '@',
+    'c1': 'A',
+    'c2': 'B',
+    '43': 'C',
+    'c4': 'D',
+    '45': 'E',
+    '46': 'F',
+    'c7': 'G',
+    'c8': 'H',
+    '49': 'I',
+    '4a': 'J',
+    'cb': 'K',
+    '4c': 'L',
+    'cd': 'M',
+    'ce': 'N',
+    '4f': 'O',
+    'd0': 'P',
+    '51': 'Q',
+    '52': 'R',
+    'd3': 'S',
+    '54': 'T',
+    'd5': 'U',
+    'd6': 'V',
+    '57': 'W',
+    '58': 'X',
+    'd9': 'Y',
+    'da': 'Z',
+    '5b': '[',
+    'dc': 'é',
+    '5d': ']',
+    '5e': 'í',
+    'df': 'ó',
+    'e0': 'ú',
+    '61': 'a',
+    '62': 'b',
+    'e3': 'c',
+    '64': 'd',
+    'e5': 'e',
+    'e6': 'f',
+    '67': 'g',
+    '68': 'h',
+    'e9': 'i',
+    'ea': 'j',
+    '6b': 'k',
+    'ec': 'l',
+    '6d': 'm',
+    '6e': 'n',
+    'ef': 'o',
+    '70': 'p',
+    'f1': 'q',
+    'f2': 'r',
+    '73': 's',
+    'f4': 't',
+    '75': 'u',
+    '76': 'v',
+    'f7': 'w',
+    'f8': 'x',
+    '79': 'y',
+    '7a': 'z',
+    'fb': 'ç',
+    '7c': '÷',
+    'fd': 'Ñ',
+    'fe': 'ñ',
+    '7f': '',
+    '80': ''
 }
 
 
 SPECIAL_CHARS = {
-    u'91b0': u'®',
-    u'9131': u'°',
-    u'9132': u'½',
-    u'91b3': u'¿',
-    u'91b4': u'™',
-    u'91b5': u'¢',
-    u'91b6': u'£',
-    u'9137': u'♪',
-    u'9138': u'à',
-    u'91b9': u' ',
-    u'91ba': u'è',
-    u'913b': u'â',
-    u'91bc': u'ê',
-    u'913d': u'î',
-    u'913e': u'ô',
-    u'91bf': u'û'
+    '91b0': '®',
+    '9131': '°',
+    '9132': '½',
+    '91b3': '¿',
+    '91b4': '™',
+    '91b5': '¢',
+    '91b6': '£',
+    '9137': '♪',
+    '9138': 'à',
+    '91b9': ' ',
+    '91ba': 'è',
+    '913b': 'â',
+    '91bc': 'ê',
+    '913d': 'î',
+    '913e': 'ô',
+    '91bf': 'û'
 }
 
 
 EXTENDED_CHARS = {
-    u'9220': u'Á',
-    u'92a1': u'É',
-    u'92a2': u'Ó',
-    u'9223': u'Ú',
-    u'92a4': u'Ü',
-    u'9225': u'ü',
-    u'9226': u'‘',
-    u'92a7': u'¡',
-    u'92a8': u'*',
-    u'9229': u'’',
-    u'922a': u'—',
-    u'92ab': u'©',
-    u'922c': u'℠',
-    u'92ad': u'•',
-    u'92ae': u'“',
-    u'922f': u'”',
-    u'92b0': u'À',
-    u'9231': u'Â',
-    u'9232': u'Ç',
-    u'92b3': u'È',
-    u'9234': u'Ê',
-    u'92b5': u'Ë',
-    u'92b6': u'ë',
-    u'9237': u'Î',
-    u'9238': u'Ï',
-    u'92b9': u'ï',
-    u'92ba': u'Ô',
-    u'923b': u'Ù',
-    u'92bc': u'ù',
-    u'923d': u'Û',
-    u'923e': u'«',
-    u'92bf': u'»',
-    u'1320': u'Ã',
-    u'13a1': u'ã',
-    u'13a2': u'Í',
-    u'1323': u'Ì',
-    u'13a4': u'ì',
-    u'1325': u'Ò',
-    u'1326': u'ò',
-    u'13a7': u'Õ',
-    u'13a8': u'õ',
-    u'1329': u'{',
-    u'132a': u'}',
-    u'13ab': u'\\',
-    u'132c': u'^',
-    u'13ad': u'_',
-    u'13ae': u'¦',
-    u'132f': u'~',
-    u'13b0': u'Ä',
-    u'1331': u'ä',
-    u'1332': u'Ö',
-    u'13b3': u'ö',
-    u'1334': u'ß',
-    u'13b5': u'¥',
-    u'13b6': u'¤',
-    u'1337': u'|',
-    u'1338': u'Å',
-    u'13b9': u'å',
-    u'13ba': u'Ø',
-    u'133b': u'ø',
-    u'13bc': u'┌',
-    u'133d': u'┐',
-    u'133e': u'└',
-    u'13bf': u'┘',
+    '9220': 'Á',
+    '92a1': 'É',
+    '92a2': 'Ó',
+    '9223': 'Ú',
+    '92a4': 'Ü',
+    '9225': 'ü',
+    '9226': '‘',
+    '92a7': '¡',
+    '92a8': '*',
+    '9229': '’',
+    '922a': '—',
+    '92ab': '©',
+    '922c': '℠',
+    '92ad': '•',
+    '92ae': '“',
+    '922f': '”',
+    '92b0': 'À',
+    '9231': 'Â',
+    '9232': 'Ç',
+    '92b3': 'È',
+    '9234': 'Ê',
+    '92b5': 'Ë',
+    '92b6': 'ë',
+    '9237': 'Î',
+    '9238': 'Ï',
+    '92b9': 'ï',
+    '92ba': 'Ô',
+    '923b': 'Ù',
+    '92bc': 'ù',
+    '923d': 'Û',
+    '923e': '«',
+    '92bf': '»',
+    '1320': 'Ã',
+    '13a1': 'ã',
+    '13a2': 'Í',
+    '1323': 'Ì',
+    '13a4': 'ì',
+    '1325': 'Ò',
+    '1326': 'ò',
+    '13a7': 'Õ',
+    '13a8': 'õ',
+    '1329': '{',
+    '132a': '}',
+    '13ab': '\\',
+    '132c': '^',
+    '13ad': '_',
+    '13ae': '¦',
+    '132f': '~',
+    '13b0': 'Ä',
+    '1331': 'ä',
+    '1332': 'Ö',
+    '13b3': 'ö',
+    '1334': 'ß',
+    '13b5': '¥',
+    '13b6': '¤',
+    '1337': '|',
+    '1338': 'Å',
+    '13b9': 'å',
+    '13ba': 'Ø',
+    '133b': 'ø',
+    '13bc': '┌',
+    '133d': '┐',
+    '133e': '└',
+    '13bf': '┘',
 }
 
 
 # Cursor positioning codes
 PAC_HIGH_BYTE_BY_ROW = [
-    u'xx',
-    u'91',
-    u'91',
-    u'92',
-    u'92',
-    u'15',
-    u'15',
-    u'16',
-    u'16',
-    u'97',
-    u'97',
-    u'10',
-    u'13',
-    u'13',
-    u'94',
-    u'94'
+    'xx',
+    '91',
+    '91',
+    '92',
+    '92',
+    '15',
+    '15',
+    '16',
+    '16',
+    '97',
+    '97',
+    '10',
+    '13',
+    '13',
+    '94',
+    '94'
 ]
 PAC_LOW_BYTE_BY_ROW_RESTRICTED = [
-    u'xx',
-    u'd0',
-    u'70',
-    u'd0',
-    u'70',
-    u'd0',
-    u'70',
-    u'd0',
-    u'70',
-    u'd0',
-    u'70',
-    u'd0',
-    u'd0',
-    u'70',
-    u'd0',
-    u'70'
+    'xx',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    'd0',
+    '70',
+    'd0',
+    '70'
 ]
 
 # High order bytes come first, then each key contains a list of low bytes.
@@ -781,158 +781,158 @@
 # This particular dictionary will get transformed to a more suitable form for
 # usage like PAC_BYTES_TO_POSITIONING_MAP[u'91'][u'd6'] = (1, 12)
 PAC_BYTES_TO_POSITIONING_MAP = {
-    u'91': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (1, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (2, 0),  # noqa
-        (u'52', u'd3'): (1, 4),
-        (u'54', u'd5'): (1, 8),
-        (u'd6', u'57'): (1, 12),
-        (u'58', u'd9'): (1, 16),
-        (u'da', u'5b'): (1, 20),
-        (u'dc', u'5d'): (1, 24),
-        (u'5e', u'df'): (1, 28),
-
-        (u'f2', u'73'): (2, 4),
-        (u'f4', u'75'): (2, 8),
-        (u'76', u'f7'): (2, 12),
-        (u'f8', u'79'): (2, 16),
-        (u'7a', u'fb'): (2, 20),
-        (u'7c', u'fd'): (2, 24),
-        (u'fe', u'7f'): (2, 28)
+    '91': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (1, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (2, 0),  # noqa
+        ('52', 'd3'): (1, 4),
+        ('54', 'd5'): (1, 8),
+        ('d6', '57'): (1, 12),
+        ('58', 'd9'): (1, 16),
+        ('da', '5b'): (1, 20),
+        ('dc', '5d'): (1, 24),
+        ('5e', 'df'): (1, 28),
+
+        ('f2', '73'): (2, 4),
+        ('f4', '75'): (2, 8),
+        ('76', 'f7'): (2, 12),
+        ('f8', '79'): (2, 16),
+        ('7a', 'fb'): (2, 20),
+        ('7c', 'fd'): (2, 24),
+        ('fe', '7f'): (2, 28)
     },
-    u'92': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (3, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (4, 0),  # noqa
-        (u'52', u'd3'): (3, 4),
-        (u'54', u'd5'): (3, 8),
-        (u'd6', u'57'): (3, 12),
-        (u'58', u'd9'): (3, 16),
-        (u'da', u'5b'): (3, 20),
-        (u'dc', u'5d'): (3, 24),
-        (u'5e', u'df'): (3, 28),
-
-        (u'f2', u'73'): (4, 4),
-        (u'f4', u'75'): (4, 8),
-        (u'76', u'f7'): (4, 12),
-        (u'f8', u'79'): (4, 16),
-        (u'7a', u'fb'): (4, 20),
-        (u'7c', u'fd'): (4, 24),
-        (u'fe', u'7f'): (4, 28)
+    '92': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (3, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (4, 0),  # noqa
+        ('52', 'd3'): (3, 4),
+        ('54', 'd5'): (3, 8),
+        ('d6', '57'): (3, 12),
+        ('58', 'd9'): (3, 16),
+        ('da', '5b'): (3, 20),
+        ('dc', '5d'): (3, 24),
+        ('5e', 'df'): (3, 28),
+
+        ('f2', '73'): (4, 4),
+        ('f4', '75'): (4, 8),
+        ('76', 'f7'): (4, 12),
+        ('f8', '79'): (4, 16),
+        ('7a', 'fb'): (4, 20),
+        ('7c', 'fd'): (4, 24),
+        ('fe', '7f'): (4, 28)
     },
-    u'15': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (5, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (6, 0),  # noqa
-        (u'52', u'd3'): (5, 4),
-        (u'54', u'd5'): (5, 8),
-        (u'd6', u'57'): (5, 12),
-        (u'58', u'd9'): (5, 16),
-        (u'da', u'5b'): (5, 20),
-        (u'dc', u'5d'): (5, 24),
-        (u'5e', u'df'): (5, 28),
-
-        (u'f2', u'73'): (6, 4),
-        (u'f4', u'75'): (6, 8),
-        (u'76', u'f7'): (6, 12),
-        (u'f8', u'79'): (6, 16),
-        (u'7a', u'fb'): (6, 20),
-        (u'7c', u'fd'): (6, 24),
-        (u'fe', u'7f'): (6, 28)
+    '15': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (5, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (6, 0),  # noqa
+        ('52', 'd3'): (5, 4),
+        ('54', 'd5'): (5, 8),
+        ('d6', '57'): (5, 12),
+        ('58', 'd9'): (5, 16),
+        ('da', '5b'): (5, 20),
+        ('dc', '5d'): (5, 24),
+        ('5e', 'df'): (5, 28),
+
+        ('f2', '73'): (6, 4),
+        ('f4', '75'): (6, 8),
+        ('76', 'f7'): (6, 12),
+        ('f8', '79'): (6, 16),
+        ('7a', 'fb'): (6, 20),
+        ('7c', 'fd'): (6, 24),
+        ('fe', '7f'): (6, 28)
     },
-    u'16': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (7, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (8, 0),  # noqa
-        (u'52', u'd3'): (7, 4),
-        (u'54', u'd5'): (7, 8),
-        (u'd6', u'57'): (7, 12),
-        (u'58', u'd9'): (7, 16),
-        (u'da', u'5b'): (7, 20),
-        (u'dc', u'5d'): (7, 24),
-        (u'5e', u'df'): (7, 28),
-
-        (u'f2', u'73'): (8, 4),
-        (u'f4', u'75'): (8, 8),
-        (u'76', u'f7'): (8, 12),
-        (u'f8', u'79'): (8, 16),
-        (u'7a', u'fb'): (8, 20),
-        (u'7c', u'fd'): (8, 24),
-        (u'fe', u'7f'): (8, 28)
+    '16': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (7, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (8, 0),  # noqa
+        ('52', 'd3'): (7, 4),
+        ('54', 'd5'): (7, 8),
+        ('d6', '57'): (7, 12),
+        ('58', 'd9'): (7, 16),
+        ('da', '5b'): (7, 20),
+        ('dc', '5d'): (7, 24),
+        ('5e', 'df'): (7, 28),
+
+        ('f2', '73'): (8, 4),
+        ('f4', '75'): (8, 8),
+        ('76', 'f7'): (8, 12),
+        ('f8', '79'): (8, 16),
+        ('7a', 'fb'): (8, 20),
+        ('7c', 'fd'): (8, 24),
+        ('fe', '7f'): (8, 28)
     },
-    u'97': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (9, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (10, 0),  # noqa
-        (u'52', u'd3'): (9, 4),
-        (u'54', u'd5'): (9, 8),
-        (u'd6', u'57'): (9, 12),
-        (u'58', u'd9'): (9, 16),
-        (u'da', u'5b'): (9, 20),
-        (u'dc', u'5d'): (9, 24),
-        (u'5e', u'df'): (9, 28),
-
-        (u'f2', u'73'): (10, 4),
-        (u'f4', u'75'): (10, 8),
-        (u'76', u'f7'): (10, 12),
-        (u'f8', u'79'): (10, 16),
-        (u'7a', u'fb'): (10, 20),
-        (u'7c', u'fd'): (10, 24),
-        (u'fe', u'7f'): (10, 28)
+    '97': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (9, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (10, 0),  # noqa
+        ('52', 'd3'): (9, 4),
+        ('54', 'd5'): (9, 8),
+        ('d6', '57'): (9, 12),
+        ('58', 'd9'): (9, 16),
+        ('da', '5b'): (9, 20),
+        ('dc', '5d'): (9, 24),
+        ('5e', 'df'): (9, 28),
+
+        ('f2', '73'): (10, 4),
+        ('f4', '75'): (10, 8),
+        ('76', 'f7'): (10, 12),
+        ('f8', '79'): (10, 16),
+        ('7a', 'fb'): (10, 20),
+        ('7c', 'fd'): (10, 24),
+        ('fe', '7f'): (10, 28)
     },
-    u'10': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (11, 0),  # noqa
-        (u'52', u'd3'): (11, 4),
-        (u'54', u'd5'): (11, 8),
-        (u'd6', u'57'): (11, 12),
-        (u'58', u'd9'): (11, 16),
-        (u'da', u'5b'): (11, 20),
-        (u'dc', u'5d'): (11, 24),
-        (u'5e', u'df'): (11, 28),
+    '10': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (11, 0),  # noqa
+        ('52', 'd3'): (11, 4),
+        ('54', 'd5'): (11, 8),
+        ('d6', '57'): (11, 12),
+        ('58', 'd9'): (11, 16),
+        ('da', '5b'): (11, 20),
+        ('dc', '5d'): (11, 24),
+        ('5e', 'df'): (11, 28),
     },
-    u'13': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (12, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (13, 0),  # noqa
-        (u'52', u'd3'): (12, 4),
-        (u'54', u'd5'): (12, 8),
-        (u'd6', u'57'): (12, 12),
-        (u'58', u'd9'): (12, 16),
-        (u'da', u'5b'): (12, 20),
-        (u'dc', u'5d'): (12, 24),
-        (u'5e', u'df'): (12, 28),
-
-        (u'f2', u'73'): (13, 4),
-        (u'f4', u'75'): (13, 8),
-        (u'76', u'f7'): (13, 12),
-        (u'f8', u'79'): (13, 16),
-        (u'7a', u'fb'): (13, 20),
-        (u'7c', u'fd'): (13, 24),
-        (u'fe', u'7f'): (13, 28)
+    '13': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (12, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (13, 0),  # noqa
+        ('52', 'd3'): (12, 4),
+        ('54', 'd5'): (12, 8),
+        ('d6', '57'): (12, 12),
+        ('58', 'd9'): (12, 16),
+        ('da', '5b'): (12, 20),
+        ('dc', '5d'): (12, 24),
+        ('5e', 'df'): (12, 28),
+
+        ('f2', '73'): (13, 4),
+        ('f4', '75'): (13, 8),
+        ('76', 'f7'): (13, 12),
+        ('f8', '79'): (13, 16),
+        ('7a', 'fb'): (13, 20),
+        ('7c', 'fd'): (13, 24),
+        ('fe', '7f'): (13, 28)
     },
-    u'94': {
-        (u'd0', u'51', u'c2', u'43', u'c4', u'45', u'46', u'c7', u'c8', u'49', u'4a', u'cb', u'4c', u'cd'): (14, 0),  # noqa
-        (u'70', u'f1', u'62', u'e3', u'64', u'e5', u'e6', u'67', u'68', u'e9', u'ea', u'6b', u'ec', u'6d'): (15, 0),  # noqa
-        (u'52', u'd3'): (14, 4),
-        (u'54', u'd5'): (14, 8),
-        (u'd6', u'57'): (14, 12),
-        (u'58', u'd9'): (14, 16),
-        (u'da', u'5b'): (14, 20),
-        (u'dc', u'5d'): (14, 24),
-        (u'5e', u'df'): (14, 28),
-
-        (u'f2', u'73'): (15, 4),
-        (u'f4', u'75'): (15, 8),
-        (u'76', u'f7'): (15, 12),
-        (u'f8', u'79'): (15, 16),
-        (u'7a', u'fb'): (15, 20),
-        (u'7c', u'fd'): (15, 24),
-        (u'fe', u'7f'): (15, 28)
+    '94': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (14, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (15, 0),  # noqa
+        ('52', 'd3'): (14, 4),
+        ('54', 'd5'): (14, 8),
+        ('d6', '57'): (14, 12),
+        ('58', 'd9'): (14, 16),
+        ('da', '5b'): (14, 20),
+        ('dc', '5d'): (14, 24),
+        ('5e', 'df'): (14, 28),
+
+        ('f2', '73'): (15, 4),
+        ('f4', '75'): (15, 8),
+        ('76', 'f7'): (15, 12),
+        ('f8', '79'): (15, 16),
+        ('7a', 'fb'): (15, 20),
+        ('7c', 'fd'): (15, 24),
+        ('fe', '7f'): (15, 28)
     }
 }
 
 
 def _create_position_to_bytes_map(bytes_to_pos):
     result = {}
-    for high_byte, low_byte_dict in bytes_to_pos.items():
+    for high_byte, low_byte_dict in list(bytes_to_pos.items()):
 
         # must contain mappings to column, to the tuple of possible values
-        for low_byte_list in low_byte_dict.keys():
+        for low_byte_list in list(low_byte_dict.keys()):
             column = bytes_to_pos[high_byte][low_byte_list][1]
 
             row = bytes_to_pos[high_byte][low_byte_list][0]
@@ -954,9 +954,9 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
     return {
         k_: {
             low_byte: byte_to_pos_map[k_][low_byte_list]
-            for low_byte_list in v_.keys() for low_byte in low_byte_list
+            for low_byte_list in list(v_.keys()) for low_byte in low_byte_list
         }
-        for k_, v_ in byte_to_pos_map.items()
+        for k_, v_ in list(byte_to_pos_map.items())
     }
 
 # Now use the dict with arguments like [u'91'][u'75'] directly.
@@ -981,4 +981,4 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
 MICROSECONDS_PER_CODEWORD = 1000.0 * 1000.0 / (30.0 * 1000.0 / 1001.0)
 
 
-HEADER = u'Scenarist_SCC V1.0'
+HEADER = 'Scenarist_SCC V1.0'
diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
index 5e3a5486..31dd6085 100644
--- a/pycaption/scc/specialized_collections.py
+++ b/pycaption/scc/specialized_collections.py
@@ -3,6 +3,7 @@
                         VerticalAlignmentEnum, HorizontalAlignmentEnum)
 
 from .constants import PAC_BYTES_TO_POSITIONING_MAP, COMMANDS
+import collections
 
 
 class PreCaption(object):
@@ -113,7 +114,7 @@ def set_active(self, key):
         :param key: any hashable object
         """
         if key not in self:
-            raise ValueError(u'No such key present')
+            raise ValueError('No such key present')
 
         # Notify observers of the change
         if key != self.active_key:
@@ -126,7 +127,7 @@ def get_active(self):
         """Returns the value corresponding to the active key
         """
         if self.active_key is self._guard:
-            raise KeyError(u'No active key set')
+            raise KeyError('No active key set')
 
         return self[self.active_key]
 
@@ -139,8 +140,8 @@ def add_change_observer(self, observer):
         :param observer: any callable that can be called with 2 positional
             arguments
         """
-        if not callable(observer):
-            raise TypeError(u'The observer should be callable')
+        if not isinstance(observer, collections.Callable):
+            raise TypeError('The observer should be callable')
 
         self.observers.append(observer)
 
@@ -223,7 +224,7 @@ def create_and_store(self, node_buffer, start):
             elif instruction.sets_italics_on():
                 caption.nodes.append(
                     CaptionNode.create_style(
-                        True, {u'italics': True},
+                        True, {'italics': True},
                         layout_info=_get_layout_from_tuple(
                             instruction.position
                         ))
@@ -233,7 +234,7 @@ def create_and_store(self, node_buffer, start):
             elif instruction.sets_italics_off():
                 caption.nodes.append(
                     CaptionNode.create_style(
-                        False, {u'italics': True},
+                        False, {'italics': True},
                         layout_info=_get_layout_from_tuple(
                             instruction.position)
                     ))
@@ -334,10 +335,10 @@ def interpret_command(self, command):
         """
         self._update_positioning(command)
 
-        text = COMMANDS.get(command, u'')
+        text = COMMANDS.get(command, '')
 
-        if u'italic' in text:
-            if u'end' not in text:
+        if 'italic' in text:
+            if 'end' not in text:
                 self._collection.append(
                     _InstructionNode.create_italics_style(
                         self._position_tracer.get_current_position())
@@ -394,7 +395,7 @@ def from_list(cls, stash_list, position_tracker):
             # use space to separate the stashes, but don't add final space
             if idx < len(stash_list) - 1:
                 try:
-                    instance._collection[-1].add_chars(u' ')
+                    instance._collection[-1].add_chars(' ')
                 except AttributeError:
                     pass
 
@@ -455,9 +456,9 @@ def add_chars(self, *args):
         :return:
         """
         if self.text is None:
-            self.text = u''
+            self.text = ''
 
-        self.text += u''.join(args)
+        self.text += ''.join(args)
 
     def is_text_node(self):
         """
@@ -508,7 +509,7 @@ def requires_repositioning(self):
     def get_text(self):
         """A little legacy code.
         """
-        return u' '.join(self.text.split())
+        return ' '.join(self.text.split())
 
     @classmethod
     def create_break(cls, position):
@@ -533,7 +534,7 @@ def create_text(cls, position, *chars):
 
         :rtype: _InstructionNode
         """
-        return cls(u''.join(chars), position=position)
+        return cls(''.join(chars), position=position)
 
     @classmethod
     def create_italics_style(cls, position, turn_on=True):
@@ -563,17 +564,17 @@ def create_repositioning_command(cls, position=None):
 
     def __repr__(self):         # pragma: no cover
         if self._type == self.BREAK:
-            extra = u'BR'
+            extra = 'BR'
         elif self._type == self.TEXT:
-            extra = u'"{}"'.format(self.text)
+            extra = '"{}"'.format(self.text)
         elif self._type in (self.ITALICS_ON, self.ITALICS_OFF):
-            extra = u'italics {}'.format(
-                u'on' if self._type == self.ITALICS_ON else u'off'
+            extra = 'italics {}'.format(
+                'on' if self._type == self.ITALICS_ON else 'off'
             )
         else:
-            extra = u'change position'
+            extra = 'change position'
 
-        return u'<INode: {extra} >'.format(extra=extra)
+        return '<INode: {extra} >'.format(extra=extra)
 
 
 def _format_italics(collection):
diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py
index f3fb8653..89bc24c3 100644
--- a/pycaption/scc/state_machines.py
+++ b/pycaption/scc/state_machines.py
@@ -51,7 +51,7 @@ def get_current_position(self):
         """
         if not any(self._positions):
             raise CaptionReadSyntaxError(
-                u'No Preamble Address Code [PAC] was provided'
+                'No Preamble Address Code [PAC] was provided'
             )
         else:
             return self._positions[0]
diff --git a/pycaption/srt.py b/pycaption/srt.py
index 870217b0..9d9692d4 100644
--- a/pycaption/srt.py
+++ b/pycaption/srt.py
@@ -9,12 +9,12 @@
 class SRTReader(BaseReader):
     def detect(self, content):
         lines = content.splitlines()
-        if lines[0].isdigit() and u'-->' in lines[1]:
+        if lines[0].isdigit() and '-->' in lines[1]:
             return True
         else:
             return False
 
-    def read(self, content, lang=u'en-US'):
+    def read(self, content, lang='en-US'):
         if type(content) != six.text_type:
             raise InvalidInputError('The content is not a unicode string.')
 
@@ -28,15 +28,15 @@ def read(self, content, lang=u'en-US'):
 
             end_line = self._find_text_line(start_line, lines)
 
-            timing = lines[start_line + 1].split(u'-->')
-            start = self._srttomicro(timing[0].strip(u' \r\n'))
-            end = self._srttomicro(timing[1].strip(u' \r\n'))
+            timing = lines[start_line + 1].split('-->')
+            start = self._srttomicro(timing[0].strip(' \r\n'))
+            end = self._srttomicro(timing[1].strip(' \r\n'))
 
             nodes = []
 
             for line in lines[start_line + 2:end_line - 1]:
                 # skip extra blank lines
-                if not nodes or line != u'':
+                if not nodes or line != '':
                     nodes.append(CaptionNode.create_text(line))
                     nodes.append(CaptionNode.create_break())
 
@@ -51,15 +51,15 @@ def read(self, content, lang=u'en-US'):
         caption_set = CaptionSet({lang: captions})
 
         if caption_set.is_empty():
-            raise CaptionReadNoCaptions(u"empty caption file")
+            raise CaptionReadNoCaptions("empty caption file")
 
         return caption_set
 
     def _srttomicro(self, stamp):
-        timesplit = stamp.split(u':')
-        if u',' not in timesplit[2]:
-            timesplit[2] = timesplit[2] + u',000'
-        secsplit = timesplit[2].split(u',')
+        timesplit = stamp.split(':')
+        if ',' not in timesplit[2]:
+            timesplit[2] += ',000'
+        secsplit = timesplit[2].split(',')
         microseconds = (int(timesplit[0]) * 3600000000 +
                         int(timesplit[1]) * 60000000 +
                         int(secsplit[0]) * 1000000 +
@@ -72,7 +72,7 @@ def _find_text_line(self, start_line, lines):
 
         found = False
         while end_line < len(lines):
-            if lines[end_line].strip() == u"":
+            if lines[end_line].strip() == "":
                 found = True
             elif found is True:
                 end_line -= 1
@@ -93,40 +93,40 @@ def write(self, caption_set):
                 self._recreate_lang(caption_set.get_captions(lang))
             )
 
-        caption_content = u'MULTI-LANGUAGE SRT\n'.join(srt_captions)
+        caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions)
         return caption_content
 
     def _recreate_lang(self, captions):
-        srt = u''
+        srt = ''
         count = 1
 
         for caption in captions:
-            srt += u'%s\n' % count
+            srt += '%s\n' % count
 
-            start = caption.format_start(msec_separator=u',')
-            end = caption.format_end(msec_separator=u',')
-            timestamp = u'%s --> %s\n' % (start[:12], end[:12])
+            start = caption.format_start(msec_separator=',')
+            end = caption.format_end(msec_separator=',')
+            timestamp = '%s --> %s\n' % (start[:12], end[:12])
 
-            srt += timestamp.replace(u'.', u',')
+            srt += timestamp.replace('.', ',')
 
-            new_content = u''
+            new_content = ''
             for node in caption.nodes:
                 new_content = self._recreate_line(new_content, node)
 
             # Eliminate excessive line breaks
             new_content = new_content.strip()
-            while u'\n\n' in new_content:
-                new_content = new_content.replace(u'\n\n', u'\n')
+            while '\n\n' in new_content:
+                new_content = new_content.replace('\n\n', '\n')
 
-            srt += u"%s%s" % (new_content, u'\n\n')
+            srt += "%s%s" % (new_content, '\n\n')
             count += 1
 
         return srt[:-1]  # remove unwanted newline at end of file
 
     def _recreate_line(self, srt, line):
         if line.type_ == CaptionNode.TEXT:
-            return srt + u'%s ' % line.content
+            return srt + '%s ' % line.content
         elif line.type_ == CaptionNode.BREAK:
-            return srt + u'\n'
+            return srt + '\n'
         else:
             return srt
diff --git a/pycaption/transcript.py b/pycaption/transcript.py
index a3bb72b0..be9c07b3 100644
--- a/pycaption/transcript.py
+++ b/pycaption/transcript.py
@@ -3,28 +3,28 @@
 try:
     import nltk.data
 except ImportError:
-    raise ImportError(u'You must install nltk==2.0.4 and numpy==1.7.1 to be able to use this.')
+    raise ImportError('You must install nltk==2.0.4 and numpy==1.7.1 to be able to use this.')
 from pycaption.base import BaseWriter, CaptionNode
 
 
 class TranscriptWriter(BaseWriter):
     def __init__(self, *args, **kw):
-        self.nltk = nltk.data.load(u'file:%s/english.pickle' %
+        self.nltk = nltk.data.load('file:%s/english.pickle' %
                                    os.path.dirname(__file__))
 
     def write(self, captions):
         transcripts = []
 
         for lang in captions.get_languages():
-            lang_transcript = u'* %s Transcript *\n' % lang.upper()
+            lang_transcript = '* %s Transcript *\n' % lang.upper()
 
             for caption in captions.get_captions(lang):
                 lang_transcript = self._strip_text(caption.nodes, lang_transcript)
 
-            lang_transcript = u'\n'.join(self.nltk.tokenize(lang_transcript))
+            lang_transcript = '\n'.join(self.nltk.tokenize(lang_transcript))
             transcripts.append(lang_transcript)
 
-        return u'\n'.join(transcripts)
+        return '\n'.join(transcripts)
 
     def _strip_text(self, elements, lang_transcript):
         for el in elements:
diff --git a/pycaption/webvtt.py b/pycaption/webvtt.py
index 3cf7d6b8..11efa8a7 100644
--- a/pycaption/webvtt.py
+++ b/pycaption/webvtt.py
@@ -1,10 +1,9 @@
-import datetime
-import sys
 import re
+import six
+import sys
+import datetime
 from copy import deepcopy
 
-from builtins import str
-import six
 
 from .base import (
     BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode
@@ -22,24 +21,24 @@
 # The following pattern captures [start], [end] and [cue settings] if existent
 from pycaption.geometry import HorizontalAlignmentEnum
 
-TIMING_LINE_PATTERN = re.compile(u'^(\S+)\s+-->\s+(\S+)(?:\s+(.*?))?\s*$')
-TIMESTAMP_PATTERN = re.compile(u'^(\d+):(\d{2})(:\d{2})?\.(\d{3})')
-VOICE_SPAN_PATTERN = re.compile(u'<v(\\.\\w+)* ([^>]*)>')
+TIMING_LINE_PATTERN = re.compile('^(\S+)\s+-->\s+(\S+)(?:\s+(.*?))?\s*$')
+TIMESTAMP_PATTERN = re.compile('^(\d+):(\d{2})(:\d{2})?\.(\d{3})')
+VOICE_SPAN_PATTERN = re.compile('<v(\\.\\w+)* ([^>]*)>')
 OTHER_SPAN_PATTERN = (
     re.compile(
-        u'</?([cibuv]|ruby|rt|lang|(\d+):(\d{2})(:\d{2})?\.(\d{3})).*?>'
+        '</?([cibuv]|ruby|rt|lang|(\d+):(\d{2})(:\d{2})?\.(\d{3})).*?>'
     )
 )  # These WebVTT tags are stripped off the cues on conversion
 
 WEBVTT_VERSION_OF = {
-    HorizontalAlignmentEnum.LEFT: u'left',
-    HorizontalAlignmentEnum.CENTER: u'middle',
-    HorizontalAlignmentEnum.RIGHT: u'right',
-    HorizontalAlignmentEnum.START: u'start',
-    HorizontalAlignmentEnum.END: u'end'
+    HorizontalAlignmentEnum.LEFT: 'left',
+    HorizontalAlignmentEnum.CENTER: 'middle',
+    HorizontalAlignmentEnum.RIGHT: 'right',
+    HorizontalAlignmentEnum.START: 'start',
+    HorizontalAlignmentEnum.END: 'end'
 }
 
-DEFAULT_ALIGNMENT = u'middle'
+DEFAULT_ALIGNMENT = 'middle'
 
 
 def microseconds(h, m, s, f):
@@ -58,16 +57,16 @@ def __init__(self, ignore_timing_errors=True, *args, **kwargs):
         self.ignore_timing_errors = ignore_timing_errors
 
     def detect(self, content):
-        return u'WEBVTT' in content
+        return 'WEBVTT' in content
 
-    def read(self, content, lang=u'en-US'):
+    def read(self, content, lang='en-US'):
         if type(content) != six.text_type:
             raise InvalidInputError('The content is not a unicode string.')
 
         caption_set = CaptionSet({lang: self._parse(content.splitlines())})
 
         if caption_set.is_empty():
-            raise CaptionReadNoCaptions(u"empty caption file")
+            raise CaptionReadNoCaptions("empty caption file")
 
         return caption_set
 
@@ -81,7 +80,7 @@ def _parse(self, lines):
 
         for i, line in enumerate(lines):
 
-            if u'-->' in line:
+            if '-->' in line:
                 found_timing = True
                 timing_line = i
                 last_start_time = captions[-1].start if captions else 0
@@ -89,15 +88,14 @@ def _parse(self, lines):
                     start, end, layout_info = self._parse_timing_line(
                         line, last_start_time)
                 except CaptionReadError as e:
-                    new_message = u'%s (line %d)' % (e.args[0], timing_line)
+                    new_message = '%s (line %d)' % (e.args[0], timing_line)
                     six.reraise(type(e), type(e)(new_message), sys.exc_info()[2])
 
-
-            elif u'' == line:
+            elif '' == line:
                 if found_timing:
                     if not nodes:
                         raise CaptionReadSyntaxError(
-                            u'Cue without content. (line %d)' % timing_line)
+                            'Cue without content. (line %d)' % timing_line)
                     else:
                         found_timing = False
                         caption = Caption(
@@ -122,22 +120,22 @@ def _parse(self, lines):
         return captions
 
     def _remove_styles(self, line):
-        partial_result = VOICE_SPAN_PATTERN.sub(u'\\2: ', line)
-        return OTHER_SPAN_PATTERN.sub(u'', partial_result)
+        partial_result = VOICE_SPAN_PATTERN.sub('\\2: ', line)
+        return OTHER_SPAN_PATTERN.sub('', partial_result)
 
     def _validate_timings(self, start, end, last_start_time):
         if start is None:
             raise CaptionReadSyntaxError(
-                u'Invalid cue start timestamp.')
+                'Invalid cue start timestamp.')
         if end is None:
-            raise CaptionReadSyntaxError(u'Invalid cue end timestamp.')
+            raise CaptionReadSyntaxError('Invalid cue end timestamp.')
         if start > end:
             raise CaptionReadError(
-                u'End timestamp is not greater than start timestamp.')
+                'End timestamp is not greater than start timestamp.')
         if start < last_start_time:
             raise CaptionReadError(
-                u'Start timestamp is not greater than or equal'
-                u'to start timestamp of previous cue.')
+                'Start timestamp is not greater than or equal'
+                'to start timestamp of previous cue.')
 
     def _parse_timing_line(self, line, last_start_time):
         """
@@ -146,7 +144,7 @@ def _parse_timing_line(self, line, last_start_time):
         m = TIMING_LINE_PATTERN.search(line)
         if not m:
             raise CaptionReadSyntaxError(
-                u'Invalid timing format.')
+                'Invalid timing format.')
 
         start = self._parse_timestamp(m.group(1))
         end = self._parse_timestamp(m.group(2))
@@ -169,13 +167,13 @@ def _parse_timestamp(self, timestamp):
         m = TIMESTAMP_PATTERN.search(timestamp)
         if not m:
             raise CaptionReadSyntaxError(
-                u'Invalid timing format.')
+                'Invalid timing format.')
 
         m = m.groups()
 
         if m[2]:
             # Timestamp takes the form of [hours]:[minutes]:[seconds].[milliseconds]
-            return microseconds(m[0], m[1], m[2].replace(u":", u""), m[3])
+            return microseconds(m[0], m[1], m[2].replace(":", ""), m[3])
         else:
             # Timestamp takes the form of [minutes]:[seconds].[milliseconds]
             return microseconds(0, m[0], m[1], m[3])
@@ -187,23 +185,23 @@ def _decode(self, s):
         """
         s = s.strip()
         # Covert voice span
-        s = VOICE_SPAN_PATTERN.sub(u'\\2: ', s)
+        s = VOICE_SPAN_PATTERN.sub('\\2: ', s)
         # TODO: Add support for other WebVTT tags. For now just strip them
         # off the text.
-        s = OTHER_SPAN_PATTERN.sub(u'', s)
+        s = OTHER_SPAN_PATTERN.sub('', s)
         # Replace WebVTT special XML codes with plain unicode values
-        s = s.replace(u'&lt;', u'<')
-        s = s.replace(u'&gt;', u'>')
-        s = s.replace(u'&lrm;', u'\u200e')
-        s = s.replace(u'&rlm;', u'\u200f')
-        s = s.replace(u'&nbsp;', u'\u00a0')
+        s = s.replace('&lt;', '<')
+        s = s.replace('&gt;', '>')
+        s = s.replace('&lrm;', '\u200e')
+        s = s.replace('&rlm;', '\u200f')
+        s = s.replace('&nbsp;', '\u00a0')
         # Must do ampersand last
-        s = s.replace(u'&amp;', u'&')
+        s = s.replace('&amp;', '&')
         return s
 
 
 class WebVTTWriter(BaseWriter):
-    HEADER = u'WEBVTT\n\n'
+    HEADER = 'WEBVTT\n\n'
     global_layout = None
     video_width = None
     video_height = None
@@ -231,7 +229,7 @@ def write(self, caption_set):
 
         captions = caption_set.get_captions(lang)
 
-        return output + u'\n'.join(
+        return output + '\n'.join(
             [self._write_caption(caption_set, caption) for caption in captions])
 
     def _timestamp(self, ts):
@@ -240,28 +238,27 @@ def _timestamp(self, ts):
         hh, mm = divmod(mm, 60)
         s = "%02d:%02d.%03d" % (mm, ss, td.microseconds/1000)
         if hh:
-            s = "%d:%s" % (hh,s)
+            s = "%d:%s" % (hh, s)
         return s
 
-
     def _tags_for_style(self, style):
-        if style == u'italics':
-            return [u'<i>', u'</i>']
-        elif style == u'underline':
-            return [u'<u>', u'</u>']
-        elif style == u'bold':
-            return [u'<b>', u'</b>']
+        if style == 'italics':
+            return ['<i>', '</i>']
+        elif style == 'underline':
+            return ['<u>', '</u>']
+        elif style == 'bold':
+            return ['<b>', '</b>']
         else:
-            return [u'', u'']
+            return ['', '']
 
     def _calculate_resulting_style(self, style, caption_set):
         resulting_style = {}
 
         style_classes = []
-        if u'classes' in style:
-            style_classes = style[u'classes']
-        elif u'class' in style:
-            style_classes = [style[u'class']]
+        if 'classes' in style:
+            style_classes = style['classes']
+        elif 'class' in style:
+            style_classes = [style['class']]
 
         for style_class in style_classes:
             sub_style = caption_set.get_style(style_class).copy()
@@ -280,11 +277,11 @@ def _write_caption(self, caption_set, caption):
 
         start = self._timestamp(caption.start)
         end = self._timestamp(caption.end)
-        timespan = u"{} --> {}".format(start, end)
+        timespan = "{} --> {}".format(start, end)
 
-        output = u''
+        output = ''
 
-        cue_style_tags = [u'', u'']
+        cue_style_tags = ['', '']
 
         style = self._calculate_resulting_style(caption.style, caption_set)
         for key, value in sorted(style.items()):
@@ -292,14 +289,14 @@ def _write_caption(self, caption_set, caption):
                 tags = self._tags_for_style(key)
 #                    print "tags: " + str(tags) + "\n"
                 cue_style_tags[0] += tags[0]
-                cue_style_tags[1]  = tags[1] + cue_style_tags[1]
+                cue_style_tags[1] = tags[1] + cue_style_tags[1]
 
         for cue_text, layout in layout_groups:
             if not layout:
                 layout = caption.layout_info or self.global_layout
             cue_settings = self._cue_settings_from(layout)
-            output += timespan + cue_settings + u'\n'
-            output += cue_style_tags[0] + cue_text + cue_style_tags[1] + u'\n'
+            output += timespan + cue_settings + '\n'
+            output += cue_style_tags[0] + cue_text + cue_style_tags[1] + '\n'
 
         return output
 
@@ -310,12 +307,12 @@ def _cue_settings_from(self, layout):
         :rtype: unicode
         """
         if not layout:
-            return u''
+            return ''
 
         # If it's converting from WebVTT to WebVTT, keep positioning info
         # unchanged
         if layout.webvtt_positioning:
-            return u' {}'.format(layout.webvtt_positioning)
+            return ' {}'.format(layout.webvtt_positioning)
 
         left_offset = None
         top_offset = None
@@ -330,7 +327,7 @@ def _cue_settings_from(self, layout):
                 # There are absolute positioning values for this cue but the
                 # Writer is explicitly configured not to do any relativization.
                 # Ignore all positioning for this cue.
-                return u''
+                return ''
 
         # Ensure that all positioning values are measured using percentage.
         # This may raise an exception if layout.is_relative() == False
@@ -379,16 +376,16 @@ def _cue_settings_from(self, layout):
         except (AttributeError, KeyError):
             pass
 
-        cue_settings = u''
+        cue_settings = ''
 
-        if alignment and alignment != u'middle':
-            cue_settings += u" align:" + alignment
+        if alignment and alignment != 'middle':
+            cue_settings += " align:" + alignment
         if left_offset:
-            cue_settings += u" position:{},start".format(six.text_type(left_offset))
+            cue_settings += " position:{},start".format(six.text_type(left_offset))
         if top_offset:
-            cue_settings += u" line:" + six.text_type(top_offset)
+            cue_settings += " line:" + six.text_type(top_offset)
         if cue_width:
-            cue_settings += u" size:" + six.text_type(cue_width)
+            cue_settings += " size:" + six.text_type(cue_width)
 
         return cue_settings
 
@@ -408,22 +405,22 @@ def _layout_groups(self, nodes, caption_set):
         layout_groups = []
         # A properly encoded WebVTT string (plain unicode must be properly
         # escaped before being appended to this string)
-        s = u''
+        s = ''
         for i, node in enumerate(nodes):
             if node.type_ == CaptionNode.TEXT:
                 if s and current_layout and node.layout_info != current_layout:
                     # If the positioning changes from one text node to
                     # another, a new WebVTT cue has to be created.
                     layout_groups.append((s, current_layout))
-                    s = u''
+                    s = ''
                 # ATTENTION: This is where the plain unicode node content is
                 # finally encoded as WebVTT.
-                s += self._encode(node.content) or u'&nbsp;'
+                s += self._encode(node.content) or '&nbsp;'
                 current_layout = node.layout_info
             elif node.type_ == CaptionNode.STYLE:
                 resulting_style = self._calculate_resulting_style(node.content, caption_set)
 
-                styles = [u'italics', u'underline', u'bold']
+                styles = ['italics', 'underline', 'bold']
                 if not node.start:
                     styles.reverse()
 
@@ -439,10 +436,10 @@ def _layout_groups(self, nodes, caption_set):
                 # "Style node"
             elif node.type_ == CaptionNode.BREAK:
                 if i > 0 and nodes[i - 1].type_ != CaptionNode.TEXT:
-                    s += u'&nbsp;'
+                    s += '&nbsp;'
                 if i == 0:  # cue text starts with a break
-                    s += u'&nbsp;'
-                s += u'\n'
+                    s += '&nbsp;'
+                s += '\n'
 
         if s:
             layout_groups.append((s, current_layout))
@@ -455,12 +452,12 @@ def _encode(self, s):
             - http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-text-span
         :type s: unicode
         """
-        s = s.replace(u'&', u'&amp;')
-        s = s.replace(u'<', u'&lt;')
+        s = s.replace('&', '&amp;')
+        s = s.replace('<', '&lt;')
 
         # The substring "-->" is also not allowed according to this:
         #   - http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-block
-        s = s.replace(u'-->', u'--&gt;')
+        s = s.replace('-->', '--&gt;')
 
         # The following characters have escaping codes for some reason, but
         # they're not illegal, so for now I'll leave this commented out so that
diff --git a/tests/mixins.py b/tests/mixins.py
index 62b26f1e..72090d74 100644
--- a/tests/mixins.py
+++ b/tests/mixins.py
@@ -16,7 +16,7 @@ def assertWebVTTEquals(self, first, second):
         """
         first_items = self._extract_webvtt_captions(first)
         second_items = self._extract_webvtt_captions(second)
-        self.assertEquals(first_items, second_items)
+        self.assertEqual(first_items, second_items)
 
 
 class SRTTestingMixIn(object):
@@ -33,7 +33,7 @@ def assertSRTEquals(self, first, second):
         """
         first_items = self._extract_srt_captions(first)
         second_items = self._extract_srt_captions(second)
-        self.assertEquals(first_items, second_items)
+        self.assertEqual(first_items, second_items)
 
 
 class CaptionSetTestingMixIn(object):
@@ -50,14 +50,14 @@ def assertCaptionSetAlmostEquals(self, first, second,
 
         def get_text_for_caption(caption):
             text = caption.get_text()
-            text = re.sub(u'\s+', u' ', text)
+            text = re.sub('\s+', ' ', text)
 
             return text
 
         text_1 = [get_text_for_caption(caption) for caption in captions_1]
         text_2 = [get_text_for_caption(caption) for caption in captions_2]
 
-        self.assertEquals(text_1, text_2)
+        self.assertEqual(text_1, text_2)
 
         def close_enough(ts1, ts2):
             return abs(ts1 - ts2) < tolerance_microseconds
@@ -67,14 +67,14 @@ def close_enough(ts1, ts2):
             for caption_1, caption_2 in zip(captions_1, captions_2)
             if not close_enough(caption_1.start, caption_2.start)
         ]
-        self.assertEquals(start_differences, [])
+        self.assertEqual(start_differences, [])
 
         end_differences = [
             (caption_1.end, caption_2.end)
             for caption_1, caption_2 in zip(captions_1, captions_2)
             if not close_enough(caption_1.end, caption_2.end)
         ]
-        self.assertEquals(end_differences, [])
+        self.assertEqual(end_differences, [])
 
 
 class DFXPTestingMixIn(object):
@@ -83,19 +83,19 @@ class DFXPTestingMixIn(object):
     """
 
     def _remove_styling(self, soup):
-        for style in soup(u'styling'):
+        for style in soup('styling'):
             style.clear()
 
-        for paragraph in soup(u'p'):
-            if u'style' in paragraph.attrs:
-                del paragraph.attrs[u'style']
+        for paragraph in soup('p'):
+            if 'style' in paragraph.attrs:
+                del paragraph.attrs['style']
 
     def _remove_spans(self, soup):
-        for span in soup(u'span'):
+        for span in soup('span'):
             span.unwrap()
 
     def _trim_text(self, soup):
-        for paragraph in soup(u'p'):
+        for paragraph in soup('p'):
             paragraph.string = paragraph.text.strip()
 
     def assertDFXPEquals(self, first, second,
@@ -115,7 +115,7 @@ def assertDFXPEquals(self, first, second,
         self._trim_text(first_soup)
         self._trim_text(second_soup)
 
-        self.assertEquals(first_soup, second_soup)
+        self.assertEqual(first_soup, second_soup)
 
 
 class SAMITestingMixIn(object):
@@ -125,8 +125,8 @@ class SAMITestingMixIn(object):
 
     def _extract_sami_captions(self, soup):
         return tuple(
-            (caption.attrs[u'start'], caption.p.text.strip())
-            for caption in soup.select(u'sync'))
+            (caption.attrs['start'], caption.p.text.strip())
+            for caption in soup.select('sync'))
 
     def assertSAMIEquals(self, first, second):
         first_soup = BeautifulSoup(first, 'lxml')
@@ -134,4 +134,4 @@ def assertSAMIEquals(self, first, second):
 
         first_items = self._extract_sami_captions(first_soup)
         second_items = self._extract_sami_captions(second_soup)
-        self.assertEquals(first_items, second_items)
+        self.assertEqual(first_items, second_items)
diff --git a/tests/samples/dfxp.py b/tests/samples/dfxp.py
index 175402ae..10d3119a 100644
--- a/tests/samples/dfxp.py
+++ b/tests/samples/dfxp.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import unicode_literals
 
 SAMPLE_DFXP = """\
 <?xml version="1.0" encoding="utf-8"?>
@@ -176,7 +175,7 @@
  </body>
 </tt>"""
 
-SAMPLE_DFXP_WITH_POSITIONING = u"""<?xml version="1.0" encoding="utf-8"?>
+SAMPLE_DFXP_WITH_POSITIONING = """<?xml version="1.0" encoding="utf-8"?>
 <tt xml:lang="en-us"
     xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts='http://www.w3.org/ns/ttml#styling'
@@ -627,7 +626,7 @@
  </body>
 </tt>"""
 
-SAMPLE_DFXP_TO_RENDER_WITH_ONLY_DEFAULT_POSITIONING_INPUT = u"""\
+SAMPLE_DFXP_TO_RENDER_WITH_ONLY_DEFAULT_POSITIONING_INPUT = """\
 <?xml version="1.0" encoding="utf-8"?>
 <tt xml:lang="en" xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling">
  <head>
@@ -1028,7 +1027,7 @@
  </body>
 </tt>"""
 
-DFXP_WITH_CONCURRENT_CAPTIONS = u"""\
+DFXP_WITH_CONCURRENT_CAPTIONS = """\
 <tt xml:lang="en-us"
     xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts='http://www.w3.org/ns/ttml#styling'
@@ -1084,7 +1083,7 @@
 </tt>
 """
 
-DFXP_WITH_ESCAPED_APOSTROPHE = u"""\
+DFXP_WITH_ESCAPED_APOSTROPHE = """\
 <tt xml:lang="en-us"
     xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts='http://www.w3.org/ns/ttml#styling'
@@ -1103,7 +1102,7 @@
 </body>
 </tt>"""
 
-DFXP_WITH_ALTERNATIVE_TIMING_FORMATS = u"""\
+DFXP_WITH_ALTERNATIVE_TIMING_FORMATS = """\
 <tt xml:lang="en-us"
     xmlns="http://www.w3.org/ns/ttml"
     xmlns:tts='http://www.w3.org/ns/ttml#styling'
diff --git a/tests/samples/sami.py b/tests/samples/sami.py
index 154db679..63c7e834 100644
--- a/tests/samples/sami.py
+++ b/tests/samples/sami.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-SAMPLE_SAMI = u"""
+SAMPLE_SAMI = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 <!--
 P { margin-left:  1pt;
@@ -49,7 +49,7 @@
 </BODY></SAMI>
 """
 
-SAMPLE_SAMI_WITH_STYLE_TAGS = u"""
+SAMPLE_SAMI_WITH_STYLE_TAGS = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 <!--
 P { margin-left:  1pt;
@@ -74,7 +74,7 @@
 </BODY></SAMI>
 """
 
-SAMPLE_SAMI_WITH_CSS_INLINE_STYLE = u"""
+SAMPLE_SAMI_WITH_CSS_INLINE_STYLE = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 <!--
 P { margin-left:  1pt;
@@ -99,7 +99,7 @@
 </BODY></SAMI>
 """
 
-SAMPLE_SAMI_WITH_CSS_ID_STYLE = u"""
+SAMPLE_SAMI_WITH_CSS_ID_STYLE = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 <!--
 P { margin-left:  1pt;
@@ -139,7 +139,7 @@
 </BODY></SAMI>
 """
 
-SAMPLE_SAMI_EMPTY = u"""
+SAMPLE_SAMI_EMPTY = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE><STYLE TYPE="text/css">
 <!--
 P { margin-left:  1pt;
@@ -160,7 +160,7 @@
 """
 
 
-SAMPLE_SAMI_SYNTAX_ERROR = u"""
+SAMPLE_SAMI_SYNTAX_ERROR = """
 <SAMI>
 <Head>
 <title>ir2014_111</title>
@@ -199,7 +199,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_DOUBLE_BR = u"""
+SAMPLE_SAMI_DOUBLE_BR = """
 <SAMI><HEAD><TITLE>NOVA3213</TITLE>
 </HEAD><BODY>
 <SYNC start="14848"><P class="ENCC">
@@ -209,7 +209,7 @@
 </BODY></SAMI>
 """
 
-SAMPLE_SAMI_PARTIAL_MARGINS = u"""
+SAMPLE_SAMI_PARTIAL_MARGINS = """
 <SAMI>
 <HEAD>
    <STYLE TYPE="Text/css">
@@ -226,7 +226,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_PARTIAL_MARGINS_RELATIVIZED = u"""<sami>
+SAMPLE_SAMI_PARTIAL_MARGINS_RELATIVIZED = """<sami>
  <head>
   <style type="text/css">
    <!--
@@ -265,7 +265,7 @@
  </body>
 </sami>"""
 
-SAMPLE_SAMI_LANG_MARGIN = u"""
+SAMPLE_SAMI_LANG_MARGIN = """
 <SAMI>
 <HEAD>
    <STYLE TYPE="Text/css">
@@ -282,7 +282,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_SPAN = u"""
+SAMPLE_SAMI_WITH_SPAN = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -302,7 +302,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_BAD_SPAN_ALIGN = u"""
+SAMPLE_SAMI_WITH_BAD_SPAN_ALIGN = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -322,7 +322,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_BAD_DIV_ALIGN = u"""
+SAMPLE_SAMI_WITH_BAD_DIV_ALIGN = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -342,7 +342,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_P_ALIGN = u"""
+SAMPLE_SAMI_WITH_P_ALIGN = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -362,7 +362,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_P_AND_SPAN_ALIGN = u"""
+SAMPLE_SAMI_WITH_P_AND_SPAN_ALIGN = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -382,7 +382,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_MULTIPLE_SPAN_ALIGNS = u"""
+SAMPLE_SAMI_WITH_MULTIPLE_SPAN_ALIGNS = """
 <SAMI>
 <HEAD>
     <STYLE TYPE="Text/css">
@@ -404,7 +404,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_NO_LANG = u"""
+SAMPLE_SAMI_NO_LANG = """
 <SAMI>
 <Head><STYLE TYPE="text/css"></Style></Head>
 <BODY>
@@ -414,7 +414,7 @@
 </SAMI>
 """
 
-SAMPLE_SAMI_WITH_LANG = u"""
+SAMPLE_SAMI_WITH_LANG = """
 <sami>
 <head>
 <style type="text/css"><!--.en-US {lang: en-US;}--></style>
diff --git a/tests/samples/scc.py b/tests/samples/scc.py
index 666d286d..011bf8e8 100644
--- a/tests/samples/scc.py
+++ b/tests/samples/scc.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 
-SAMPLE_SCC_CREATED_DFXP_WITH_WRONGLY_CLOSING_SPANS = u"""\
+SAMPLE_SCC_CREATED_DFXP_WITH_WRONGLY_CLOSING_SPANS = """\
 Scenarist_SCC V1.0
 
 00:01:28;09 9420 942f 94ae 9420 9452 97a2 e3e3 e3e3 e3e3 9470 9723 e3a1 e3a1
@@ -21,13 +21,13 @@
 00:01:59;14 9420 942f 94ae 9420 94f4 6464 6464
 """
 
-SCC_THAT_GENERATES_WEBVTT_WITH_PROPER_NEWLINES = u"""\
+SCC_THAT_GENERATES_WEBVTT_WITH_PROPER_NEWLINES = """\
 Scenarist_SCC V1.0
 
 00:21:29;23    9420 9452 6161 94f4 97a2 6262 942c 942f
 """
 
-SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME = u"""\
+SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME = """\
 Scenarist_SCC V1.0
 
 00:01:31;18 9420 9454 6162 9758 97a1 91ae 6261 9170 97a1 e362
@@ -37,7 +37,7 @@
 00:01:40;25 942c
 """
 
-SAMPLE_SCC_POP_ON = u"""Scenarist_SCC V1.0
+SAMPLE_SCC_POP_ON = """Scenarist_SCC V1.0
 
 00:00:09:05 94ae 94ae 9420 9420 9470 9470 a820 e3ec efe3 6b20 f4e9 e36b e96e 6720 2980 942c 942c 942f 942f
 
@@ -66,7 +66,7 @@
 #   2 Roll-Up captions - same comment
 #   2 Paint-on captions - same comment
 #       - the TAB OVER commands are not interpreted (97A1, 97A2, 9723)
-SAMPLE_SCC_MULTIPLE_POSITIONING = u"""Scenarist_SCC V1.0
+SAMPLE_SCC_MULTIPLE_POSITIONING = """Scenarist_SCC V1.0
 
 00:00:00:16 94ae 94ae 9420 9420 1370 1370 6162 6162 91d6 91d6 e364 e364 927c 927c e5e6 e5e6 942c 942c 942f 942f
 
@@ -85,22 +85,22 @@
 """
 
 # UNUSED SAMPLE
-SAMPLE_SCC_WITH_ITALICS_BKUP = u"""\
+SAMPLE_SCC_WITH_ITALICS_BKUP = """\
 Scenarist_SCC V1.0
 
 00:00:00:01 9420 10d0 97a2 91ae 6162 6162 6162 6162 942c 8080 8080 942f
 """
 
-SAMPLE_SCC_WITH_ITALICS = u"""\
+SAMPLE_SCC_WITH_ITALICS = """\
 
 00:00:00:01 9420 10d0 97a2 91ae 6162 6162 6162 6162 942c 8080 8080 942f
 """
 
 
-SAMPLE_SCC_EMPTY = u"""Scenarist_SCC V1.0
+SAMPLE_SCC_EMPTY = """Scenarist_SCC V1.0
 """
 
-SAMPLE_SCC_ROLL_UP_RU2 = u"""\
+SAMPLE_SCC_ROLL_UP_RU2 = """\
 Scenarist_SCC V1.0
 00:00:00;22    9425 9425 94ad 94ad 9470 9470 3e3e 3e20 c849 ae
 
@@ -136,7 +136,7 @@
 """
 
 
-SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME = u"""\
+SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME = """\
 Scenarist_SCC V1.0
 
 00:23:28;01    9420 94ae 9154 5245 91f4 c1c2 942c
@@ -148,7 +148,7 @@
 00:54:29;21    942f
 """
 
-SAMPLE_NO_POSITIONING_AT_ALL_SCC = u"""\
+SAMPLE_NO_POSITIONING_AT_ALL_SCC = """\
 Scenarist_SCC V1.0
 
 00:23:28;01    9420 94ae 5245 c1c2 942c
@@ -161,7 +161,7 @@
 """
 
 # UNUSED SAMPLE
-SAMPLE_SCC_NOT_EXPLICITLY_SWITCHING_ITALICS_OFF = u"""\
+SAMPLE_SCC_NOT_EXPLICITLY_SWITCHING_ITALICS_OFF = """\
 Scenarist_SCC V1.0
 
 00:01:28;09    9420 942f 94ae 9420 9452 97a2 b031 6161 9470 9723 b031 6262
@@ -215,7 +215,7 @@
 00:53:03;15    9420 94f4 97a1 94f4 97a1 91ae 31b6 6464
 """
 
-SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION = u"""\
+SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION = """\
 Scenarist_SCC V1.0
 
 00:00:00;00    73e9 e329 942f
diff --git a/tests/samples/srt.py b/tests/samples/srt.py
index e96dfcff..f98b8e96 100644
--- a/tests/samples/srt.py
+++ b/tests/samples/srt.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-SAMPLE_SRT = u"""1
+SAMPLE_SRT = """1
 00:00:09,209 --> 00:00:12,312
 ( clock ticking )
 
@@ -35,7 +35,7 @@
 <LAUGHING & WHOOPS!>
 """
 
-SAMPLE_SRT_ASCII = u"""1
+SAMPLE_SRT_ASCII = """1
 00:00:09,209 --> 00:00:12,312
 ( clock ticking )
 
@@ -74,7 +74,7 @@
 some more text
 """
 
-SAMPLE_SRT_NUMERIC = u"""35
+SAMPLE_SRT_NUMERIC = """35
 00:00:32,290 --> 00:00:32,890
 TO  FIND  HIM.            IF
 
@@ -104,10 +104,10 @@
 """
 
 
-SAMPLE_SRT_EMPTY = u"""
+SAMPLE_SRT_EMPTY = """
 """
 
-SAMPLE_SRT_BLANK_LINES = u"""35
+SAMPLE_SRT_BLANK_LINES = """35
 00:00:32,290 --> 00:00:32,890
 
 
@@ -117,7 +117,7 @@
 
 """
 
-SAMPLE_SRT_TRAILING_BLANKS = u"""35
+SAMPLE_SRT_TRAILING_BLANKS = """35
 00:00:32,290 --> 00:00:32,890
 HELP  I  SAY
 
diff --git a/tests/samples/webvtt.py b/tests/samples/webvtt.py
index 634a8997..228897a5 100644
--- a/tests/samples/webvtt.py
+++ b/tests/samples/webvtt.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-SAMPLE_WEBVTT = u"""WEBVTT
+SAMPLE_WEBVTT = """WEBVTT
 
 00:09.209 --> 00:12.312
 ( clock ticking )
@@ -30,7 +30,7 @@
 <LAUGHING & WHOOPS!>
 """
 
-SAMPLE_WEBVTT_FROM_DFXP = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_DFXP = """WEBVTT
 
 00:09.209 --> 00:12.312
 ( clock ticking )
@@ -63,14 +63,14 @@
 
 SAMPLE_WEBVTT_FROM_SAMI = SAMPLE_WEBVTT_FROM_DFXP
 
-SAMPLE_WEBVTT_FROM_SAMI_WITH_STYLE = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_SAMI_WITH_STYLE = """WEBVTT
 
 00:09.209 --> 00:12.312
 I <b>do</b> <i>not</i> want to go <u>home</u>.
 I don't like it <i><u><b>there</b></u></i>.
 """
 
-SAMPLE_WEBVTT_FROM_SAMI_WITH_ID_STYLE = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_SAMI_WITH_ID_STYLE = """WEBVTT
 
 00:09.209 --> 00:12.312
 <i>This is in italics.</i>
@@ -85,13 +85,13 @@
 <b><i><u>This is everything together.</u></i></b>
 """
 
-SAMPLE_WEBVTT_FROM_DFXP_WITH_STYLE = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_DFXP_WITH_STYLE = """WEBVTT
 
 00:09.209 --> 00:12.312
 This is <i>italic</i>, <b>bold</b>, <u>underline</u>, <i><u><b>everything together in one tag</b></u></i>, and <u><b><i>nested</i></b></u>.
 """
 
-SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING = """WEBVTT
 
 00:01.000 --> 00:03.000 position:25%,start line:25% size:50%
 You might not remember us. We are a typical transparent region with centered text that has an outline.
@@ -107,7 +107,7 @@
 the last cue
 """
 
-SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING_AND_STYLE = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING_AND_STYLE = """WEBVTT
 
 00:01.000 --> 00:03.000 position:25%,start line:25% size:50%
 You might not remember us. We are a typical transparent region with centered text that has an outline.
@@ -123,7 +123,7 @@
 the last cue
 """
 
-SAMPLE_WEBVTT_FROM_SRT = u"""WEBVTT
+SAMPLE_WEBVTT_FROM_SRT = """WEBVTT
 
 00:09.209 --> 00:12.312
 ( clock ticking )
@@ -158,7 +158,7 @@
 # in order to conform to the specification.
 SAMPLE_WEBVTT_FROM_WEBVTT = SAMPLE_WEBVTT_FROM_SRT
 
-SAMPLE_WEBVTT_2 = u"""WEBVTT
+SAMPLE_WEBVTT_2 = """WEBVTT
 
 1
 00:00:00.000 --> 00:00:43.000
@@ -189,10 +189,10 @@
 HEY. WATCH THIS.
 """
 
-SAMPLE_WEBVTT_EMPTY = u"""WEBVTT
+SAMPLE_WEBVTT_EMPTY = """WEBVTT
 """
 
-SAMPLE_WEBVTT_DOUBLE_BR = u"""WEBVTT
+SAMPLE_WEBVTT_DOUBLE_BR = """WEBVTT
 
 00:14.848 --> 00:18.848
 MAN:
@@ -201,7 +201,7 @@
 of "E equals m c-squared",
 """
 
-SAMPLE_WEBVTT_OUTPUT_LONG_CUE = u"""WEBVTT
+SAMPLE_WEBVTT_OUTPUT_LONG_CUE = """WEBVTT
 
 00:01.000 --> 00:02.000
 NARRATOR:
@@ -213,7 +213,7 @@
 most complex machine in history.
 """
 
-WEBVTT_FROM_DFXP_WITH_CONFLICTING_ALIGN = u"""WEBVTT
+WEBVTT_FROM_DFXP_WITH_CONFLICTING_ALIGN = """WEBVTT
 
 00:04.537 --> 00:07.841
 IT'S WORD GIRL♫
@@ -223,7 +223,7 @@
 IT'S WORD GIRL♫
 """
 
-SAMPLE_WEBVTT_WITH_CUE_SETTINGS = u"""\
+SAMPLE_WEBVTT_WITH_CUE_SETTINGS = """\
 WEBVTT
 
 00:01.000 --> 00:06.000 align:middle position:37%,start line:74%
@@ -233,7 +233,7 @@
 They built the largest,
 """
 
-SAMPLE_WEBVTT_FROM_SCC_PROPERLY_WRITES_NEWLINES_OUTPUT = u"""\
+SAMPLE_WEBVTT_FROM_SCC_PROPERLY_WRITES_NEWLINES_OUTPUT = """\
 WEBVTT
 
 21:30.033 --> 21:34.033 align:left position:12.5%,start line:86.67% size:87.5%
@@ -241,7 +241,7 @@
 bb
 """
 
-SAMPLE_WEBVTT_LAST_CUE_ZERO_START = u"""WEBVTT
+SAMPLE_WEBVTT_LAST_CUE_ZERO_START = """WEBVTT
 
 00:00.000 --> 00:12.312
 ( clock ticking )"""
diff --git a/tests/test_dfxp.py b/tests/test_dfxp.py
index 2801b880..62967801 100644
--- a/tests/test_dfxp.py
+++ b/tests/test_dfxp.py
@@ -17,27 +17,26 @@ def test_detection(self):
 
     def test_caption_length(self):
         captions = DFXPReader().read(SAMPLE_DFXP)
-        self.assertEqual(7, len(captions.get_captions(u"en-US")))
+        self.assertEqual(7, len(captions.get_captions("en-US")))
 
     def test_proper_timestamps(self):
         captions = DFXPReader().read(SAMPLE_DFXP)
-        paragraph = captions.get_captions(u"en-US")[2]
+        paragraph = captions.get_captions("en-US")[2]
 
         self.assertEqual(17000000, paragraph.start)
         self.assertEqual(18752000, paragraph.end)
 
     def test_offset_time(self):
         reader = DFXPReader()
-        self.assertEquals(1, reader._translate_time(u"0.001ms"))
-        self.assertEquals(2000, reader._translate_time(u"2ms"))
-        self.assertEquals(1000000, reader._translate_time(u"1s"))
-        self.assertEquals(1234567, reader._translate_time(u"1.234567s"))
-        self.assertEquals(180000000, reader._translate_time(u"3m"))
-        self.assertEquals(14400000000, reader._translate_time(u"4h"))
+        self.assertEqual(1, reader._translate_time("0.001ms"))
+        self.assertEqual(2000, reader._translate_time("2ms"))
+        self.assertEqual(1000000, reader._translate_time("1s"))
+        self.assertEqual(1234567, reader._translate_time("1.234567s"))
+        self.assertEqual(180000000, reader._translate_time("3m"))
+        self.assertEqual(14400000000, reader._translate_time("4h"))
         # Tick values are not supported
         self.assertRaises(
-	        InvalidInputError, reader._translate_time, u"2.3t"
-        )
+            InvalidInputError, reader._translate_time, "2.3t")
 
     def test_empty_file(self):
         self.assertRaises(
@@ -46,12 +45,12 @@ def test_empty_file(self):
 
     def test_invalid_markup_is_properly_handled(self):
         captions = DFXPReader().read(SAMPLE_DFXP_SYNTAX_ERROR)
-        self.assertEquals(2, len(captions.get_captions(u"en-US")))
+        self.assertEqual(2, len(captions.get_captions("en-US")))
 
     def test_caption_error_for_invalid_positioning_values(self):
         invalid_value_dfxp = (
             SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE
-            .format(origin=u"px 5px")
+            .format(origin="px 5px")
         )
         self.assertRaises(
             CaptionReadSyntaxError, DFXPReader().read,
@@ -61,7 +60,7 @@ def test_caption_error_for_invalid_positioning_values(self):
     def test_caption_error_for_invalid_or_unsupported_positioning_units(self):
         invalid_dfxp = (
             SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE
-            .format(origin=u"6foo 7bar")
+            .format(origin="6foo 7bar")
         )
         self.assertRaises(
             CaptionReadSyntaxError, DFXPReader().read,
@@ -82,9 +81,9 @@ def test_individual_texts_of_captions_with_matching_timespec_are_kept(self):  #
             SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
         )
 
-        expected_texts = [u'Some text here',
-                          u'Some text there',
-                          u'Caption texts are everywhere!']
+        expected_texts = ['Some text here',
+                          'Some text there',
+                          'Caption texts are everywhere!']
         actual_texts = [c_.nodes[0].content for c_ in
                         captionset.get_captions("en-US")]
 
@@ -119,7 +118,7 @@ def test_empty_paragraph(self):
             self.fail("Failing on empty paragraph")
 
 
-SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE = u"""\
+SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE = """\
 <?xml version="1.0" encoding="utf-8"?>
 <tt xml:lang="en" xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling">
  <head>
@@ -138,7 +137,7 @@ def test_empty_paragraph(self):
 
 # TODO - notice that there's no "bottom" region specified in the <layout>
 # region, but it's referenced by the <div>. Decide if this is ok enough
-SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING = u"""\
+SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING = """\
 <?xml version="1.0" encoding="utf-8"?>
 <tt xml:lang="en" xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling">
  <head>
diff --git a/tests/test_dfxp_conversion.py b/tests/test_dfxp_conversion.py
index ce80600e..f5935998 100644
--- a/tests/test_dfxp_conversion.py
+++ b/tests/test_dfxp_conversion.py
@@ -60,43 +60,43 @@ def test_default_styling_tag(self):
         result = DFXPWriter().write(caption_set)
 
         default_style = _recreate_style(DFXP_DEFAULT_STYLE, None)
-        default_style[u'xml:id'] = DFXP_DEFAULT_STYLE_ID
+        default_style['xml:id'] = DFXP_DEFAULT_STYLE_ID
 
-        soup = BeautifulSoup(result, u'lxml-xml')
-        style = soup.find(u'style', {u'xml:id': DFXP_DEFAULT_STYLE_ID})
+        soup = BeautifulSoup(result, 'lxml-xml')
+        style = soup.find('style', {'xml:id': DFXP_DEFAULT_STYLE_ID})
 
         self.assertTrue(style)
-        self.assertEquals(style.attrs, default_style)
+        self.assertEqual(style.attrs, default_style)
 
     def test_default_styling_p_tags(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP)
         result = DFXPWriter().write(caption_set)
 
-        soup = BeautifulSoup(result, u'lxml')
-        for p in soup.find_all(u'p'):
-            self.assertEquals(p.attrs.get(u'style'), 'p')
+        soup = BeautifulSoup(result, 'lxml')
+        for p in soup.find_all('p'):
+            self.assertEqual(p.attrs.get('style'), 'p')
 
     def test_default_region_tag(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP)
         result = DFXPWriter().write(caption_set)
 
-        soup = BeautifulSoup(result, u'lxml-xml')
-        region = soup.find(u'region', {u'xml:id': DFXP_DEFAULT_REGION_ID})
+        soup = BeautifulSoup(result, 'lxml-xml')
+        region = soup.find('region', {'xml:id': DFXP_DEFAULT_REGION_ID})
 
         default_region = _convert_layout_to_attributes(DFXP_DEFAULT_REGION)
-        default_region[u'xml:id'] = DFXP_DEFAULT_REGION_ID
+        default_region['xml:id'] = DFXP_DEFAULT_REGION_ID
 
         self.assertTrue(region)
-        self.assertEqual(region.attrs[u'xml:id'], DFXP_DEFAULT_REGION_ID)
+        self.assertEqual(region.attrs['xml:id'], DFXP_DEFAULT_REGION_ID)
         self.assertEqual(region.attrs, default_region)
 
     def test_default_region_p_tags(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP)
         result = DFXPWriter().write(caption_set)
 
-        soup = BeautifulSoup(result, u'lxml')
-        for p in soup.find_all(u'p'):
-            self.assertEqual(p.attrs.get(u'region'), DFXP_DEFAULT_REGION_ID)
+        soup = BeautifulSoup(result, 'lxml')
+        for p in soup.find_all('p'):
+            self.assertEqual(p.attrs.get('region'), DFXP_DEFAULT_REGION_ID)
 
     def test_correct_region_attributes_are_recreated(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP_MULTIPLE_REGIONS_INPUT)
@@ -160,12 +160,12 @@ def test_fit_to_screen(self):
 
     def test_proper_xml_entity_escaping(self):
         caption_set = DFXPReader().read(DFXP_WITH_ESCAPED_APOSTROPHE)
-        cue_text = caption_set.get_captions(u'en-US')[0].nodes[0].content
+        cue_text = caption_set.get_captions('en-US')[0].nodes[0].content
         self.assertEqual(
-            cue_text, u"<< \"Andy's Caf\xe9 & Restaurant\" this way")
+            cue_text, "<< \"Andy's Caf\xe9 & Restaurant\" this way")
         result = DFXPWriter().write(caption_set)
         self.assertIn(
-            u"&lt;&lt; \"Andy's Café &amp; Restaurant\" this way",
+            "&lt;&lt; \"Andy's Café &amp; Restaurant\" this way",
             result
         )
 
@@ -191,10 +191,10 @@ def test_dfxp_to_sami_with_margins(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP_FROM_SAMI_WITH_MARGINS)
         results = SAMIWriter(video_width=VIDEO_WIDTH,
                              video_height=VIDEO_HEIGHT).write(caption_set)
-        margins = [u"margin-right: 6.04%;",
-                   u"margin-bottom: 0%;",
-                   u"margin-top: 0%;",
-                   u"margin-left: 6.04%;"]
+        margins = ["margin-right: 6.04%;",
+                   "margin-bottom: 0%;",
+                   "margin-top: 0%;",
+                   "margin-left: 6.04%;"]
         for margin in margins:
             self.assertIn(margin, results)
 
@@ -238,7 +238,7 @@ def test_dfxp_to_webvtt_adds_explicit_size(self):
         caption_set = DFXPReader().read(SAMPLE_DFXP_LONG_CUE)
         results = WebVTTWriter().write(caption_set)
         self.assertTrue(isinstance(results, text_type))
-        self.assertEquals(
+        self.assertEqual(
             SAMPLE_WEBVTT_OUTPUT_LONG_CUE, results)
 
     def test_dfxp_to_webvtt_preserves_proper_alignment(self):
@@ -247,7 +247,7 @@ def test_dfxp_to_webvtt_preserves_proper_alignment(self):
         # WebVTTWriter.
         caption_set = DFXPReader().read(DFXP_STYLE_REGION_ALIGN_CONFLICT)
         results = WebVTTWriter().write(caption_set)
-        self.assertEquals(
+        self.assertEqual(
             WEBVTT_FROM_DFXP_WITH_CONFLICTING_ALIGN, results)
 
 
diff --git a/tests/test_sami.py b/tests/test_sami.py
index 7f34c625..5b0a0fbe 100644
--- a/tests/test_sami.py
+++ b/tests/test_sami.py
@@ -19,27 +19,27 @@ def test_detection(self):
     def test_caption_length(self):
         captions = SAMIReader().read(SAMPLE_SAMI)
 
-        self.assertEqual(7, len(captions.get_captions(u"en-US")))
+        self.assertEqual(7, len(captions.get_captions("en-US")))
 
     def test_proper_timestamps(self):
         captions = SAMIReader().read(SAMPLE_SAMI)
-        paragraph = captions.get_captions(u"en-US")[2]
+        paragraph = captions.get_captions("en-US")[2]
 
-        self.assertEquals(17000000, paragraph.start)
-        self.assertEquals(18752000, paragraph.end)
+        self.assertEqual(17000000, paragraph.start)
+        self.assertEqual(18752000, paragraph.end)
 
     def test_6digit_color_code_from_6digit_input(self):
         captions = SAMIReader().read(SAMPLE_SAMI)
-        p_style = captions.get_style(u"p")
+        p_style = captions.get_style("p")
 
-        self.assertEqual(u"#ffeedd", p_style[u'color'])
+        self.assertEqual("#ffeedd", p_style['color'])
 
     def test_6digit_color_code_from_3digit_input(self):
         captions = SAMIReader().read(
-            SAMPLE_SAMI.replace(u"#ffeedd", u"#fed"))
-        p_style = captions.get_style(u"p")
+            SAMPLE_SAMI.replace("#ffeedd", "#fed"))
+        p_style = captions.get_style("p")
 
-        self.assertEqual(u"#ffeedd", p_style[u'color'])
+        self.assertEqual("#ffeedd", p_style['color'])
 
     def test_empty_file(self):
         self.assertRaises(
@@ -48,7 +48,7 @@ def test_empty_file(self):
 
     def test_invalid_markup_is_properly_handled(self):
         captions = SAMIReader().read(SAMPLE_SAMI_SYNTAX_ERROR)
-        self.assertEqual(2, len(captions.get_captions(u"en-US")))
+        self.assertEqual(2, len(captions.get_captions("en-US")))
 
     def test_partial_margins(self):
         caption_set = SAMIReader().read(SAMPLE_SAMI_PARTIAL_MARGINS)
@@ -56,28 +56,28 @@ def test_partial_margins(self):
         # (i.e. "0%")
         self.assertEqual(
             caption_set.layout_info.padding.to_xml_attribute(),
-            u'0% 29pt 0% 29pt'
+            '0% 29pt 0% 29pt'
 
         )
 
     def test_sami_with_bad_span_align(self):
         caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_BAD_SPAN_ALIGN)
         caption = caption_set.get_captions('en-US')[0]
-        self.assertEquals(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
+        self.assertEqual(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
 
     def test_sami_with_bad_div_align(self):
         caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_BAD_DIV_ALIGN)
         caption = caption_set.get_captions('en-US')[0]
-        self.assertEquals(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
+        self.assertEqual(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
 
     def test_sami_with_p_align(self):
         caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_P_ALIGN)
         caption = caption_set.get_captions('en-US')[0]
-        self.assertEquals(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
+        self.assertEqual(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
 
     def test_sami_with_p_and_span_align(self):
         """ <span> align DOES NOT override <p> align if it is specified inline.
         """
         caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_P_AND_SPAN_ALIGN)
         caption = caption_set.get_captions('en-US')[0]
-        self.assertEquals(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
+        self.assertEqual(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
diff --git a/tests/test_sami_conversion.py b/tests/test_sami_conversion.py
index 00624801..dc787518 100644
--- a/tests/test_sami_conversion.py
+++ b/tests/test_sami_conversion.py
@@ -121,9 +121,9 @@ def test_sami_to_dfxp_xml_output(self):
         results = DFXPWriter(relativize=False,
                              fit_to_screen=False).write(captions)
         self.assertTrue(isinstance(results, six.text_type))
-        self.assertTrue(u'xmlns="http://www.w3.org/ns/ttml"' in results)
+        self.assertTrue('xmlns="http://www.w3.org/ns/ttml"' in results)
         self.assertTrue(
-            u'xmlns:tts="http://www.w3.org/ns/ttml#styling"' in results)
+            'xmlns:tts="http://www.w3.org/ns/ttml#styling"' in results)
 
 
 class SAMItoWebVTTTestCase(unittest.TestCase, WebVTTTestingMixIn):
@@ -164,4 +164,4 @@ def test_sami_to_sami_conversion(self):
         results = SAMIWriter().write(caption_set)
         self.assertTrue(isinstance(results, six.text_type))
         self.assertSAMIEquals(SAMPLE_SAMI_WITH_LANG, results)
-        self.assertTrue(u"lang: en-US;" in results)
+        self.assertTrue("lang: en-US;" in results)
diff --git a/tests/test_scc.py b/tests/test_scc.py
index 45f33177..7d474f87 100644
--- a/tests/test_scc.py
+++ b/tests/test_scc.py
@@ -27,11 +27,11 @@ def test_detection(self):
     def test_caption_length(self):
         captions = SCCReader().read(SAMPLE_SCC_POP_ON)
 
-        self.assertEqual(7, len(captions.get_captions(u"en-US")))
+        self.assertEqual(7, len(captions.get_captions("en-US")))
 
     def test_proper_timestamps(self):
         captions = SCCReader().read(SAMPLE_SCC_POP_ON)
-        paragraph = captions.get_captions(u"en-US")[2]
+        paragraph = captions.get_captions("en-US")[2]
 
         delta_start = abs(paragraph.start - 17000000)
         delta_end = abs(paragraph.end - 18752000)
@@ -64,7 +64,7 @@ def test_scc_positioning_is_read(self):
         ]
         actual_positioning = [
             caption_.layout_info.origin.serialized() for caption_ in
-            captions.get_captions(u'en-US')
+            captions.get_captions('en-US')
         ]
 
         self.assertEqual(expected_positioning, actual_positioning)
@@ -78,7 +78,7 @@ def test_correct_last_bad_timing(self):
                             (3208266666.666667, 3269700000.0)]
 
         actual_timings = [
-            (c_.start, c_.end) for c_ in caption_set.get_captions(u'en-US')
+            (c_.start, c_.end) for c_ in caption_set.get_captions('en-US')
         ]
         self.assertEqual(expected_timings, actual_timings)
 
@@ -94,26 +94,26 @@ def switches_italics(node):
             :rtype: bool
             """
             if not node.type_ == node.STYLE:
-                raise ValueError(u"This should be a style node.")
+                raise ValueError("This should be a style node.")
 
             return node.start
 
         caption_set = SCCReader().read(SAMPLE_SCC_WITH_ITALICS)
-        nodes = caption_set.get_captions(u'en-US')[0].nodes
+        nodes = caption_set.get_captions('en-US')[0].nodes
 
         # We assert that the text is specified in italics.
         # If Style nodes are replaced, the way these 3 assertions are made
         # will most likely change
         self.assertEqual(switches_italics(nodes[0]), True)
         self.assertEqual(switches_italics(nodes[2]), False)
-        self.assertEqual(nodes[1].content, u'abababab')
+        self.assertEqual(nodes[1].content, 'abababab')
 
     def test_default_positioning_when_no_positioning_is_specified(self):
         caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)
 
         actual_caption_layouts = [
             caption.layout_info.serialized()
-            for caption in caption_set.get_captions(u'en-US')
+            for caption in caption_set.get_captions('en-US')
         ]
 
         expected_caption_layouts = [
@@ -145,9 +145,9 @@ def test_timing_is_properly_set_on_split_captions(self):
         caption_set = SCCReader().read(
             SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME
         )
-        expected_timings = [(u'00:01:35.666', u'00:01:40.866'),
-                            (u'00:01:35.666', u'00:01:40.866'),
-                            (u'00:01:35.666', u'00:01:40.866')]
+        expected_timings = [('00:01:35.666', '00:01:40.866'),
+                            ('00:01:35.666', '00:01:40.866'),
+                            ('00:01:35.666', '00:01:40.866')]
 
         actual_timings = [(c_.format_start(), c_.format_end()) for c_ in
                           caption_set.get_captions('en-US')]
@@ -168,37 +168,37 @@ def test_freeze_rollup_captions_contents(self):
         # There were no tests for ROLL-UP captions, but the library processed
         # Roll-Up captions. Make sure nothing changes during the refactoring
         scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
-        captions = scc1.get_captions(u'en-US')
+        captions = scc1.get_captions('en-US')
         actual_texts = [cap_.nodes[0].content for cap_ in captions]
-        expected_texts = [u'>>> HI',
-                          u"I'M KEVIN CUNNING AND AT",
+        expected_texts = ['>>> HI',
+                          "I'M KEVIN CUNNING AND AT",
                           # Notice the missing 'N' at the end. This is because
                           # the input is not OK (should only use 4 byte "words"
                           # (filling in with '80' where only 2 bytes are
                           # meaningful)
-                          u"INVESTOR'S BANK WE BELIEVE I",
-                          u'HELPING THE LOCAL NEIGHBORHOOD',
-                          u'AND IMPROVING THE LIVES OF ALL',
-                          u'WE SERVE',
+                          "INVESTOR'S BANK WE BELIEVE I",
+                          'HELPING THE LOCAL NEIGHBORHOOD',
+                          'AND IMPROVING THE LIVES OF ALL',
+                          'WE SERVE',
                           # special chars. Last one should be printer 2 times
                           # XXX this is a bug.
-                          u'®°½',
+                          '®°½',
                           # special/ extended chars delete last 0-4 chars.
                           # XXX - this is a bug.
-                          u'ABû',
-                          u'ÁÉÓ¡',
-                          u"WHERE YOU'RE STANDING NOW,",
-                          u"LOOKING OUT THERE, THAT'S AL",
-                          u'THE CROWD.',
-                          u'>> IT WAS GOOD TO BE IN TH',
-                          u"And restore Iowa's land, water",
-                          u'And wildlife.',
-                          u'>> Bike Iowa, your source for']
+                          'ABû',
+                          'ÁÉÓ¡',
+                          "WHERE YOU'RE STANDING NOW,",
+                          "LOOKING OUT THERE, THAT'S AL",
+                          'THE CROWD.',
+                          '>> IT WAS GOOD TO BE IN TH',
+                          "And restore Iowa's land, water",
+                          'And wildlife.',
+                          '>> Bike Iowa, your source for']
         self.assertEqual(expected_texts, actual_texts)
 
     def test_freeze_semicolon_spec_time(self):
         scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
-        captions = scc1.get_captions(u'en-US')
+        captions = scc1.get_captions('en-US')
         expected_timings = [(766666.6666666667, 2800000.0),
                             (2800000.0, 4600000.0),
                             (4600000.0, 6166666.666666667),
@@ -233,7 +233,7 @@ def test_freeze_colon_spec_time(self):
                             (32165466.66666666, 36202833.33333332)]
 
         actual_timings = [
-            (c_.start, c_.end) for c_ in scc1.get_captions(u'en-US')]
+            (c_.start, c_.end) for c_ in scc1.get_captions('en-US')]
         self.assertEqual(expected_timings, actual_timings)
 
 
@@ -323,7 +323,7 @@ def __init__(self, start=0, end=0, nodes=(1, 2)):
         self.end = end
 
     def __repr__(self):
-        return u"{start}-->{end}".format(start=self.start, end=self.end)
+        return "{start}-->{end}".format(start=self.start, end=self.end)
 
 
 class TimingCorrectingCaptionListTestCase(unittest.TestCase):
diff --git a/tests/test_srt.py b/tests/test_srt.py
index 6e5e2827..37352ade 100644
--- a/tests/test_srt.py
+++ b/tests/test_srt.py
@@ -15,18 +15,18 @@ def test_detection(self):
     def test_caption_length(self):
         captions = SRTReader().read(SAMPLE_SRT)
 
-        self.assertEqual(7, len(captions.get_captions(u"en-US")))
+        self.assertEqual(7, len(captions.get_captions("en-US")))
 
     def test_proper_timestamps(self):
         captions = SRTReader().read(SAMPLE_SRT)
-        paragraph = captions.get_captions(u"en-US")[2]
+        paragraph = captions.get_captions("en-US")[2]
 
         self.assertEqual(17000000, paragraph.start)
         self.assertEqual(18752000, paragraph.end)
 
     def test_numeric_captions(self):
         captions = SRTReader().read(SAMPLE_SRT_NUMERIC)
-        self.assertEqual(7, len(captions.get_captions(u"en-US")))
+        self.assertEqual(7, len(captions.get_captions("en-US")))
 
     def test_empty_file(self):
         self.assertRaises(
@@ -35,8 +35,8 @@ def test_empty_file(self):
 
     def test_extra_empty_line(self):
         captions = SRTReader().read(SAMPLE_SRT_BLANK_LINES)
-        self.assertEqual(2, len(captions.get_captions(u"en-US")))
+        self.assertEqual(2, len(captions.get_captions("en-US")))
 
     def test_extra_trailing_empty_line(self):
         captions = SRTReader().read(SAMPLE_SRT_TRAILING_BLANKS)
-        self.assertEqual(2, len(captions.get_captions(u"en-US")))
+        self.assertEqual(2, len(captions.get_captions("en-US")))
diff --git a/tests/test_webvtt.py b/tests/test_webvtt.py
index 1a56938d..eef7df6b 100644
--- a/tests/test_webvtt.py
+++ b/tests/test_webvtt.py
@@ -27,30 +27,30 @@ def test_negative_answer_for_detection(self):
 
     def test_caption_length(self):
         captions = self.reader.read(SAMPLE_WEBVTT_2)
-        self.assertEqual(len(captions.get_captions(u'en-US')), 7)
+        self.assertEqual(len(captions.get_captions('en-US')), 7)
 
     def test_read_supports_multiple_languages(self):
-        captions = self.reader.read(SAMPLE_WEBVTT, lang=u'es')
-        self.assertIsNotNone(captions.get_captions(u'es'))
+        captions = self.reader.read(SAMPLE_WEBVTT, lang='es')
+        self.assertIsNotNone(captions.get_captions('es'))
 
     def test_proper_timestamps(self):
         captions = self.reader.read(SAMPLE_WEBVTT)
-        cue = captions.get_captions(u'en-US')[2]
+        cue = captions.get_captions('en-US')[2]
         self.assertEqual(cue.start, 17000000)
         self.assertEqual(cue.end, 18752000)
 
     def test_webvtt_cue_components_removed_from_text(self):
         result = self.reader._remove_styles(
-            u"<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
-            u"its shortcomings</i>, but it is<u> the largest</u> collective "
-            u"knowledge construction endevour</c> <ruby>base text <rt>"
-            u"annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!"
+            "<c vIntro><b>Wikipedia</b> is a great adventure. <i>It may have "
+            "its shortcomings</i>, but it is<u> the largest</u> collective "
+            "knowledge construction endevour</c> <ruby>base text <rt>"
+            "annotation</rt></ruby> <v Audry><b>Yes</b>, indeed!"
         )
         expected = (
-            u"Wikipedia is a great adventure. It may have "
-            u"its shortcomings, but it is the largest collective "
-            u"knowledge construction endevour base text annotation"
-            u" Audry: Yes, indeed!"
+            "Wikipedia is a great adventure. It may have "
+            "its shortcomings, but it is the largest collective "
+            "knowledge construction endevour base text annotation"
+            " Audry: Yes, indeed!"
         )
         self.assertEqual(result, expected)
 
@@ -63,26 +63,26 @@ def test_not_ignoring_timing_errors(self):
         self.assertRaises(
             CaptionReadError,
             WebVTTReader(ignore_timing_errors=False).read,
-            (u"\n"
-             u"00:00:20.000 --> 00:00:10.000\n"
-             u"foo bar baz")
+            ("\n"
+             "00:00:20.000 --> 00:00:10.000\n"
+             "foo bar baz")
         )
 
         self.assertRaises(
             CaptionReadError,
             WebVTTReader(ignore_timing_errors=False).read,
-            (u"00:00:20.000 --> 00:00:10.000\n"
-             u"Start time is greater than end time.\n")
+            ("00:00:20.000 --> 00:00:10.000\n"
+             "Start time is greater than end time.\n")
         )
 
         self.assertRaises(
             CaptionReadError,
             WebVTTReader(ignore_timing_errors=False).read,
-            (u"00:00:20.000 --> 00:00:30.000\n"
-             u"Start times should be consecutive.\n"
-             u"\n"
-             u"00:00:10.000 --> 00:00:20.000\n"
-             u"This cue starts before the previous one.\n")
+            ("00:00:20.000 --> 00:00:30.000\n"
+             "Start times should be consecutive.\n"
+             "\n"
+             "00:00:10.000 --> 00:00:20.000\n"
+             "This cue starts before the previous one.\n")
         )
 
     def test_ignoring_timing_errors(self):
@@ -90,72 +90,72 @@ def test_ignoring_timing_errors(self):
         self.assertRaises(
             CaptionReadSyntaxError,
             WebVTTReader().read,
-            (u"\nNOTE invalid cue stamp\n"
-             u"00:00:20.000 --> \n"
-             u"foo bar baz\n")
+            ("\nNOTE invalid cue stamp\n"
+             "00:00:20.000 --> \n"
+             "foo bar baz\n")
         )
 
         # And this too
         self.assertRaises(
             CaptionReadSyntaxError,
             WebVTTReader().read,
-            (u"\n00:00:20,000 --> 00:00:22,000\n"
-             u"Note the comma instead of point.\n")
+            ("\n00:00:20,000 --> 00:00:22,000\n"
+             "Note the comma instead of point.\n")
         )
 
         try:
             WebVTTReader().read(
-                (u"\n"
-                 u"00:00:20.000 --> 00:00:10.000\n"
-                 u"Start time is greater than end time.\n")
+                ("\n"
+                 "00:00:20.000 --> 00:00:10.000\n"
+                 "Start time is greater than end time.\n")
             )
         except CaptionReadError:
-            self.fail(u"Shouldn't raise CaptionReadError")
+            self.fail("Shouldn't raise CaptionReadError")
 
         try:
             WebVTTReader().read(
-                (u"\n"
-                 u"00:00:20.000 --> 00:00:30.000\n"
-                 u"Start times should be consecutive.\n"
-                 u"\n"
-                 u"00:00:10.000 --> 00:00:20.000\n"
-                 u"This cue starts before the previous one.\n")
+                ("\n"
+                 "00:00:20.000 --> 00:00:30.000\n"
+                 "Start times should be consecutive.\n"
+                 "\n"
+                 "00:00:10.000 --> 00:00:20.000\n"
+                 "This cue starts before the previous one.\n")
 
             )
         except CaptionReadError:
-            self.fail(u"Shouldn't raise CaptionReadError")
+            self.fail("Shouldn't raise CaptionReadError")
 
     def test_invalid_files(self):
         self.assertRaises(
             CaptionReadSyntaxError,
             WebVTTReader().read,
-            (u"\nNOTE Cues without text are invalid.\n"
-                u"00:00:20.000 --> 00:00:30.000\n"
-                u"\n"
-                u"00:00:40.000 --> 00:00:50.000\n"
-                u"foo bar baz\n")
+            ("\nNOTE Cues without text are invalid.\n"
+                "00:00:20.000 --> 00:00:30.000\n"
+                "\n"
+                "00:00:40.000 --> 00:00:50.000\n"
+                "foo bar baz\n")
         )
 
         self.assertRaises(
             CaptionReadError,
             WebVTTReader(ignore_timing_errors=False).read,
-            (u"00:00:20.000 --> 00:00:10.000\n"
-                u"Start time is greater than end time.")
+            ("00:00:20.000 --> 00:00:10.000\n"
+                "Start time is greater than end time.")
         )
 
         self.assertRaises(
             CaptionReadError,
             WebVTTReader(ignore_timing_errors=False).read,
-            (u"00:00:20.000 --> 00:00:30.000\n"
-                u"Start times should be consecutive.\n"
-                u"\n"
-                u"00:00:10.000 --> 00:00:20.000\n"
-                u"This cue starts before the previous one.\n")
+            ("00:00:20.000 --> 00:00:30.000\n"
+                "Start times should be consecutive.\n"
+                "\n"
+                "00:00:10.000 --> 00:00:20.000\n"
+                "This cue starts before the previous one.\n")
         )
 
     def test_zero_start(self):
         captions = self.reader.read(SAMPLE_WEBVTT_LAST_CUE_ZERO_START)
-        cue = captions.get_captions(u'en-US')[0]
+        cue = captions.get_captions('en-US')[0]
         self.assertEqual(cue.start, 0)
 
 
diff --git a/tox.ini b/tox.ini
index 6623fd82..3a47fbcb 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27, py34, py35
+envlist = py34, py35
 [testenv]
 deps=
     beautifulsoup4==4.4