# -*- coding: utf-8 -*-
from xml.etree import cElementTree as etree
import os
import re
import sys
import time
VERSION = '1.39'
class Eaf:
"""Read and write Elan's Eaf files.
.. note:: All times are in milliseconds and can't have decimals.
:var dict adocument: Annotation document TAG entries.
:var list licenses: Licences included in the file of the form:
``(name, url)``.
:var dict header: XML header.
:var list media_descriptors: Linked files, where every file is of the
form: ``{attrib}``.
:var list properties: Properties, where every property is of the form:
``(key, value)``.
:var list linked_file_descriptors: Secondary linked files, where every
linked file is of the form: ``{attrib}``.
:var dict timeslots: Timeslot data of the form: ``{id -> time(ms)}``.
:var dict tiers: Tiers, where every tier is of the form:
``{tier_name -> (aligned_annotations, reference_annotations,
attributes, ordinal)}``,
aligned_annotations of the form: ``[{id -> (begin_ts, end_ts, value,
svg_ref)}]``,
reference annotations of the form: ``[{id -> (reference, value,
previous, svg_ref)}]``.
:var list linguistic_types: Linguistic types, where every type is of the
form: ``{id -> attrib}``.
:var dict locales: Locales, of the form:
``{lancode -> (countrycode, variant)}``.
:var dict languages: Languages, of the form:
``{langid -> (langdef, langlabel)}``.
:var dict constraints: Constraints, every constraint is of the form:
``{stereotype -> description}``.
:var dict controlled_vocabularies: Controlled vocabulary, where every
controlled vocabulary is of the form: ``{id -> (descriptions, entries,
ext_ref)}``,
descriptions of the form: ``[(value, lang_ref, description)]``,
entries of the form: ``{id -> (values, ext_ref)}``,
values of the form: ``[(lang_ref, description, text)]``.
:var list external_refs: External references of the form:
``{id -> (type, value)}``.
:var list lexicon_refs: Lexicon references, where every reference is of
the form: ``{id -> {attribs}}``.
:var dict annotations: Dictionary of annotations of the form:
``{id -> tier}``, this is only used internally.
"""
ETYPES = {'iso12620', 'ecv', 'cve_id', 'lexen_id', 'resource_url'}
CONSTRAINTS = {
'Time_Subdivision': "Time subdivision of parent annotation's time inte"
'rval, no time gaps allowed within this interval',
'Symbolic_Subdivision': 'Symbolic subdivision of a parent annotation. '
'Annotations refering to the same parent are ordered',
'Symbolic_Association': '1-1 association with a parent annotation',
'Included_In': 'Time alignable annotations within the parent annotatio'
"n's time interval, gaps are allowed"}
MIMES = {'wav': 'audio/x-wav', 'mpg': 'video/mpeg', 'mpeg': 'video/mpg',
'xml': 'text/xml'}
def __init__(self, file_path=None, author='pympi'):
"""Construct either a new Eaf file or read on from a file/stream.
:param str file_path: Path to read from, - for stdin. If ``None`` an
empty Eaf file will be created.
:param str author: Author of the file.
"""
ctz = -time.altzone if time.localtime(time.time()).tm_isdst and\
time.daylight else -time.timezone
self.maxts = None
self.maxaid = None
self.adocument = {
'AUTHOR': author,
'DATE': time.strftime('%Y-%m-%dT%H:%M:%S{:0=+3d}:{:0=2d}').format(
ctz // 3600, ctz % 3600),
'VERSION': '2.8',
'FORMAT': '2.8',
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'xsi:noNamespaceSchemaLocation':
'http://www.mpi.nl/tools/elan/EAFv2.8.xsd'}
self.annotations = {}
self.constraints = {}
self.controlled_vocabularies = {}
self.external_refs = {}
self.header = {}
self.languages = {}
self.lexicon_refs = {}
self.linguistic_types = {}
self.locales = {}
self.tiers = {}
self.timeslots = {}
self.licenses = []
self.linked_file_descriptors = []
self.media_descriptors = []
self.properties = []
if file_path is None:
self.add_linguistic_type('default-lt')
self.constraints = self.CONSTRAINTS.copy()
self.properties.append(('lastUsedAnnotation', 0))
self.add_tier('default')
else:
parse_eaf(file_path, self)
def add_annotation(self, id_tier, start, end, value='', svg_ref=None):
"""Add an annotation.
:param str id_tier: Name of the tier.
:param int start: Start time of the annotation.
:param int end: End time of the annotation.
:param str value: Value of the annotation.
:param str svg_ref: Svg reference.
:raises KeyError: If the tier is non existent.
:raises ValueError: If one of the values is negative or start is bigger
then end or if the tiers already contains ref
annotations.
"""
if self.tiers[id_tier][1]:
raise ValueError('Tier already contains ref annotations...')
if start == end:
raise ValueError('Annotation length is zero...')
if start > end:
raise ValueError('Annotation length is negative...')
if start < 0:
raise ValueError('Start is negative...')
start_ts = self.generate_ts_id(start)
end_ts = self.generate_ts_id(end)
aid = self.generate_annotation_id()
self.annotations[aid] = id_tier
self.tiers[id_tier][0][aid] = (start_ts, end_ts, value, svg_ref)
def add_controlled_vocabulary(self, cv_id, ext_ref=None):
"""Add a controlled vocabulary. This will initialize the controlled
vocabulary without entries.
:param str cv_id: Name of the controlled vocabulary.
:param str ext_ref: External reference.
"""
self.controlled_vocabularies[cv_id] = ([], {}, ext_ref)
def add_cv_entry(self, cv_id, cve_id, values, ext_ref=None):
"""Add an entry to a controlled vocabulary.
:param str cv_id: Name of the controlled vocabulary to add an entry.
:param str cve_id: Name of the entry.
:param list values: List of values of the form:
``(value, lang_ref, description)`` where description can be
``None``.
:param str ext_ref: External reference.
:throws KeyError: If there is no controlled vocabulary with that id.
:throws ValueError: If a language in one of the entries doesn't exist.
"""
for value, lang_ref, description in values:
if lang_ref not in self.languages:
raise ValueError('Language not present: {}'.format(lang_ref))
self.controlled_vocabularies[cv_id][1][cve_id] = (values, ext_ref)
def add_cv_description(self, cv_id, lang_ref, description=None):
"""Add a description to a controlled vocabulary.
:param str cv_id: Name of the controlled vocabulary to add the
description.
:param str lang_ref: Language reference.
:param str description: Description, this can be none.
:throws KeyError: If there is no controlled vocabulary with that id.
:throws ValueError: If the language provided doesn't exist.
"""
if lang_ref not in self.languages:
raise ValueError('Language not present: {}'.format(lang_ref))
self.controlled_vocabularies[cv_id][0].append((lang_ref, description))
def add_external_ref(self, eid, etype, value):
"""Add an external reference.
:param str eid: Name of the external reference.
:param str etype: Type of the external