Introduction


xtf2.rnc

Preamble

#-lines

Protocols

Comments

&-lines

@-lines

Objects

Surfaces

Columns

Status

Milestones

Implied tags

$-lines

Seal

State

Rulings

Examples

Images

Text Lines

Advanced

Line Numbers

Cells & Fields

Streams

Zones

Composites

@composite

Structure

Locator

Variants


Resources


Links

Top

Tutorial

GDL Manual

Advanced

Composites

Lexical

Linkage

Protocols

XTF2: XML Transliteration Format Version 2

(http://emegir.info/xtf/2)

Steve Tinney
Version of 2008-07-16

Introduction

XTF2 is an XML format for describing the transliteration of cuneiform texts; it also encompasses facilities for other kinds of editions commonly used in cuneiform studies.

xtf2.rnc

default namespace = "http://emegir.info/xtf/2"
include "gdl.rnc"
include "xtr.rnc"
start = xtf | translation | transliteration | composite | atf
xtf = element xtf { proto.outer? , (atf | transliteration | composite | translation)* }
atf = element atf { attribute xml:id   { xsd:ID } , text }

proto.outer = element protocols {
  attribute scope { text },
  proto.basket
}

proto.start = element protocols {
  attribute scope { text },
  (  proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer 
     | proto.link | proto.project | proto.syntax | proto.version )*
}

proto.after = proto.note

proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link 
            | proto.note | proto.var

proto.atf    = element protocol { attribute type { "atf" }    , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib    = element protocol { attribute type { "bib" }    , text }
proto.etcsl  = element protocol { attribute type { "etcsl" }  , text }
proto.key    = element protocol { attribute type { "key" }    , text }
proto.lem    = element protocol { attribute type { "lem" }    , text }
proto.lemmatizer
             = element protocol { attribute type { "lemmatizer" }, text }
proto.link   = element protocol { attribute type { "link" }   , text }
proto.note   = element protocol { attribute type { "note" }   , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var    = element protocol { attribute type { "var" }    , text }
proto.version= element protocol { attribute type { "version" }, text }

comments = cmt | note
cmt = element cmt { text }
note = element note { text }

transliteration =
  element transliteration {
    attribute xml:id   { xsd:ID },
    attribute n        { text },
    attribute hand     { text }?,
    attribute xml:lang { xsd:NMTOKEN },
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    (proto.start? , (object | nonobject | comments | sealing)*)
  }
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:NMTOKEN }

object = 
  element object {
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    ( attribute type { known.object }
     |(attribute type { user.object } , n.attr) 
    ) ,
    status.flags,
    (m.fragment | surface | sealing | comments | nonx)*
  }
known.object =    xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object =     xsd:string { pattern="object" }
nonobject = nonx

surface =
  element surface { 
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (proto.inter | column | nonx | m | comments)* ,
    (  attribute type { known.surface }
     |(attribute type { face.surface } , n.attr.lc)
     |(attribute type { edge.surface } , n.attr.lc?)
     |(attribute type { user.surface | seal.surface } , n.attr)
     ),
    primes?,
    status.flags
  }

known.surface =
  xsd:string {
    pattern="surface|obverse|reverse|left|right|top|bottom"
  }
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
seal.surface = xsd:string { pattern="seal" }

sealing =
  element sealing {
    attribute xml:id { xsd:ID },
    attribute label    { text },
    attribute n { xsd:NMTOKEN },
    attribute scid { xsd:NMTOKEN }?,
    (column | nonx | milestone | comments)*
  }

column = 
  element column { 
    (implicit
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (milestone | lg | l | nonl | nonx | comments | proto.inter)*,
    attribute n { text },
    attribute o { text }?,
    primes?,
    status.flags
  }

primes = 
  attribute primes { xsd:string { pattern="\x{2032}+" } }

milestone = m | m.discourse

m = element m { 
  attribute type { "division" | "locator" },
  attribute subtype { xsd:NMTOKEN }?,
  text
}

m.discourse = element m {
  attribute type { "discourse" },
  attribute subtype { "body" | "date" | "linecount" | "witnesses" | "summary" },
  text
}

m.fragment = element m { 
  attribute type { "locator" },
  attribute subtype { "fragment" }?,
  text
}

implicit = attribute implicit { "1" }

nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }

nonx-attlist =
  attribute xml:id { xsd:ID },
  (attribute label { text },
   attribute silent { "1" })?,
  ((attribute strict { "1" },
   ((attribute ref    { text },
     attribute scope  { text })
   |(attribute extent { text },
     attribute scope  { text },
     attribute state  { text })))
  |
   (attribute strict { "0" },
    attribute extent { text }?,
    attribute ref    { text }?,
    attribute scope  { text }?,
    attribute state  { text }?)
  |
   (attribute strict { "0" },
    attribute ref    { "none" },
    attribute type   { "empty" })
  |
   (attribute type   { "image" },
    attribute strict { "0" },
    attribute ref    { xsd:string {
                          pattern="[PQX][0-9]+@[0-9]+[a-z]*" 
		       }},
    attribute alt    { text })
  )

non-x-attr-set =
  attribute type {
    "newline" | "broken" | "maybe-broken" | "traces"
    | "maybe-traces" | "blank" | "ruling" | "image"
    | "seal" | "comment" | "bullet" | "other"
  },
  attribute unit { "self" | "quantity" | "ref" }?,
  attribute extent { text }?,
  attribute ref { text }?,
  attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set

l =
  element l {
    attribute xml:id { xsd:ID },
    attribute n { text },
    attribute o { text }?,
    attribute l { text }?,
    attribute label { text }?,
    attribute silent { "1" }?,
    (cell+ | f+ | (ag | l.inner)*) 
  }

l.inner = (surro | normword | words | glo)*

cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }

f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute xml:lang { xsd:NMTOKEN }?

lg = element lg { 
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  (   (l,gus?,nts)
    | (l,gus?,lgs) 
    | (l,gus?,nts,lgs) 
    | (l,gus?, (e | comments)*)),
  proto.inter*,
  var*
}
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v { 
  attribute varnum { xsd:NMTOKEN } ,
  l.inner 
}

# alignment groups
ag = element ag { 
  attribute ref { xsd:string { pattern="[a-z]+" } },
  attribute form { text }?,
  l.inner*
}

surro = element surro { l.inner }
words |= surro?
word |= surro?

composite =
  element composite {
    composite-attlist,
    sigdef*,
    attribute hand     { text }?,
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    proto.start?,
    composite-content,
    (referto, comments?)*
  }
composite-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?

composite-content = 
  (milestone | \include | \div | variants | lg | l | comments | nonl | nonx | proto.inter)*

\include = element include { increfAttr }

referto = element referto { increfAttr }

increfAttr = 
  (attribute ref { text } , 
   attribute n { text } ,
   (attribute from { text },
    attribute to { text }?)?)

\div =
  element div {
    div-attlist, 
    composite-content
  }
div-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute lang { text }?,
  attribute place { text }?,
  attribute subtype { text }?

variants = element variants { variant* }

variant = 
  element variant {
    (\div | variants | lg | l | comments | nonl | proto.inter | nonx)*
  }

score =
  element score {
    score-attlist, sigdef*, (milestone | \div | lg | comments | nonl)*
  }
score-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
synopticon =
  element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
  attribute xml:id { xsd:ID },
  attribute targ-id { xsd:NMTOKEN },
  attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
  element e {
    e-attlist,
    (l.inner
     | c+
     | f+)
  }
e-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute sigref { xsd:IDREF }?,
  attribute n { text }?,
  attribute l { text }?,
  attribute p { text }?,
  attribute hlid { text }?,
  attribute plid { text }?

Preamble

This document is a work in progress; the schema is correct and defines the XML output format produced by atf2xtf. Developer documentation is not yet included here, but the tutorial is essentially complete.

Most elements in an XTF file are in either the XTF or GDL namespaces, the latter being defined in the included GDL specification. The n namespace is used for normalized text as described below.

The macro structure of any XTF file produced by the ATF processor is always an outer container, the xtf element, followed by optional outer protocols and then zero or more transliterations and/or composite texts.

We allow transliteration and composite as start elements to simplify the ATF processor's internal validation of texts.

default namespace = "http://emegir.info/xtf/2"
include "gdl.rnc"
include "xtr.rnc"
start = xtf | translation | transliteration | composite | atf
xtf = element xtf { proto.outer? , (atf | transliteration | composite | translation)* }
atf = element atf { attribute xml:id   { xsd:ID } , text }

#-lines

The other quite common type of line in an ATF file begins with the hash sign (#). There are two kinds of #-line: protocols and comments.

Protocols

Protocols are statements which are interpreted or stored by the ATF processor but are not part of the text edition proper. Protocols are all named and may trigger special processing within the ATF processor.

With the exception of #note:, protocols must occur on a single line; multiple protocols do not need blank lines between them except for multiple #note: protocols which behave like comments.

Protocols are divided into four classes:

outer
protocols which may only occur at the very beginning of the document; only #basket: may occur in this location.
start
protocols which may occur at the start of a text; only #atf:, #bib:, #link:, #note: and #version: may occur in this location.
after
protocols which may occur only after all other protocols have been given in a particular section; only #note: may occur in this location. Other protocols are not required before #note:, but if they are present they must precede it.
inter
protocols which may occur between lines of a text; only #bib:, #lem:, #note: and #var: may occur in this location.
#bib: MSL 14, 343

1. a
#lem: a[water]
#note: This can only occur after any protocols other than #note:.

Protocols which may be given explicitly by users in an ATF file are: atf; basket; bib; lem; lemmatizer; link; note; syntax; var; version.

Note that the #link: protocol handles only a subset of intertext linkage; link protocols in XTF may also originate from the || << >> operator set. See the link protocol documentation for further details. The #note: protocol does not generate a protocol node; it generates a note element.

proto.outer = element protocols {
  attribute scope { text },
  proto.basket
}

proto.start = element protocols {
  attribute scope { text },
  (  proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer 
     | proto.link | proto.project | proto.syntax | proto.version )*
}

proto.after = proto.note

proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link 
            | proto.note | proto.var

proto.atf    = element protocol { attribute type { "atf" }    , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib    = element protocol { attribute type { "bib" }    , text }
proto.etcsl  = element protocol { attribute type { "etcsl" }  , text }
proto.key    = element protocol { attribute type { "key" }    , text }
proto.lem    = element protocol { attribute type { "lem" }    , text }
proto.lemmatizer
             = element protocol { attribute type { "lemmatizer" }, text }
proto.link   = element protocol { attribute type { "link" }   , text }
proto.note   = element protocol { attribute type { "note" }   , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var    = element protocol { attribute type { "var" }    , text }
proto.version= element protocol { attribute type { "version" }, text }
comments = cmt | note
cmt = element cmt { text }
note = element note { text }

&-lines

&-lines are used to introduce a new text and consist of two parts: the ID and the name.

For transliterations of exemplars, the ID is a 'P' followed by six digits, e.g., P123456. This ID is assigned by CDLI and is the reference ID of the object in the main CDLI catalog; to get IDs for objects not in the CDLI catalog send an e-mail to cdli@cdli.ucla.edu.

The name of the text should be identical with the 'Designation' field in the CDLI main catalog; the ATF processor detects mismatches and reports the correct name. This mechanism is designed to provide a check that the P-number in the ID actually references the text the transliterator intends.

Transliterations are not the only data type which can be entered in ATF; the documentation on composite texts is kept in a separate document.

transliteration =
  element transliteration {
    attribute xml:id   { xsd:ID },
    attribute n        { text },
    attribute hand     { text }?,
    attribute xml:lang { xsd:NMTOKEN },
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    (proto.start? , (object | nonobject | comments | sealing)*)
  }
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:NMTOKEN }

@-lines

@-lines are used for structural tags. Several kinds of structure may be indicated using this mechanism: physical structure, e.g., objects, surfaces; manuscript structure, i.e., columns; and document structure, e.g., divisions and colophons. For clarity, we describe here only the structural features which are permitted in object transliterations, i.e., texts with an ID beginning with P. Documentation of structural conventions for composite texts is given in the composites manual.

Objects

The kind of object on which the inscription being transliterated is written is designated using one of the following tags:

@tablet
The default, and therefore optional; object is a tablet.
@envelope
Tablets and envelopes with the same P number can be transliterated separately using this tag.
@prism
Object is a prism.
@bulla
Object is a bulla.
@fragment
Object is a fragment, with a fragment name (e.g., a letter) following the tag; may be used more than once to transliterate multiple fragments of an object, e.g.:
&P212121 = Some Fragmentary Object
@fragment a
1. a
@fragment b
1. a
@object
The generic object tag which must be followed by the type of the object, e.g. @object Stone wig.

Seals

A transliteration of the text inscribed on a physical seal object should be handled using the @object tag:

&P333444 = Some Seal
@object seal
1. da-da
2. dumu du-du
object = 
  element object {
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    ( attribute type { known.object }
     |(attribute type { user.object } , n.attr) 
    ) ,
    status.flags,
    (m.fragment | surface | sealing | comments | nonx)*
  }
known.object =    xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object =     xsd:string { pattern="object" }
nonobject = nonx

Surfaces

Surfaces are principally the physical surfaces:

@obverse, @reverse
Obverse and reverse.
@left, @right, @top, @bottom
Specifiable edges, left right, top and bottom (as seen when looking at obverse of tablet).
@face
Conventional designation for surfaces of a prism; must be followed by single lowercase letter indicating the face, e.g.:
&P123321 = Some Prism
@prism
@face a
1. a
@face b
1. e
@surface
Generic surface tag which must be followed by name of surface, e.g.: @surface shoulder; @surface side a.
@edge
Generic edge tag; may be followed by single lowercase letter to name the edge similarly to @face.

Sealings

A transliteration of a sealing should be handled using the @seal tag included like a surface after the transliteration of the object on which the sealing occurs:

&P343434 = Some Sealed Tablet
1. a
$ seal 1

@seal 1
1. du-du

The use of $ seal anticipates the discussion of $-lines below; this mechanism can be used to indicate which sealings occur where on an object.

surface =
  element surface { 
    (implicit 
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (proto.inter | column | nonx | m | comments)* ,
    (  attribute type { known.surface }
     |(attribute type { face.surface } , n.attr.lc)
     |(attribute type { edge.surface } , n.attr.lc?)
     |(attribute type { user.surface | seal.surface } , n.attr)
     ),
    primes?,
    status.flags
  }

known.surface =
  xsd:string {
    pattern="surface|obverse|reverse|left|right|top|bottom"
  }
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
seal.surface = xsd:string { pattern="seal" }

The scid attribute is intended for use in cross-referencing sealing instance transliterations to composite transliterations of sealings stored in an external database.

sealing =
  element sealing {
    attribute xml:id { xsd:ID },
    attribute label    { text },
    attribute n { xsd:NMTOKEN },
    attribute scid { xsd:NMTOKEN }?,
    (column | nonx | milestone | comments)*
  }

Columns

Columns are indicated with the @column tag, which may be omitted for single-column texts. Column numbers must be given in arabic numerals:

&P545454 = Some Columnar Text
@column 1
1. a
@column 2
1. e
column = 
  element column { 
    (implicit
     | (attribute xml:id   { xsd:ID },
        attribute label    { text })),
    (milestone | lg | l | nonl | nonx | comments | proto.inter)*,
    attribute n { text },
    attribute o { text }?,
    primes?,
    status.flags
  }

Status

The status of some of the features indicated with @-lines can be indicated in a manner similar to that of graphemes; the notation is intended to be natural and to follow Assyriological conventions:

@obverse?

Meaning: status of obverse/reverse uncertain

@reverse!*

Meaning: collated; reverse correct despite designation in publication

Primes can be used where this makes sense:

@face a'

@column 3'
primes = 
  attribute primes { xsd:string { pattern="\x{2032}+" } }

Milestones

For technical reasons it is impossible to interweave physical structure (of the kind described above for transliterated objects) and document structure (e.g., paragraph divisions). This limitation is resolved by recourse to milestones.

Divisions

Documentary divisions in a transliterated object are given using the @m tag, with the milestone type given after an equals sign and the division type following; an optional division name or number may follow the division type:

@m=division paragraph 1

@m=division colophon

Discourse

Simple support for discourse elements in administrative texts is provided using shorthands which are also implemented as milestones. These shorthands are @date, @summary, @witnesses:

&P787878 = Some Administrative Text
1. 1(disz) udu
2. da-da
3. szu ba-ti
@date
4. u4 1-kam
@left
@summary
1. 1(disz) udu
milestone = m | m.discourse

m = element m { 
  attribute type { "division" | "locator" },
  attribute subtype { xsd:NMTOKEN }?,
  text
}

m.discourse = element m {
  attribute type { "discourse" },
  attribute subtype { "body" | "date" | "linecount" | "witnesses" | "summary" },
  text
}

m.fragment = element m { 
  attribute type { "locator" },
  attribute subtype { "fragment" }?,
  text
}

Implied tags

The ATF processor supplies structural elements where they are implied by the transliteration and this is indicated in the XTF tree by use of the implicit attribute. For example, given:

&P121212 = Some Sparse Data
1. a

The following (schematic) element structure is generated:

<transliteration>
  <object>
    <surface>
      <column>

All of these elements have implicit="1".

N.B.: Implicit elements are not addressable by label or xml:id attributes; explicit object, surface and column indicators must be given if addressability is a requirement.

implicit = attribute implicit { "1" }

$-lines

$-lines are used to indicate information about the state of the text or object, or to describe features on the object which are not part of the transliteration proper. They come in two flavours: strict and loose.

Strict $-lines are subject to the restrictions in the table below; strict $-lines can be interpreted in their entirety by the ATF processor and the interpreted information can then be used by other programs. Strict $-lines are the best practice.

Loose $-lines are indicated by putting parentheses around the contents of the $-line. This is a facility provided to enable annotation of features which are not covered by the strict $-line specification. If the ATF processor detects that a loose $-line actually meets the criteria defined for strict $-lines it gives an advisory notice that the parentheses should be removed.

$-lines and comments are two quite different facilities, but experience has shown that transliterators can confuse the two. Comments are for information which does not belong in the transliteration and description of the text; comments are not displayed when the text is formatted for display or print. $-lines are for information which is integral to an understanding of the textual data; $-lines are included when the text is displayed or printed.

Seal

A particular use of $-lines is to indicate that a seal is used on an object; the form is:

$ seal <N>

Where N is a number indicating which seal is used; if a transliteration of the seal is also given using the @seal heading, the number following $ seal should correspond to the number following @seal. See the example above.

State

Most $-lines are used to give information about the state of the object being transliterated. The conventions for this can be summarized as follows:

Summary of Strict $-line Conventions for States
QualificationExtent1ScopeState
1The extent N may be a number such as 1 or 5; a RANGE gives two numbers separated by a hyphen, e.g., 3-5.
2OBJECT is any object specifier as described above, e.g., tablet, object etc.
3SURFACE is any surface specifier as described above, e.g., obverse, left etc.
at least
at most
about
n
several
some
NUMBER
RANGE
rest of
start of
beginning of
middle of
end of
OBJECT2
SURFACE3
column
columns
line
lines
case
cases
surface
blank
broken
effaced
illegible
missing
traces

Rulings

$-lines are also used to indicate noteworthy rulings on the tablet; ordinary case- or line-ruling should not be indicated with a $-line, but where a scribe has used a ruling to give additional information about the document structure this should be noted as:

(single | double | triple)   ruling

Examples

Strict $-lines look like this:

$ 3 lines blank
$ rest of obverse missing

A loose $-line looks like this:

$ (head of statue broken)

A ruling $-line looks like this:

$ double ruling

Images

Inline images can be specified using the form:

$ (image N = <text>)

Where N is an image number consisting of digits followed by optional lowercase letters from a to z, and <text> is free text, giving a label for the image (which is copied through to the XHTML 'alt' attribute on the <img> tag).

$ (image 1 = numbered diagram of triangle)

At present, the implementation only works for XHTML which is produced within a project. The ATF processor constructs a file name consisting of the text ID and the image's N value, joined by an at sign (e.g., P123456@1). The XHTML producer then emits an <img> tag with the src attribute set to /<PROJECT>/<FILENAME>.png.

Thus, in the present implementation, there must exist an appropriately named file in the PNG graphics format residing in the project's images directory. The implementation is expected to support a more sophisticated locator mechanism in the future.

nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }

nonx-attlist =
  attribute xml:id { xsd:ID },
  (attribute label { text },
   attribute silent { "1" })?,
  ((attribute strict { "1" },
   ((attribute ref    { text },
     attribute scope  { text })
   |(attribute extent { text },
     attribute scope  { text },
     attribute state  { text })))
  |
   (attribute strict { "0" },
    attribute extent { text }?,
    attribute ref    { text }?,
    attribute scope  { text }?,
    attribute state  { text }?)
  |
   (attribute strict { "0" },
    attribute ref    { "none" },
    attribute type   { "empty" })
  |
   (attribute type   { "image" },
    attribute strict { "0" },
    attribute ref    { xsd:string {
                          pattern="[PQX][0-9]+@[0-9]+[a-z]*" 
		       }},
    attribute alt    { text })
  )

non-x-attr-set =
  attribute type {
    "newline" | "broken" | "maybe-broken" | "traces"
    | "maybe-traces" | "blank" | "ruling" | "image"
    | "seal" | "comment" | "bullet" | "other"
  },
  attribute unit { "self" | "quantity" | "ref" }?,
  attribute extent { text }?,
  attribute ref { text }?,
  attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set

Text Lines

Lines of transliterated text begin with a sequence of non-space characters followed by a period and a space (these are typically numbers, but that is not a requirement):

1.   a
a+1. e
2'.  i
l =
  element l {
    attribute xml:id { xsd:ID },
    attribute n { text },
    attribute o { text }?,
    attribute l { text }?,
    attribute label { text }?,
    attribute silent { "1" }?,
    (cell+ | f+ | (ag | l.inner)*) 
  }

l.inner = (surro | normword | words | glo)*

Advanced

Line Numbers

By default the ATF processor renumbers lines, storing the original line number and generating a new one according to consistently defined rules. This procedure was adopted because of the lack of consistency in numbering administrative texts.

It is possible to suppress this behaviour and, indeed, it is necessary to suppress this behaviour if intertext linking is in use. The relevant protocol to achieve this is:

#atf: use mylines

Cells & Fields

Two mechanisms provide structural subdivisions of lines: cells and fields.

Cells are alignment units (like table cells); they can be of use to organize the data in a way that mimics the layout on the object. Fields are logical subdivisions in a line which are not necessarily laid out in a special way on the object. Cells can contain fields but fields cannot contain cells; fields are lower in the structural hierarchy than cells.

Fields can have a type specified so that higher order processors working with the XTF data can work intelligently with them.

cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }

f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute xml:lang { xsd:NMTOKEN }?

Streams

Streams are XTF's mechanism for entering data several times in several different ways; no automatic alignment is done between streams, but an alignment-group mechanism is provided for those occasions where alignment is a requirement. There are three kinds of stream in XTF:

MTS: Main Transliteration Stream
This is the default line-type and is the only one that is normally used. Lemmatization information is aligned with the MTS unless there is an NTS.
NTS: Normalized Transliteration Stream
This is a transliteration stream in which adjustments have been made to normalize the text; a normal-orthography version of an emesal text could be created using this mechanism, for example. Lemmatization information is aligned with the NTS if present. If NTS and LGS are both given, NTS must come before LGS.
LGS: Linearized Grapheme Stream
This is the sequence of graphemes exactly in order and linearized to the extent possible; this is mainly used in transliterations of ED texts where the presumed reading sequence and the actual grapheme sequence often diverge. No alignment is ever done with the LGS.
GUS: Gloss Underneath Stream
Implemented for compatibility with the SAA corpus, this stream allows glosses which appear on the tablet underneath the main text line to be given in their own line.
lg = element lg { 
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  (   (l,gus?,nts)
    | (l,gus?,lgs) 
    | (l,gus?,nts,lgs) 
    | (l,gus?, (e | comments)*)),
  proto.inter*,
  var*
}
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v { 
  attribute varnum { xsd:NMTOKEN } ,
  l.inner 
}
# alignment groups
ag = element ag { 
  attribute ref { xsd:string { pattern="[a-z]+" } },
  attribute form { text }?,
  l.inner*
}

Zones

Zones are an experimental feature; at the schema level they are defined in the GDL, but it is convenient to discuss them here because they are another mechanism for grouping graphemes. The concept is that part of an inscription, e.g., a case, may exhibit ordering which may not be linear but is nevertheless be based on some spatial relationship between signs. Transliterators can assign graphemes to zones and label the graphemes by zone.

See the GDL documentation under Presence for surrogates.

surro = element surro { l.inner }
words |= surro?
word |= surro?

Composites

@composite

Composite texts by convention have an ID beginning with Q and are declared by an @-line which immediately follows the &-line for the text:

&Q000002 = Archaic Lu A
@composite

To obtain an ID for a composite text e-mail stinney@sas.upenn.edu.

Structure

Most of the @-lines which are permitted in transliterations are not permitted in composites; this is because composites are organized around documentary structure rather than the structure of a physical object. The one exception is that milestones are allowed in composites.

Documentary divisions are indicated in ATF by use of the @div tag which is followed by the name of the division and an optional name for the division. The @div tag requires a closing @end tag, which must take as its single argument the name of its corresponding opening @div. @div's of different kinds may not be interwoven

The @div tag maps to the DIV element in XTF. The first NMTOKEN which follows the @div is the name of the division and is stored in the @TYPE attribute. The remainder of the line is stored in the @N attribute..

 
@div part 1
...
@end part

@div colophon
...
@end colophon

In the liturgical corpus (including ETCSL editions of texts which could reasonably be considered liturgical), kirugu and other rubrics are used as logical structures, and they contain subdivisions giving the actual rubric; this is supported with the following syntax:

@div kirugu 1
1.  tur3-ra-na ...

@div rubric kirugu
10. ki-ru-gu2 1(disz)-a-kam
@end rubric

@end kirugu

@div giszgigal 1
11. u2-a a-u3-a u2-a-u2-a

@div rubric giszgigal
12. gisz-gi4-gal2-bi-im
@end rubric

@end giszgigal

Locator

A physical location may be given in a composite by using the locator milestone; the content after locator is a label. This is intended for use when the documentary structure of composites is being used to edit a text which is preserved only in one exemplar (the ePSD royal inscriptions corpus edits all royal inscriptions as composites):

1. a
@m=locator o 1

Variants

Variants are implemented to support the ETCSL corpus but may be used in any composite.

composite =
  element composite {
    composite-attlist,
    sigdef*,
    attribute hand     { text }?,
    project?,
    implicit?,
    haslinks?,
    maxcells?,
    proto.start?,
    composite-content,
    (referto, comments?)*
  }
composite-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?

composite-content = 
  (milestone | \include | \div | variants | lg | l | comments | nonl | nonx | proto.inter)*

\include = element include { increfAttr }

referto = element referto { increfAttr }

increfAttr = 
  (attribute ref { text } , 
   attribute n { text } ,
   (attribute from { text },
    attribute to { text }?)?)

\div =
  element div {
    div-attlist, 
    composite-content
  }
div-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute n { text }?,
  attribute type { xsd:NMTOKEN },
  attribute lang { text }?,
  attribute place { text }?,
  attribute subtype { text }?

variants = element variants { variant* }

variant = 
  element variant {
    (\div | variants | lg | l | comments | nonl | proto.inter | nonx)*
  }
score =
  element score {
    score-attlist, sigdef*, (milestone | \div | lg | comments | nonl)*
  }
score-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
synopticon =
  element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
  attribute xml:id { xsd:ID },
  attribute n { text },
  attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
  attribute xml:id { xsd:ID },
  attribute targ-id { xsd:NMTOKEN },
  attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
  element e {
    e-attlist,
    (l.inner
     | c+
     | f+)
  }
e-attlist &=
  attribute xml:id { xsd:ID }?,
  attribute sigref { xsd:IDREF }?,
  attribute n { text }?,
  attribute l { text }?,
  attribute p { text }?,
  attribute hlid { text }?,
  attribute plid { text }?

Resources

P123456.xtf
.
RTF.pm
Perl module CDL::XTF2::RTF.
akk.xtf
.
atf2xtf2.plx
.
atfsplit2.plx [listing]
Split up ATF files into their constituent PQ-files.
charset.rnc
Charset Relax NG Compact Syntax grammar.
charset.rng
Charset Relax NG grammar.
escape-quotes.xsl
XSL transform from escape to quotes.
example.rnc
Example Relax NG Compact Syntax grammar.
finishcnc.plx
.
gdl.rnc
Gdl Relax NG Compact Syntax grammar.
gdl.rng
Gdl Relax NG grammar.
grapheme.rnc
Grapheme Relax NG Compact Syntax grammar.
grapheme.rng
Grapheme Relax NG grammar.
graphmeta.rnc
Graphmeta Relax NG Compact Syntax grammar.
graphmeta.rng
Graphmeta Relax NG grammar.
hash.xtf
.
html-text.xsl
XSL transform from html to text.
label-info.xml
.
lemcheck.plx
.
metadata.rnc
Metadata Relax NG Compact Syntax grammar.
scoreblock.xsl
Mysterious XSL transformation.
scoregen.plx
.
test-xtf2.xtf
.
testforms.plx
.
words.rnc
Words Relax NG Compact Syntax grammar.
words.rng
Words Relax NG grammar.
xtf-HTML.xsl
XSL transform from xtf to HTML.
xtf-XCL.xsl
XSL transform from xtf to XCL.
xtf2.rnc
XTF2 Relax NG Compact Syntax grammar.
xtf2.rng
XTF2 Relax NG grammar.
xtf2.xdf
XDF source for this documentation.
xtf2rtf.plx
.
xtf2txh.plx [listing]
Wrapper for /usr/local/share/cdl/lib/scripts/xtf-HTML.xsl.
xtflemma.plx [listing]
Lemmatization tool for XTF2 files.
xtfmanager.plx [listing]
Manage the generation of XTF and derivatives.
xtfscore.plx
.
xtr.rnc
Xtr Relax NG Compact Syntax grammar.
xtr.rng
Xtr Relax NG grammar.

Links

Top

Tutorial

GDL Manual

Advanced

Composites

Lexical

Linkage

Protocols


Questions about this document may be directed to Steve Tinney (stinney at sas dot upenn dot edu).