XTF2 is an XML format for describing the transliteration of cuneiform texts; it also encompasses facilities for other kinds of editions commonly used in cuneiform studies.
default namespace = "http://emegir.info/xtf/2"
include "gdl.rnc"
include "xtr.rnc"
start = xtf | translation | transliteration | composite | atf
xtf = element xtf { proto.outer? , (atf | transliteration | composite | translation)* }
atf = element atf { attribute xml:id { xsd:ID } , text }
proto.outer = element protocols {
attribute scope { text },
proto.basket
}
proto.start = element protocols {
attribute scope { text },
( proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer
| proto.link | proto.project | proto.syntax | proto.version )*
}
proto.after = proto.note
proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link
| proto.note | proto.var
proto.atf = element protocol { attribute type { "atf" } , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib = element protocol { attribute type { "bib" } , text }
proto.etcsl = element protocol { attribute type { "etcsl" } , text }
proto.key = element protocol { attribute type { "key" } , text }
proto.lem = element protocol { attribute type { "lem" } , text }
proto.lemmatizer
= element protocol { attribute type { "lemmatizer" }, text }
proto.link = element protocol { attribute type { "link" } , text }
proto.note = element protocol { attribute type { "note" } , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var = element protocol { attribute type { "var" } , text }
proto.version= element protocol { attribute type { "version" }, text }
comments = cmt | note
cmt = element cmt { text }
note = element note { text }
transliteration =
element transliteration {
attribute xml:id { xsd:ID },
attribute n { text },
attribute hand { text }?,
attribute xml:lang { xsd:NMTOKEN },
project?,
implicit?,
haslinks?,
maxcells?,
(proto.start? , (object | nonobject | comments | sealing)*)
}
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:NMTOKEN }
object =
element object {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
( attribute type { known.object }
|(attribute type { user.object } , n.attr)
) ,
status.flags,
(m.fragment | surface | sealing | comments | nonx)*
}
known.object = xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object = xsd:string { pattern="object" }
nonobject = nonx
surface =
element surface {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
(proto.inter | column | nonx | m | comments)* ,
( attribute type { known.surface }
|(attribute type { face.surface } , n.attr.lc)
|(attribute type { edge.surface } , n.attr.lc?)
|(attribute type { user.surface | seal.surface } , n.attr)
),
primes?,
status.flags
}
known.surface =
xsd:string {
pattern="surface|obverse|reverse|left|right|top|bottom"
}
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
seal.surface = xsd:string { pattern="seal" }
sealing =
element sealing {
attribute xml:id { xsd:ID },
attribute label { text },
attribute n { xsd:NMTOKEN },
attribute scid { xsd:NMTOKEN }?,
(column | nonx | milestone | comments)*
}
column =
element column {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
(milestone | lg | l | nonl | nonx | comments | proto.inter)*,
attribute n { text },
attribute o { text }?,
primes?,
status.flags
}
primes =
attribute primes { xsd:string { pattern="\x{2032}+" } }
milestone = m | m.discourse
m = element m {
attribute type { "division" | "locator" },
attribute subtype { xsd:NMTOKEN }?,
text
}
m.discourse = element m {
attribute type { "discourse" },
attribute subtype { "body" | "date" | "linecount" | "witnesses" | "summary" },
text
}
m.fragment = element m {
attribute type { "locator" },
attribute subtype { "fragment" }?,
text
}
implicit = attribute implicit { "1" }
nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }
nonx-attlist =
attribute xml:id { xsd:ID },
(attribute label { text },
attribute silent { "1" })?,
((attribute strict { "1" },
((attribute ref { text },
attribute scope { text })
|(attribute extent { text },
attribute scope { text },
attribute state { text })))
|
(attribute strict { "0" },
attribute extent { text }?,
attribute ref { text }?,
attribute scope { text }?,
attribute state { text }?)
|
(attribute strict { "0" },
attribute ref { "none" },
attribute type { "empty" })
|
(attribute type { "image" },
attribute strict { "0" },
attribute ref { xsd:string {
pattern="[PQX][0-9]+@[0-9]+[a-z]*"
}},
attribute alt { text })
)
non-x-attr-set =
attribute type {
"newline" | "broken" | "maybe-broken" | "traces"
| "maybe-traces" | "blank" | "ruling" | "image"
| "seal" | "comment" | "bullet" | "other"
},
attribute unit { "self" | "quantity" | "ref" }?,
attribute extent { text }?,
attribute ref { text }?,
attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set
l =
element l {
attribute xml:id { xsd:ID },
attribute n { text },
attribute o { text }?,
attribute l { text }?,
attribute label { text }?,
attribute silent { "1" }?,
(cell+ | f+ | (ag | l.inner)*)
}
l.inner = (surro | normword | words | glo)*
cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }
f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
attribute xml:id { xsd:ID }?,
attribute n { text }?,
attribute type { xsd:NMTOKEN },
attribute xml:lang { xsd:NMTOKEN }?
lg = element lg {
attribute xml:id { xsd:ID }?,
attribute n { text }?,
( (l,gus?,nts)
| (l,gus?,lgs)
| (l,gus?,nts,lgs)
| (l,gus?, (e | comments)*)),
proto.inter*,
var*
}
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v {
attribute varnum { xsd:NMTOKEN } ,
l.inner
}
# alignment groups
ag = element ag {
attribute ref { xsd:string { pattern="[a-z]+" } },
attribute form { text }?,
l.inner*
}
surro = element surro { l.inner }
words |= surro?
word |= surro?
composite =
element composite {
composite-attlist,
sigdef*,
attribute hand { text }?,
project?,
implicit?,
haslinks?,
maxcells?,
proto.start?,
composite-content,
(referto, comments?)*
}
composite-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
composite-content =
(milestone | \include | \div | variants | lg | l | comments | nonl | nonx | proto.inter)*
\include = element include { increfAttr }
referto = element referto { increfAttr }
increfAttr =
(attribute ref { text } ,
attribute n { text } ,
(attribute from { text },
attribute to { text }?)?)
\div =
element div {
div-attlist,
composite-content
}
div-attlist &=
attribute xml:id { xsd:ID }?,
attribute n { text }?,
attribute type { xsd:NMTOKEN },
attribute lang { text }?,
attribute place { text }?,
attribute subtype { text }?
variants = element variants { variant* }
variant =
element variant {
(\div | variants | lg | l | comments | nonl | proto.inter | nonx)*
}
score =
element score {
score-attlist, sigdef*, (milestone | \div | lg | comments | nonl)*
}
score-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
synopticon =
element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
attribute xml:id { xsd:ID },
attribute targ-id { xsd:NMTOKEN },
attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
element e {
e-attlist,
(l.inner
| c+
| f+)
}
e-attlist &=
attribute xml:id { xsd:ID }?,
attribute sigref { xsd:IDREF }?,
attribute n { text }?,
attribute l { text }?,
attribute p { text }?,
attribute hlid { text }?,
attribute plid { text }?
This document is a work in progress; the schema is correct and defines the XML output format produced by atf2xtf. Developer documentation is not yet included here, but the tutorial is essentially complete.
Most elements in an XTF file are in either the XTF or GDL
namespaces, the latter being defined in the included GDL
specification. The n namespace is used for normalized
text as described below.
The macro structure of any XTF file produced by the ATF processor
is always an outer container, the xtf element, followed
by optional outer protocols and then zero or more transliterations
and/or composite texts.
We allow transliteration and composite as start elements to simplify the ATF processor's internal validation of texts.
default namespace = "http://emegir.info/xtf/2"
include "gdl.rnc"
include "xtr.rnc"
start = xtf | translation | transliteration | composite | atf
xtf = element xtf { proto.outer? , (atf | transliteration | composite | translation)* }
atf = element atf { attribute xml:id { xsd:ID } , text }
The other quite common type of line in an ATF file begins with the
hash sign (#). There are two kinds of #-line: protocols
and comments.
Protocols are statements which are interpreted or stored by the ATF processor but are not part of the text edition proper. Protocols are all named and may trigger special processing within the ATF processor.
With the exception of #note:, protocols must occur on
a single line; multiple protocols do not need blank lines between them
except for multiple #note: protocols which behave like
comments.
Protocols are divided into four classes:
#basket: may occur in this location.#atf:, #bib:, #link:,
#note: and #version: may occur in this
location.#note: may occur in
this location. Other protocols are not required before
#note:, but if they are present they must precede
it.#bib:, #lem:, #note: and
#var: may occur in this location.#bib: MSL 14, 343 1. a #lem: a[water] #note: This can only occur after any protocols other than #note:.
Protocols which may be given explicitly by users in an ATF file are: atf; basket; bib; lem; lemmatizer; link; note; syntax; var; version.
Note that the #link: protocol handles
only a subset of intertext linkage; link protocols in XTF may also
originate from the || << >> operator set. See
the link protocol documentation for further details. The
#note: protocol does not generate a protocol node; it
generates a note element.
proto.outer = element protocols {
attribute scope { text },
proto.basket
}
proto.start = element protocols {
attribute scope { text },
( proto.atf | proto.bib | proto.etcsl | proto.key | proto.lemmatizer
| proto.link | proto.project | proto.syntax | proto.version )*
}
proto.after = proto.note
proto.inter = proto.bib | proto.etcsl | proto.lem | proto.link
| proto.note | proto.var
proto.atf = element protocol { attribute type { "atf" } , text }
proto.basket = element protocol { attribute type { "basket" } , text }
proto.bib = element protocol { attribute type { "bib" } , text }
proto.etcsl = element protocol { attribute type { "etcsl" } , text }
proto.key = element protocol { attribute type { "key" } , text }
proto.lem = element protocol { attribute type { "lem" } , text }
proto.lemmatizer
= element protocol { attribute type { "lemmatizer" }, text }
proto.link = element protocol { attribute type { "link" } , text }
proto.note = element protocol { attribute type { "note" } , text }
proto.project= element protocol { attribute type { "project" }, text }
proto.syntax = element protocol { attribute type { "syntax" } , text }
proto.var = element protocol { attribute type { "var" } , text }
proto.version= element protocol { attribute type { "version" }, text }
Comments are asides which are not part of the text edition or the annotation; they are useful for keeping odd bits of information in the file without it getting in the way of the text edition or annotation.
Comments look like protocols in that they begin with a hash-sign, but they may not begin with the sequence hash-name-colon. Comments may be included within text transliterations but not before the first text in a file. Comments must always follow any protocols which occur adjacent to them.
A sequence of lines beginning with hash-signs is a multi-line comment. To separate multiple comments to the same line use a blank line in the ATF file.
1. a #a simple comment 2. a #a longer comment which somewhat artificially extends #over multiple lines 3. a #one comment to line 3. #another comment to line 3. 4. a #Comments look a bit like protocols but there is no chance of #confusion: the ATF processor's scanning rules take care of that. 5. a #lem: a[water] #note: If you want a comment to appear in the displayed text-edition #use the '#note:' protocol instead. #and note that any comment must follow any other protocol, including #'#note:'.
comments = cmt | note
cmt = element cmt { text }
note = element note { text }
&-lines are used to introduce a new text and consist of two parts: the ID and the name.
For transliterations of exemplars, the ID is a 'P' followed by six digits, e.g., P123456. This ID is assigned by CDLI and is the reference ID of the object in the main CDLI catalog; to get IDs for objects not in the CDLI catalog send an e-mail to cdli@cdli.ucla.edu.
The name of the text should be identical with the 'Designation' field in the CDLI main catalog; the ATF processor detects mismatches and reports the correct name. This mechanism is designed to provide a check that the P-number in the ID actually references the text the transliterator intends.
Transliterations are not the only data type which can be entered in ATF; the documentation on composite texts is kept in a separate document.
transliteration =
element transliteration {
attribute xml:id { xsd:ID },
attribute n { text },
attribute hand { text }?,
attribute xml:lang { xsd:NMTOKEN },
project?,
implicit?,
haslinks?,
maxcells?,
(proto.start? , (object | nonobject | comments | sealing)*)
}
n.attr = attribute n { text }
n.attr.lc = attribute n { xsd:string { pattern="[a-z]" }}
haslinks = attribute haslinks { xsd:boolean }
maxcells = attribute cols { xsd:nonNegativeInteger }
project = attribute project { xsd:NMTOKEN }
@-lines are used for structural tags. Several kinds of structure
may be indicated using this mechanism: physical structure, e.g., objects,
surfaces; manuscript structure, i.e., columns; and document structure,
e.g., divisions and colophons. For clarity, we describe here only the
structural features which are permitted in object transliterations,
i.e., texts with an ID beginning with P. Documentation
of structural conventions for composite texts is given in the composites manual.
The kind of object on which the inscription being transliterated is written is designated using one of the following tags:
@tablet@envelope@prism@bulla@fragment&P212121 = Some Fragmentary Object @fragment a 1. a @fragment b 1. a
@object@object Stone wig.A transliteration of the text inscribed on a physical seal object
should be handled using the @object tag:
&P333444 = Some Seal @object seal 1. da-da 2. dumu du-du
object =
element object {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
( attribute type { known.object }
|(attribute type { user.object } , n.attr)
) ,
status.flags,
(m.fragment | surface | sealing | comments | nonx)*
}
known.object = xsd:string { pattern="tablet|envelope|prism|bulla" }
user.object = xsd:string { pattern="object" }
nonobject = nonx
Surfaces are principally the physical surfaces:
@obverse,
@reverse@left,
@right,
@top,
@bottom@face&P123321 = Some Prism @prism @face a 1. a @face b 1. e
@surface@surface shoulder; @surface side a.@edge@face.A transliteration of a sealing should be handled using the
@seal tag included like a surface after the
transliteration of the object on which the sealing occurs:
&P343434 = Some Sealed Tablet 1. a $ seal 1 @seal 1 1. du-du
The use of $ seal anticipates the discussion of
$-lines below; this mechanism can be used to indicate which sealings
occur where on an object.
surface =
element surface {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
(proto.inter | column | nonx | m | comments)* ,
( attribute type { known.surface }
|(attribute type { face.surface } , n.attr.lc)
|(attribute type { edge.surface } , n.attr.lc?)
|(attribute type { user.surface | seal.surface } , n.attr)
),
primes?,
status.flags
}
known.surface =
xsd:string {
pattern="surface|obverse|reverse|left|right|top|bottom"
}
face.surface = xsd:string { pattern="face" }
edge.surface = xsd:string { pattern="edge" }
user.surface = xsd:string { pattern="surface" }
seal.surface = xsd:string { pattern="seal" }
The scid attribute is intended for use
in cross-referencing sealing instance transliterations to composite
transliterations of sealings stored in an external database.
sealing =
element sealing {
attribute xml:id { xsd:ID },
attribute label { text },
attribute n { xsd:NMTOKEN },
attribute scid { xsd:NMTOKEN }?,
(column | nonx | milestone | comments)*
}
Columns are indicated with the @column tag, which may
be omitted for single-column texts. Column numbers must be given in
arabic numerals:
&P545454 = Some Columnar Text @column 1 1. a @column 2 1. e
column =
element column {
(implicit
| (attribute xml:id { xsd:ID },
attribute label { text })),
(milestone | lg | l | nonl | nonx | comments | proto.inter)*,
attribute n { text },
attribute o { text }?,
primes?,
status.flags
}
The status of some of the features indicated with @-lines can be indicated in a manner similar to that of graphemes; the notation is intended to be natural and to follow Assyriological conventions:
@obverse?
Meaning: status of obverse/reverse uncertain
@reverse!*
Meaning: collated; reverse correct despite designation in publication
Primes can be used where this makes sense:
@face a' @column 3'
primes =
attribute primes { xsd:string { pattern="\x{2032}+" } }
For technical reasons it is impossible to interweave physical structure (of the kind described above for transliterated objects) and document structure (e.g., paragraph divisions). This limitation is resolved by recourse to milestones.
Documentary divisions in a transliterated object are given using
the @m tag, with the milestone type given after an equals
sign and the division type following; an optional division name or
number may follow the division type:
@m=division paragraph 1 @m=division colophon
Simple support for discourse elements in administrative texts is
provided using shorthands which are also implemented as
milestones. These shorthands are @date,
@summary,
@witnesses:
&P787878 = Some Administrative Text 1. 1(disz) udu 2. da-da 3. szu ba-ti @date 4. u4 1-kam @left @summary 1. 1(disz) udu
milestone = m | m.discourse
m = element m {
attribute type { "division" | "locator" },
attribute subtype { xsd:NMTOKEN }?,
text
}
m.discourse = element m {
attribute type { "discourse" },
attribute subtype { "body" | "date" | "linecount" | "witnesses" | "summary" },
text
}
m.fragment = element m {
attribute type { "locator" },
attribute subtype { "fragment" }?,
text
}
The ATF processor supplies structural elements where they are
implied by the transliteration and this is indicated in the XTF tree
by use of the implicit attribute. For example, given:
&P121212 = Some Sparse Data 1. a
The following (schematic) element structure is generated:
<transliteration>
<object>
<surface>
<column>
All of these elements have implicit="1".
N.B.: Implicit elements are not addressable by label or xml:id attributes; explicit object, surface and column indicators must be given if addressability is a requirement.
implicit = attribute implicit { "1" }
$-lines are used to indicate information about the state of the text or object, or to describe features on the object which are not part of the transliteration proper. They come in two flavours: strict and loose.
Strict $-lines are subject to the restrictions in the table below; strict $-lines can be interpreted in their entirety by the ATF processor and the interpreted information can then be used by other programs. Strict $-lines are the best practice.
Loose $-lines are indicated by putting parentheses around the contents of the $-line. This is a facility provided to enable annotation of features which are not covered by the strict $-line specification. If the ATF processor detects that a loose $-line actually meets the criteria defined for strict $-lines it gives an advisory notice that the parentheses should be removed.
$-lines and comments are two quite different facilities, but experience has shown that transliterators can confuse the two. Comments are for information which does not belong in the transliteration and description of the text; comments are not displayed when the text is formatted for display or print. $-lines are for information which is integral to an understanding of the textual data; $-lines are included when the text is displayed or printed.
A particular use of $-lines is to indicate that a seal is used on an object; the form is:
$ seal <N>
Where N is a number indicating which seal is used;
if a transliteration of the seal is also given using the
@seal heading, the number following $ seal
should correspond to the number following @seal. See the
example above.
Most $-lines are used to give information about the state of the object being transliterated. The conventions for this can be summarized as follows:
| Qualification | Extent1 | Scope | State | |
|---|---|---|---|---|
1The extent N may be a
number such as 1 or 5; a RANGE gives two numbers
separated by a hyphen, e.g., 3-5. | ||||
2OBJECT is any object
specifier as described above, e.g., tablet, object etc. | ||||
3SURFACE is any surface
specifier as described above, e.g., obverse, left etc. | ||||
|
at least at most about |
n several some NUMBER RANGE rest of start of beginning of middle of end of |
OBJECT2 SURFACE3 column columns line lines case cases surface |
blank broken effaced illegible missing traces |
|
$-lines are also used to indicate noteworthy rulings on the tablet; ordinary case- or line-ruling should not be indicated with a $-line, but where a scribe has used a ruling to give additional information about the document structure this should be noted as:
(single | double | triple) ruling
Strict $-lines look like this:
$ 3 lines blank $ rest of obverse missing
A loose $-line looks like this:
$ (head of statue broken)
A ruling $-line looks like this:
$ double ruling
Inline images can be specified using the form:
$ (image N = <text>)
Where N is an image number consisting of digits followed by optional lowercase letters from a to z, and <text> is free text, giving a label for the image (which is copied through to the XHTML 'alt' attribute on the <img> tag).
$ (image 1 = numbered diagram of triangle)
At present, the implementation only works for XHTML which is
produced within a project. The ATF processor constructs a file name
consisting of the text ID and the image's N value, joined by an at
sign (e.g., P123456@1). The XHTML producer then emits an
<img> tag with the src attribute set to
/<PROJECT>/<FILENAME>.png.
Thus, in the present implementation, there must exist an
appropriately named file in the PNG graphics format residing in the
project's images directory. The implementation is
expected to support a more sophisticated locator mechanism in the
future.
nonx = element nonx { nonx-attlist, text }
nonl = element nonl { nonl-attlist, text }
nong = element nong { nong-attlist, text }
nonx-attlist =
attribute xml:id { xsd:ID },
(attribute label { text },
attribute silent { "1" })?,
((attribute strict { "1" },
((attribute ref { text },
attribute scope { text })
|(attribute extent { text },
attribute scope { text },
attribute state { text })))
|
(attribute strict { "0" },
attribute extent { text }?,
attribute ref { text }?,
attribute scope { text }?,
attribute state { text }?)
|
(attribute strict { "0" },
attribute ref { "none" },
attribute type { "empty" })
|
(attribute type { "image" },
attribute strict { "0" },
attribute ref { xsd:string {
pattern="[PQX][0-9]+@[0-9]+[a-z]*"
}},
attribute alt { text })
)
non-x-attr-set =
attribute type {
"newline" | "broken" | "maybe-broken" | "traces"
| "maybe-traces" | "blank" | "ruling" | "image"
| "seal" | "comment" | "bullet" | "other"
},
attribute unit { "self" | "quantity" | "ref" }?,
attribute extent { text }?,
attribute ref { text }?,
attribute xml:id { xsd:ID }?
noncolumn-attlist &= non-x-attr-set
nonl-attlist &= non-x-attr-set
nong-attlist &= non-x-attr-set
Lines of transliterated text begin with a sequence of non-space characters followed by a period and a space (these are typically numbers, but that is not a requirement):
1. a a+1. e 2'. i
l =
element l {
attribute xml:id { xsd:ID },
attribute n { text },
attribute o { text }?,
attribute l { text }?,
attribute label { text }?,
attribute silent { "1" }?,
(cell+ | f+ | (ag | l.inner)*)
}
l.inner = (surro | normword | words | glo)*
By default the ATF processor renumbers lines, storing the original line number and generating a new one according to consistently defined rules. This procedure was adopted because of the lack of consistency in numbering administrative texts.
It is possible to suppress this behaviour and, indeed, it is necessary to suppress this behaviour if intertext linking is in use. The relevant protocol to achieve this is:
#atf: use mylines
Two mechanisms provide structural subdivisions of lines: cells and fields.
Cells are alignment units (like table cells); they can be of use to organize the data in a way that mimics the layout on the object. Fields are logical subdivisions in a line which are not necessarily laid out in a special way on the object. Cells can contain fields but fields cannot contain cells; fields are lower in the structural hierarchy than cells.
Fields can have a type specified so that higher order processors working with the XTF data can work intelligently with them.
In ATF, cells are separated by ampersand characters
(&); fields are separated by commas. Both separators
must be preceded by one or more spaces.
Field types are indicated with an exclamation mark followed by one or more lowercase letters; see the lexical documentation for examples of how this works.
&P123123=UET 3,2 1. a & e &P123123=UET 3,2 1. a , e &P123123=UET 3,2 1. e4 ,!sv A
cell = element c { span? , (f+ | l.inner) }
span = attribute span { xsd:nonNegativeInteger }
f = element f { f-attlist, (ag | l.inner)* }
f-attlist &=
attribute xml:id { xsd:ID }?,
attribute n { text }?,
attribute type { xsd:NMTOKEN },
attribute xml:lang { xsd:NMTOKEN }?
Streams are XTF's mechanism for entering data several times in several different ways; no automatic alignment is done between streams, but an alignment-group mechanism is provided for those occasions where alignment is a requirement. There are three kinds of stream in XTF:
In ATF, the MTS is the unmarked case (the one with the line
number). The NTS is introduced by the sequence equals-period-space at
the start of the line (=. ). The LGS is introduced by
the sequence equals-colon-space at the start of the line (=:
). A simple, if contrived example of all the streams is:
&P246246=Streams
1. a
={ e
=. e4
=: A
#lem: a[water]
lg = element lg {
attribute xml:id { xsd:ID }?,
attribute n { text }?,
( (l,gus?,nts)
| (l,gus?,lgs)
| (l,gus?,nts,lgs)
| (l,gus?, (e | comments)*)),
proto.inter*,
var*
}
nts = element l { attribute type { "nts" } , (ag | l.inner)* }
lgs = element l { attribute type { "lgs" } , grapheme* }
gus = element l { attribute type { "gus" } , l.inner* }
var = element v {
attribute varnum { xsd:NMTOKEN } ,
l.inner
}
Alignment between MTS and NTS can be effected through the alignment-groups mechanism in which groups of words can be defined and labelled such that the groups in one stream correspond to the groups in the other stream.
If groups are used at all in a stream then every word in the stream must belong to a group.
In ATF, alignment groups must be enabled using a protocol; the groups are then indicated using matched parentheses with one or more lowercase letters following the closing parenthesis:
&P122221=Align #atf: use alignment-groups 1. %u (UD)a (GAL UM ME)b (BA LAGAB)c =. (kur)a (umeda)b (ba-jen)c #lem: kur[mountain]; umeda[nurse]; jen[go]
# alignment groups
ag = element ag {
attribute ref { xsd:string { pattern="[a-z]+" } },
attribute form { text }?,
l.inner*
}
Zones are an experimental feature; at the schema level they are defined in the GDL, but it is convenient to discuss them here because they are another mechanism for grouping graphemes. The concept is that part of an inscription, e.g., a case, may exhibit ordering which may not be linear but is nevertheless be based on some spatial relationship between signs. Transliterators can assign graphemes to zones and label the graphemes by zone.
In ATF, zones are indicated using a dollar sign followed by digits
(e.g., $1. In the Ebla version of the text in the
alignment example, the words are stacked vertically as in the image
here. This could be transliterated as follows:
&P122221=Align #atf: use alignment-groups 1. %u (UD$1)a (GAL$2 UM$3 ME$3)b (BA$4 LAGAB$4)c =. (kur)a (umeda)b (ba-jen)c #lem: kur[mountain]; umeda[nurse]; jen[go]
See the GDL documentation under Presence for surrogates.
surro = element surro { l.inner }
words |= surro?
word |= surro?
Composite texts by convention have an ID beginning with Q and are declared by an @-line which immediately follows the &-line for the text:
&Q000002 = Archaic Lu A @composite
To obtain an ID for a composite text e-mail
stinney@sas.upenn.edu.
Most of the @-lines which are permitted in transliterations are not permitted in composites; this is because composites are organized around documentary structure rather than the structure of a physical object. The one exception is that milestones are allowed in composites.
Documentary divisions are indicated in ATF by use of the
@div tag which is followed by the name of the division
and an optional name for the division. The @div tag
requires a closing @end tag, which must take as its
single argument the name of its corresponding opening
@div. @div's of different kinds may not be
interwoven
The @div tag maps to the DIV element
in XTF. The first NMTOKEN which follows the @div is the name of the
division and is stored in the @TYPE attribute. The remainder of the
line is stored in the @N attribute..
@div part 1 ... @end part @div colophon ... @end colophon
In the liturgical corpus (including ETCSL editions of texts which could reasonably be considered liturgical), kirugu and other rubrics are used as logical structures, and they contain subdivisions giving the actual rubric; this is supported with the following syntax:
@div kirugu 1 1. tur3-ra-na ... @div rubric kirugu 10. ki-ru-gu2 1(disz)-a-kam @end rubric @end kirugu @div giszgigal 1 11. u2-a a-u3-a u2-a-u2-a @div rubric giszgigal 12. gisz-gi4-gal2-bi-im @end rubric @end giszgigal
A physical location may be given in a composite by using the locator milestone; the content after locator is a label. This is intended for use when the documentary structure of composites is being used to edit a text which is preserved only in one exemplar (the ePSD royal inscriptions corpus edits all royal inscriptions as composites):
1. a @m=locator o 1
Variants are implemented to support the ETCSL corpus but may be used in any composite.
composite =
element composite {
composite-attlist,
sigdef*,
attribute hand { text }?,
project?,
implicit?,
haslinks?,
maxcells?,
proto.start?,
composite-content,
(referto, comments?)*
}
composite-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
composite-content =
(milestone | \include | \div | variants | lg | l | comments | nonl | nonx | proto.inter)*
\include = element include { increfAttr }
referto = element referto { increfAttr }
increfAttr =
(attribute ref { text } ,
attribute n { text } ,
(attribute from { text },
attribute to { text }?)?)
\div =
element div {
div-attlist,
composite-content
}
div-attlist &=
attribute xml:id { xsd:ID }?,
attribute n { text }?,
attribute type { xsd:NMTOKEN },
attribute lang { text }?,
attribute place { text }?,
attribute subtype { text }?
variants = element variants { variant* }
variant =
element variant {
(\div | variants | lg | l | comments | nonl | proto.inter | nonx)*
}
score =
element score {
score-attlist, sigdef*, (milestone | \div | lg | comments | nonl)*
}
score-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
synopticon =
element synopticon { synopticon-attlist, sigdef*, (eg | comments | nonl)* }
synopticon-attlist &=
attribute xml:id { xsd:ID },
attribute n { text },
attribute xml:lang { xsd:NMTOKEN }?
sigdef = element sigdef { sigdef-attlist, empty }
sigdef-attlist &=
attribute xml:id { xsd:ID },
attribute targ-id { xsd:NMTOKEN },
attribute targ-n { text }
eg = element eg { eg-attlist, e* }
eg-attlist &= attribute xml:id { xsd:ID }?
e =
element e {
e-attlist,
(l.inner
| c+
| f+)
}
e-attlist &=
attribute xml:id { xsd:ID }?,
attribute sigref { xsd:IDREF }?,
attribute n { text }?,
attribute l { text }?,
attribute p { text }?,
attribute hlid { text }?,
attribute plid { text }?
Questions about this document may be directed to Steve Tinney (stinney at sas dot upenn dot edu).