% -------------------------------------------------------------------------
%
%   The Specification of the Z-Machine
%
%   This file is formatted in the TeX typesetting language,
%   but after the initial pages of macro definitions is relatively
%   straightforward to read as plain text.
%
%   After the \end directive, the file also contains five short Inform
%   programs useful for testing compliance with the specification.
%
%   -- GAN, 951115
%
% -------------------------------------------------------------------------
% ---------------------------------------------------------------------------
% ---------------------------------------------------------------------------

\newif\iffiles\filesfalse
%  Manual macros
%
%  Page layout
%
\newif\ifshutup\shutupfalse
\newif\iflexicon\lexiconfalse
\newif\ifanswers\answersfalse
\magnification=\magstep 1
\hoffset=0.15 true in
\voffset=2\baselineskip
%
%  General hacks
%
\def\PAR{\par}
%
%  Font loading
%
\font\medfont=cmr10 scaled \magstep2
\font\bigfont=cmr10 scaled \magstep3
%\def\sectfont{\bf}
\font\sectfont=cmbx12
\def\small{\sevenrm}
\font\rhrm=cmr8
\font\rhit=cmsl8
%
%  Titles
%
\newcount\subsectno    %  Subsection number
\def\rhead{{\rhrm\topmark}}           %  The running head will go here
%
\def\newsection#1#2{%     To begin a section...
%\global\titletrue%        Declare this as a title page
%\xdef\rhead{{\rhrm #1}\quad #2}%       Initialise running head and ssn
\subsectno=0%
\iffiles
\write\conts{\string\sli\string{#1\string}\string{#2\string}\string{\the\pageno\string}}%
\fi
}
%
\def\section#1#2{\mark{}\vskip 1 true in\goodbreak
\noindent{\sectfont #1\quad #2}\bigskip\newsection{#1}{#2}\noindent\mark{#1\quad #2}}
\def\sectionx#1{\mark{}\vskip 1 true in\goodbreak
\noindent{\sectfont #1}\bigskip\newsection{}{#1}\noindent\mark{\quad #1}}
%
\def\newpage{\mark{}\vfill\eject}
%
%  Headers and footers
%
\newif\iftitle
\headline={\iftitle\hfil\global\titlefalse%
           \else{\iflexicon{\bf\firstmark}\hfil{\bf\botmark}\else%
                 \ifanswers{\hfil\ifnum\firstmark=\botmark%
                            {\rhit Answer to exercise \rhrm\firstmark}%
                            \else{\rhit Answers to exercises \rhrm\firstmark-\botmark}%
                           \fi}\else%
                 \hfil{\rhit \rhead}\fi\fi}%
           \fi}
\footline={\ifnum\pageno<0\hfil{\tenbf\romannumeral -\pageno}%
\else\hfil{\tenbf \number\pageno}\fi}
%\footline={\ifnum\pageno=1\hfil\else\hfil{\tenbf \number\pageno}\fi}
%
%  (Old date-stamping version:)
% \footline={\hfil{\rm \number\pageno}\hfil{\rm \number\day/\number\month}}
%
% If this works I'll be impressed
%

\font\ninerm=cmr9
\font\ninei=cmmi9
\font\ninesy=cmsy9
\font\ninebf=cmbx9
\font\eightbf=cmbx8
\font\ninett=cmtt9
\font\nineit=cmti9
\font\ninesl=cmsl9
\def\ninepoint{\def\rm{\fam0\ninerm}%
  \textfont0=\ninerm
  \textfont1=\ninei
  \textfont2=\ninesy
  \textfont3=\tenex
  \textfont\itfam=\nineit \def\it{\fam\itfam\nineit}%
  \textfont\slfam=\ninesl \def\sl{\fam\slfam\ninesl}%
  \textfont\ttfam=\ninett \def\tt{\fam\ttfam\ninett}%
  \textfont\bffam=\ninebf
  \normalbaselineskip=11pt
  \setbox\strutbox=\hbox{\vrule height8pt depth3pt width0pt}%
  \normalbaselines\rm}

\def\tenpoint{\def\rm{\fam0\tenrm}%
  \textfont0=\tenrm
  \textfont1=\teni
  \textfont2=\tensy
  \textfont3=\tenex
  \textfont\itfam=\tenit \def\it{\fam\itfam\tenit}%
  \textfont\slfam=\tensl \def\sl{\fam\slfam\tensl}%
  \textfont\ttfam=\tentt \def\tt{\fam\ttfam\tentt}%
  \textfont\bffam=\tenbf
  \normalbaselineskip=12pt
  \setbox\strutbox=\hbox{\vrule height8.5pt depth3.5pt width0pt}%
  \normalbaselines\rm}

\parindent=30pt
\def\inpar{\hangindent40pt\hangafter1\qquad}
\def\onpar{\par\hangindent40pt\hangafter0}

\newskip\ttglue
\ttglue=.5em plus.25em minus.15em

\def\orsign{$\mid\mid$}

\outer\def\begindisplay{\obeylines\startdisplay}
{\obeylines\gdef\startdisplay#1
  {\catcode`\^^M=5$$#1\halign\bgroup\indent##\hfil&&\qquad##\hfil\cr}}
\outer\def\enddisplay{\crcr\egroup$$}

\chardef\other=12

\def\ttverbatim{\begingroup \catcode`\\=\other \catcode`\{=\other
  \catcode`\}=\other \catcode`\$=\other \catcode`\&=\other 
  \catcode`\#=\other \catcode`\%=\other \catcode`\~=\other 
  \catcode`\_=\other \catcode`\^=\other
  \obeyspaces \obeylines \tt}
{\obeyspaces\gdef {\ }}

\outer\def\beginstt{$$\let\par=\endgraf \ttverbatim\ninett \parskip=0pt
  \catcode`\|=0 \rightskip=-5pc \ttfinish}
\outer\def\begintt{$$\let\par=\endgraf \ttverbatim \parskip=0pt
  \catcode`\|=0 \rightskip=-5pc \ttfinish}
{\catcode`\|=0 |catcode`|\=\other
  |obeylines
  |gdef|ttfinish#1^^M#2\endtt{#1|vbox{#2}|endgroup$$}}

\catcode`\|=\active
{\obeylines\gdef|{\ttverbatim\spaceskip=\ttglue\let^^M=\ \let|=\endgroup}}

\def\beginlines{\par\begingroup\nobreak\medskip\parindent=0pt
  \nobreak\ninepoint \obeylines \everypar{\strut}}
\def\endlines{\endgroup\medbreak\noindent}

\def\<#1>{\leavevmode\hbox{$\langle$#1\/$\rangle$}}

\def\dbend{{$\triangle$}}
\def\d@nger{\medbreak\begingroup\clubpenalty=10000
  \def\par{\endgraf\endgroup\medbreak} \noindent\hang\hangafter=-1   % -2
  \hbox to0pt{\hskip-\hangindent\dbend\hfill}\ninepoint}
\def\refd@nger{\par\nobreak\noindent\begingroup\clubpenalty=10000
  \def\par{\endgraf\endgroup\medbreak}\ninepoint}
\outer\def\danger{\d@nger}
\def\dd@nger{\medskip\begingroup\clubpenalty=10000
  \def\par{\endgraf\endgroup\medbreak} \noindent\hang\hangafter=-1   % -2
  \hbox to0pt{\hskip-\hangindent\dbend\kern 1pt\dbend\hfill}\ninepoint}
\outer\def\ddanger{\dd@nger}
\def\ddd@nger{\medskip\begingroup\clubpenalty=10000
  \def\par{\endgraf\endgroup\medbreak} \noindent\hang\hangafter=-1   % -2
  \hbox to0pt{\hskip-\hangindent\dbend\kern 1pt\dbend\kern 1pt\dbend\hfill}\ninepoint}
\outer\def\dddanger{\dd@nger}
\def\enddanger{\endgraf\endsubgroup}

\def\cstok#1{\leavevmode\thinspace\hbox{\vrule\vtop{\vbox{\hrule\kern1pt
       \hbox{\vphantom{\tt/}\thinspace{\tt#1}\thinspace}}
     \kern1pt\hrule}\vrule}\thinspace}

\def\rstok#1{\leavevmode\thinspace\hbox{\vrule\vtop{\vbox{\hrule\kern1pt
       \hbox{\vphantom{\rm!}\thinspace{\rm#1}\thinspace}}
     \kern1pt\hrule}\vrule}\thinspace}

\newcount\exno
\exno=0

\def\xd@nger{%
  \begingroup\def\par{\endgraf\endgroup\medbreak}\ninepoint}

\outer\def\warning{\medbreak
  \noindent\llap{$\bullet$\rm\kern.15em}%
  {\ninebf WARNING}\par\nobreak\noindent}
\outer\def\refs{\medbreak
  \noindent\llap{$\bullet$\rm\kern.15em}%
  {\eightbf REFERENCES}\refd@nger}
\outer\def\nextref{\quad$\bullet$\quad}%
\outer\def\exercise{\medbreak \global\advance\exno by 1
  \noindent\llap{$\bullet$\rm\kern.15em}%
  {\eightbf EXERCISE \bf\the\exno}\refd@nger}
%\par\noindent
\def\dexercise#1{\global\advance\exno by 1
  \medbreak\noindent\llap{$\bullet$\rm\kern.15em}%
  #1{\eightbf ~EXERCISE \bf\the\exno}\refd@nger}
%   \hfil\break}
\outer\def\dangerexercise{\xd@nger \dexercise{\dbend}}
\outer\def\ddangerexercise{\xd@nger \dexercise{\dbend\dbend}}


\newwrite\ans%
\newwrite\conts%
\iffiles
\immediate\openout\conts=conts
\fi

\iffiles\else\outer\def\answer#1{\par\medbreak}\shutuptrue\fi

\newwrite\inx
\ifshutup\else
\immediate\openout\inx=inxdata
\fi
\def\marginstyle{\sevenrm %
  \vrule height6pt depth2pt width0pt } %

\newif\ifsilent
\def\specialhat{\ifmmode\def\next{^}\else\let\next=\beginxref\fi\next}
\def\beginxref{\futurelet\next\beginxrefswitch}
\def\beginxrefswitch{\ifx\next\specialhat\let\next=\silentxref
  \else\silentfalse\let\next=\xref\fi \next}
\catcode`\^=\active \let ^=\specialhat
\def\silentxref^{\silenttrue\xref}

\newif\ifproofmode
\proofmodetrue %

\def\xref{\futurelet\next\xrefswitch}
\def\xrefswitch{\begingroup\ifx\next|\aftergroup\vxref
  \else\ifx\next\<\aftergroup\anglexref
    \else\aftergroup\normalxref \fi\fi \endgroup}
\def\vxref|{\catcode`\\=\active \futurelet\next\vxrefswitch}
\def\vxrefswitch#1|{\catcode`\\=0
  \ifx\next\empty\def\xreftype{2}%
    \def\next{{\tt\text}}%
  \else\def\xreftype{1}\def\next{{\tt\text}}\fi %
  \edef\text{#1}\makexref}
{\catcode`\|=0 \catcode`\\=\active |gdef\{}}
\def\anglexref\<#1>{\def\xreftype{3}\def\text{#1}%
  \def\next{\<\text>}\makexref} %
\def\normalxref#1{\def\xreftype{0}\def\text{#1}\let\next=\text\makexref}

\def\makexref{\ifproofmode%
  \xdef\writeit{\write\inx{\text\space!\xreftype\space
    \noexpand\number\pageno.}}\iffiles\writeit\fi
  \else\ifhmode\kern0pt \fi\fi
 \ifsilent\ignorespaces\else\next\fi}

\newdimen\fullhsize
\def\fullline{\hbox to\fullhsize}
\let\lr=L \newbox\leftcolumn

\def\doubleformat{\shipout\vbox{\makeheadline
    \fullline{\box\leftcolumn\hfil\columnbox}
    \makefootline}
  \advancepageno}
\def\tripleformat{\shipout\vbox{\makeheadline
    \fullline{\box\leftcolumn\hfil\box\midcolumn\hfil\columnbox}
    \makefootline}
  \advancepageno}
\def\columnbox{\leftline{\pagebody}}

\newbox\leftcolumn
\newbox\midcolumn
\def\beginindex{
\fullhsize=6.5true in \hsize=2.1true in
 \global\def\makeheadline{\vbox to 0pt{\vskip-22.5pt
      \fullline{\vbox to8.5pt{}\the\headline}\vss}\nointerlineskip}
 \global\def\makefootline{\baselineskip=24pt \fullline{\the\footline}}
 \output={\if L\lr
   \global\setbox\leftcolumn=\columnbox \global\let\lr=M
 \else\if M\lr
   \global\setbox\midcolumn=\columnbox \global\let\lr=R
 \else\tripleformat \global\let\lr=L\fi\fi
 \ifnum\outputpenalty>-20000 \else\dosupereject\fi}
\begingroup
  \parindent=1em \maxdepth=\maxdimen
  \def\par{\endgraf \futurelet\next\inxentry}
  \obeylines \everypar={\hangindent 2\parindent}
  \exhyphenpenalty=10000 \raggedright}
\def\inxentry{\ifx\next\sub \let\next=\subentry
  \else\ifx\next\endindex \let\next=\vfill
  \else\let\next=\mainentry \fi\fi\next}
\def\endindex{\mark{}\break\endgroup
\supereject
\if L\lr \else\null\vfill\eject\fi
\if L\lr \else\null\vfill\eject\fi
}
\let\sub=\indent \newtoks\maintoks \newtoks\subtoks
\def\mainentry#1,{\mark{}\noindent
  \maintoks={#1}\mark{\the\maintoks}#1,}
\def\subentry\sub#1,{\mark{\the\maintoks}\indent
  \subtoks={#1}\mark{\the\maintoks\sub\the\subtoks}#1,}

\def\subsection#1{\medbreak\par\noindent{\bf #1}\qquad}

%  For contents

\def\cl#1#2{\bigskip\par\noindent{\bf #1}\quad {\bf #2}}
\def\li#1#2#3{\smallskip\par\noindent\hbox to 5 in{{\bf #1}\quad #2\dotfill #3}}
\def\sli#1#2#3{\par\noindent\hbox to 5 in{\qquad\item{#1}\quad #2\dotfill #3}}
\def\fcl#1#2{\bigskip\par\noindent\hbox to 5 in{\phantom{\bf 1}\quad {\bf #1}\dotfill #2}}

% Epigrams

\def\poem{\begingroup\narrower\narrower\narrower\obeylines\ninepoint}
\def\widepoem{\begingroup\narrower\narrower\obeylines\ninepoint}
\def\verywidepoem{\begingroup\narrower\obeylines\ninepoint}
\def\quote{\medskip\begingroup\narrower\narrower\noindent\ninepoint}
\def\widequote{\medskip\begingroup\narrower\noindent\ninepoint}
\def\poemby#1#2{\par\smallskip\qquad\qquad\qquad\qquad\qquad -- #1, {\it #2}
\tenpoint\endgroup\bigskip}
\def\widepoemby#1#2{\par\smallskip\qquad\qquad\qquad -- #1, {\it #2}
\tenpoint\endgroup\bigskip}
\def\quoteby#1{\par\smallskip\qquad\qquad\qquad\qquad\qquad
 -- #1\tenpoint\endgroup\bigskip}
\def\tlwidequoteby#1#2{\par\smallskip\qquad
 -- #1\par\smallskip\qquad\qquad\qquad
 -- #2\tenpoint\endgroup\bigskip}
\def\tvquoteby#1#2{\par\smallskip\qquad
 -- #1\par\qquad\qquad\quad
 \phantom{--} #2\tenpoint\endgroup\bigskip}
\def\endquote{\par\tenpoint\endgroup\medskip}

%
%  End of macros

\def\subtitle#1{\bigbreak\noindent{\bf #1}\medskip}
\newdimen\stepin
\newdimen\tstepin
\stepin=60pt
\def\block#1{\par%\rlap{{\tt #1}}
\hangindent\stepin\hangafter0\noindent\strut\llap{\hbox to\stepin{{\tt #1}\hfill}}%
\noindent}
\def\continue{\block{}}
\newcount\sectno
\stepin=40pt
\def\orm{\smallskip\par\noindent\hangindent=0pt}
\def\frm{\par\parindent=20pt\hangindent=20pt}
\def\sp#1{\smallskip\noindent {\bf{\the\sectno.#1}}\quad\hangindent=20pt}
\def\nsp#1{\medskip\sp{#1}}
\def\specs#1#2{\tenpoint\section{#1}{#2}\sectno=#1}
\def\remarks{\medskip\ninepoint\noindent {\sl Remarks.}\qquad}
\pageno=2

% ---------------------------------------------------------------------------
% ---------------------------------------------------------------------------
% ---------------------------------------------------------------------------

\section{}{Preface}

The Z-machine was created on a coffee table in Pittsburgh in 1979.  It
is an imaginary computer whose programs are adventure games, and is
well-adapted to its task, implementing complex games remarkably compactly. 
They were still perhaps 100K long and the Z-machine seems to have made the
first usage of virtual memory on a microcomputer.  Further ahead of its time
was the ability to efficiently save and restore the entire execution state
(something we would do well to rediscover as parallel processing takes
over).

The design's cardinal principle is that any game is 100\% portable to
different computers: that is, any legal program exactly determines its
behaviour.  This portability is largely made possible by a willingness to
constrain maximum as well as minimum levels of performance (for instance,
dynamic memory allocation is impossible) and by a very primitive
operating-system interface (so file-naming issues hardly arise).  The
strategy is the opposite extreme to that of the C language, which sacrifices
predictable behaviour for performance: for instance, a programmer never
knows how many bits will make up an |int| or whether |char| will be signed.

But this is not a historical or theoretical paper, because the Z-machine is
widely used in practice to play Infocom and Inform-produced games.  It is a
standards document which aims to exactly describe the correct behaviour, and
is a variorum description in that it describes every different Version of
the machine.  (However, the Version 6 standard will remain provisional until
we have more experience with it.)

\subtitle{Why do we need a ``standards'' document?}

Since the end of the 1980s, interpreters have been in the public domain
which almost properly implement the Z-machine.  Good portable source code
for these has twice been published.  Each interpreter was then ported to
many different machines, where its behaviour was subtly altered, usually
because the porter noticed a missing feature and added it, or had to guess
something.  The ports have grown elaborate and corrections are now difficult
to propagate.  The casual user who downloads an interpreter cannot be sure
how accurate it will be: a ``new'' interpreter (with a beautiful new user
interface) may be built on a partly-repaired core which is five years old. 
One reason for a standard, then, is to increase the pressure to return to a
good common release.  Players will know what to ask for (can you get
interpreter 1.1 for the Mac?) and porters will be aware if the core has
changed.

More fundamentally, the problem has changed.  Until 1993 there were only
about 130 story files known, variant forms of 35 games and a few oddments,
all produced by the same compiler's code generator.  An interpreter could
safely be incomplete.  For instance, the |not| opcode was unnecessary
since it never occurred in any game.  Today there is a quite large base of
Inform users and many more games will be in circulation: and designers of
these new games want to know what they can depend on.  There is also
pressure for future extension of the format, so a game itself will need to
know what kind of interpreter is running it.

\subtitle{So what is ``standard''?}

To call itself ``Standard'', an interpreter should (as far as anyone knows)
obey this document exactly for every Version of the Z-machine it claims to
interpret.  (There's no problem with a standard interpreter which interprets
Version 5 only, for instance.)  Each edition of this document will be given
a Revision number (from 1.0 upwards), somewhat like the JFIF identification
number used by the JPEG standard.  A standard interpreter should communicate
the revision number it obeys in three ways:

\item{(a)} To someone downloading it from an FTP site or bulletin board:
by including it in its filename.
\item{(b)} To the player: for instance by means of an ``information'' option
on a menu, or in an initialisation sequence.
\item{(c)} To the game: by writing it into bytes in the header which were
always left zero before this standard was devised (see \S 11).  A game
compiled with Inform library 5/12 or later prints the revision number in its
banner (if this isn't 0.0).

\noindent Few arbitrary choices have been made in writing this document.  On
the few points where Infocom's own shipped interpreters disagree it has
usually been possible to decide which was ``correct''.  Elsewhere, minimum
levels of performance have been invented where necessary.  (For example, a
minimum call-stack size is needed for programmers to be sure of what level
of recursion is safe.  The call-stack size currently used by |Zip| has
been taken as the standard.)

Existing interpreters are close to the standard already.  Most ``difficult''
features (colours, fonts, sound effects, pictures, etc.) are optional, so
that a port only needs to set some header bit to indicate that it can't
oblige.  The big exception is timed input (in which an interrupt routine is
run every few tenths of a second while the player is deciding what to type). 
Some ports can't manage this for operating-system reasons, others can but
don't because it's too much trouble.  In Infocom's specification the feature
is mandatory, but many ports of |Zip| ignore it.  In this document it is
optional and a new header bit has been allocated: see \S 11.

The very few paragraphs which actually extend the Infocom format, such as
the one describing this header bit, are marked |***|.

\subtitle{Terminology}

It is assumed that the reader is familiar with terms like `object', `tree',
`attribute', `property', `local and global variable'.  (See Chapter I of the
{\sl Inform Designer's Manual} for explanation of these.)

So far, eight Versions of the Z-machine exist, and the first byte of any
``story file'' (that is: any game program in the Infocom format) gives the
Version number it must be interpreted under.

The opcode names used in this document are those used by Inform 5.4 and
later.  The names are extended from those chosen by Mark Howell for his
disassembler |Txd| and were agreed on between him and the author as
a standard set.  We hope this will provide interpreter writers and
others with a common lexicon, and it would be helpful if future interpreter
sources use these names internally.

Hexadecimal numbers are written with an initial dollar, as in |$ff|, while
binary numbers are written with a double-dollar as in |$$11011|, according
to Inform conventions.  The bits in a byte are numbered 0 to 7, 0 being
the least significant and the top bit, 7, the most.

\subtitle{Where are all the grammar tables?}

The Z-machine has some lexical acuity but it doesn't contain a full parser:
it's like a computer without an operating system.
A game program has to contain its own parser
and the tables this uses are not part of the formal Z-machine specification.
(The Infocom games have similar parsing table formats since all the
Versions 1 to 5 games used a parser which slowly evolved from the
`Zork I' parser.)  Inform's parsing table
format is documented in the {\sl Inform Technical Manual}.  For the usual
format of Infocom's parsing tables, see the C source code to Mark Howell's
utility ``Infodump''.

\subtitle{Acknowledgements}

\quote
     There is an obvious resemblance between an unreadable script
     and a secret code; similar methods can be employed to break
     both.  But the differences must not be overlooked.  The code is
     deliberately designed to baffle the investigator; the script
     is only puzzling by accident.
\poemby{John Chadwick}{The Decipherment of Linear B}

The Z-machine was originally devised by Joel Berez and Marc Blank in 1979.
Marc Blank made most of the Version 4 extensions, and Version 5 was created
by Dave Lebling (with contributions from others including Brian Moriarty,
Duncan Blanchard and Linde Dynneson).  Version 6 was largely the work of Tim
Anderson and Dave Lebling.

In the reverse direction, decipherment is mostly due to the InfoTaskForce
(David Beazley, George Janczuk, Peter Lisle, Russell Hoare and Chris Tham),
Matthias Pfaller, Mike Threepoint, Mark Howell and Paul David Doherty. 
(Only a few of the pieces in the jigsaw were placed by myself.)

I gratefully acknowledge the help of Paul David Doherty and Mark Howell, who
each drafts of this paper and sent back detailed corrections; also, of
Stefan Jokisch and Marnix Klooster who have put a great deal of work into
the fine detail of the specification; and of all those who commented on
the circulated draft, whose comments were mainly presentational but no less
important for that.  Mistakes and misunderstandings remain my own.

\medskip
\hbox to\hsize{\hfill\it Graham Nelson}
\hbox to\hsize{\hfill\it St Anne's College, Oxford}
\hbox to\hsize{\hfill\it 15 November 1995}
\medskip

\newpage

% ----------------------------------------------------------------------------

\specs{1}{The memory map}
\sp{1} The memory map of the Z-machine is an array of bytes with
``byte addresses'' running from 0 upwards.  This is divided into
three regions: ``dynamic'', ``static'' and ``high''.  Dynamic
memory begins from byte address |$00000| and runs up to the byte
before the byte address stored in the word at |$0e| in the header.
(Dynamic memory must contain at least 64 bytes.)  Static memory
follows immediately on.  Its extent is not defined in the header
(or anywhere else), though it must end by the last byte of the
story file or by byte address |$0ffff| (whichever is lower).
High memory begins at the ``high memory mark'' (the byte address
stored in the word at |$04| in the header) and continues to the
end of the story file.  The bottom of high memory may overlap with
the top of static memory (but not with dynamic memory).

\sp{1.1} Dynamic memory can be read or written to (either directly,
using |loadb|, |loadw|, |storeb| and |storew|, or indirectly with
opcodes such as |insert_obj| and |remove_obj|).

\sp{1.1.1} By tradition, the first 64 bytes are known as the
``header''.  The contents of this are given later but note that games
are not permitted to alter many bits inside it.

\sp{1.1.2} It is legal for games to alter any of the tables stored
in dynamic memory above the header, provided they leave the tables
in legal states.

\sp{1.2} Static memory can be read using the opcodes |loadb| and
|loadw|.  It is illegal for a game to attempt to write to static
memory.

\sp{1.3} Except for its (possible) overlap with static memory,
high memory cannot be directly accessed at all by a
game program.  It contains routines, which can be called,
and strings, which can be printed using |print_paddr|.

\sp{1.4} The maximum permitted length of a story file depends
on the Version, as follows:
$$ \matrix{{\rm V}1-3 & {\rm V}4-5  & {\rm V}6 & {\rm V}7 & {\rm V}8\cr
                  128 &   256       &   576    &   320    &   512\cr} $$

\nsp{2} There are three kinds of address in the Z-machine, all of
which can be stored in a 2-byte number: byte addresses, word
addresses and packed addresses.

\sp{2.1} A byte address specifies a byte in memory in the range 0
up to the last byte of static memory.

\sp{2.2} A word address specifies an even address in the bottom
128K of memory (by giving the address divided by 2).  (Word addresses
are used only in the abbreviations table.)

\sp{2.3} |***| A packed address specifies where a routine or string begins
in high memory.  Given a packed address $P$, the formula to obtain the
corresponding byte address $B$ is:
$$ B = \cases { 2P & Versions 1, 2 and 3\cr
                4P & Versions 4 and 5\cr
                4(P+R_O) & Versions 6 and 7, for routine calls\cr
                4(P+S_O) & Versions 6 and 7, for {\tt print\_paddr}\cr
                8P & Version 8\cr
} $$
$R_O$ and $S_O$ are the routine and strings offsets (specified in the
header as words at |$28| and |$2a|, respectively).

% ----------------------------------------------------------------------------
\topinsert
\centerline{\sl An example memory map of a small game}
\medskip
$$ \vbox{\offinterlineskip
\hrule
\halign{\vrule#&\strut\quad{\it #}\hfil\quad&\hfil # \quad&%
\vrule#&\strut\quad\hfil#\hfil\quad&\vrule#\cr
height2pt&\omit&\omit&&\omit&\cr
&& Start && Contains &\cr
height2pt&\omit&\omit&&\omit&\cr
\noalign{\hrule}
height2pt&\omit&\omit&&\omit&\cr
& Dynamic& |00000| && header &\cr
&        & |00040| && abbreviation strings &\cr
&        & |00042| && abbreviation table &\cr
&        & |00102| && property defaults &\cr
&        & |00140| && objects &\cr
&        & |002f0| && object descriptions &\cr
&        &         && and properties &\cr
&        & |006e3| && global variables &\cr
&        & |008c3| && arrays &\cr
& Static & |00b48| && grammar table &\cr
&        & |010a7| && actions table &\cr
&        & |01153| && preactions table &\cr
&        & |01201| && adjectives table &\cr
&        & |0124d| && dictionary &\cr
& High   & |01a0a| && Z-code &\cr
&        & |05d56| && static strings &\cr
&        & |06ae6| && end of file &\cr
height2pt&\omit&\omit&&\omit&\cr
}\hrule}$$
\endinsert

\remarks
Inform never compiles any overlap between static and high memory
(it places all data tables in dynamic memory).  However, many
Infocom games group tables of static data just above the high
memory mark, before routines begin; some, such as `Nord 'n'
Bert...', interleave static data between routines, so that static
memory actually overlaps code; and a few, such as `Seastalker'
release 15, even contain routines placed below the high memory
mark.  (The original idea behind the high memory mark was that
everything below it should be stored in the interpreter's RAM, while
what was above could reasonably be kept in ``virtual memory'', i.e.,
loaded off disc as needed.)

Note that the total of dynamic plus static memory must not exceed 64K.
(In fact, 64K minus 2 bytes.)  This is the most serious limitation
on the Z-machine (though it has not yet been reached by anyone).

Throughout the specification, Versions 7 and 8 are identical
to Version 5 except as stated at 1.1.4 and 1.2.3 above.

\newpage

% ----------------------------------------------------------------------------

\specs{2}{Numbers and arithmetic}

\sp{1} In the Z-machine, numbers are usually stored in 2 bytes
(in the form most-significant-byte first, then least-significant)
and hold any value in the range |$0000| to |$ffff| (0 to 65535
decimal).

\nsp{2} These values are sometimes regarded as signed, in the range
$-32768$ to $32767$.  In effect $-n$ is stored as $65536-n$
and so the top bit is the sign bit.

\sp{2.1} The operations of numerical comparison, multiplication,
addition, subtraction and printing of numbers are signed; bitwise
operations, division and remainder-after-division are unsigned.
(In particular, since comparison is signed, it is unsafe to compare
two addresses using simply |jl| and |jg|.)

\nsp{3} Arithmetic errors:

\sp{3.1} It is illegal to divide by 0 (or to ask for remainder after
division by 0) and an interpreter should halt with an error message
if this occurs.

\sp{3.2} Formally it has never been specified what the result of an
out-of-range calculation should be.  The author suggests that the
result should be reduced modulo |$10000|.

\nsp{4} The Z-machine needs a random number generator which at any time
has one of two states, ``random'' and ``predictable''.  When the game
starts or restarts the state becomes ``random''.  Ideally the generator
should not produce identical sequences after each restart.

\sp{4.1} When ``random'', it must be capable of generating a uniformly
random integer in the range $1\leq x\leq n$, for any value
$1\leq n\leq 32767$.  Any method can be used for this (for instance,
using the host computer's clock time in milliseconds).  The uniformity
of randomness should be optimised for low values of $n$ (say, up to
100 or so) and it is especially important to avoid regular patterns
appearing in remainders after division (most crudely, being alternately
odd and even).

\sp{4.2} The generator is switched into ``predictable'' state with a
seed value.  On any two occasions when the same seed is sown, identical
sequences of values must result (for an indefinite period) until
the generator is switched back into ``random'' mode.  The generator
should cope well with very low seed values, such as 10, and should not
depend on the seed containing many non-zero bits.

\sp{4.3} The interpreter is permitted to switch between these states
on request of the player.  (This is useful for testing purposes.)

\remarks
It is dangerous to rely on the ANSI C random number routines, as some
implementations of these are very poor.  This has made some games (in
particular, `Balances') unwinnable on some Unix ports of |Zip|.

The author suggests the following algorithm:
\item{1.} In ``random'' mode, the generator uses the host computer's
clock to obtain a random sequence of bits.
\item{2.} In ``predictable'' mode, the generator should store the
seed value $S$.  If $S<1000$ it should then internally generate
$$ 1, 2, 3, ..., S, 1, 2, 3, ..., S, 1, ... $$
so that |random n| produces the next entry in this sequence modulo $n$.
If $S\geq 1000$ then $S$ is used as a seed in a standard seeded
random-number generator.

\par\noindent (The rising sequence is useful for testing, since it will
produce all possible values in sequence.  On the other hand, a seeded
but fairly random generator is useful for testing entire scripts.)

% ----------------------------------------------------------------------------

\specs{3}{How text is encoded and printed}

\quote
    This technique is similar to the five-bit Baudot code, which
    was used by early Teletypes before ASCII was invented.
\endquote
\quote
    \quad -- Marc S. Blank and S. W. Galley, {\sl How to Fit a Large Program Into
a Small Machine}
\endquote

\sp{1} A string of encoded text is stored as a sequence of 2-byte
words.  Each of these is divided into three 5-bit `Z-characters', plus
1 bit left over, arranged as
\orm\beginstt
   --first byte-------   --second byte---
   7    6 5 4 3 2  1 0   7 6 5  4 3 2 1 0
   bit  --first--  --second---  --third--
\endtt\frm
The bit is set only on the last 2-byte word of the text, and so marks the
end.

\nsp{2} There are three `alphabets', A0 (lower case), A1 (upper case) and
A2 (punctuation) and during printing one of these is current at any given
time.  Initially A0 is current.  The meaning of a Z-character may depend on
which alphabet is current.

\sp{2.1} In Versions 1 and 2, the current alphabet can be any of the
three.  The Z-characters 2 and 3 are called `shift' characters and change
the alphabet for the next character only.  The new alphabet depends on
what the current one is:
\orm\beginstt
             from A0  from A1  from A2
  Z-char 2      A1       A2       A0
  Z-char 3      A2       A0       A1
\endtt\frm
Z-characters 4 and 5 permanently change alphabet, according to the
same table, and are called `shift lock' characters.

\sp{2.2} In Versions 3 and later, the current alphabet is always A0
unless changed for 1 character only: Z-characters 4 and 5 are shift
characters.  Thus 4 means ``the next character is in A1'' and 5 means
``the next is in A2''.  There are no shift lock characters.

\sp{2.3} An indefinite sequence of shift or shift lock characters is
legal (but prints nothing).

\nsp{3} In Versions 3 and later, Z-characters 1, 2 and 3 represent
abbreviations, sometimes also called `synonyms' (for traditional reasons):
the next Z-character indicates which abbreviation string to print.  If $z$
is the first Z-character (1, 2 or 3) and $x$ the subsequent one, then the
interpreter must look up entry $32(z-1)+x$ in the abbreviations table and
print the string at that word address.  In Version 2, Z-character 1 has this
effect (but 2 and 3 do not, so there are only 32 abbreviations).

\sp{3.1} Abbreviation string-printing follows all the rules of this section
except that an abbreviation string must not itself use abbreviations and
must not end with an incomplete multi-Z-character construction (see \S 3.6.1
below).

\nsp{4} Z-character 6 from A2 means that the two subsequent Z-characters
specify a ten-bit character code: the next Z-character gives the top 5
bits and the one after the bottom 5.  As detailed below, this is
printed using an extended form of the ASCII standard (for seven-bit
character codes).

\sp{4.1} Some Inform users require unusual accented characters (in one
case, Chinese characters).  Inform is able to produce `ASCII' values
which use the full 10 bits (using the |@@| string escape).  Game
designers may want to be able to modify the interpreter to print
something suitable when a value of 256 or above is found, and interpreter
writers are asked to make this easy.

\sp{4.1.1} The author wishes to reserve the `ASCII' values
768 to 1023 for future specification.  (One idea would be that
such a code causes a routine in Z-code to be called.  This would allow
much greater flexibility in variable printing.)

\sp{4.2} ASCII ``control codes'' in the range 0 to 31 are illegal
(i.e. should not be printed in any story file) except as follows:

\sp{4.2.1} Character 0 (ASCII ``null'') is legal but prints nothing.

\sp{4.2.2} Character 13 (``carriage return'') prints a newline.

\sp{4.2.3} In Version 6, character 9 (``tab'') at the start of a screen line
should print a paragraph indentation suitable for the font being used: if it
is printed in the middle of a screen line, it should be a space.  Character
11 (``cursor up'') should be printed as a suitable gap between two
sentences (in the same way that typographers normally place larger spaces
after the full stops ending sentences than after words or commas).

\sp{4.3} Character codes between 32 (``space'') and 126 (``tilde'') are
legal and are printed from the standard ASCII character set:
\orm\beginstt
      0123456789abcdef0123456789abcdef
      --------------------------------
 $20   !"#$%&'()*+,-./0123456789:;<=>?
 $40  @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
 $60  `abcdefghijklmnopqrstuvwxyz{!}~ 
      --------------------------------
\endtt\frm
In particular code |$23| (35 decimal) is a hash mark, not a pound sign.
(Code |$7c| (124 decimal) is a vertical stroke which is shown as |!|
here for typesetting reasons.)  Character 127 (``delete'') is illegal.

\sp{4.4} The `ASCII' values between 128 and 154 are at present
undefined: undefined character values should be printed as question
marks.  The range 155 to 251 is reserved for European accented
characters, but 220 to 251 are undefined.  Accented characters should
either be printed from a suitable font (they are all taken from the
`ISO Latin 1' standard set) or transliterated into plain text as in
the following table:
\orm\beginstt
155:  a-umlaut      ae            191:  a-circumflex      a
156:  o-unlaut      oe            192:  e-circumflex      e
157:  u-umlaut      ue            193:  i-circumflex      i
158:  A-umlaut      Ae            194:  o-circumflex      o
159:  O-umlaut      Oe            195:  u-circumflex      u
160:  U-umlaut      Ue            196:  A-circumflex      A
161:  sz-ligature   ss            197:  E-circumflex      E
162:  quotation     << or "       198:  I-circumflex      I
163:    marks       >> or "       199:  O-circumflex      O
164:  e-umlaut      e             200:  U-circumflex      U
165:  i-umlaut      i             201:  a-ring            a
166:  y-umlaut      y             202:  A-ring            A
167:  E-umlaut      E             203:  o-slash           o
168:  I-umlaut      I             204:  O-slash           O
169:  a-acute       a             205:  a-tilde           a
170:  e-acute       e             206:  n-tilde           n
171:  i-acute       i             207:  o-tilde           o
172:  o-acute       o             208:  A-tilde           A
173:  u-acute       u             209:  N-tilde           N
174:  y-acute       y             210:  O-tilde           O
175:  A-acute       A             211:  ae-ligature       ae
176:  E-acute       E             212:  AE-ligature       AE
177:  I-acute       I             213:  c-cedilla         c
178:  O-acute       O             214:  C-cedilla         C
179:  U-acute       U             215:  Icelandic thorn   th
180:  Y-acute       Y             216:  Icelandic eth     th
181:  a-grave       a             217:  Icelandic Thorn   Th
182:  e-grave       e             218:  Icelandic Eth     Th
183:  i-grave       i             219:  pound symbol      L
184:  o-grave       o
185:  u-grave       u
186:  A-grave       A
187:  E-grave       E
188:  I-grave       I
189:  O-grave       O
190:  U-grave       U
\endtt\frm
|***| The values from 164 onward are defined for the first time
in this standard.  (Note that all these values are the same as the
keyboard input character codes for the same letters.)

\sp{4.5} The `ASCII' values 252 to 255 are illegal, not undefined.

\nsp{5} The remaining Z-characters translate directly into printed
characters:

\sp{5.1} The Z-character 0 is printed as a space.

\sp{5.2} In Version 1, Z-character 1 is printed as a new-line.

\sp{5.3} In Versions 2 and later, Z-characters in the range 6 to 31
depend on the current alphabet.  Except for character 6 in A2, they
are printed as:
\orm\beginstt
   Z-char 6789abcdef0123456789abcdef
current   --------------------------
  A0      abcdefghijklmnopqrstuvwxyz
  A1      ABCDEFGHIJKLMNOPQRSTUVWXYZ
  A2       ^0123456789.,!?_#'"/\-:()
          --------------------------
\endtt\frm
(Character 7 in A2, written here as a circumflex |^|, is a new-line.)

\sp{5.4} Version 1 has the same table except that A2 (needing no
new-line) accommodates the |<| character as well.  It has the form:
\orm\beginstt
          6789abcdef0123456789abcdef
          --------------------------
  A2       0123456789.,!?_#'"/\<-:()
          --------------------------
\endtt\frm

\sp{5.5} In Versions 5 and later, a game may replace the above
table by providing its own ``character set table''.  It does this
by giving the byte address of such a table in the word at |$34| in
the header.  (If this byte address is 0, then the default table
above is used.)

\sp{5.5.1} The character set table consists of 78 bytes arranged
as 3 blocks of 26 ASCII values, translating Z-characters 6 to 31
for alphabets A0, A1 and A2.  Z-characters 6 and 7 of A2, however,
are still translated as escape and newline codes (as above).

\nsp{6} Since the end-bit only comes up once every three Z-characters,
a string may have to be `padded out' with null values.  This is
conventionally achieved with a sequence of 5's, though a sequence of
(for example) 4's would work equally well.

\sp{6.1} It is legal for the string to end while a multi-Z-character
construction is incomplete: for instance, after only the top half of
an ASCII value has been given.  The partial construction is simply
ignored.  (This can happen in printing dictionary words which have
been guillotined to the dictionary resolution of 6 or 9 Z-characters.)

\nsp{7} When encrypting text for a dictionary word: A1 may not be
used; nor may abbreviations; the pad character, if needed, must be 5; and
the total string length must be 6 Z-characters (in Versions 1 to 3) or 9
(Versions 4 and later).  For example, ``i'' is encrypted as
\begindisplay
14, 5, 5, 5, 5, 5, 5, 5, 5\quad |$48a5| |$14a5| |$94a5|\cr
\enddisplay

\remarks 
In practice the text compression factor is not really very good: for
instance, 155000 characters of text squashes into 99000 bytes.  (Text
usually accounts for about 75\% of a story file.)  Encoding does
at least encrypt the text so that casual browsers can't read it.
Well-chosen abbreviations will reduce total story file size by 10\% or so.

The German translation of `Zork I' uses a character set table for accented
letters and is illegible on interpreters (like |ITF|) which do not implement
this feature.  (`Shogun' also needs the character set table.)

It is helpful for an interpreter to filter out any ASCII control
characters other than those explicitly legalised above, as this makes
run-time crashes of the ``printing random text'' kind much less
severe for the terminal.

The continental European quotation marks |<<| and |>>| should have
spacing which looks sensible either in French style |<<|Merci!|>>|
or in German style |>>|Danke!|<<|.

Further accented characters may be allocated codes later.  Other graphical
or unusual characters are best handled by creating a new font (see \S 16
for an example font).

% ----------------------------------------------------------------------------

\specs{4}{How instructions are encoded}

\widepoem
We do but teach bloody instructions
Which, being taught, return to plague th' inventor
\poemby{Shakespeare}{Macbeth}

\sp{1} A single Z-machine instruction consists of the following
sections (and in the order shown):
\orm\beginstt
  Opcode               1 or 2 bytes
  (Types of operands)  1 or 2 bytes: 4 or 8 2-bit fields
  Operands             Between 0 and 8 of these: each 1 or 2 bytes
  (Store variable)     1 byte
  (Branch offset)      1 or 2 bytes
  (Text to print)      An encoded string (of unlimited length)
\endtt\frm
Bracketed sections are not present in all opcodes.  (A few opcodes
take both ``store'' and ``branch''.)

\nsp{2} There are four `types' of operand.  These are often specified
by a number stored in 2 binary digits:
\orm\beginstt
  $$00    Large constant (0 to 65535)    2 bytes
  $$01    Small constant (0 to 255)      1 byte
  $$10    Variable                       1 byte
  $$11    Omitted altogether             0 bytes
\endtt\frm

\sp{2.1} Large constants, like all 2-byte words of data in the Z-machine,
are stored with most significant byte first (e.g. |$2478| is stored as
|$24| followed by |$78|).  A `large constant' may in fact be a small
number.

\sp{2.2} Variable number |$00| refers to the top of the stack,
|$01| to |$0f| mean the local variables of the current routine
and |$10| to |$ff| mean the global variables.  It is illegal to
refer to local variables which do not exist for the current routine
(there may even be none).

\sp{2.3} The type `Variable' really means ``variable by value''.  Some
instructions take as an operand a ``variable by reference'': for instance,
|inc| has one operand, the reference number of a variable to increment.
This operand usually has type `Small constant' (and Inform automatically
assembles a line like |@inc turns| by writing the operand |turns| as
a small constant with value the reference number of the variable |turns|).

\nsp{3} Each instruction has a form (long, short, extended or variable)
and an operand count (0OP, 1OP, 2OP or VAR).
If the top two bits of the opcode are |$$11| the form is variable;
if |$$10|, the form is short.  If the opcode is 190 (|$BE| in hexadecimal)
and the version is 5 or later, the form is ``extended''.  Otherwise, the
form is ``long''.

\sp{3.1} In short form, bits 4 and 5 of the opcode byte give an operand
type as above.  If this is |$11| then the operand count is
0OP; otherwise, 1OP.  In either case the opcode number is
given in the bottom 4 bits.

\sp{3.2} In long form the operand count is always 2OP.  The opcode number
is given in the bottom 5 bits.

\sp{3.3} In variable form, if bit 5 is 0 then the count is 2OP;
if it is 1, then the count is VAR.  The opcode number is given
in the bottom 5 bits.

\sp{3.4} In extended form, the operand count is VAR.  The opcode number
is given in a second opcode byte.

\nsp{4} Next, the types of the operands are specified.

\sp{4.1} In short form, bits 4 and 5 of the opcode give the type.

\sp{4.2} In long form, bit 6 of the opcode gives the type of
the first operand, bit 5 of the second.  A value of 0 means a small
constant and 1 means a variable.  (If a 2OP instruction needs a
large constant as operand, then it should be assembled in variable
rather than long form.)

\sp{4.3} In variable or extended forms, a byte of 4 operand types is
given next.  This contains 4 2-bit fields: bits 6 and 7 are the first
field, bits 0 and 1 the fourth.  The values are operand types as above.
Once one type has been given as `omitted', all subsequent ones must
be.  Example: |$$00101111| means large constant followed by variable
(and no third or fourth opcode).

\sp{4.3.1} In the special case of the ``double variable'' VAR opcodes
|call_vs2| and |call_vn2| (opcode numbers 12 and 26), a second byte
of types is given, containing the types for the next four operands.

\nsp{5} The operands are given next.  Operand counts of 0OP, 1OP or 2OP
require 0, 1 or 2 operands to be given, respectively.  If the count is VAR,
there must be as many operands as there were types other than `omitted'.

\sp{5.1} Note that only |call_vs2| and |call_vn2| can have more than 4
operands, and no instruction can have more than 8.

\nsp{6} ``Store'' instructions return a value: e.g., |mul| multiplies
its two operands together.  Such instructions must be followed by a
single byte giving the variable number of where to put the result.

\nsp{7} Instructions which test a condition are called ``branch''
instructions.  The branch information is stored in one or two bytes,
indicating what to do with the result of the test.  If bit 7 of
the first byte is 0, a branch occurs when the condition was false;
if 1, then branch is on true.  If bit 6 is set, then the branch
occupies 1 byte only, and the ``offset'' is in the range 0 to 63,
given in the bottom 6 bits.  If bit 6 is clear, then the offset is
a signed 14-bit number given in bits 0 to 5 of the first byte
followed by all 8 of the second.

\sp{7.1} An offset of 0 means ``return false from the current
routine'', and 1 means ``return true from the current routine''.

\sp{7.2} Otherwise, a branch moves execution to the instruction at
address
\orm\beginstt
  Address after branch data + Offset - 2.
\endtt\frm

\nsp{8} Two opcodes, |print| and |print_ret|, are followed by a text
string.  This is stored according to the usual rules: in particular
execution continues after the last 2-byte word of text (the one with
top bit set).

\remarks
Some opcodes have type VAR only because the available codes for
the other types had run out; |print_char|, for instance.  Others, especially
|call|, need the flexibility to have between 1 and 4 operands.

The Inform assembler can assemble branches in either form, but the compiler
always writes 2-byte branch data and never uses offset values of 0 or 1.
(The computation involved in achieving these optimisations outweighs the
slight gain.)

The disassembler |Txd| numbers locals from 0 to 14 and globals from
0 to 239 in its output (corresponding to variable numbers 1 to 15, and
16 to 255, respectively).

The branch formula is sensible because in the natural implementation,
the program counter is at the address after the branch data when the branch
takes place: thus it can be regarded as
\beginstt
  PC = PC + Offset - 2.
\endtt
If the rule were simply ``add the offset'' then, since the offset couldn't
be 0 or 1 (because of the return-false and return-true values), we would
never be able to skip past a 1-byte instruction (say, a 0OP like |quit|),
or specify the branch ``don't branch at all'' (sometimes useful to ignore
the result of the test altogether).  Subtracting 2 means that the only
effects we can't achieve are
\beginstt
  PC = PC - 1     and     PC = PC - 2
\endtt
and we would never want these anyway, since they would put the program
counter somewhere back inside the same instruction, with horrid
consequences.

\bigskip\noindent{\sl On disassembly}
\medskip
\noindent Briefly, the first byte of an instruction can be decoded
using the following table:
\beginstt
  $00 -- $1f  long      2OP     small constant, small constant
  $20 -- $3f  long      2OP     small constant, variable
  $40 -- $5f  long      2OP     variable, small constant
  $60 -- $7f  long      2OP     variable, variable
  $80 -- $8f  short     1OP     large constant
  $90 -- $9f  short     1OP     small constant
  $a0 -- $af  short     1OP     variable
  $b0 -- $bf  short     0OP
  except $be  extended opcode given in next byte
  $c0 -- $df  variable  2OP     (operand types in next byte)
  $e0 -- $ff  variable  VAR     (operand types in next byte(s))
\endtt
Here is an example disassembly:
\beginlines
|  @inc_chk c 0 label;    05 02 00 d4|
|      long form; count 2OP; opcode number 5; operands:|
|          02     small constant (referring to variable c)|
|          00     small constant 0|
|      branch if true: 1-byte offset, 20 (since label is|
|      18 bytes forward from here).|
|  @print "Hello.^";      b2 11 aa 46 34 16 45 9c a5 |
|      short form; count 0OP.|
|      literal string, Z-chars: 4 13 10  17 17 20  5 18 5  7 5 5.|
|  @mul 1000 c sp;        d6 1f 03 e8 02 00|
|      variable form; count 2OP; opcode number 22; operands:|
|          03 e8  long constant (1000 decimal)|
|          02     variable c|
|      store result to stack pointer (var number 00).|
|  @call_1n Message;      8f 01 56|
|      short form; count 1OP; opcode number 15; operand:|
|          01 56  long constant (packed address of routine)|
|  .label;|
\endlines

% ----------------------------------------------------------------------------

\specs{5}{How routines are encoded}

\sp{1} A routine is required to begin at an address in memory which
can be represented by a packed address (for instance, in Version 5
it must occur at a byte address which is divisible by 4).

\nsp{2} A routine begins with one byte indicating the number of local
variables it has (between 0 and 15 inclusive).

\sp{2.1} In Versions 1 to 4, that number of 2-byte words follows,
giving initial values for these local variables.  In Versions 5 and
later, the initial values are all zero.

\nsp{3} Execution of instructions begins from the byte after this
header information.  There is no formal `end-marker' for a routine
(it is simply assumed that execution eventually results in a return
taking place).

\nsp{4} In Version 6, there is a ``main'' routine (whose packed address
is stored in the word at |$06| in the header) called when the game
starts up.  It is illegal to return from this routine.

\nsp{5} In all other Versions, the word at |$06| contains the
byte address of the first instruction to execute.  The Z-machine
starts in an environment with no local variables from which, again,
a return is illegal.

\remarks
Note that it is permissible for a routine to be in dynamic memory.
Marnix Klooster suggests this might be used for compiling code at
run time!

In Versions 3 and 4, Inform always stores 0 as the initial values
for local variables.

Inform's ``main'' routine is required not to have local variables
and has to be the first defined routine.  This ensures it is in the
bottom 64K of memory, as it must be (in Versions other than 6).

% ----------------------------------------------------------------------------

\specs{6}{The game state: storage and routine calls}

\sp{1} The ``state of play'' is defined as the following: the contents
of dynamic memory; the contents of the stack; the value of the program
counter (PC), and the ``routine call state'' (that is, the chain of routines
which have called each other in sequence, and the values of their local
variables).  Note that the routine call state, the stack and the PC
must be stored outside the Z-machine memory map, in the interpreter's
private memory.

\sp{1.1} The entire state of play must be stored when the game is saved.

\sp{1.1.1} The format of a saved game file is not specified.

\sp{1.1.2} An internal saved game for ``undo'' purposes (if there is one) is
not part of the state of play.  This is important: if a saved game file also
contained the internal saved game at the time of saving, it would be
impossible to undo the act of restoration.  It also prevents internal
saved games from growing larger and larger as they include their
predecessors.

\sp{1.2} On a ``restore'' or ``undo'' (which restores a game saved into
internal memory), the entire state of play is written back except for one
bit: bit 0 of `Flags 2' in the header, the flag revealing whether the game
is being transcribed to printer.

\sp{1.2.1} Before a ``restore'', an interpreter should check that the
file to be used has been saved from the same game currently being played.
(See remark below.)

\sp{1.2.2} After a ``restore'' or ``undo'', an interpreter should reset
the header values marked |Rst| in the header table of \S 11.  (It should
not be assumed that the game was saved by the same interpreter.)

\sp{1.3} A ``restart'' is similar: the entire state is restored from the
original story file; but the transcription bit is preserved; and the
interpreter should reset the |Rst| parts of the header.

\sp{1.4} In Versions 5 and later, an interpreter unable to save the game
state into internal memory (for ``undo'' purposes) must clear bit 4 of
`Flags 2' in the header.

\nsp{2} Global variables (variable numbers |$10| to |$ff|) are stored
in a table in the Z-machine's dynamic memory, at a byte address given in
word 6 of the header.  The table consists of 240 2-byte words and the
initial values of the global variables are the values initially contained in
the table.  (It is legal for a program to alter the table's contents
directly in play, though not for it to change the table's address.)

\nsp{3} Writing to the stack pointer (variable number |$00|) pushes a
value onto the stack; reading from it pulls a value off.  Stack entries
are 2-byte words as usual.

\sp{3.1} The stack is considered as empty at the start of each routine:
it is illegal to pull values from it unless values have first been pushed
on.

\sp{3.2} The stack is left empty at the end of each routine: when a
return occurs, any values pushed during the routine are thrown away.

\sp{3.3} Stack size has not previously been specified.  The author
proposes the present capacity of |Zip| as a future minimum standard:
let the `usage' of a routine call be 4 plus the number of local
variables it has.  During a game the total of the usages for each
routine in the recursive chain of routines being called, plus the
game's own stack usage, must never reach 1024.

\nsp{4} Routine calls occur in the following circumstances: when one
of the |call...| opcodes is executed; in Versions 4 and later, when
timed keyboard input is being monitored; in Versions 5 and later,
when a sound effect finishes; in Version 6, when the game begins
(to call the ``main'' routine); in Version 6, when a ``newline
interrupt'' occurs.

\sp{4.1} A routine call may have any number of arguments, from 0 to
3 (in Versions 1 to 4) or 0 to 7 (Versions 5 and later).  All
routines return a value (though sometimes this value is thrown away
afterward: for example by opcodes in the form |call_vn*|).

\sp{4.2} Routine calls preserve local variables and the stack
(except when the return value is stored in a local variable or onto
the top of the stack).

\sp{4.3} A routine call to packed address 0 is legal: it does nothing
and returns false (0).  Otherwise it is illegal to call a packed
address where no routine is present.

\sp{4.4} When a routine is called, its local variables are created
with initial values taken from the routine header (Versions 1 to
4) or with initial value 0 (Versions 5 and later).  Next, the
arguments are written into the local variables (argument 1 into
local 1 and so on).

\sp{4.4.1} It is legal for there to be more arguments than local
variables (any spare arguments are thrown away) or for there to
be fewer.

\sp{4.5} The return value of a routine can be any Z-machine number.
Returning `false' means returning 0; returning `true' means
returning 1.

\nsp{5} A ``stack frame'' is an index to the routine call state
(that is, the call-stack of return addresses from routines currently
running, and values of local variables within them).  This index
is a Z-machine number.  The interpreter must be able to produce the
current value and to set a value further down the call-stack than
the current one, effectively throwing away its recent history
(see |catch| and |throw|).

\nsp{6} In Version 6, the Z-machine understands a third kind of stack: a
``user stack'', which is a table of words in dynamic memory.  The first word
in this table always holds the number of spare slots on the stack (so the
initial value is the capacity of the stack).  The Z-machine makes no
check on stack under-flow (i.e., pulling more values than were pushed)
which would over-run the length of the table if the program allowed it
to happen.

\remarks
Most interpreters store the whole of dynamic memory to disc as part of their
saved game files, which can make them as much as 45K or so long.  A player
making a serious attack on a game may end up wasting a whole megabyte, more
than convenient without a hard disc.  Bryan Scattergood's Psion interpreter
ingeniously avoids this by only saving bytes of dynamic memory which are
different from the initial state of the game.

It is unspecified how an interpreter should decide whether a saved game file
belongs to the game currently being played.  It is normal to insist that the
release numbers, serial codes and checksums all match.  The |Pinfocom|
interpreter deliberately checks only the release number, so that saved games
can be exchanged between different editions of `Seastalker' (presumably
compiled to handle the sonarscope differently).

The stack is stored in the interpreter's own memory, not anywhere in the
Z-machine.  The game program has no direct access to the stack memory or
stack pointer; on some implementations the game's main stack is also used to
store the routine call state (i.e. the game stack and the call-stack are the
same) but this need not be true.

The stack size specification guarantees in particular that if the game
itself never uses more than 32 stack entries at once then it can have
a recursive depth of at least 90 routine calls.  The author believes
that old Infocom games will all run with a stack size of 512 words.

Note that the ``state of play'' does not include numerous input/output
settings (the current window, cursor position, splitness or otherwise,
which streams are selected, etc.): neither does it include the state
of the random-number generator.  (Games with elaborate status lines
must redraw them after a restore has taken place.)

|Zip| provides ``undo'', but the |ITF| interpreter currently does not (and
|save_undo| returns 0, unfortunately).  This is probably its greatest
failing.  Some Infocom-written interpreters will only provide ``undo'' to a
game which has bit 4 of `Flags 2' set: but Inform 5.5 doesn't set this bit,
so modern interpreters should be more generous.

% ----------------------------------------------------------------------------

\specs{7}{Output streams and file handling}

\sp{1} At any given time text is being output through a selection of
``output streams" (possibly none, possibly several at once).

\sp{1.1} Two output streams are common to all Versions:
number 1 (the screen) and 2 (the game transcript, usually printed
to a printer or a file).

\sp{1.2} Versions 3 and later supply these and two other output streams,
numbered 3 (Z-machine memory) and 4 (a script file of the player's whole
commands and of individual keypresses as read by |read_char|).

\sp{1.2.1} Output stream 3 writes to a table in dynamic memory.  When the
stream is selected, the table may have any contents (even the initial `size'
word will be ignored by the interpreter).  While the stream is selected, the
table's contents are unspecified (and a game cannot safely read or write
to it).  When the stream is deselected, the initial word of the table holds
the number of characters printed and subsequent bytes hold those characters.
Similarly, in Version 6, the total width of printing (in units) will then be
stored in the word at |$30| in the header.  (It is the programmer's
responsibility to make the table large enough: the interpreter performs no
overflow checking.)

\sp{1.2.2} Output stream 3 is unusual in that, while it is selected, no
text is sent to any other output streams which are selected.  (However,
they remain selected.)

\sp{1.2.2.1} Newlines are written to output stream 3 as ASCII 13.  Any
character 10 codes printed should be converted to 13.

\sp{1.2.3} Output stream 4 is unusual in that, when it is selected,
the only text printed to it is that of the player's commands and
keypresses (as read by |read_char|).  (Each command is written, in
one go, when it has been finished: a command which has been timed-out,
or has been terminated by a code in the terminating character codes
table, is not written.  Mistypes and uses of `delete' are not written.)

\nsp{2} On output streams 1 and 2 (only), text printing may be ``buffered''
in that new-lines are automatically printed to ensure that no word
(of length less than the width of the screen) spreads across two lines.
(This process is sometimes called ``word-wrapping''.)

\sp{2.1} In Versions 1 to 3, buffering is always on.  In Versions 4
and later it is on by default (at the start of a game) and a game can
switch it on or off using the |buffer_mode| opcode.

\sp{2.2} In Version 6, each of the eight windows has its own ``buffering
flag''.  In other Versions, the |buffer_mode| applies
only to the lower window.  Output should never be buffered on
the upper window.

\nsp{3} In Versions 1 and 2, output stream 1 is always selected and
stream 2 can be selected or deselected by the game, by setting or clearing
bit 0 of `Flags 2'.

\nsp{4} In Versions 3 and later, all four output streams can be selected
or deselected using the |output_stream| opcode.  In addition, stream 2
can be selected or deselected by setting or clearing bit 0 of `Flags 2'.

\nsp{5} Character codes in the range 256 to 767 can only be printed on
the screen.  The author encourages interpreter-writers to make it easy
for game designers to modify the interpreter to print suitable substitutes
on the other streams.  (For instance, if 500 represents a Chinese dragon
character, this routine might print ``dragon'' on the other streams.)
Failing this, good practice would be to print a question mark on the
other streams.

\nsp{6} In Versions 5 and later, the Z-machine has the ability to load
and save files (using optional operands with the |save| and |restore|
opcodes).

\sp{6.1} |***| Filenames have the following format (approximately the MS-DOS
8.3 rule): one to eight alphanumeric characters, a full stop and zero to
three alphanumeric characters (the ``file extension'').

\sp{6.1.1} The interpreter must convert all filenames to upper case before
use.  If no full stop is given, ``.AUX'' should be appended.

\sp{6.1.2} Games should avoid the extensions ``.INF'', ``.H'', ``.Z''
followed by a number or ``.SAV'': otherwise they may be in danger of erasing
their own object code, source code or saved game files.

\sp{6.2} |***| Saved files are not associated with any particular session
of a game.  They are not part of the ``state of play''.

\sp{6.3} |***| A game may depend on having up to 32 auxiliary files (with
different names).

\sp{6.4} File-handling errors such as ``disc corrupt'' and ``disc full''
should be reported directly to the player by the interpreter.  The error
``file not found'' should only cause a failure return code from |restore|.

\remarks
The {\sl Inform Designer's Manual} advises games always to switch
buffering off when printing to the upper window.  This is wise since
the |ITF| interpreter does not behave correctly on this point.

An ambiguous point about output stream 4 is whether it should contain
the answers to interpreter questions like ``what file name should your
saved game have?'': it can actually be quite useful to be able to include
such answers in test script files.  (When running a long script, I often
save the game at several places during it, in order to save time in
re-running passages.)

Ideally, an interpreter should be able to write time delays (for timed input)
into stream 4 (i.e., to a script file).  In practice this is formidably
hard to implement.

A typical auxiliary file might be one containing the player's preferred
choices.  This would be created when he first changed any of the default
settings, and loaded (if present) whenever the game started up.

% ----------------------------------------------------------------------------

\specs{8}{The screen model}

\sp{1} Text may be printed in any font of the interpreter's choice,
variable- or fixed-pitch: except that when bit 1 of `Flags 2' in
the header is set, or when the text style has been set to Fixed Pitch,
then a fixed-pitch font must be used.

\sp{1.1} In Versions 5 and later, the height and width of the current
font (in units (see below)) should be written to bytes |$26| and
|$27| of the header, respectively.  The width of a font is defined
as the width of its `0' character.

\sp{1.2} An interpreter should ideally provide 4 fonts, with ID numbers
as follows:
\orm\beginstt
   1: the normal font
   2: a picture font
   3: a character graphics font
   4: a Courier-style font with fixed pitch
\endtt\frm
(In addition, font ID 0 means ``the previous font''.)
Ideally all text styles should be available for each font (for instance,
Courier bold should be obtainable) except that font 3 need only be
available in Roman and Reverse Video.  Each font should provide characters
for character codes 32 to 126 (plus character codes for any accented
characters with codes greater than 127 which are being implemented as single
accented letters on-screen).

\sp{1.3} A game must not use fonts other than 1 unless allowed to by the
interpreter: see the |set_font| opcode for how to give or refuse permission. 
(`Beyond Zork' produces different character graphics according to whether or
not font 3 is available: see \S 16 for the full story.)  This permission
may, at the interpreter's whim, depend on which window is active.

\sp{1.3.1} It is legal for a game to change font at any time,
including halfway through the printing of a word.

\sp{1.4} The specification of the ``picture font'' is unknown
(conjecturally, it was intended to provide pictures before Version
6 was properly developed).  Interpreters need not implement it.

\sp{1.5} The specification of the character graphics font is given
in \S 16.

\sp{1.5.1} In Version 5 (only), an interpreter which cannot provide
the character graphics font should clear bit 3 of `Flags 2' in the
header.

\nsp{2} In Versions 1 to 3, a status line should be printed by the
interpreter, as follows.  In Version 3, it must set bit 4 of
`Flags 1' in the header if it is unable to produce a status line.

\sp{2.1} In Versions 1 and 2, all games are ``score games''.  In
Version 3, if bit 1 of `Flags 1' is set then the game is a ``score game'';
otherwise, a ``time game''.

\sp{2.2} The short name of the object whose number is in the first global
variable should be printed on the left hand side of the line.

\sp{2.2.1} Whenever the status line is being printed the first global
must contain a valid object number.  (It would be useful if interpreters
could protect themselves in case the game accidentally violates this
requirement.)

\sp{2.2.2} If the object's short name exceeds the available room on
the status line, the author suggests that an interpreter should break
it at the last space and append an ellipsis ``...''.  There is no
guaranteed maximum length for location names but an interpreter should
expect names of length up to at least 49 characters.

\sp{2.3} If there is room, the right hand side of the status line should
display:

\sp{2.3.1} For ``score games'': the score and number of turns, held in
the values of the second and third global variables respectively.  The
score may be assumed to be in the range $-99$ to $999$ inclusive, and
the turn number in the range $0$ to $9999$.

\sp{2.3.2} For ``time games'': the time, in the form |hours:minutes| (held
in the second and third globals).  The time may be given on a 24-hour clock
or the number of hours may be reduced modulo 12 (but if so, ``AM'' or ``PM''
should be appended).  Either way the player should be able to see the
difference between 4am and 4pm, for example.  The hours global may be
assumed to be in the range 0 to 23 and the minutes global in the range
0 to 59.

\sp{2.4} The status line is updated in exactly two circumstances: when
a |show_status| opcode is executed, and just before the keyboard is
read by |read|.  (It is not displayed when the game begins.)

\nsp{3} Under Versions 5 and later, text printing has a current
foreground and background colour.  (In Version 6, each window has its
own pair.)

\sp{3.1} The following codes are used to refer to colours:
\orm\beginstt
    -1 =  the colour of the pixel under the mouse arrow (if any)
    0  =  the current setting of this colour
    1  =  the default setting of this colour
    2  =  black   3 = red       4 = green    5 = yellow
    6  =  blue    7 = magenta   8 = cyan     9 = white
\endtt\frm
(These are loosely based on the IBM PC colour-scheme.)

\sp{3.2} If the interpreter cannot produce colours, it should clear
bit 0 of `Flags 1' in the header.

\sp{3.3} If the interpreter can produce colours, it should set bit
0 of `Flags 1' in the header, and write its default background
and foreground colours into bytes |$2c| and |$2d| of the header.

\sp{3.4} If a game wishes to use colours, it should have bit 6 in
`Flags 2' set in its story file.  (However, an interpreter should not
rule out the use of colours just because this has not been done.)

\nsp{4} The screen should ideally be at least 60 characters wide by 14 lines
deep.  (Old Apple II interpreters had a 40 character width and some modern
laptop ones have a 9 line height, but implementors should seek to avoid
these extremes if possible.)  The interpreter may change the exact
dimensions whenever it likes but must write the current height (in lines)
and width (in characters) into bytes |$20| and |$21| in the header.

\sp{4.1} The interpreter should use the screen height for calculating
when to pause and print ``[MORE]''.  A screen height of 255 lines means
``infinite height'', in which case the interpreter should never stop
printing for a ``[MORE]'' prompt.  (In case, say, the screen is actually
a teletype printer, or has very good ``scrollback''.)

\sp{4.2} Screen dimensions are measured in notional ``units''.  In
Versions 1 to 4, one unit is simply the height or width of one character.
In Version 5 and later, the interpreter is free to implement units as
anything from character sizes down to individual pixels.

\sp{4.3} In Version 5 and later, the screen's width and height in units
should be written to the words at |$22| and |$24|.

\nsp{5} The screen model for Versions 1 and 2 is as follows:

\sp{5.1} The screen can only be printed to (like a teletype) and there
is no control of the cursor.

\sp{5.2} At the start of a game, the screen should be cleared and the text
cursor placed at the bottom left (so that text scrolls upwards as the game
gets under way).

\nsp{6} The screen model for Version 3 is as follows:

\sp{6.1} The screen is divided into a lower and an upper window and at any
given time one of these is selected.  (Initially it is the lower
window.)  The game uses the |set_window| opcode to select one of the
two.  Each window has its own cursor position at which text is
printed.  Operations in the upper window do not move the cursor of the
lower.  Whenever the upper window is selected, its cursor position is
reset to the top left.  Selecting, or re-sizing, the upper window does
not change the screen's appearance.

\sp{6.1.1} The upper window has variable height (of $n$ lines) and the
same width as the screen.  This should be displayed on the $n$ lines of
the screen below the top one (which continues to hold the status line).
Initially the upper window has
height 0.  When the lower window is selected, the game can split off
an upper window of any chosen size by using the |split_window| opcode.

\sp{6.1.1.1} Printing onto the upper window overlays whatever text is
already there.  Printing is normally buffered (unless the game has turned
this off), just as in the lower window.

\sp{6.1.2} An interpreter need not provide the upper window at all.  If
it is going to do so, it should set bit 5 of `Flags 1' in the header to
signal this to the game.  It is only legal for a game to use
|set_window| or |split_window| if this bit has been set.

\sp{6.1.3} Following a ``restore'' of the game, the interpreter should
automatically collapse the upper window to size 0.

\sp{6.2} When text reaches the bottom right of the lower window, it
should be scrolled upwards.  The upper window should never be scrolled:
it is legal for a character to be printed on the bottom right position
of the upper window (but the position of the cursor after this operation
is undefined: the author suggests that it stay put).

\sp{6.3} At the start of a game, the screen should be cleared and the text
cursor placed at the bottom left (so that text scrolls upwards as the game
gets under way).

\nsp{7} The screen model for Versions 4 and later, except Version 6,
is as follows:

\sp{7.1} Text can be printed in five different styles (modelled on the
VT100 design of terminal).  These are: Roman (the default), Bold, Italic,
Reverse Video (usually printed with foreground and background colours
reversed) and Fixed Pitch.  The specification does not require the
interpreter to be able to display more than one of these at once (e.g. to
combine italic and bold), and most interpreters can't.  If the interpreter
is going to allow certain combinations, then note that changing back to
Roman should turn off all the text styles currently active.

\sp{7.1.1} An interpreter need not provide Bold or Italic (even for font 1)
and is free to interpret them broadly.  (For example, rendering bold-face by
changing the colour, or rendering italic with underlining.)

\sp{7.1.2} It is legal to change text style at any point, including in
the middle of a word being printed.

\sp{7.2} There are two ``windows'', called ``upper'' and ``lower'': at
any given time one of these two is selected.  (Initially it is the lower
window.)  The game uses the |set_window| opcode to select one of the
two.  Each window has its own cursor position at which text is
printed.  Operations in the upper window do not move the cursor of the
lower.  Whenever the upper window is selected, its cursor position is
reset to the top left.

\sp{7.2.1} The upper window has variable height (of $n$ lines) and the
same width as the screen.  (It is usual for interpreters to print the
upper window on the top $n$ lines of the screen, overlaying any text
which is already there, having been printed in the lower window some
time ago.)  Initially the upper window has height 0.  When the lower
window is selected, the game can split off an upper window of any
chosen size by using the |split_window| opcode.

\sp{7.2.1.1} It is unclear exactly what |split_window| should do if
the upper window is currently selected.  The author suggests that
it should work as usual, leaving the cursor where it is if the
cursor is still inside the new upper window, and otherwise moving
the cursor back to the top left.  (This is analogous to the Version 6
practice.)

\sp{7.2.2} In Version 4, the lower window's cursor is always on
the bottom screen line.  In Version 5 it can be at any line which is
not underneath the upper window.  If a split takes place which would
cause the upper window to swallow the lower window's cursor position,
the interpreter should move the lower window's cursor down to the
line just below the upper window's new size.

\sp{7.2.3} When the upper window is selected, its cursor position
can be moved with |set_cursor|.  This position is given in characters
in the form (row, column), with $(1,1)$ at the top left.  The opcode
has no effect when the lower window is selected.  It is illegal
to move the cursor outside the current size of the upper window.

\sp{7.2.4} An interpreter should use a fixed-pitch font when printing on
the upper window.

\sp{7.2.5} In Version 4, text buffering should work in the upper window
exactly as it does in the lower one (i.e., it must be turned off by
the game if it is not required).  In Versions 5 and later, text buffering
is never active in the upper window (even if a game begins printing there
without having turned it off).

\sp{7.3} Clearing regions of the screen:

\sp{7.3.1} When text reaches the bottom right of the lower window, it
should be scrolled upwards.  (When the text style is Reverse Video
the new blank line should {\bf not} have reversed colours.)  The upper
window should never be scrolled: it is legal for a character to be
printed on the bottom right position of the upper window (but the
position of the cursor after this operation is undefined: the author
suggests that it stay put).

\sp{7.3.2} Using the opcode |erase_window|, the specified window
can be cleared to background colour.  (Even if the text style is Reverse
Video the new blank space should not have reversed colours.)

\sp{7.3.2.1} In Versions 5 and later, the cursor for the window being erased
should be moved to the top left.  In Version 4, the lower window's cursor
moves to its bottom left, while the upper window's cursor moves to top left.

\sp{7.3.3} Erasing window $-1$ clears the whole screen, collapses the
upper window to height 0 and moves the cursor of the lower screen
to bottom left (in Version 4) or top left (in Versions 5 and
later).  The same operation should happen at the start of a game.

\sp{7.3.4} Using |erase_line| in the upper window should erase
the current line from the cursor position to the right-hand edge,
clearing it to background colour.  (Even if the text style is
Reverse Video the new blank space should not have reversed colours.)

\nsp{8} The screen model for Version 6 is as follows:

\sp{8.1} The display is an array of pixels.  Coordinates are usually
given (in units) in the form $(y,x)$, with $(1,1)$ in the top left.

\sp{8.2} If the interpreter thinks the status line should be redrawn
(e.g. because a menu window has been clicked over it), it may set bit
2 of `Flags 2'.  The game is expected to notice, take action and clear
the bit.  (However, a more efficient interpreter would cache
the status line and handle redraws itself.)

\sp{8.3} There are eight ``windows'', numbered 0 to 7.  The code $-3$
is used as a window number to mean ``the currently selected window''.
This selection can be changed with the |set_window| opcode.
Windows are invisible and usually lie on top of each other.  When
something is printed in a window, it appears on the screen, but
subsequent movements of the window do not move what was printed and
there is no sense in which characters `belong' to any particular
window once printed.  Each window has a position (in units), a size
(in units), a cursor position within it (in units, relative to its
own origin), a number of flags called ``attributes'' and a number
of variables called ``properties''.

\sp{8.3.1} There are four attributes, numbered as follows:
\orm\beginstt
    1: character wrapping
    2: scrolling
    3: text copied to output stream 2 (the transcript, if selected)
    4: buffered printing
\endtt\frm
Each can be turned on or off, using the |window_style| opcode.

\sp{8.3.1.1} Character wrapping takes place (if set) when printing
a character would push beyond the right-hand edge of the window:
if set, then the character is printed on the left of the next line.
If it is clear, then text is printed along the line but clipped
to the window size.

\sp{8.3.2} There are 16 properties, numbered as follows:
\orm\beginstt
    0  y coordinate    6   left margin size            12  font number
    1  x coordinate    7   right margin size           13  font size
    2  y size          8   newline interrupt routine   14  attributes
    3  x size          9   interrupt countdown         15  line count
    4  y cursor        10  text style
    5  x cursor        11  colour data
\endtt\frm
Each property is a standard Z-machine number and is readable
with |get_wind_prop| and writeable with |put_wind_prop|.  However,
a game should only use |put_wind_prop| to set the newline
interrupt routine and interrupt countdown: everything else is
either set by the interpreter (such as the line count) or
set using specialised opcodes (such as |set_font|).

\sp{8.3.2.1} If a window has character wrapping, then text is
clipped to stay inside the left and right margins.  After a
new-line, the cursor moves to the left margin on the next line.
Margins can be set with |set_margins| but this should only be
done just after a newline or just after the window has been
selected.  (These values are margin sizes in pixels, and are by
default 0.)

\sp{8.3.2.2} If the interrupt countdown is set to a non-zero value
(which by default it is not), then the line count is decremented on each
new-line, and when it hits zero the routine whose packed address is stored
in the ``newline interrupt routine'' property is called before text printing
resumes.  (This routine may, for example, meddle with margins to roll text
around a crinkly-shaped picture.)  The interrupt routine should not attempt
to print anything.

\sp{8.3.2.3} The text style is set just as in Version 4, using
|set_text_style| (which sets that for the current window).  The
property holds the operand of that instruction (e.g. 4 for italic).

\sp{8.3.2.4} The foreground colour is stored in the upper byte of the
colour data property, the background colour in the lower byte.

\sp{8.3.2.5} The font height (in pixels) is stored in the upper byte of the
font size property, the font width (in pixels) in the lower byte.

\sp{8.3.2.6} The interpreter may use the line count to see when it
should print ``[MORE]''.

\sp{8.3.3} All eight windows begin at $(1,1)$.  Window 0 occupies the
whole screen and is initially selected.
Window 1 is as wide as the screen but has zero height.  Windows 2 to 7
have zero width and height.  All eight windows begin with buffered
printing on, and the other attributes off.

\sp{8.3.4} A window can be moved with |move_window| and resized with
|window_size|.  If the window size is reduced so that its cursor lies
outside it, the cursor should be reset to the left margin on the top
line.

\sp{8.3.5} Each window remembers its own cursor position (relative
to its own coordinates, so that the position $(1,1)$ is at its top
left).  These can
be changed using |set_cursor| (and it is legal to move the cursor
for an unselected window).  It is illegal to move the cursor outside
the current window.

\sp{8.3.6} Each window can be scrolled vertically (up or down) any
number of pixels, using the |scroll_window| opcode.

\sp{8.4} To some extent windows 0 and 1 mimic the behaviour of the
lower and upper windows in the Version 4 screen model:

\sp{8.4.1} The
|split_screen| opcode tiles windows 0 and 1 so that window 1 has
the given height and is placed at the top left, while window 0 is
moved to be just below it and has its height shortened by the
height of window 1.  (If this makes a negative amount, the height
becomes 0.)  Finally, window 0 is selected.

\sp{8.4.2} An ``unsplit'' (that is, a |split_screen 0|) takes place
when the entire screen is cleared with |erase_window -1|, if a
``split'' has previously occurred (meaning that windows 0 and 1
have been set up as above).

\sp{8.5} Screen clearing operations:

\sp{8.5.1} Erasing a picture is like drawing it (see below), except
that the space where it would appear is painted over with background
colour instead.

\sp{8.5.2} The current line can be erased using |erase_line|, either
all the way to the right margin or by any positive number of pixels in
that direction.  The space is painted over with background colour
(even if the current text style is Reverse Video).

\sp{8.5.3} Each window can be erased using |erase_window|, erasing to
background colour (even if the current text style is Reverse Video).

\sp{8.5.3.1} Erasing window number -1 erases the entire screen
and unsplits windows 0 and 1 (see above).

\sp{8.5.3.2} Erasing window -2 erases the entire screen (without
changing any window attributes or cursor positions).

\sp{8.6} Pictures may accompany the game.  They are not stored in the
story file (or the Z-machine) itself, and the interpreter is simply
expected to know where to find them.  Infocom supplied files of
pictures in different formats on different machines.  The exact format of
such files is not specified here.

\sp{8.6.1} Pictures are numbered from 1 upwards (not necessarily
contiguously).  They can be ``drawn'' or ``erased'' (using |draw_picture|
and |erase_picture|).  Before attempting to do so, a game may ask the
interpreter about the picture (using |picture_data|): this allows the
interpreter to signal that the picture in question is unavailable,
or to specify its height and width.

\sp{8.6.2} The game may, if it wishes, use the |picture_table| opcode
to give the interpreter advance warning that a group of pictures will
soon be needed (for instance, a collection of icons making up a control
panel).  The interpreter may want to load these pictures off disc and
into a memory cache.

\remarks
See \S 16 for comment on how `Beyond Zork' uses fonts.

Some interpreters print the status line when they begin running a Version
3 game, but this is incorrect.  (It means that a small game printing text
and then quitting cannot be run unless it includes an object.)  The author's
preferred status line formats are:
\beginstt
Hall of Mists                                 80/733
Lincoln Memorial                              12:03 PM
\endtt
Thus the score/turns block always fits in $3+1+4=8$ characters and the
time in $2+1+2+1+2=8$ characters.  (Games needing more exotic time lines,
for example, should not be written in Version 3.)

The only existing Version 3 game to use an upper window is `Seastalker'
(for its sonarscope display).

Some ports of |ITF| apply buffering (i.e. word-wrapping) and scrolling to
the upper window, with unfortunate consequences.  This is why
the standard Inform status line is one character short of the width
of the screen.

The original Infocom files seldom use |erase_window|, except with window
$-1$ (for instance `Trinity' only uses it in this form).  |ITF| does not
implement it in any other case.

The Version 5 re-releases of older games make use of consecutive
|set_text_style| instructions to attempt to combine boldface reverse video
(in the hints system).

None of Infocom's Version 4 or 5 files use |erase_line| at all, and |ITF|
implements it badly (with unpredictable behaviour in Reverse Video text
style).  (It's interesting to note that the Version-5 edition of `Zork I'
- one of the earliest Version 5 files -- blanks out lines by looking up
the screen width and printing that many spaces.)

Note that a minor bug in |Zip| writes bytes |$22| to |$25| in the
header as four values, giving the screen dimensions in the form left,
right, top, bottom: provided units are characters (i.e. provided the
font width and height are both 1) then since ``left'' and ``top''
are both 0, this bug has no effect.

Some details of the known IBM graphics files are given in Paul David
Doherty's ``Infocom Fact Sheet''.  See also Mark Howell's program
``pix2gif'', which extracts pictures to GIF files.  (This is one of
his ``Infocom toolkit'' programs.)

% ----------------------------------------------------------------------------

\specs{9}{Sound effects}

\sp{1} Some games, from Version 3 onward, have sound effects attached.
These are not stored in the story files (or the Z-machine) itself,
and the interpreter is simply expected to know where to find them.
Infocom implemented sound effects differently on different machines.

\sp{1.1} In Version 6, the interpreter should set bit 5 of `Flags 1'
if it can provide sound effects.

\sp{1.2} In Version 5 and later, a game should have bit 7 of `Flags 2'
set in its story file if it wants to use sound effects.  The interpreter
should then clear this bit if it cannot oblige.

\nsp{2} Sound effects are numbered upwards from 1.  Number 1 is a
high-pitched beep, number 2 a low-pitched one and effects from 3 upward
are supplied by the interpreter somehow for the particular game in
question.

\nsp{3} A sound effect can be played at any volume level from 1 to 8 (8
being loudest of these).  The volume level $-1$ should be implemented as
``loudest possible''.

\nsp{4} Sound effects take place in the background, while normal operation
of the Z-machine is going on.  Control is via the |sound_effect| opcode,
allowing the game to prepare, start, stop or finish with an effect.

\sp{4.1} The game may (but need not) ``prepare'' a sound effect before
use.  This would indicate to the interpreter that the game intends
to use the effect soon: an interpreter might act on this information by
loading the sampled sound off disc and into a memory cache.

\sp{4.2} A sound effect can then be ``stopped'' or ``started''.  Only one
sound effect is playing at any given time, and starting a new sound effect
automatically stops any current one.

\sp{4.3} In Versions 5 and later, a sound effect may repeat any specified
number of times, or repeat forever (until stopped).

\sp{4.4} Eventually, though, if it has not been stopped, it may end by itself.
A routine (specified at start time) can then be called.  The intention is that
this routine may implement effects such as fading in and out, by replaying
the sound effect at a different volume.  (A game should not place any important
code in such a routine.)

\sp{4.5} The game should explicitly ``finish with'' any sound effect which is
not likely to occur again for a while: the interpreter can then throw it out
of memory.

\remarks
The safest way an Inform program can try to produce a bleep is by
executing |@sound_effect 1|.  Some ports of |Zip| believe that the
first operand of this is the number of bleeps to make (so that
|@sound_effect 2| bleeps twice), but this is incorrect.

Only two Infocom games provided sound effects: `The Lurking Horror'
and `Sherlock'.  Their story files only contain the following usages of
|sound_effect|:
\beginstt
  sound_effect 1
  sound_effect 2
  sound_effect number 2 volume                   (in TLH)
  sound_effect number 2 volume/repeats function  (in Sherlock)
  sound_effect 0 3
  sound_effect 0 4
\endtt

The format of Infocom's shipped sound effects files has been documented by
Stefan Jokisch and his notes are available from |ftp.gmd.de|.

% ----------------------------------------------------------------------------

\specs{10}{Input streams and devices}

\sp{1} In Versions 1 and 2, the player's commands can only be
drawn from the keyboard.

\nsp{2} In Versions 3 and later, the player's keypresses are drawn
from the current ``input stream''.  There are two input streams:
numbered 0 (the keyboard) and 1 (a file containing commands).
Other inputs (mouse clicks or menu selections), if available,
are also implemented as keypresses (see below).

\sp{2.1} The format of a file containing commands must be the same as
that written in output stream 4.

\sp{2.2} The game can change the current input stream itself, using
the opcode |input_stream|.  It has no way of finding out which input
stream is currently in use.  An interpreter is free to change the
input stream whenever it likes (e.g. at the player's request) or,
indeed, to run the entire game under input stream 1 (for testing
purposes).

\sp{2.3} When input stream 1 is first selected, the interpreter may
use any method of choosing a file name for the file of commands.  (Good
practice is to use the same conventions as when choosing a filename
for output to stream 4.)

\sp{2.4} When the the current stream is stream 1, the interpreter
should not hold up long passages of text (by printing ``[MORE]''
and waiting for a keypress, for instance).

\nsp{3} Mouse support is optional but can be provided in Versions 5 and
later.

\sp{3.1} In a game which wishes to use the mouse, bit 5 of `Flags 2'
in the header should be set in the story file, and word |$36| of the
header should be the byte address of the mouse data table in dynamic
memory.

\sp{3.1.1} If the interpreter cannot offer mouse support, then it
should clear bit 5 of `Flags 2' to signal this to the game.

\sp{3.2} The mouse data table has the format:
\orm\beginstt
   Word 0:  Length of the table (in words)
   Word 1:  Mouse x coordinate
   Word 2:  Mouse y coordinate
\endtt\frm
(The table length is usually 2.)  These coordinates should be
updated regularly by the interpreter.

\sp{3.3} The mouse is presumed to have between 0 and 16 buttons.
The state of these buttons can be read by the |read_mouse| opcode
in Version 6.  Otherwise, mouse clicks are treated as keyboard
input codes (see below).

\sp{3.4} In Version 6, the mouse can either be free or constrained
to one of the 8 windows: if so, clicks outside the `mouse window'
must be ignored, and the interpreter is at liberty to confine the
mouse's movement to the boundary of its window.

\nsp{4} Menu support can optionally be provided in Version 6.

\sp{4.1} In a game which wishes to use menus, bit 8 of `Flags 2'
in the header should be set in the story file.

\sp{4.1.1} If the interpreter cannot offer menu support, then it
should clear bit 8 of `Flags 2' to signal this to the game.

\sp{4.2} Menus are numbered from 0 upwards.  0, 1 and 2 are reserved for
the interpreter to manage (this system has only been implemented on
the Macintosh, wherein 0 is the Apple menu, 1 the File menu and 2 the
Edit menu).  Menus numbered 3 and upwards can be created or removed with
the |make_menu| opcode.

\sp{4.3} Menu selection is reported to the game as a keypress
(see below).  Details of what selection has been made are read with
|read_mouse|.

\nsp{5} Whole commands are read from the input stream using the |read|
opcode.  (Note that this has two different internal names in Inform,
|sread| for Versions 1 to 4 and |aread| subsequently.)

\sp{5.1} In Versions 1 to 3, the interpreter must redisplay the status
line before it begins accepting input.

\sp{5.2} Commands are normally terminated by a new-line (a carriage
return or a line feed as appropriate for the machine's keyboard or
file format).

\sp{5.2.1} In Versions 5 and later, the game may provide a
``terminating characters table'' by giving its byte address in
the word at |$2e| in the header.  This table is a zero-terminated
list of input character codes which cause |aread| to finish the
command (in addition to new-line).  Only function key codes are
permitted: these are defined as those between 129 and 154
inclusive, together with 252, 253 and 254.  The special value
255 means ``any function key code is terminating''.

\sp{5.3} |***| In Versions 4 and later, an interpreter should ideally
be able to time input and to call a (game) routine at periodic
intervals: see the |aread| opcode.  If it is able to do this, it
should set bit 7 of `Flags 1' in the header.

\nsp{6} In Versions 4 and later, individual characters can be read
from the current input stream, using |read_char|.  Again, the interpreter
should ideally be able to time input and to call a (game) routine at
periodic intervals.  If it is able to do this, it should set bit 7 of `Flags
1' in the header.

\nsp{7} For input purposes the character set is as follows:
\orm\beginstt
 0-9       ----
 10        New-line   (ends input of a command)
 11-12     ----
 13        New-line   (ends input of a command)
 14-26     ----
 27        Escape
 28-31     ----
 32-126    Standard ASCII           (see 3.4.3)
 127-128   ----
 129-154   Function key codes       (see below)
 155-251   Accented letter codes    (see below)
 252-254   Function key codes       (see below)
 255-      ----
\endtt\frm
The codes marked |----| should never be read.  (Of course an
interpreter may well want to use other ASCII codes for its own
line-editor when the player is typing a command: 127 for ``delete'',
for instance.  The table means only that these codes should not
be passed to the game.)  Note that an interpreter can return
either 10 or 13 as ``new-line''.  (The recommended choice is
10.)

\sp{7.1} The ``escape'' code is optional: an interpreter need not
provide an escape key.  (The Inform library clears and quits menus
if Escape is pressed.)

\sp{7.2} The first block of function key codes is as follows:
\orm\beginstt
129: cursor up  130: cursor down  131: cursor left  132: cursor right
133: f1         134: f2           ....              144: f12
145: keypad 0   146: keypad 1     ....              154: keypad 9
\endtt\frm

\sp{7.3} The input codes 155 to 251 refer to European accented
letters: see the table in \S 3.4.4.

\sp{7.4} In Version 6, mouse clicks and menu selections are reported
as the function key codes:
\orm\beginstt
252: menu click   253: mouse double-click   254: mouse single-click
\endtt\frm
In Versions 5 and later (except 6), menus are unavailable, and it is
legal for an interpreter to translate both forms of mouse-click as
code 254.  This is the recommended practice but a game should not
depend on it.

\sp{7.5} All the codes not marked as |----| should be passed to
|read_char|.  Function key codes and the code for ``escape'' should
not be entered by |read| into the input buffer (they have no
specified appearance on screen), but accented letter codes should.

\remarks
Menus in `Beyond Zork' define cursor up and cursor down as terminating
characters, and make use of |read| in the upper window.

Ideally, an interpreter should be able to read time delays (for timed input)
from stream 1 (i.e., from a script file).  In practice this is formidably
hard to implement.

The `Beyond Zork' story file is capable of receiving both mouse-click
codes (253 and 254), listing both in its terminating characters table
and treating them equally.

% ----------------------------------------------------------------------------

\specs{11}{The format of the header}

\sp{1} The header table summarises those locations in the Z-machine's
header which an interpreter must deal with.  (For much fuller details,
see Appendix A.)  ``Hex'' means the address, in hexadecimal;
``V'' the earliest Version to which the rule is applicable; ``Dyn'' means
that the byte or bit may legally be changed by the game during play;
``Int'' means that the interpreter may change it; ``Rst'' means that the
interpreter must set it correctly after loading the game, after a restore
or after a restart.

\pageinsert
\smallskip\hrule\smallskip
\beginlines
| Hex  V  Dyn Int Rst  Contents|
\endlines\smallskip\hrule\smallskip\beginlines
|  0   1               Version number (1 to 6)|
|  1   3               Flags 1:|
|      .3                Bit 1    Status line type: 0=score/turns, 1=hours:mins|
|      .3      *   *         4    Status line not available?|
|      .3      *   *         5    Screen-splitting available?|
|      .3      *   *         6    Is a variable-pitch font the default?|
|      4               Flags 1:|
|      .5      *   *     Bit 0    Colours available?|
|      .6      *   *         1    Picture displaying available?|
|      .4      *   *         2    Boldface available?|
|      .4      *   *         3    Italic available?|
|      .4      *   *         4    Fixed-space font available?|
|      .6      *   *         5    Sound effects available?|
|      .4      *   *         7    Timed keyboard input available?|
|  4   1               Base of high memory (byte address)|
|  6   1               Initial value of program counter (byte address)|
|      6               Packed address of initial "main" routine|
|  8   1               Location of dictionary (byte address)|
|  A   1               Location of object table (byte address)|
|  C   1               Location of global variables table (byte address)|
|  E   1               Base of static memory (byte address)|
| 10   1               Flags 2:|
|      .1  *   *   *     Bit 0    Set when transcripting is on|
|      .3  *                 1    Game sets to force printing in fixed-pitch font|
|      .6  *   *             2    Int sets to request status line redraw:|
|                                   game clears when it complies with this.|
|      .5      *   *         3    If set, game wants to use pictures|
|      .5      *   *         4    If set, game wants to use the UNDO opcodes|
|      .5      *   *         5    If set, game wants to use a mouse|
|      .5                    6    If set, game wants to use colours|
|      .5      *   *         7    If set, game wants to use sound effects|
|      .6      *   *         8    If set, game wants to use menus|
|                                   (For bits 3,4,5,7 and 8, Int clears again|
|                                   if it cannot provide the requested effect.)|
| 18   2               Location of abbreviations table (byte address)|
| 1A   3+              Length of file (see note)|
| 1C   3+              Checksum of file|
| 1E   4       *   *   Interpreter number|
| 1F   4       *   *   Interpreter version (single ASCII character)|
\endlines\smallskip\hrule\smallskip
\noindent Some early Version 3 files do not contain length and checksum
data, hence the notation |3+|.
\vfill\endinsert

\topinsert
\smallskip\hrule\beginlines
| Hex  V  Dyn Int Rst  Contents|
\endlines\smallskip\hrule\smallskip\beginlines
| 20   4       *   *   Screen height (lines): 255 means "infinite"|
| 21   4       *   *   Screen width (characters)|
| 22   5       *   *   Screen width in units|
| 24   5       *   *   Screen height in units|
| 26   5       *   *   Font height in units|
| 27   5       *   *   Font width in units (defined as width of a '0')|
| 28   6               Routines offset (divided by 8)|
| 2A   6               Static strings offset (divided by 8)|
| 2C   5       *   *   Default background colour|
| 2D   5       *   *   Default foreground colour|
| 2E   5               Address of terminating characters table (bytes)|
| 30   6       *       Total width in pixels of text sent to output stream 3|
| 32   1       *   *   Standard revision number|
| 34   5               Character set table address (bytes), or 0 for default|
| 36   5               Mouse data table address (bytes)|
\endlines\smallskip\hrule\bigskip
\endinsert

\sp{1.1} It is illegal for a game to alter those
fields not marked as ``Dyn''.  An interpreter is therefore free to
store values of such fields in its own variables.

\sp{1.2} The state of the transcription bit (bit 0 of Flags 2) is only
changed by the game (see \S 7.3, \S 7.4), but the interpreter ensures that
its value survives a restart or restore.

\sp{1.3} Infocom used the interpreter numbers:
\orm\beginstt
   1   DECSystem-20     5   Atari ST           9   Apple IIc
   2   Apple IIe        6   IBM PC            10   Apple IIgs
   3   Macintosh        7   Commodore 128     11   Tandy Color
   4   Amiga            8   Commodore 64
\endtt\frm
(The DECSystem-20 was Infocom's own in-house mainframe.)  An interpreter
should choose the interpreter number most suitable for the machine it
will run on.  (The main consideration is that the behaviour of `Beyond Zork'
actually depends on the interpreter number.)

\sp{1.4} |***| The use of bit 7 in `Flags 1' to signal whether timed
input is available is new in this document: see the preface.

\sp{1.5} |***| If an interpreter obeys Revision |n.m| of this document {\sl
perfectly}, as far as anyone knows, then byte |$32| should be written with
|n| and byte |$33| with |m|.  If it is an earlier (non-standard)
interpreter, it should leave these bytes as 0.

\sp{1.6} The file length stored at |$1a| is actually divided by a constant,
depending on the Version, to make it fit into a header word.  This constant
is 2 for Versions 1 to 3, 4 for Versions 4 to 5 or 8 for Versions 6 and
later.

\remarks
See the ``Infocom fact sheet'' for numbers and letters of the known
interpreters shipped by Infocom.  Interpreter versions are conventionally
the upper case letters in sequence (A, B, C, ...).  At present most ports
of |Zip| use interpreter number 6, and most of |ITF| use number 2.

The unusual behaviour of `Beyond Zork' concerns its character graphics:
see the remarks to \S 16.

% ----------------------------------------------------------------------------

\specs{12}{The object table}

\sp{1} The object table is held in dynamic memory and its byte address
is stored in the word at |$0a| in the header.  (Recall that objects
have flags attached called attributes, numbered from 0 upward, and
variables attached called properties, numbered from 1 upward.  An
object need not provide every property.)

\nsp{2} The table begins with a block known as the property defaults table.
This contains 31 words in Versions 1 to 3 and 63 in Versions 4 and later.
When the game attempts to read the value of property $n$ for an object
which does not provide property $n$, the $n$-th entry in this table
is the resulting value.

\nsp{3} Next is the object tree.  Objects are numbered consecutively from
1 upward, with object number 0 being used to mean ``nothing'' (though
there is formally no such object).  The table consists of a list of
entries, one for each object.

\sp{3.1} In Versions 1 to 3, there are at most 255 objects, each having
a 9-byte entry as follows:
\orm\beginstt
   <the 32 attribute flags>   <parent>  <sibling>  <child>  <properties>
   ---32 bits in 4 bytes---   ---3 bytes------------------  ---2 bytes--
\endtt\frm
|parent|, |sibling| and |child| must all hold valid object numbers.
The |properties| pointer is the byte address of the list of
properties attached to the object.  Attributes 0 to 31 are flags
(at any given time, they are either on (1) or off (0)) and are stored topmost
bit first: e.g., attribute 0 is stored in bit 7 of the first byte,
attribute 31 is stored in bit 0 of the fourth.

\sp{3.2} In Version 4 and later, there are at most 65535 objects, each
having a 14-byte entry as follows:
\orm\beginstt
   <the 48 attribute flags>   <parent>  <sibling>  <child>  <properties>
   ---48 bits in 6 bytes---   ---3 words, i.e. 6 bytes----  ---2 bytes--
\endtt\frm

\nsp{4} Each object has its own property table.  Each of these can be
anywhere in dynamic memory (indeed, a game can legally change an object's
properties table address in play, provided the new address points to
another valid properties table).

\sp{4.1} In Versions 1 to 3, a property table has header:
\orm\beginstt
  <text-length>   <text of short name of object>
  -----byte----   --some even number of bytes---
\endtt\frm
where the |text-length| is the number of 2-byte words making up the text,
which is stored in the usual format.  (This means that an object's short
name is limited to 765 Z-characters.)  After the header, the properties
are listed in descending numerical order.  (This order is essential and
is not a matter of convention.)  Each property is stored as a block
\orm\beginstt
  <size byte>   <the actual property data>
                ---between 1 and 8 bytes--
\endtt\frm
where the |size byte| is arranged as 32 times the number of data bytes
minus one, plus the property number.  A property list is terminated by
a size byte of 0.  (It is otherwise illegal for a size byte to be a
multiple of 32.)

\sp{4.2} In Versions 4 and later, a property block instead has the form
\orm\beginstt
  <size and number>     <the actual property data>
  --1 or 2 bytes---     --between 1 and 64 bytes--
\endtt\frm
The property number occupies the bottom 6 bits of the first size byte.

\sp{4.2.1} If the top bit of the size byte is set, then there is a
second size byte.  The bottom six bits contain the property data
length (counting in bytes), minus 1, and the top two bits must be
|$$10|.

\sp{4.2.2} Otherwise, if bit 6 of the size byte is set then the
length is 2, and if it is clear then the length is 1.

\sp{5} It is the game's responsibility to keep the object tree
well-founded: the interpreter is not required to check.
``Well-founded'' means the following:
\item{(a)} An object with a sibling also has a parent.
\item{(b)} An object is the parent of exactly those objects
in the sibling list of its child.
\item{(c)} Each object can be given a level $n$, such that
parentless objects have level $0$ and all children of a level
$n$ object have level $n+1$.

\remarks
The largest valid object number is not directly stored anywhere
in the Z-machine.  Utility programs like ``Infodump'' deduce this
number by assuming that, initially, the object entries end where
the first property table begins.

The reason why the second property size byte needs to have top bits set
to |$$10| is that the size field must be parsable either
forwards or backwards -- the Z-machine needs to be able to reconstruct the
length of a property given only the address of the first byte of its data.
(There are very many (e.g. 2000) property entries in a story file, so
optimising size into one byte most of the time is worthwhile.)

In fact only the top bit of the second byte needs to be set, so it
would be extremely easy to modify an interpreter to allow up to 128
bytes of property data.  Infocom seem not to have noticed, or not to
have needed this.

Inform can only construct well-founded object trees as the initial game
state, but it is easy to compile sequences of code like ``move red box to
blue box'' followed by ``move blue box to red box'' which leave the object
tree in an ill-founded state.  (The Inform library protects the standard
object-movement verbs against this.)

% ----------------------------------------------------------------------------

\specs{13}{The dictionary and lexical analysis}

\sp{1} The dictionary table is held in static memory and its byte address
is stored in the word at |$08| in the header.

\nsp{2} The table begins with a short header:
\orm\beginstt
  n    <list of keyboard input codes>  entry-length  number-of-entries
 byte  ------n bytes-----------------      byte         2-byte word
\endtt\frm
The keyboard input codes are ``word-separators'': typically (and under
Inform mandatorily) these are the ASCII codes for full stop, comma and
double-quote.  Note that a space character (32) should never be a
word-separator.  The ``entry length'' is the length of each word's
entry in the dictionary table.  (It must be at least 4 in Versions
1 to 3, and at least 6 in later Versions.)

\sp{2.1} Note that control codes such as the ASCII for ``tab'' are
never given in the word-separators table: they aren't legal keyboard
input codes (an interpreter might sensibly convert a tab to a space).

\nsp{3} In Versions 1 to 3, each word has an entry in the form
\orm\beginstt
  <encoded text of word>      <bytes of data>
  ------- 4 bytes ------   (entry length-4) bytes
\endtt\frm
The interpreter ignores the bytes of data (presumably the game's parser will
use them).  The encoded text contains 6 Z-characters (it is always padded
out with Z-character 5's to make up 4 bytes: see ``How strings are
encoded'').  The text may include spaces or other word-separators
(though, if so, the interpreter will never match any text to the
dictionary word in question: surprisingly, this can be useful and is
a trick used in Inform 5/12).

\nsp{4} In Versions 4 and later, the encoded text has 6 bytes and
always contains 9 Z-characters.

\nsp{5} The word entries follow immediately after the dictionary header
and must be given in numerical order of the encoded text (when the encoded
text is regarded as a 32 or 48-bit binary number with most-significant
byte first).  It must not contain two entries with the same encoded text.

\nsp{6} Lexical analysis takes place in two circumstances: on request
of a |tokenise| opcode (in which case it can use any dictionary table
it likes, in the format above) and during acceptance of a game command
(in which case the standard dictionary is used).

\sp{6.1} First, the text is broken up into words.  Spaces divide up
words and are otherwise ignored.  Word separators also divide words,
but each one of them is considered a word in its own right.  Thus,
the erratically-spaced text ``fred,go  fishing'' is divided into four
words:
\orm\beginstt
fred / , / go / fishing
\endtt\frm

\sp{6.2} Each word is then encoded as a Z-machine string in
dictionary form, and searched for in the dictionary.

\sp{6.3} A ``parse table'' is then written, recording the number of
words, the length and position of each word and the dictionary
address of each word which is recognised.  For the format, see the
|sread| opcode.

\remarks
Usually (under Inform, mandatorily) there are three bytes of data
in the word entries, so that dictionary entry lengths are 7 and 9
in the early and late Z-machine, respectively.

It is essential that dictionary entries are in numerical order of the
bytes of encrypted text so that interpreters can search the dictionary
efficiently (e.g. by a binary-chop algorithm).  Because the letters in
A0 are in alphabetical order, because the bits are ordered in the right
way and because the pad character 5 is less than the values for the
letters, the numerical ordering corresponds to normal English alphabetical
order for ordinary words.  (For instance ``an'' comes before
``anaconda''.)

The Infocom games do contain words whose initial
character is not a letter (words such as ``\#record'').

% ----------------------------------------------------------------------------

\specs{14}{Complete table of opcodes}

\sp{1} This table contains all 117 opcodes and, taken with the dictionary
in \S 15, describes exactly what each should do.  In addition, it lists which
opcodes are actually used in the known Infocom story files, and documents the
Inform assembly language syntax.

\subtitle{Reading the opcode tables}

The two columns ``St" and ``Br" (store and branch) mark whether an
instruction stores a result in a variable, and whether it must provide a
label to jump to, respectively.

The ``Opcode" is written |TYPE:Decimal| where the |TYPE| is the operand
count (2OP, 1OP, 0OP or VAR) or else EXT for two-byte opcodes (where the
first byte is (decimal) 190).  The decimal number is the lowest possible
decimal opcode value (by convention, 256 is added for extended opcodes). 
The hex number is the opcode number within each |TYPE|.

The ``V" column gives the Version information.  If nothing is specified, the
opcode is as stated from Version 1 onwards.  Otherwise, it exists only from
the version quoted onwards.  Before this time, its use is illegal.  Some
opcodes change their meanings as the Version increases, and these have more
than one line of specification.  Others become illegal again, and these are
marked |[illegal]|.

In a few cases, the Version is given as ``3/4" or some such.  The first
number is the Version number whose specification the opcode belongs to, and
the second is the earliest Version in which the opcode is known actually to
be used in an Infocom-produced story file.  A dash means that it is seems
never to have been used (in any Version).

The table explicitly marks opcodes which do not exist in any version of the
Z-machine as |------|: in addition, none of the extended set of codes from
|$1d| to |$ff| were ever used.

\subtitle{Inform assembly language}

An Inform line beginning with an |@| is sent directly to the assembler. In
the syntax below, |<variable>| and |<result>| must be variables (or |sp|,
the stack pointer); |<label>| a label (not a routine name). 
|<literal-string>| must be literal text in quotation marks ``thus". 
|routine| should be the name of a routine (this assembles to its packed
address).  Otherwise any Inform constant term (such as |'/'| or |'beetle'|)
can be given as an operand.

In a branch instruction, the logical effect can be negated using a tilde
|~| before the label name, so for instance
\beginstt
    @je a b ~Different;  ! Jump to Different if a not equal to b
\endtt
The programmer must specify whether a branch is in the ``near" or ``far"
form, the default being ``near".  A question mark |?| before the label (and
tilde, if present) forces it to be ``far''.

Note that the operands marked as |<variable>| are assembled with ``small
constant'' type, not ``variable'' type (see \S 4.2.3).  This affects the
opcodes
\beginstt
    inc,  dec,  inc_chk,  dec_chk,  store,  pull,  load.
\endtt
For example, Inform assembles |@inc score;| to something looking like
``increment 16'', because 16 is the variable number of |score|.
(Such behaviour can be seen, for instance, at |$5051| in Zork II,
48.840904.  Some Infocom games use ``indirect addressing'' by
|load [sp] sp| (load the value of the variable held on the stack,
and put it on the stack).  However, this syntax is not understood
by Inform.)

\vfill\eject
\hrule\smallskip
\centerline{\bf Two-operand (long) opcodes 2OP}\smallskip\hrule
\beginlines
|   St  Br  Opcode Hex  V  Inform name and syntax|
\endlines\smallskip\hrule\smallskip\beginlines
|           ------   0  ------|
|       *   2OP:1    1     je              a b <label>|
|       *   2OP:2    2     jl              a b <label>|
|       *   2OP:3    3     jg              a b <label>|
|       *   2OP:4    4     dec_chk         <variable> value <label>|
|       *   2OP:5    5     inc_chk         <variable> value <label>|
|       *   2OP:6    6     jin             obj1 obj2 <label>|
|       *   2OP:7    7     test            bitmap flags <label>|
|   *       2OP:8    8     or              a b <result>|
|   *       2OP:9    9     and             a b <result>|
|       *   2OP:10   A     test_attr       object attribute <label>|
|           2OP:11   B     set_attr        object attribute|
|           2OP:12   C     clear_attr      object attribute|
|           2OP:13   D     store           <variable> value|
|           2OP:14   E     insert_obj      object destination|
|   *       2OP:15   F     loadw           array word-index <result>|
|   *       2OP:16  10     loadb           array byte-index <result>|
|   *       2OP:17  11     get_prop        object property <result>|
|   *       2OP:18  12     get_prop_addr   object property <result>|
|   *       2OP:19  13     get_next_prop   object property <result>|
|   *       2OP:20  14     add             a b <result>|
|   *       2OP:21  15     sub             a b <result>|
|   *       2OP:22  16     mul             a b <result>|
|   *       2OP:23  17     div             a b <result>|
|   *       2OP:24  18     mod             a b <result>|
|   *       2OP:25  19  4  call_2s         routine arg1 <result>|
|           2OP:26  1A  5  call_2n         routine arg1|
|           2OP:27  1B  5  set_colour      foreground background|
|           2OP:28  1C 5/- throw           value stack-frame|
|           ------  1D  ------|
|           ------  1E  ------|
|           ------  1F  ------|
\endlines\smallskip\hrule\smallskip
\centerline{32 to 127: other forms of 2OP with different types.}
\vfill\eject
\smallskip\hrule\smallskip
\centerline{\bf One-operand opcodes \rm 1OP}\smallskip\hrule
\beginlines
|   St  Br  Opcode Hex  V  Inform name and syntax|
\endlines\smallskip\hrule\smallskip\beginlines
|       *   1OP:128  0     jz              a <label>|
|   *   *   1OP:129  1     get_sibling     object <result> <label>|
|   *   *   1OP:130  2     get_child       object <result> <label>|
|   *       1OP:131  3     get_parent      object <result>|
|   *       1OP:132  4     get_prop_len    property-address <result>|
|           1OP:133  5     inc             <variable>|
|           1OP:134  6     dec             <variable>|
|           1OP:135  7     print_addr      byte-address-of-string|
|   *       1OP:136  8  4  call_1s         routine <result>|
|           1OP:137  9     remove_obj      object|
|           1OP:138  A     print_obj       object|
|           1OP:139  B     ret             value|
|           1OP:140  C     jump            <label>|
|           1OP:141  D     print_paddr     packed-address-of-string|
|   *       1OP:142  E     load            <variable> <result>|
|   *       1OP:143  F 1/4 not             value <result>|
|                       5  call_1n         routine|
\endlines\smallskip\hrule\smallskip
\centerline{144 to 175: other forms of 1OP with different types.}
\medskip\hrule\smallskip
\centerline{\bf Zero-operand opcodes \rm 0OP}\smallskip\hrule
\beginlines
|   St  Br  Opcode Hex  V  Inform name and syntax|
\endlines\smallskip\hrule\smallskip\beginlines
|           0OP:176  0     rtrue|
|           0OP:177  1     rfalse|
|           0OP:178  2     print           <literal-string>|
|           0OP:179  3     print_ret       <literal-string>|
|           0OP:180  4 1/- nop|
|       *   0OP:181  5  1  save            <label>|
|                       5  [illegal]|
|       *   0OP:182  6  1  restore         <label>|
|                       5  [illegal]|
|           0OP:183  7     restart|
|           0OP:184  8     ret_popped|
|           0OP:185  9  1  pop|
|   *                  5/- catch           <result>|
|           0OP:186  A     quit|
|           0OP:187  B     new_line|
|           0OP:188  C  3  show_status|
|                       4  [illegal]|
|       *   0OP:189  D  3  verify          <label>|
|           0OP:190  E  5  [first byte of extended opcode]|
|       *   0OP:191  F 5/- piracy          <label>|
\endlines\smallskip\hrule\smallskip
\centerline{192 to 223: VAR forms of 2OP:0 to 2OP:31.}
\vfill\eject
\smallskip\hrule\smallskip
\centerline{\bf Variable-operand opcodes \rm VAR}\smallskip\hrule
\beginlines
|   St  Br  Opcode Hex  V  Inform name and syntax|
\endlines\smallskip\hrule\smallskip\beginlines
|   *       VAR:224  0  1  call            routine ...up to 3 args... <result>|
|                          icall           packed-address-of-routine <result>|
|                       4  call_vs         routine ...up to 3 args... <result>|
|           VAR:225  1     storew          array word-index value|
|           VAR:226  2     storeb          array byte-index value|
|           VAR:227  3     put_prop        object property value|
|           VAR:228  4  1  sread           text parse|
|                       4  sread           text parse time routine|
|   *                   5  aread           text parse time routine <result>|
|           VAR:229  5     print_char      output-character-code|
|           VAR:230  6     print_num       value|
|   *       VAR:231  7     random          range <result>|
|           VAR:232  8     push            value|
|           VAR:233  9  1  pull            <variable>|
|   *                  6/- pull            stack <result>|
|           VAR:234  A  3  split_window    lines|
|           VAR:235  B  3  set_window      window|
|   *       VAR:236  C  4  call_vs2        routine ...up to 7 args... <result>|
|           VAR:237  D  4  erase_window    window|
|           VAR:238  E 4/- erase_line      value|
|                       6  erase_line      pixels|
|           VAR:239  F  4  set_cursor      line column|
|                       6  set_cursor      line column window|
|           VAR:240 10 4/- get_cursor      table|
|           VAR:241 11  4  set_text_style  style|
|           VAR:242 12  4  buffer_mode     flag|
|           VAR:243 13  3  output_stream   number|
|                       5  output_stream   number table|
|                       6  output_stream   number table width|
|           VAR:244 14  3  input_stream    number|
|           VAR:245 15 5/3 sound_effect    number effect volume routine|
|   *       VAR:246 16  4  read_char       1 time routine <result>|
|   *    *  VAR:247 17  4  scan_table      x table len|
|   *       VAR:248 18 5/- not             value <result>|
|           VAR:249 19  5  call_vn         routine ...up to 3 args...|
|           VAR:250 1A  5  call_vn2        routine ...up to 7 args...|
|           VAR:251 1B  5  tokenise        text parse dictionary flag|
|           VAR:252 1C  5  encode_text     ascii-text length from coded-text|
|           VAR:253 1D  5  copy_table      first second size|
|           VAR:254 1E  5  print_table     ascii-text width height skip|
|        *  VAR:255 1F  5  check_arg_count argument-number|
\endlines\smallskip\hrule\smallskip
\vfill\eject
\smallskip\hrule\smallskip
\centerline{\bf Extended opcodes \rm EXT}\smallskip\hrule
\beginlines
|   St  Br  Opcode Hex  V  Inform name and syntax|
\endlines\smallskip\hrule\smallskip\beginlines
|   *       EXT:256  0  5  save            table bytes name <result>|
|   *       EXT:257  1  5  restore         table bytes name <result>|
|   *       EXT:258  2  5  log_shift       number places <result>|
|   *       EXT:259  3 5/- art_shift       number places <result>|
|   *       EXT:260  4  5  set_font        font window <result>|
|           EXT:261  5  6  draw_picture    picture-number y x|
|        *  EXT:262  6  6  picture_data    picture-number table <label>|
|           EXT:263  7  6  erase_picture   picture-number y x|
|           EXT:264  8  6  set_margins     left right window|
|   *       EXT:265  9  5  save_undo       <result>|
|   *       EXT:266  A  5  restore_undo    <result>|
|           -------  B  ------|
|           -------  C  ------|
|           -------  D  ------|
|           -------  E  ------|
|           -------  F  ------|
|           EXT:272 10  6  move_window     window y x|
|           EXT:273 11  6  window_size     window y x|
|           EXT:274 12  6  window_style    window flags operation|
|   *       EXT:275 13  6  get_wind_prop   window property-number <result>|
|           EXT:276 14  6  scroll_window   window pixels|
|           EXT:277 15  6  pop_stack       items stack|
|           EXT:278 16  6  read_mouse      table|
|           EXT:279 17  6  mouse_window    window|
|        *  EXT:280 18  6  push_stack      value stack <label>|
|           EXT:281 19  6  put_wind_prop   window property-number value|
|           EXT:282 1A  6  print_form      formatted-table|
|        *  EXT:283 1B  6  make_menu       number table <label>|
|           EXT:284 1C  6  picture_table   table|
\endlines\smallskip\hrule\bigskip

\nsp{2} Formally, it is illegal for a game to contain an opcode
not specified for its version.  An interpreter should normally halt with
a suitable message.

\sp{2.1} However, extended opcodes in the range EXT:285 to EXT:511
should be simply ignored (perhaps with a warning message somewhere
off-screen).

\sp{2.2} EXT:285 to EXT:383 are reserved for future common extensions of
the Z-machine.

\sp{2.3} Game-writers who wish to create their own ``new'' opcodes, for one
specific game only, are asked to use opcode numbers in the range EXT:384 to
EXT:511.  It is easy to modify Inform to name and assemble such opcodes. 
(Of course the game will then have to be circulated with a suitably modified
interpreter to run it.)

\sp{2.4} Interpreter-writers should make this easy by providing a
routine which is called if EXT:384 to EXT:511 are found, so that
the minimum possible modification to the interpreter is needed.

\remarks
The opcodes 5, 6, 7, 8 in the extended set were very likely in Infocom's
own V5 specification (now lost): they seem to have been partially implemented
in existing Infocom interpreters, but do not occur in any existing V5
story file.  They are here left unspecified.

The notation ``5/3" for |sound_effect| is because this plainly
version-5 feature was used also in one solitary Version-3 game,
`The Lurking Horror' (the sound version of which was the last V3 release,
in September 1987).

The 2OP opcode 0 was possibly intended for setting break-points
in debugging.  It was not |nop|.  (The Infix debugger uses the actual
|nop| instruction as a break-point instead.)

|read_mouse| and |make_menu| are believed to have been used only in
`Journey' (based on a check of 11 V6 story files).  |picture_table| is
used once by `Shogun' and several times by `Zork Zero'.


\specs{15}{Dictionary of opcodes}

\quote
       The highest ideal of a translation... is achieved when the
       reader flings it impatiently into the fire, and begins
       patiently to learn the language for himself.
\quoteby{Philip Vellacott}

\sp{1} The dictionary below is alphabetical and includes entries on every
opcode listed in the table above, as well as brief notes on some Inform
internal synonyms which might otherwise be confused with opcodes.

\nsp{2} The Z-machine has the same concept of ``table'' (as an internal
data structure) as Inform.  Specifically, a table is an array of words
(in dynamic or static memory) of which the initial entry is the number
of subsequent words in the table.  For example, a table with three
entries occupies 8 bytes, arranged as the words 3, $x$, $y$, $z$.

\nsp{3} In all cases below where one operand is supposed to be an object
number, the behaviour is undefined if it isn't a legal object number
(and this includes 0).  Ideally an interpreter should halt with a suitable
error message.  This is especially true of |print_obj| (which is not
required to run very quickly, so that an interpreter can safely ``waste''
time checking this common error condition).  Similar remarks apply to
attribute numbers exceeding 32 or 48; and to window numbers, window
attribute numbers and window property numbers in Version 6.
\bigskip
\stepin=75pt
\ninepoint
\def\de{\medskip\noindent}

\block{add}|2OP:20  14     add             a b <result>|
    \continue
    Signed 16-bit addition.

\block{and}|2OP:9    9     and             a b <result>|
    \continue
    Bitwise AND.

\block{"aparse"}Obsolete name for |tokenise|.

\block{aread}This is the Inform name for the keyboard-reading opcode
    under Version 5 and later.  (Inform calls the same opcode |sread|
    under Versions 3 and 4.)  See |read| for the specification.

\block{art\_shift}|EXT:259  3 5/- art_shift       number places <result>|
    \continue
    Does an arithmetic shift of |number| by the given number
    of places, shifting left (i.e. increasing) if places is positive, right
    if negative.  In a right shift, the sign bit is preserved as well as
    being shifted on down.  (The alternative behaviour is |log_shift|.)

\block{"beep"}Inform currently uses this name for |sound_effect| in Versions
    before 5 (since public interpreters provide only minimal facilities),
    but the name is being withdrawn.  See |sound_effect|.

\block{buffer\_mode}|VAR:242 12  4  buffer_mode     flag|
    \continue
    If set to 1, text output on the lower window in strea