#!/usr/bin/env python
"""
transmatte.py -- an IF excerpting tool
Created by Andrew Plotkin <erkyrath@eblong.com>
Last updated: March 4, 2010
Web site: <http://eblong.com/zarf/transmatte/>
This script is in the public domain. Do anything.

Usage: python transmatte.py [ options ] trans1.txt trans2.txt ... > page.html

Given a set of IF transcript files, this generates an HTML page that
lets the reader flip between them. The transcripts are merged, as much
as possible. That is, common sequences of commands are displayed as
static data; differences are displayed with links that let the reader
jump between the transcript variations.

If this makes no sense to you, go to the web site (see above) and try the
examples.

Options:
  -p, --prompt: Define the string that identifies command inputs. 
      (Default is ">")
  -a, --annotation: Define the string that identifies annotation lines.
      (Default is "!")

A certain amount of cleanup is done on the transcripts. Trailing whitespace 
is stripped from each line. Blank lines before and after a command line are 
ignored (but blank lines in the middle of a game response are kept).

Lines beginning with "!" are special; they will not appear in the output.
Instead, they are annotations which help customize the output. Annotation
lines have the form "!FUNCTION: VALUE".

(An annotation line can appear right before a command prompt line or after
it. If you put one at the end of a game response, it'll be glomped up
by the following command line.)

  !title: Defines the page title. This must be placed on or before the
    first command.
  
  !subtitle: Defines the page subtitle. This must be placed on or before
    the first command.
  
  !note: This line defines a game note for the current command. Game notes
    appear as-is in the right column of the generated page.
  
  !option: This line defines the label for a command. (In the choice links
    in the left column.) When two of your transcripts differ, use this
    annotation on the commands that are different.
  
  !otheroption: Sometimes you have one transcript that differs only in the
    absence of a command. So there's no command there to put an "!option"
    line on. Instead, put an "!otheroption" line in the *other* transcript,
    on the appropriate command.
  
  !priority: This defines the (numeric) priority for a command. In the
    choice links, the high-priority command appears first, and is the
    default choice. (That is, it will be visible when the page is first
    loaded.)
"""

import sys
import re
import optparse

popt = optparse.OptionParser()

popt.add_option('-p', '--prompt',
    action='store', dest='prompt')
popt.add_option('-a', '--annotation',
    action='store', dest='annotation')

popt.set_defaults(
    prompt = '>',
    annotation = '!',
)

(opts, args) = popt.parse_args()

if not args:
    sys.stderr.write('usage: transmatte.py file1 file2 file3...\n')
    sys.exit(1)

class Spindle:
    """Spindle: A data structure which is like a tree, except that instead
    of branching at the bottom, it branches in the middle.

    That is, a spindle consists of a chain of nodes, except that at one
    point (optionally), the chain is interrupted by a fork into one or
    more child spindles. You go down the chain, hit the fork, branch
    out into one of the children, and then merge back into the chain
    afterwards. 

    In regexp syntax, this would look like "^head(S1|S2|S3)tail$". Where
    S1, S2, S3 (etc) are themselves spindles. Note that either the head
    or the tail (or both) could be empty.

    If there's no fork point, we call it a "straight spindle", and it's
    simply a list of nodes (possibly the empty list).

    Constructors:

    Spindle(seq) -- Create a straight spindle. The argument can be any
    sequence, such as a list or tuple. A string argument will be broken
    up into a sequence of characters.

    Spindle(seqhead, children, seqtail) -- Create a spindle with the given
    sequence, followed by a fork, followed by the given tail sequence.
    The children argument should be a tuple/list of Spindles.

    (Note that while you can create a Spindle containing any type of nodes,
    the process() function defined later on assumes that the contents are
    Node objects.)
    """

    index_counter = 0
    
    def __init__(self, seq, children=None, seqtail=None):
        Spindle.index_counter += 1
        self.index = Spindle.index_counter
        
        if (children is None):
            self.straight = True
            self.seq = tuple(seq)
            self.seqhead = self.seq
            self.seqtail = self.seq
            self.length = len(self.seq)
            self.children = None
        else:
            self.straight = False
            self.seqhead = tuple(seq)
            self.seqtail = tuple(seqtail)
            self.children = tuple(children)
            self.length = len(self.seqhead) + len(self.seqtail)

    def __repr__(self):
        if (self.straight):
            return ('<Spindle #%d %s>' % (self.index, self.seq))
        else:
            return ('<Spindle #%d %s (%d children) %s>' % (self.index, self.seqhead, len(self.children), self.seqtail))

    def dump(self, depth=0):
        """dump() -> None

        Print a tree reprentation of the spindle and its children. 
        For debugging.
        """

        if (self.straight):
            val = ''.join(self.seq)
            if (not val):
                val = '()'
            print '  '*depth + val
        else:
            val = ''.join(self.seqhead)
            if (not val):
                val = '()'
            print '  '*depth + '\\' + val
            for spin in self.children:
                spin.dump(depth+1)
            val = ''.join(self.seqtail)
            if (not val):
                val = '()'
            print '  '*depth + '/' + val

    def firstnode(self):
        """firstnode() -> tuple of nodes
        
        Return the first node in the spindle. If the fork occurs right
        at the beginning, returns the first node of *all* of the children.
        If it can't find any nodes, this returns the empty tuple.
        """

        if (self.straight):
            # This will be empty is seq is empty.
            return self.seq[0:1]
        else:
            if (self.seqhead):
                return self.seqhead[0:1]
            res = []
            for spin in self.children:
                res.extend(spin.firstnode())
            if res:
                return tuple(res)
            if (self.seqtail):
                return self.seqtail[0:1]
            return ()

    def trim(self, headlen, taillen):
        """trim(headlen, taillen) -> (tuple, tuple)

        Chop headlen nodes off the head of the spindle, and taillen nodes
        off the tail. Return the two chopped-off sequences, as tuples.
        (Either or both arguments can be zero, in which case the 
        corresponding return value will be empty.)

        This assumes that the spindle *has* at least headlen nodes before 
        the fork, and taillen nodes after it. (Or, for a straight spindle,
        at least headlen+taillen nodes overall.) If it doesn't, this will
        do something which is probably wrong.
        """

        headseq = self.seqhead[ 0 : headlen ]
        tailseq = self.seqtail[ len(self.seqtail)-taillen : ]

        if (self.straight):
            self.seq = self.seq[ headlen : len(self.seq)-taillen ]
            self.length = len(self.seq)
        else:
            self.seqhead = self.seqhead[ headlen : ]
            self.seqtail = self.seqtail[ 0 : len(self.seqtail)-taillen ]
            self.length = len(self.seqhead) + len(self.seqtail)

        return (headseq, tailseq)

    def getnote(self, key):
        """getnode(key) -> value

        Look at the first node in this spindle. If it contains the
        given key, return its value. If the spindle start with a fork,
        look up the given key in the first nodes of *all* the children.
        If nothing can be found, return None
        """
        ls = self.firstnode()
        for node in ls:
            val = node.getnote(key)
            if (val):
                return val
        return None
    
def merge(spin1, spin2, headlen, taillen):
    """merge(spin1, spin2, headlen, taillen) -> Spindle

    Given two spindles, which have headlen matching nodes at the beginning
    and taillen matching nodes at the end, merge them. This returns a new
    spindle, which begins with the common headlen nodes, ends with the
    common taillen nodes, and contains the appropriate fork in between.

    This takes for granted that the spindles actually do have those nodes
    in common. (Annotations will be merged between common nodes.)

    This mutates (or mutilates) spin1 and spin2. Do not try to use them
    after passing them to this function.

    This attempts to be clever about minimizing forks. For example, if you 
    merge "abc(123|456)def" with "abc(789|0)def", you'll get a four-way
    spindle: "abc(123|456|789|0)def".
    """

    (headseq1, tailseq1) = spin1.trim(headlen, taillen)
    (headseq2, tailseq2) = spin2.trim(headlen, taillen)
    headseq = [ nod1.update(nod2) for (nod1, nod2) in zip(headseq1, headseq2) ]
    tailseq = [ nod1.update(nod2) for (nod1, nod2) in zip(tailseq1, tailseq2) ]

    # spin1 and spin2 have now been trimmed of their common portions.
    # See if we can merge the remainder of one into the other.

    if (not (spin1.straight or spin2.straight)
        and spin1.length == 0 and spin2.length == 0):
        ls = spin1.children + spin2.children
        spin = Spindle(headseq, ls, tailseq)
        return spin

    if (not spin1.straight and spin1.length == 0):
        ls = spin1.children + (spin2,)
        spin = Spindle(headseq, ls, tailseq)
        return spin
        
    if (not spin2.straight and spin2.length == 0):
        ls = spin2.children + (spin1,)
        spin = Spindle(headseq, ls, tailseq)
        return spin
        
    # No efficient way to do it; create a new spindle that forks between
    # (what's left of) spin1 and spin2.

    spin = Spindle(headseq, [spin1, spin2], tailseq)
    return spin
            
def compare(spin1, spin2):
    """compare(spin1, spin2) -> (int, int)

    Compare the two spindles, and count how many nodes they have in common
    at the beginning, and at the end. Return these two values.

    The comparison doesn't extend into children of the spindle. Only the
    head and tail sequences are considered.

    If you compare a straight spindle to itself, you'll get (len, 0).
    This is probably not what you want, so don't do that. (This is why
    the program rejects loading two identical transcripts.)
    """

    # Note: if I cared about efficiency, I'd cache the results of this
    # call with (spin1,spin2) as the cache key.

    # In a straight spindle, seqhead and seqtail are both copies of the
    # entire spindle sequence. This is for the convenience of this
    # function.

    maxval = min(len(spin1.seqhead), len(spin2.seqhead))
    ix = 0
    while (ix < maxval):
        if (spin1.seqhead[ix] != spin2.seqhead[ix]):
            break
        ix += 1
    sharehead = ix

    maxval = min(len(spin1.seqtail), len(spin2.seqtail))
    ix = 0
    while (ix < maxval):
        if (spin1.seqtail[-1-ix] != spin2.seqtail[-1-ix]):
            break
        ix += 1
    sharetail = ix

    if (sharehead + sharetail > spin1.length):
        sharetail = spin1.length - sharehead
    if (sharehead + sharetail > spin2.length):
        sharetail = spin2.length - sharehead

    return (sharehead, sharetail)
            
def number_spindle(spin, parent=None, num=0):
    """number_spindle(spin) -> None

    Go down a spindle (and all its children). In each one, store the
    choice number -- the position of the spindle in its parent's
    "children" array. (The root spindle's number defaults to 0.)
    """
    spin.parent = parent
    spin.choicenum = num
    spin.extrachoiceset = None
    
    if (not spin.straight):
        ls = []
        for child in spin.children:
            priority = 0
            val = child.getnote('priority')
            if (val):
                try:
                    priority = int(val)
                except:
                    pass
            ls.append( (-priority, child.index, child) )
        ls.sort()
        spin.children = tuple([ nod for (_, _, nod) in ls ])
        
        ix = 0
        for child in spin.children:
            number_spindle(child, spin, ix)
            ix += 1

def emplace_choices(spin):
    """emplace_choices(spin) -> None

    Go down a spindle (and all its children). In each one that forks,
    add a choicelist descriptor at the beginning of each of the forked
    children. (A choicelist descriptor is the information we'll need to
    generate a set of choice links. This consists of a reference to the
    parent node, and the child's number (so we know which link gets
    boldfaced.))

    A complication: a spindle might be totally empty, so there's no
    child node to stick the descriptor on. In that case, we put it
    directly on the spindle.
    """
    
    if (not spin.straight):
        for child in spin.children:
            tup = (spin, child.choicenum)
            ls = child.firstnode()
            if not ls:
                child.extrachoiceset = tup
            else:
                for node in ls:
                    node.choicesets.append(tup)
        for child in spin.children:
            emplace_choices(child)
    
            
def process(spindles):
    """process(spindles) -> Spindle

    Given a list of spindles, merge them all as much as possible. Return
    a single spindle whose branches cover all of the inputs.

    This is not particularly efficient. It compares every spindle to every
    other spindle, in order to find the pair which have the most in common.
    It merges those two, and then starts the whole process over. A lot of
    redundant comparisons, in other words.
    """

    while True:
        results = []
        
        for spin1 in spindles:
            for spin2 in spindles:
                if (spin1 == spin2):
                    break
    
                (sharehead, sharetail) = compare(spin1, spin2)
                sharetotal = sharehead + sharetail
                if (sharetotal == 0):
                    continue
                
                results.append( (sharetotal, sharehead, sharetail, spin1, spin2) )

        if (not results):
            break
        
        results.sort(reverse=True)
        (sharetotal, sharehead, sharetail, spin1, spin2) = results[0]
    
        spindles.remove(spin1)
        spindles.remove(spin2)

        newspin = merge(spin1, spin2, sharehead, sharetail)
        spindles.append(newspin)
    
    if (len(spindles) == 1):
        # We merged down to a single spindle. Yay!
        res = spindles[0]
    else:
        # We were left with several spindles, with nothing in common.
        # Create a single spindle that forks over all of them.
        # (We use merge() for efficient merging.)
        spin1 = spindles.pop()
        spin2 = spindles.pop()
        res = merge(spin1, spin2, 0, 0)
        while spindles:
            spin2 = spindles.pop()
            res = merge(res, spin2, 0, 0)

    number_spindle(res)
    emplace_choices(res)
    return res

class Node:
    """Node: Represents one move from a transcript, including the player's
    command, the game's response, and any annotations added to the transcript
    for that move.

    (For introductory text, which is not preceded by a command, the command
    should be None.)

    In order to make the comparison-and-merge algorithm behave right, two
    Nodes compare as equal if their commands and bodies are equal. The
    annotations are ignored for the purposes of comparison.
    """

    def __init__(self, command, body, notels=[], choicesets=None):
        self.command = command
        self.body = body
        self.tup = (command, body)  # equality key
        
        self.notes = {}
        for note in notels:
            if (':' in note):
                (key, val) = note.split(':', 1)
                key = key.strip()
                val = val.strip()
            else:
                (key, val) = (note, True)
            self.notes[key] = val
            
        # The following is not used until the process() phase.
        if choicesets is None:
            self.choicesets = []
        else:
            self.choicesets = choicesets

    def __repr__(self):
        return '<' + repr(self.command) + '>'
    def __hash__(self):
        return hash(self.tup)
    def __eq__(self, node):
        return (self.tup == node.tup)
    def __ne__(self, node):
        return (self.tup != node.tup)

    def update(self, node):
        """update(node) -> Node
        Copy all the notes from the given node into this one. Returns self.
        """
        self.notes.update(node.notes)
        return self

    def getnote(self, key):
        """getnode(key) -> value

        If this node's notes contain the given key, return its value.
        Otherwise, return None.
        """
        return self.notes.get(key, None)

def parse_transcript(filename):
    """parse_transcript(filename) -> tuple

    Read a transcript file. Parse it into a sequence of Nodes.

    A certain amount of cleanup is done on the transcript, to make it easier
    to compare. Trailing whitespace is stripped from each line. Blank lines
    before and after a command line are ignored (but blank lines in the
    middle of a game response are kept).
    """
    
    result = []
    
    fl = open(filename)
    leadingblanks = True

    command = None
    bodyls = []
    notels = []
    notels2 = []
    
    while True:
        ln = fl.readline()
        if (not ln):
            break
        ln = ln.rstrip()
        if (leadingblanks and not ln):
            continue
        leadingblanks = False

        if (ln.startswith(opts.prompt)):
            if (command or bodyls):
                body = '\n'.join(bodyls)
                body = body.rstrip()
                node = Node(command, body, notels)
                notels = []
                result.append(node)
            notels.extend(notels2)
            notels2 = []
            bodyls = []
            command = ln
        elif (ln.startswith(opts.annotation)):
            ln = ln[ len(opts.annotation) : ].strip()
            notels2.append(ln)
        else:
            if (bodyls or ln):
                bodyls.append(ln)
            if (ln):
                notels.extend(notels2)
                notels2 = []

    if (command or bodyls):
        body = '\n'.join(bodyls)
        body = body.rstrip()
        node = Node(command, body, notels)
        result.append(node)
            
    fl.close()

    if leadingblanks or not result:
        raise Exception('transcript was entirely blank: %s' % (filename,))
    
    return tuple(result)

# TemplateHead and TemplateFoot are the boilerplate of the HTML output.

TemplateHead = """<html>
<head>
<title>$TITLE$</title>
<style type="text/css">

body {
  overflow: scroll;
  overflow-x: hidden;
}

h1 {
  font-size: 20px;
}

.Chunk {
}

table {
  margin-top: 15px;
  margin-bottom: 15px;
  margin-left: auto;
  margin-right: auto;
  border-spacing: 20px 0px;
}

td {
  vertical-align: top;
}

.Control {
  width: 150px;
  min-width: 150px;
  font-size: 12px;
  text-align: right;
  color: #6060C0;
}
.Control a {
  color: #6060FF;
}
.Game {
  width: 400px;
  min-width: 400px;
  font-size: 14px;
}
.Note {
  width: 200px;
  min-width: 200px;
  font-size: 12px;
  font-style: oblique;
  color: #606060;
}

.Command {
  font-style: oblique;
}

</style>

<script type="text/javascript">

function swap(id1, id2) {
  var el = document.getElementById(id1);
  el.style.display = "none";
  el = document.getElementById(id2);
  el.style.display = "block";
}

</script>

</head>
<body>
<h1>$TITLE$</h1>
<p><em>$SUBTITLE$</em></p>

<hr>
"""

TemplateFoot = """
</body>
</html>
"""

headwhite_regex = re.compile('^\\s+')

def nbsp_head(val):
    """nbsp_head(val) -> str
    Convert whitespace at the *beginning* of a string into "&nbsp;".
    (This is necessary to convert indented transcript lines into HTML.)
    """
    match = headwhite_regex.match(val)
    if (not match):
        return val
    count = match.end()
    return ('&nbsp;' * count) + val[ count : ]

def htmlescape(val):
    """htmlescape(val) -> str
    Escape text for HTML output. Newlines are turned to <br>. Indented
    lines get "&nbsp;" to preserve the indentation.
    """
    val = val.replace('&', '&amp;')
    val = val.replace('>', '&gt;')
    val = val.replace('<', '&lt;')
    ls = val.split('\n')
    ls = [ nbsp_head(val) for val in ls ]
    return '<br>'.join(ls)

def generate_sequence(seq):
    """generate_sequence(seq) -> None

    Generate the HTML tables for a sequence of nodes. If the sequence is
    empty, this prints nothing.
    """
        
    for node in seq:
        print '<table><tr>'

        # The biggest, ugliest part of this function is printing the choice
        # links (the left column of the output).

        if (not node.choicesets):
            print '<td class="Control"></td>'
        else:
            print '<td class="Control">'
            firstone = True
            for (cset, thischoice) in node.choicesets:
                if firstone:
                    firstone = False
                else:
                    print '<br>'
                ix = 0
                for spin in cset.children:
                    label = spin.getnote('option')
                    if not label:
                        for diffspin in cset.children:
                            if (diffspin != spin):
                                label = diffspin.getnote('otheroption')
                                if label:
                                    break
                    if not label:
                        label = 'Option %d of %d' % (ix+1, len(cset.children))
                    if (thischoice == ix):
                        print '<strong>%s</strong><br>' % (label,)
                    else:
                        cmd = "javascript:swap('spin_%d_%d', 'spin_%d_%d')" % (cset.index, thischoice, cset.index, ix)
                        print '<a href="%s">%s</a><br>' % (cmd, label)
                    ix += 1
            print '</td>'
        print '<td class="Game">'

        # Print the command and body (middle column).
        
        if (node.command):
            print '<p class="Command">' + htmlescape(node.command) + '</p>'
        print '<p class="Response">' + htmlescape(node.body) + '</p>'

        print '</td>'

        # Print the game notes (right column).

        nodenote = node.getnote('note')
        if (not nodenote):
            print '<td class="Note"></td>'
        else:
            print '<td class="Note">' + htmlescape(nodenote) + '</td>'
        print '</tr></table>'
        
def generate_spindle(spin):
    """generate_spindle(spin) -> None

    Recursively generate the HTML divs and tables for a spindle and its 
    children.
    """

    if (spin.parent):
        parentindex = spin.parent.index
    else:
        parentindex = 0

    # Only the first choice in any set is visible, to begin with.
    disp = ''
    if (spin.choicenum > 0):
        disp = ' style="display: none;"'
    print '<div class="Chunk" id="spin_%d_%d"%s>' % (parentindex, spin.choicenum, disp)

    if (spin.straight):
        generate_sequence(spin.seq)
    else:
        generate_sequence(spin.seqhead)
        for choice in spin.children:
            generate_spindle(choice)
        generate_sequence(spin.seqtail)

    if (spin.extrachoiceset):
        # This is the case where the spindle had no nodes at all. We
        # fake up an empty row to put the choiceset in.
        emptynode = Node(None, ' ', choicesets = [spin.extrachoiceset])
        generate_sequence( (emptynode,) )
        
    print '</div>'

def generate(topspin):
    """generate(topspin) -> None

    Generate the complete dynamic HTML page for the transcripts represented
    by a spindle. The page is printed to stdout.
    """

    title = topspin.getnote('title')
    if not title:
        title = 'TITLE'
    val = TemplateHead.replace('$TITLE$', title)
    
    subtitle = topspin.getnote('subtitle')
    if not subtitle:
        subtitle = 'A slightly interactive excerpt'
    val = val.replace('$SUBTITLE$', subtitle)
    print val

    generate_spindle(topspin)
    
    print TemplateFoot

nodelists = []
nodelist_map = {}

for filename in args:
    ls = parse_transcript(filename)
    if (nodelist_map.has_key(ls)):
        raise Exception('identical transcripts: %s, %s' % (nodelist_map[ls], filename))
    nodelists.append(ls)
    nodelist_map[ls] = filename

spindles = [ Spindle(trans) for trans in nodelists ]

result = process(spindles)

generate(result)
