//Cumulus.java/////////////////////////////////////////////////////////////////////////////////////
/**************************************************************************************************
 * cs550 Design and Translation of Programming Languages
 * Assignment 2: first & follow sets
 * Aaron Hall
 **************************************************************************************************/

package edu.montana.cs.fafnir.cs550.hw2;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import org.jdom.JDOMException;

/**************************************************************************************************
 * Central class of this solution loads the grammar description from its xml-based file into
 * alphabet and ruleset data structures. Then, routines compute the first and follow sets using the
 * given algorithms and the whole mess is displayed.
 **************************************************************************************************/
 
final class Cumulus {
    String title;
    Alphabet alphabet;
    Ruleset ruleset;
    
    HashMap firsts;
    HashMap follows;
    
    
    Cumulus() {
        alphabet = new Alphabet();
        ruleset = new Ruleset();
    }
    
    public static void main(String[] args) {
        if (args.length < 1) {
            System.out.println("must have file name");
            System.exit(0);
        }
        
        System.out.println();
        Cumulus c = new Cumulus();
        try {
            c.load(new File(args[0]));
            
            System.out.println("\"" + c.title + "\"");
            System.out.println();
            System.out.println();
            System.out.println(c.alphabet);
            System.out.println();
            System.out.println(c.ruleset);
            System.out.println();
            c.computeFirsts();
            c.displayFirsts();
            System.out.println();
            c.computeFollows();
            c.displayFollows();
            System.out.println();
            
        } catch (FFException e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
            System.exit(-1);
        }
    }
    
    
    void load(File source) throws FFException {
        /* Loads the CFG description from the given file, initializing the title, alphabet, and ruleset
         * datastructures. JDOM is used to wander through the xml document tree during loading.
         */
        try {
            SAXBuilder builder = new SAXBuilder();
            Document doc = builder.build(source);
            Element root = doc.getRootElement();
            if (root.getName() != "GRAMMAR") {
                throw new FFException("source file contained incorrect root node, \"" + root.getName() + "\"");
            }
            title = root.getChildText("TITLE");
            
            /* Iterate over every production, loading the rule itself as well as creating any symbols that are used for
             * the first time.
             */
            for (Iterator i = root.getChildren("PRODUCTION").iterator(); i.hasNext(); ) {
                Element production = (Element)i.next();
                Symbol leftside = alphabet.lookupConstructively(production.getChildText("LEFTSIDE"), Alphabet.Namespace.NONTERMINAL);
                
                ArrayList rightside = new ArrayList();
                for (Iterator j = production.getChild("RIGHTSIDE").getChildren("SYMBOL").iterator(); j.hasNext(); ) {
                    Element symbol = (Element)j.next();
                    
                    int namespace = 0;
                    String symbolType = symbol.getAttributeValue("Term");
                    if (symbolType.equals("nonterminal")) {
                        namespace = Alphabet.Namespace.NONTERMINAL;
                    } else if (symbolType.equals("terminal")) {
                        namespace = Alphabet.Namespace.TERMINAL;
                    } else {
                        throw new FFException("Symbol in rule was of unrecognized type \"" + symbolType + "\"");
                    }
                    
                    String symbolName = symbol.getText();
                    if (symbolName.equals("lambda")) {
                        namespace = Alphabet.Namespace.LAMBDA;
                    }
                    
                    rightside.add(alphabet.lookupConstructively(symbolName, namespace));
                }
                
                ruleset.add(new Rule((Nonterminal)leftside, rightside));
            }
            
            /* Only after all rules (and symbols) have been loaded, set the given start symbol.
             */
            alphabet.makeStart((Nonterminal)alphabet.lookupConstructively(root.getChildText("START"), Alphabet.Namespace.NONTERMINAL));
        } catch (JDOMException e) {
            throw new FFException(e.getMessage());
        } catch (IOException e) {
            throw new FFException(e.getMessage());
        }
    }
    
    
    private void computeFirsts() throws FFException {
        /* Computes the first sets for all nonterminal symbols in the grammar using the given algorithm. A slight
         * modification uses a test inside the loop (over right-side symbols of a rule) to see if it's the last one,
         * rather than a separate "continue" variable tested outside of the loop. The effect is the same.
         */
        firsts = new HashMap();
        for (Iterator i = alphabet.getIterator(Alphabet.Namespace.NONTERMINAL); i.hasNext(); ) {
            Nonterminal non = (Nonterminal)i.next();
            firsts.put(non, new HashSet()); //initialize each nonterminals' first to the empty set
        }
        
        boolean changes = true;
        while (changes) {
            changes = false;
            
            for (Iterator i = ruleset.getIterator(); i.hasNext(); ) {
                Rule rule = (Rule)i.next();
                Nonterminal left = rule.getLeftSide();
                HashSet leftFirst = (HashSet)firsts.get(left);
                
                for (int k = 0; k < rule.rightSideSize(); ++k) {
                    HashSet toAdd;
                    Symbol right = (Symbol)rule.getRightSide(k);
                    
                    //nonterminals have a set of firsts, but terminals are their own first
                    if (right instanceof Nonterminal) {
                        toAdd = new HashSet((HashSet)firsts.get(right));
                    } else {
                        toAdd = new HashSet(Collections.singleton(right));
                    }
                    
                    boolean hadLambda = toAdd.remove(alphabet.lambda());
                    if (leftFirst.addAll(toAdd))
                        changes = true;
                    
                    if (hadLambda) {
                        if (k + 1 == rule.rightSideSize()) { //if this was the last right-side symbol for this rule
                            if (leftFirst.add(alphabet.lambda()))
                                changes = true;
                        }
                        continue; //with next symbol on right side of rule
                    }
                    break; //out of this rule entirely
                } //for (int k...
            } //for (Iterator i...
        } //while (changes)...
    }
    
    
    private void computeFollows() throws FFException {
        /* Computes the follow sets for every nonterminal in the alphabet for this grammar. It uses the given algorithm.
         * Note the "end of file" symbol is constructed here and is not found in the alphabet. This is since the grammar
         * description file doesn't apparently include any such thing, and it's therefore only useful here.
         */
        follows = new HashMap();
        for (Iterator i = alphabet.getIterator(Alphabet.Namespace.NONTERMINAL); i.hasNext(); ) {
            Nonterminal non = (Nonterminal)i.next();
            follows.put(non, new HashSet()); //initialize each nonterminals' follow to the empty set
            if (non == alphabet.getStart())
                ((HashSet)follows.get(non)).add(new Terminal("EOF")); //special nonterminal not expected in file or alphabet
        }
        
        boolean changes = true;
        while (changes) {
            changes = false;
            
            for (Iterator i = ruleset.getIterator(); i.hasNext(); ) {
                Rule rule = (Rule)i.next();
                Nonterminal left = rule.getLeftSide();
                
                for (int k = 0; k < rule.rightSideSize(); k++) {
                    Symbol right = (Symbol)rule.getRightSide(k);
                    
                    if (right instanceof Nonterminal) {
            HashSet rightFollow = (HashSet)follows.get(right);
            HashSet assembledFirsts = assemble(rule, k);
            boolean hadLambda = assembledFirsts.remove(alphabet.lambda());
                        
            changes = rightFollow.addAll(assembledFirsts);
                        
                        if (hadLambda)
                            changes = rightFollow.addAll((HashSet)follows.get(left));
                    } //otherwise do nothing
                }
            }
        }
    }
    
    
    private HashSet assemble(Rule rule, int index) {
        /* Helper function for computeFollows; computes the first set of the right side of the given rule
         * from symbol index + 1 onward, or a set contaning only lambda if the index is at the end. It's
         * used to implement the line in the given follow algorithm "add First(Xi+1 Xi+2 ... Xn)..."
         */
    HashSet result = new HashSet();
        
    if (index + 1 == rule.rightSideSize()) {
        result.add(alphabet.lambda());
    } else {
        for (int k = index + 1; k < rule.rightSideSize(); ++k) {
        Symbol right = (Symbol)rule.getRightSide(k);
        if (right instanceof Nonterminal) {
            result.addAll((HashSet)firsts.get(right));
                } else { //terminal or lambda
                    result.add(right);
                    if (right instanceof Lambda)
                        continue;
                }
                break;
        }
    }
    return result;
    }
    
    
    void displayFirsts() {
        System.out.println("first:");
        System.out.println();
        
        for (Iterator i = firsts.keySet().iterator(); i.hasNext(); ) {
            Nonterminal non = (Nonterminal)i.next();
            System.out.print("" + non + " -- ");
            
            for (Iterator j = ((HashSet)firsts.get(non)).iterator(); j.hasNext(); ) {
                System.out.print("" + ((Symbol)j.next()) + " ");
            }
            System.out.println();
        }
        System.out.println();
    }
    
    
    void displayFollows() {
        System.out.println("follow:");
        System.out.println();
        
        for (Iterator i = follows.keySet().iterator(); i.hasNext(); ) {
            Symbol s = (Symbol)i.next();
            System.out.print("" + s + " -- ");
            
            for (Iterator j = ((HashSet)follows.get(s)).iterator(); j.hasNext(); ) {
                System.out.print("" + ((Symbol)j.next()) + " ");
            }
            System.out.println();
        }
        System.out.println();
    }
}