/**
 * pdfXtk-Extras - PDF Extraction Toolkit Extras
 * Copyright (c) by the authors/contributors.  All rights reserved.
 * This project includes code from PDFBox and TouchGraph.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://pdfxtk.sourceforge.net
 *
 */
package at.ac.tuwien.dbai.pdfwrap.table;

import java.util.HashMap;
import java.util.List;

import at.ac.tuwien.dbai.pdfwrap.analysis.*;
import at.ac.tuwien.dbai.pdfwrap.model.graph.*;
import at.ac.tuwien.dbai.pdfwrap.model.document.*;
import at.ac.tuwien.dbai.pdfwrap.utils.ListUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.SegmentUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.Utils;

public class CandColSegmentationRules implements ISegmentationRules
{
    public static float MAX_CLUST_LINE_SPACING = 1.75f; // 5524.pdf i-cite
    public static float MIN_CLUST_LINE_SPACING = 0.25f; // Baghdad problem! 30.07.08
    public static float MAX_COL_LINE_THRESHOLD = 3.5f;
//    final static float LINE_SPACING_TOLERANCE = 0.25f;
    public static float LINE_SPACING_TOLERANCE = 0.05f; // changed 30.10.10
    // NOTE! This linespacing tolerance does not apply to OCR; 
    // 9.01.11 also does not apply to str conversions;
    // PageProcessor changes this value if a page image is used
	/*
	    protected static boolean clusterTogether(AdjacencyEdge<GenericSegment> ae, 
	    		CandidateCluster clustFrom, CandidateCluster clustTo, 
	    		List<AdjacencyEdge<GenericSegment>> allEdges, HashMap vertNeighbourMap, 
	    		List<? extends GenericSegment> items, int processPhase)
	    {
	    	if (processPhase == 2)
	    		return clusterTogether2(ae, clustFrom, clustTo, allEdges, items);
	    	else
	    		return clusterTogether1(ae, clustFrom, clustTo, allEdges, vertNeighbourMap);
	    }
	    */

    public boolean clusterTogether(AdjacencyEdge<GenericSegment> ae, 
		CandidateCluster clustFrom, CandidateCluster clustTo, List<AdjacencyEdge<GenericSegment>> allEdges, 
		HashMap<GenericSegment, List<GenericSegment>> vertNeighbourMap, List<GenericSegment> items) 
    	// the penultimate parameter is ignored (1st level only)
	{
    	if (!ae.isVertical()) return false;
    	
    	// unchecked conversion -- but should always work
		TextSegment segFrom = (TextSegment)ae.getNodeFrom();
		TextSegment segTo = (TextSegment)ae.getNodeTo();
		
		float lineSpacing = ae.physicalLength()/ae.getFontSize();
		
		// don't cluster the same cluster together(!)
		if (clustFrom != null && clustTo != null && clustFrom == clustTo)
			return false;
		
		// THINK: edges are now between clusters and not textlines... :)
		if (!Utils.sameFontSize(segFrom, segTo)) return false;
//		if (!Utils.within(segFrom.getFontSize(), segTo.getFontSize(), 
//			(segFrom.getFontSize() + segTo.getFontSize() * 0.25f)))
//			return false;
		
		// 6.08.09 check that we don't go past the table by swallowing
		GenericSegment bBox;
		if (clustFrom != null)
			bBox = new GenericSegment(clustFrom.getBoundingBox());
		else
			bBox = new GenericSegment(segFrom.getBoundingBox());
		if (clustTo != null)
			bBox.growBoundingBox(clustTo);
		else
			bBox.growBoundingBox(segTo);
		
		for(GenericSegment gs : ListUtils.findElementsIntersectingBBox(items, bBox))
		{
			if (gs instanceof TextSegment)
			{
				if (clustFrom != null && clustTo != null)
				{
					if (!(clustFrom.getItems().contains(gs) || clustTo.getItems().contains(gs)))
						return false;
				}
				else if (clustFrom != null) // use segTo
				{
					if (!(clustFrom.getItems().contains(gs) || segTo == gs))
						return false;
				}
				else if (clustTo != null) // use segFrom
				{
					if (!(clustTo.getItems().contains(gs) || segFrom == gs))
						return false;
				}
				else // use both segs
				{
					if (!(segTo == gs || segFrom == gs))
						return false;
				}
			}
		}
		
		return (lineSpacing <= MAX_COL_LINE_THRESHOLD);
	}
    
    // currently identical to 1st level -- change?
	public boolean isValidCluster(CandidateCluster c)
	{
		// prerequisite for calling this method is that the lines have been found ...
		// and that the average linespacing has been found
		//c.findLinesWidth();
		c.setCalculatedFields();
		// now, we check that the linespacing is constant by comparing the
		// spacing of each consecutive line with the average linespacing
		boolean clashingLines = false;
		CompositeSegment<? extends GenericSegment> prevLine = null;
		
		for (CompositeSegment<? extends GenericSegment> l : c.getFoundLines())
    	{
    		if (prevLine != null)
    		{
    			float lineSpacing = (prevLine.getY1() - l.getY1()) / c.getFontSize();
    			if (SegmentUtils.vertIntersect(prevLine, l.getYmid())) clashingLines = true;
//    			System.out.println("lineSpacing: " + lineSpacing);
    			if (!Utils.within(lineSpacing, c.getLineSpacing(), LINE_SPACING_TOLERANCE))
    				return false;
    			// fontsize check too
    		}
    		prevLine = l;
    	}

//		System.out.println("returning: " + !checkForChasms(c));
		return !PageSegmenter.checkForChasms(c);
	}
}
