/**
* @file LabeledFileList.cpp
* @brief reads images from directory
* @author Erik Rodner
* @date 17.09.2007

*/
#include "core/image/ImageT.h"
#include "core/vector/VectorT.h"
#include "core/vector/MatrixT.h"

#include <iostream>
#include <sstream>

#include "core/basics/StringTools.h"
#include "core/basics/FileMgt.h"

#include "vislearning/cbaselib/LabeledFileList.h"

using namespace OBJREC;

using namespace std;
using namespace NICE;

LabeledFileList::LabeledFileList()
{
  debug_dataset = true;
}

LabeledFileList::~LabeledFileList()
{
}

int LabeledFileList::getClassFromNumber (
    const std::string & lfile,
    const int exampleID ) const
{
  int cvalue = -1;

  std::ifstream file( lfile.c_str() );
  std::string str;
  while ( std::getline(file, str) )
  {
    NICE::Vector valList(2,0);
    NICE::StringTools::splitVector( str, ',', valList );
    if ( (int)valList[0] == exampleID )
    {
      cvalue = (int)valList[1];
      break;
    }
  }

  return cvalue;
}

/**
 * @brief Loads the label information according to a given label file format.
 *
 * Supported types of label file format (localization_format):
 * - "image": <br>usage of a single channel images containing label regions
 * - "imagergb": <br>usage of a multi channel color images containing label regions
 * - "polygon": <br>obtaining bounding boxes from textural label files (used with e.g. PASCAL dataset)
 * - "imagelabeler": <br>obtaining label information (currently only bounding boxes) from the separate label files (XML like) created with the ImageLabeler ( < file name >_labeled.dat ). @see ImageInfo
 *
 * @param classnames class containing all potential class names (label categories)
 * @param conf configuration structure containing a information from a loaded config file; has to tag "localization_format" in section "main" in order to obtain the correct label information from a file.
 */
LocalizationResult *LabeledFileList::getLocalizationInfo ( const ClassNames & classnames,
    int classno,
    const std::string & file,
    const Config & conf ) const
{
  /*
  localization_pattern = image
  localization_subst   = mask
  localization_format  = image
  */
  std::string format = conf.gS ( "main", "localization_format", "unknown" );
  if ( format == "unknown" )
    return NULL;

  std::string pattern = conf.gS ( "main", "localization_pattern" );
  std::string subst   = conf.gS ( "main", "localization_subst" );

  std::string lfile = file;
  if ( ! StringTools::regexSubstitute ( lfile, pattern, subst ) )
  {
    fprintf ( stderr, "Unable to substitute using pattern #%s# and string #%s#\n",
              pattern.c_str(), lfile.c_str() );
    exit ( -1 );
  }

  if ( ! FileMgt::fileExists ( lfile ) && format != "imagergb" )
      return NULL;
  if ( debug_dataset )
  {
    fprintf ( stderr, "LabeledFileList: reading localization information %s\n", lfile.c_str() );
  }

  LocalizationResult *lr = NULL;

  if ( format == "image" )
  {
    NICE::Image mask;
    try {
      mask.read ( lfile );
    } catch ( ImageException & ) {
      fprintf ( stderr, "WARNING: unable to open file %s (no localization info provided)\n",
                lfile.c_str() );
      return NULL;
    }

    lr = new LocalizationResult ( &classnames, mask, classno );

  }
  else if ( format == "imagergb" ) {
    NICE::ColorImage mask;
    try {
      mask.read ( lfile );
    } catch ( ImageException &e ) {
      fprintf ( stderr, "WARNING: unable to open file %s (no localization info provided) - creating one with background class only!\n",
                lfile.c_str() );
      //fprintf ( stderr, "Error: %s\n", e.what() );
      //return NULL;
      mask.read ( file );
      mask.set(0,0,0);
    }
    lr = new LocalizationResult ( &classnames, mask );

  }
  else if ( format == "csv" ) {
      // CAUTION! This is for experimental use only and needs a certain configuration of
      // the csv file and scheme for the image file names!
      NICE::ColorImage mask;
      mask.read( file );
      int exampleID = -1;
      std::size_t found = file.find( "ID" );
      std::string exampleIDStr = file.substr(found+2,found+8);
      exampleID = std::atoi(exampleIDStr.c_str());
      int g = getClassFromNumber( lfile, exampleID );
      mask.set((uchar)g,(uchar)g,(uchar)g);
      lr = new LocalizationResult ( &classnames, mask );
  }
  else if ( format == "polygon" ) {
      lr = new LocalizationResult ( &classnames );

      lr->read ( lfile, LocalizationResult::FILEFORMAT_POLYGON );

      if ( debug_dataset )
        fprintf (stderr, "LabeledFileList: object localization %d\n", (int)lr->size() );
  }
  else if ( format == "polygon_siftflow" ) {
      lr = new LocalizationResult ( &classnames );

      lr->read ( lfile, LocalizationResult::FILEFORMAT_POLYGON_SIFTFLOW );

      if ( debug_dataset )
        fprintf (stderr, "LabeledFileList: object localization %d\n", (int)lr->size() );
  }
  else if ( format == "imagelabeler" ) {
    lr = new LocalizationResult ( &classnames );
    lr->loadImageInfo(lfile);

  }
  else {
    fthrow(Exception, "Localization format not yet supported !!\n");
  }

  if ( debug_dataset )
	if ( lr != NULL )
      fprintf (stderr, "%s (%d objects)\n", lfile.c_str(), (int)lr->size() );

  return lr;
}

void LabeledFileList::getFromPattern (
  const std::string & dir,
  const Config & datasetconf,
  const ClassNames & classnames,
  LabeledSet & ls,
  bool localizationInfoDisabled ) const
{
  std::string filemask;

  if ( dir.substr ( dir.length() - 1, 1 ) != "/" )
    filemask = dir + "/" + datasetconf.gS ( "main", "pattern" );
  else
    filemask = dir + datasetconf.gS ( "main", "pattern" );

  std::vector<string> files;

  int classnameField = datasetconf.gI ( "main", "classname_field", 1 );
  std::string fixedClassname = datasetconf.gS ( "main", "fixed_classname", "" );

  files.clear();
  FileMgt::DirectoryRecursive ( files, dir );
  fprintf ( stderr, "LabeledFileList: Files: %d\n", ( int ) files.size() );

  sort ( files.begin(), files.end() );

  for ( vector<string>::const_iterator i  = files.begin();
        i != files.end();
        i++ )
  {
    vector<string> submatches;
    // refactor-nice.pl: check this substitution
    // old: const string & file = *i;
    const std::string & file = *i;
    if ( debug_dataset )
      fprintf ( stderr, "LabeledFileList: next file: %s\n", file.c_str() );

    bool match = StringTools::regexMatch ( file, filemask, submatches );

    if ( ( fixedClassname == "" ) && ( ( int ) submatches.size() <= classnameField ) ) match = false;

    if ( ! match )
    {
      if ( debug_dataset )
        fprintf ( stderr, "LabeledFileList: WARNING: %s does not match filemask: %s!!\n", file.c_str(), filemask.c_str() );
    } else {
      std::string classcode = ( fixedClassname == "" ) ? submatches[classnameField] : fixedClassname;

      if ( classnames.existsClassCode ( classcode ) ) {
        int classno = classnames.classno ( classcode );
        LocalizationResult *lr  = NULL;

        if ( ! localizationInfoDisabled )
          lr = getLocalizationInfo (
                 classnames, classno, file, datasetconf );

        if ( debug_dataset )
          fprintf ( stderr, "LabeledFileList: LabeledSet: add %s (%d)\n", file.c_str(), classno );
        if ( lr == NULL )
        {
          ls.add ( classno, new ImageInfo ( file ) );
        } else {
          ls.add ( classno, new ImageInfo ( file, lr ) );
          if ( debug_dataset )
            fprintf ( stderr, "LabeledFileList: LocalizationResult added!\n" );

        }
      } else {
        if ( debug_dataset )
        {
          for ( vector<string>::iterator i = submatches.begin();
                i != submatches.end();
                i++ )
          {
            fprintf ( stderr, "LabeledFileList: submatch: %s\n", i->c_str() );
          }
          fprintf ( stderr, "LabeledFileList: WARNING: code %s ignored !\n", classcode.c_str() );
        }
      }
    }
    if ( debug_dataset )
      fprintf ( stderr, "LabeledFileList: filename processed\n" );
  }

  cerr << "directory " << dir << " loaded..." << endl;
  ls.printInformation();

}

void LabeledFileList::getFromList (
  const std::string & filelist,
  const Config & datasetconf,
  const ClassNames & classnames,
  LabeledSet & ls,
  bool localizationInfoDisabled ) const
{
  if ( debug_dataset )
    fprintf ( stderr, "Reading file list: %s\n", filelist.c_str() );

  ifstream ifs ( filelist.c_str(), ios::in );
  if ( ! ifs.good() )
    fthrow ( IOException, "File list " << filelist << " not found !" );

  std::string fixedClassname = datasetconf.gS ( "main", "fixed_classname", "" );

  while ( ! ifs.eof() )
  {
    std::string classcode;
    std::string file;

    if ( fixedClassname == "" ) {
      if ( ! ( ifs >> classcode ) ) break;
    } else {
      classcode = fixedClassname;
    }

    if ( ! ( ifs >> file ) ) break;

    file = datasetconf.getAbsoluteFilenameRelativeToThisConfig(file);

    if ( classnames.existsClassCode ( classcode ) ) {
      int classno = classnames.classno ( classcode );

      LocalizationResult *lr  = NULL;

      if ( ! localizationInfoDisabled )
        lr = getLocalizationInfo ( classnames, classno, file, datasetconf );

      if ( debug_dataset )
        cerr << "Adding file " << file << " with classno " << classno << endl;

      if ( lr == NULL )
        ls.add ( classno, new ImageInfo ( file ) );
      else
        ls.add ( classno, new ImageInfo ( file, lr ) );
    } else {
      if ( debug_dataset )
        fprintf ( stderr, "WARNING: code %s ignored !\n", classcode.c_str() );
    }

  }

  if ( debug_dataset )
    ls.printInformation();
}

void LabeledFileList::get ( 
    const std::string & dir,
    const Config & datasetconf,
    const ClassNames & classnames, 
    LabeledSet & ls, 
    bool localizationInfoDisabled,
    bool debugDataset ) 
{
    std::string pattern = datasetconf.gS("main", "pattern", "");
    std::string filelist = datasetconf.gS("main", "filelist", "");
    std::string factoryxmlfile = datasetconf.gS("main", "factoryxml", "");

    this->debug_dataset = debugDataset;

  if ( pattern.size() > 0 )
    getFromPattern ( dir, datasetconf, classnames, ls, localizationInfoDisabled );
  else if ( filelist.size() > 0 ) {

	std::string cfilelist = datasetconf.gS("main", "filelist");
        std::string filelist = ( cfilelist.substr(0,1) == "/" ) ? cfilelist : dir + "/" + cfilelist;

	getFromList ( filelist, datasetconf, classnames, ls, localizationInfoDisabled );
    }
    else if( !factoryxmlfile.empty() &&  m_pLabeledSetFactory != NULL )
    {
        factoryxmlfile = ( factoryxmlfile.substr(0,1) == "/" ) ? factoryxmlfile : dir + "/" + factoryxmlfile;
        m_pLabeledSetFactory->createLabeledSetFromXml( factoryxmlfile, datasetconf,classnames, ls );
    }
    else {
	fprintf (stderr, "LabeledFileList: Unable to obtain labeled file list\n");
	exit(-1);
    }
}