static char rcsid[] = "$Id: ce3ac83377e82021da78edda0c04d8c7a35123a8 $";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef HAVE_MEMCPY
#define memcpy(d,s,n) bcopy((s),(d),(n))
#endif
#ifndef HAVE_MEMMOVE
#define memmove(d,s,n) bcopy((s),(d),(n))
#endif

#include "stage1hr.h"
#include "stage1hr-single.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>		/* For rint */

#include "assert.h"
#include "mem.h"
#include "types.h"		/* Needed for HAVE_64_BIT */
#include "univcoord.h"

#include "list.h"
#include "compress.h"
#include "record.h"

#include "genomebits_mismatches.h" /* For MISMATCH_EXTRA */
#include "orderstat.h"
#include "transcriptome-search.h"
#include "tr-extension-search.h"
#include "kmer-search.h"
#include "extension-search.h"
#include "segment-search.h"

/* #include "terminal.h" */
/* #include "distant-rna.h" */
/* #include "distant-dna.h" */

#include "path-solve.h"
#include "path-fusion.h"

#include "transcript-remap.h"
#include "transcript-velocity.h"
#include "path-eval.h"


#define MIN_SIZELIMIT 100


static Mode_T mode;
static int index1part;
static int index1interval;

static Transcriptome_T transcriptome;
static bool transcriptome_align_p;
static bool genome_align_p;

static double user_nmismatches_filter_float;
static double user_mincoverage_filter_float;

static double max_middle_insertions_float;
static double max_middle_deletions_float;

static bool splicingp;
static Chrpos_T shortsplicedist;
static Chrpos_T shortsplicedist_novelend;



#ifdef DEBUG0
#define debug0(x) x
#else
#define debug0(x)
#endif


#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif



#define T Stage1_T

static int
determine_sizelimit (T this, int querylength) {
  int cutoff, *set, count;
  int n;
  int query_lastpos, querypos;

  assert(querylength >= index1part);

  query_lastpos = querylength - index1part;
  set = (int *) MALLOC(2*(query_lastpos+1)*sizeof(int));
  n = 0;
  for (querypos = 0; querypos <= query_lastpos; querypos++) {
    if (this->validp[querypos] == true) {
      set[n++] = count = this->plus_npositions[querypos];
      set[n++] = count = this->minus_npositions[querypos];
    }
  }

  if (n < 5) {
    cutoff = MIN_SIZELIMIT;
  } else if ((cutoff = Orderstat_int_pct_inplace(set,n,/*pct*/0.60)) < MIN_SIZELIMIT) {
    cutoff = MIN_SIZELIMIT;
  }
  FREE(set);

  return cutoff;
}


#if 0
/* Replaced by Stage1_list_paths */
static void
print_path_contents (T this) {
  List_T p;

  for (p = this->unextended_sense_paths_gplus; p != NULL; p = List_next(p)) {
    printf("%p this->unextended_sense_paths_gplus\n",List_head(p));
  }
  for (p = this->unextended_sense_paths_gminus; p != NULL; p = List_next(p)) {
    printf("%p this->unextended_sense_paths_gminus\n",List_head(p));
  }
  for (p = this->unextended_antisense_paths_gplus; p != NULL; p = List_next(p)) {
    printf("%p this->unextended_antisense_paths_gplus\n",List_head(p));
  }
  for (p = this->unextended_antisense_paths_gminus; p != NULL; p = List_next(p)) {
    printf("%p this->unextended_antisense_paths_gminus\n",List_head(p));
  }
  for (p = this->sense_paths_gplus; p != NULL; p = List_next(p)) {
    printf("%p this->sense_paths_gplus\n",List_head(p));
  }
  for (p = this->sense_paths_gminus; p != NULL; p = List_next(p)) {
    printf("%p this->sense_paths_gminus\n",List_head(p));
  }
  for (p = this->antisense_paths_gplus; p != NULL; p = List_next(p)) {
    printf("%p this->antisense_paths_gplus\n",List_head(p));
  }
  for (p = this->antisense_paths_gminus; p != NULL; p = List_next(p)) {
    printf("%p this->antisense_paths_gminus\n",List_head(p));
  }

  return;
}
#endif



void
single_read_search (int *found_score, bool *found_transcriptp, Method_T *last_method,
		    T this, EF64_T repetitive_ef64, int genestrand,

		    char *queryuc_ptr, char *queryrc, int querylength,
		    Knownsplicing_T knownsplicing, Knownindels_T knownindels,
		    int *mismatch_positions_alloc, Univcoord_T *novel_diagonals_alloc, unsigned short *localdb_alloc,
		    Compress_T query_compress_fwd, Compress_T query_compress_rev,

		    int nmismatches_allowed, int max_insertionlen, int max_deletionlen,
		    Chrpos_T overall_max_distance, Chrpos_T overall_end_distance,
		    
		    Trdiagpool_T trdiagpool, Univdiagpool_T univdiagpool,
		    Intlistpool_T intlistpool, Uintlistpool_T uintlistpool,
		    Univcoordlistpool_T univcoordlistpool, Listpool_T listpool, 
		    Trpathpool_T trpathpool, Pathpool_T pathpool, Transcriptpool_T transcriptpool,
		    Vectorpool_T vectorpool, Hitlistpool_T hitlistpool, Spliceendsgen_T spliceendsgen,
		    bool paired_end_p, bool first_read_p,
		    Pass_T pass, int sufficient_score) {

  /* For Segment_search */
  int sizelimit;
  struct Record_T *plus_records = NULL, *minus_records = NULL;
  int plus_nrecords = 0, minus_nrecords = 0;


  debug(printf("Entered single_read_search with queryuc_ptr %s and sufficient score %d\n",
	       queryuc_ptr,sufficient_score));

  if (querylength < index1part + index1interval - 1) {
    return;
  }

  if (transcriptome == NULL) {
    *found_transcriptp = true;
  } else {
    *found_transcriptp = false;
  }

  if (*last_method < KMER_EXACT && genome_align_p == true) {
    debug(printf("Read %s: Running Kmer_search_exact\n",first_read_p ? "5'" : "3'"));
    Kmer_search_exact(&(*found_score),&(*found_transcriptp),

		      &this->sense_paths_gplus,&this->sense_paths_gminus,
		      &this->antisense_paths_gplus,&this->antisense_paths_gminus,

		      query_compress_fwd,query_compress_rev,querylength,
		      this,genestrand,nmismatches_allowed,

		      intlistpool,uintlistpool,univcoordlistpool,listpool,
		      pathpool,vectorpool,hitlistpool,transcriptpool);
    *last_method = KMER_EXACT;

    debug(printf("After Kmer_search_exact, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }

  /* Stage1_dump(this,querylength); */

  /* Needed for any subsequent genome-based or transcriptome-based methods */
  Stage1_fill_all_oligos(this,queryuc_ptr,querylength,genestrand);

  if (*last_method < EXT && genome_align_p == true) {
    debug(printf("Read %s: Running Extension_search\n",first_read_p ? "5'" : "3'"));
    Extension_search(&(*found_score),&(*found_transcriptp),
		     &this->unsolved_sense_paths_gplus,&this->unsolved_sense_paths_gminus,
		     &this->unsolved_antisense_paths_gplus,&this->unsolved_antisense_paths_gminus,
		   
		     &this->unextended_sense_paths_gplus,&this->unextended_sense_paths_gminus,
		     &this->unextended_antisense_paths_gplus,&this->unextended_antisense_paths_gminus,
		   
		     &this->sense_paths_gplus,&this->sense_paths_gminus,
		     &this->antisense_paths_gplus,&this->antisense_paths_gminus,
		     
		     this,queryuc_ptr,queryrc,querylength,knownsplicing,knownindels,
		     mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
		     query_compress_fwd,query_compress_rev,
		     univdiagpool,intlistpool,uintlistpool,univcoordlistpool,listpool,
		     pathpool,transcriptpool,vectorpool,hitlistpool,spliceendsgen,
		     max_insertionlen,max_deletionlen,nmismatches_allowed,overall_end_distance,
		     genestrand,paired_end_p,first_read_p,pass);
    *last_method = EXT;

    debug(printf("After Extension_search, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }


  if (*last_method < TR_ENDS && transcriptome_align_p == true) {
    debug(printf("Read %s: Running Transcriptome_search_ends\n",first_read_p ? "5'" : "3'"));
    Transcriptome_search_ends(&(*found_score),&(*found_transcriptp),
			 
			      &this->sense_paths_gplus,&this->sense_paths_gminus,
			      &this->antisense_paths_gplus,&this->antisense_paths_gminus,

#if 0
			      &this->tplus_positions_5,&this->n_tplus_positions_5,&this->tplus_diagterm_5,
			      &this->tminus_positions_5,&this->n_tminus_positions_5,&this->tminus_diagterm_5,
			      &this->tplus_positions_3,&this->n_tplus_positions_3,&this->tplus_diagterm_3,
			      &this->tminus_positions_3,&this->n_tminus_positions_3,&this->tminus_diagterm_3,
#endif

			      this,queryuc_ptr,querylength,this->indelinfo,
			      query_compress_fwd,query_compress_rev,
			      nmismatches_allowed,max_insertionlen,max_deletionlen,
			      intlistpool,uintlistpool,univcoordlistpool,listpool,
			      trpathpool,pathpool,vectorpool,transcriptpool,hitlistpool,
			      /*method*/TR_ENDS);
    *last_method = TR_ENDS;
    debug(printf("After Transcriptome_search_ends, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }


  if (*last_method < TR_EXT && transcriptome_align_p == true) {
    debug(printf("Read %s: Running Tr_extension_search\n",first_read_p ? "5'" : "3'"));
    Tr_extension_search(&(*found_score),&(*found_transcriptp),

			&this->unextended_sense_paths_gplus,&this->unextended_sense_paths_gminus,
			&this->unextended_antisense_paths_gplus,&this->unextended_antisense_paths_gminus,

			&this->sense_paths_gplus,&this->sense_paths_gminus,
			&this->antisense_paths_gplus,&this->antisense_paths_gminus,

			this,querylength,mismatch_positions_alloc,
			query_compress_fwd,query_compress_rev, 

			trdiagpool,intlistpool,uintlistpool,univcoordlistpool,listpool,
			trpathpool,pathpool,transcriptpool,vectorpool,hitlistpool,
			max_insertionlen,max_deletionlen,nmismatches_allowed,
			genestrand,/*method*/TR_EXT);
    *last_method = TR_EXT;
    debug(printf("After Tr_extension_search, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }


  if (*last_method < KMER_APPROX && genome_align_p == true) {
    debug(printf("Read %s: Running Kmer_search_approx\n",first_read_p ? "5'" : "3'"));
    Kmer_search_approx(&(*found_score),&(*found_transcriptp),
		       &this->unsolved_sense_paths_gplus,&this->unsolved_sense_paths_gminus,
		       &this->unsolved_antisense_paths_gplus,&this->unsolved_antisense_paths_gminus,

		       &this->sense_paths_gplus,&this->sense_paths_gminus,
		       &this->antisense_paths_gplus,&this->antisense_paths_gminus,

		       query_compress_fwd,query_compress_rev,querylength,
		       this,knownsplicing,genestrand,nmismatches_allowed,
		       max_insertionlen,max_deletionlen,overall_max_distance,/*sizelimit*/3000,
		       intlistpool,uintlistpool,univcoordlistpool,listpool,
		       pathpool,vectorpool,hitlistpool,transcriptpool);
    *last_method = KMER_APPROX;

    debug(printf("After Kmer_search_approx, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }


  if (*last_method < SEGMENT1 && genome_align_p == true) {
    Stage1_fill_all_positions(this,querylength,genestrand);
    /* Need sizelimit to constrain segment search */
    sizelimit = determine_sizelimit(this,querylength);
      
    debug(printf("Starting Segment_identify on plus strand\n"));
    plus_records = Segment_identify(&plus_nrecords,
#ifdef LARGE_GENOMES
				    this->plus_positions_high,
#endif
				    this->plus_positions,this->plus_npositions,this->validp,
				    this->forward_oligos,repetitive_ef64,
#ifdef LARGE_GENOMES
				    this->stream_high_alloc,this->stream_low_alloc,
#else
				    this->streamptr_alloc,
#endif
				    this->streamsize_alloc,this->querypos_diagterm_alloc,this->mergeinfo,
				    overall_max_distance,querylength,sizelimit,/*plusp*/true);
    debug(printf("Done\n"));
    
    debug(printf("Starting Segment_identify on minus strand\n"));
    minus_records = Segment_identify(&minus_nrecords,
#ifdef LARGE_GENOMES
				     this->minus_positions_high,
#endif
				     this->minus_positions,this->minus_npositions,this->validp,
				     this->revcomp_oligos,repetitive_ef64,
#ifdef LARGE_GENOMES
				     this->stream_high_alloc,this->stream_low_alloc,
#else
				     this->streamptr_alloc,
#endif
				     this->streamsize_alloc,this->querypos_diagterm_alloc,this->mergeinfo,
				     overall_max_distance,querylength,sizelimit,/*plusp*/false);
    debug(printf("Done\n"));
    
    debug(printf("Read %s: Running Segment_search_all\n",first_read_p ? "5'" : "3'"));
    Segment_search_all(&(*found_score),&(*found_transcriptp),
		       &this->unsolved_sense_paths_gplus,&this->unsolved_sense_paths_gminus,
		       &this->unsolved_antisense_paths_gplus,&this->unsolved_antisense_paths_gminus,
		       &this->unextended_sense_paths_gplus,&this->unextended_sense_paths_gminus,
		       &this->unextended_antisense_paths_gplus,&this->unextended_antisense_paths_gminus,
		       &this->sense_paths_gplus,&this->sense_paths_gminus,
		       &this->antisense_paths_gplus,&this->antisense_paths_gminus,
		       
		       plus_records,plus_nrecords,minus_records,minus_nrecords,
		       queryuc_ptr,queryrc,querylength,
		       mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
		       this,knownsplicing,knownindels,
		       query_compress_fwd,query_compress_rev,
		       max_insertionlen,max_deletionlen,nmismatches_allowed,
		       overall_max_distance,overall_end_distance,
		       genestrand,paired_end_p,first_read_p,
		       univdiagpool,intlistpool,uintlistpool,univcoordlistpool,listpool,
		       pathpool,transcriptpool,vectorpool,hitlistpool,
		       spliceendsgen,/*method*/SEGMENT1,pass);
    *last_method = SEGMENT1;
    debug(printf("Done\n"));
    
    FREE(minus_records);
    FREE(plus_records);

    debug(printf("After Segment search, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }

  /* Transcriptome_search_complete can be biased toward some transcripts */
  if (*last_method < TR_COMPLETE && transcriptome_align_p == true) {
    debug(printf("Read %s: Running Transcriptome_search\n",first_read_p ? "5'" : "3'"));
    Transcriptome_search_complete(&(*found_score),&(*found_transcriptp),
			 
				  &this->unextended_sense_paths_gplus,&this->unextended_sense_paths_gminus,
				  &this->unextended_antisense_paths_gplus,&this->unextended_antisense_paths_gminus,
				  
				  &this->sense_paths_gplus,&this->sense_paths_gminus,
				  &this->antisense_paths_gplus,&this->antisense_paths_gminus,
				  
#if 0
				  this->tplus_positions_5,this->n_tplus_positions_5,this->tplus_diagterm_5,
				  this->tminus_positions_5,this->n_tminus_positions_5,this->tminus_diagterm_5,
				  this->tplus_positions_3,this->n_tplus_positions_3,this->tplus_diagterm_3,
				  this->tminus_positions_3,this->n_tminus_positions_3,this->tminus_diagterm_3,
#endif

				  this,queryuc_ptr,querylength,this->indelinfo,this->mergeinfo_tr,
				  this->tplus_stream_array,this->tplus_streamsize_array,this->tplus_diagterm_array,
				  this->tminus_stream_array,this->tminus_streamsize_array,this->tminus_diagterm_array,
				  
				  mismatch_positions_alloc,query_compress_fwd,query_compress_rev,
				  nmismatches_allowed,
				  intlistpool,uintlistpool,univcoordlistpool,listpool,
				  trpathpool,pathpool,vectorpool,transcriptpool,hitlistpool,
				  /*method*/TR_COMPLETE);
    *last_method = TR_COMPLETE;
    debug(printf("After Transcriptome_search_complete, we have sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		 List_length(this->sense_paths_gplus),List_length(this->sense_paths_gminus),
		 List_length(this->antisense_paths_gplus),List_length(this->antisense_paths_gminus)));

    if (*found_score <= sufficient_score && *found_transcriptp == true) {
      debug(Stage1_list_paths(this));
      return;
    }
  }

  debug(Stage1_list_paths(this));
  return;
}


/* Populates extended fields in Stage1_T object */
void
single_read_extend (int *found_score, bool *found_transcriptp, T this,

		    char *queryuc_ptr, char *queryrc, int querylength,
		    Knownsplicing_T knownsplicing, Knownindels_T knownindels,
		    int *mismatch_positions_alloc, Univcoord_T *novel_diagonals_alloc, unsigned short *localdb_alloc,
		    Compress_T query_compress_fwd, Compress_T query_compress_rev,

		    int nmismatches_allowed, int max_insertionlen, int max_deletionlen,
		    Chrpos_T overall_end_distance,

		    Intlistpool_T intlistpool,
		    Uintlistpool_T uintlistpool, Univcoordlistpool_T univcoordlistpool,
		    Listpool_T listpool, Pathpool_T pathpool, Transcriptpool_T transcriptpool,
		    Vectorpool_T vectorpool, Hitlistpool_T hitlistpool, Spliceendsgen_T spliceendsgen,
		    Pass_T pass) {
  List_T newpaths, p;
  bool completep;

  /* Attempt to extend paths */
  this->unextended_sense_paths_gplus = Path_filter(this->unextended_sense_paths_gplus,
						   intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
  this->unextended_sense_paths_gminus = Path_filter(this->unextended_sense_paths_gminus,
						    intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
  this->unextended_antisense_paths_gplus = Path_filter(this->unextended_antisense_paths_gplus,
						       intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
  this->unextended_antisense_paths_gminus = Path_filter(this->unextended_antisense_paths_gminus,
							intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,hitlistpool);
      
  debug(printf("Have %d unextended_sense_paths_gplus\n",List_length(this->unextended_sense_paths_gplus)));
  for (p = this->unextended_sense_paths_gplus; p != NULL; p = List_next(p)) {
    newpaths = Path_extend(&completep,&(*found_score),&(*found_transcriptp),
			   /*original_path*/(Path_T) List_head(p),
			   /*queryptr*/queryuc_ptr,querylength,
			   mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
			   this->streamspace_max_alloc,this->streamspace_alloc,
			   this->streamptr_alloc,this->streamsize_alloc,this->mergeinfo,
			   this->indelinfo,this->spliceinfo,
			   knownsplicing,knownindels,
			   /*query_compress*/query_compress_fwd,
			   query_compress_fwd,query_compress_rev,/*genestrand*/0,
			   max_insertionlen,max_deletionlen,overall_end_distance,
			   nmismatches_allowed,/*paired_end_p*/false,/*lowp*/true,
			   intlistpool,uintlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,
			   vectorpool,hitlistpool,spliceendsgen,pass,
			   /*extend_qstart_p*/true,/*extend_qend_p*/true);
    if (completep == true) {
      debug(printf("Found extended_sense_paths_gplus\n"));
      this->extended_sense_paths_gplus = List_append(newpaths,this->extended_sense_paths_gplus);
    } else {
      this->unextended_sense_paths_gplus = List_append(newpaths,this->unextended_sense_paths_gplus);
    }
  }

  debug(printf("Have %d unextended_sense_paths_gminus\n",List_length(this->unextended_sense_paths_gminus)));
  for (p = this->unextended_sense_paths_gminus; p != NULL; p = List_next(p)) {
    newpaths = Path_extend(&completep,&(*found_score),&(*found_transcriptp),
			   /*original_path*/(Path_T) List_head(p),
			   /*queryptr*/queryrc,querylength,
			   mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
			   this->streamspace_max_alloc,this->streamspace_alloc,
			   this->streamptr_alloc,this->streamsize_alloc,this->mergeinfo,
			   this->indelinfo,this->spliceinfo,
			   knownsplicing,knownindels,
			   /*query_compress*/query_compress_rev,
			   query_compress_fwd,query_compress_rev,/*genestrand*/0,
			   max_insertionlen,max_deletionlen,overall_end_distance,
			   nmismatches_allowed,/*paired_end_p*/false,/*lowp*/true,
			   intlistpool,uintlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,
			   vectorpool,hitlistpool,spliceendsgen,pass,
			   /*extend_qstart_p*/true,/*extend_qend_p*/true);
    if (completep == true) {
      debug(printf("Found extended_sense_paths_gminus\n"));
      this->extended_sense_paths_gminus = List_append(newpaths,this->extended_sense_paths_gminus);
    } else {
      this->unextended_sense_paths_gminus = List_append(newpaths,this->unextended_sense_paths_gminus);
    }
  }

  debug(printf("Have %d unextended_antisense_paths_gplus\n",List_length(this->unextended_antisense_paths_gplus)));
  for (p = this->unextended_antisense_paths_gplus; p != NULL; p = List_next(p)) {
    newpaths = Path_extend(&completep,&(*found_score),&(*found_transcriptp),
			   /*original_path*/(Path_T) List_head(p),
			   /*queryptr*/queryuc_ptr,querylength,
			   mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
			   this->streamspace_max_alloc,this->streamspace_alloc,
			   this->streamptr_alloc,this->streamsize_alloc,this->mergeinfo,
			   this->indelinfo,this->spliceinfo,
			   knownsplicing,knownindels,
			   /*query_compress*/query_compress_fwd,
			   query_compress_fwd,query_compress_rev,/*genestrand*/0,
			   max_insertionlen,max_deletionlen,overall_end_distance,
			   nmismatches_allowed,/*paired_end_p*/false,/*lowp*/true,
			   intlistpool,uintlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,
			   vectorpool,hitlistpool,spliceendsgen,pass,
			   /*extend_qstart_p*/true,/*extend_qend_p*/true);
    if (completep == true) {
      debug(printf("Found extended_antisense_paths_gplus\n"));
      this->extended_antisense_paths_gplus = List_append(newpaths,this->extended_antisense_paths_gplus);
    } else {
      this->unextended_antisense_paths_gplus = List_append(newpaths,this->unextended_antisense_paths_gplus);
    }
  }

  debug(printf("Have %d unextended_antisense_paths_gminus\n",List_length(this->unextended_antisense_paths_gminus)));
  for (p = this->unextended_antisense_paths_gminus; p != NULL; p = List_next(p)) {
    newpaths = Path_extend(&completep,&(*found_score),&(*found_transcriptp),
			   /*original_path*/(Path_T) List_head(p),
			   /*queryptr*/queryrc,querylength,
			   mismatch_positions_alloc,novel_diagonals_alloc,localdb_alloc,
			   this->streamspace_max_alloc,this->streamspace_alloc,
			   this->streamptr_alloc,this->streamsize_alloc,this->mergeinfo,
			   this->indelinfo,this->spliceinfo,
			   knownsplicing,knownindels,
			   /*query_compress*/query_compress_rev,
			   query_compress_fwd,query_compress_rev,/*genestrand*/0,
			   max_insertionlen,max_deletionlen,overall_end_distance,
			   nmismatches_allowed,/*paired_end_p*/false,/*lowp*/true,
			   intlistpool,uintlistpool,univcoordlistpool,listpool,pathpool,transcriptpool,
			   vectorpool,hitlistpool,spliceendsgen,pass,
			   /*extend_qstart_p*/true,/*extend_qend_p*/true);
    if (completep == true) {
      debug(printf("Found extended_antisense_paths_gminus\n"));
      this->extended_antisense_paths_gminus = List_append(newpaths,this->extended_antisense_paths_gminus);
    } else {
      this->unextended_antisense_paths_gminus = List_append(newpaths,this->unextended_antisense_paths_gminus);
    }
  }

  return;
}


static List_T
single_read_fusion (int *found_score, T this, int querylength,

		    Knownsplicing_T knownsplicing,
		    Compress_T query_compress_fwd, Compress_T query_compress_rev,

		    int nmismatches_allowed, int max_insertionlen, int max_deletionlen,

		    Univdiagpool_T univdiagpool, Intlistpool_T intlistpool,
		    Uintlistpool_T uintlistpool, Univcoordlistpool_T univcoordlistpool,
		    Listpool_T listpool, Pathpool_T pathpool, Transcriptpool_T transcriptpool,
		    Vectorpool_T vectorpool, Hitlistpool_T hitlistpool) {

  List_T paths = NULL, p;
  Path_T path;

  /* Look for possible fusions (or combinations of existing paths) */
  /* Find fusions.  Use code similar to finding outer fusions in Pathpair_eval_and_sort */

  for (p = this->unextended_sense_paths_gplus; p != NULL; p = List_next(p)) {
    path = (Path_T) List_head(p);

    if (Path_unextended_qstart_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_querystart_plus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						      query_compress_fwd,query_compress_rev,/*genestrand*/0,
						      nmismatches_allowed,max_insertionlen,max_deletionlen,
						      intlistpool,uintlistpool,univcoordlistpool,
						      listpool,univdiagpool,pathpool,vectorpool,
						      hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
      
    if (Path_unextended_qend_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_queryend_plus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						    query_compress_fwd,query_compress_rev,/*genestrand*/0,
						    nmismatches_allowed,max_insertionlen,max_deletionlen,
						    intlistpool,uintlistpool,univcoordlistpool,
						    listpool,univdiagpool,pathpool,vectorpool,
						    hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
  }
    
  for (p = this->unextended_antisense_paths_gplus; p != NULL; p = List_next(p)) {
    path = (Path_T) List_head(p);

    if (Path_unextended_qstart_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_querystart_plus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						      query_compress_fwd,query_compress_rev,/*genestrand*/0,
						      nmismatches_allowed,max_insertionlen,max_deletionlen,
						      intlistpool,uintlistpool,univcoordlistpool,
						      listpool,univdiagpool,pathpool,vectorpool,
						      hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
      
    if (Path_unextended_qend_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_queryend_plus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						    query_compress_fwd,query_compress_rev,/*genestrand*/0,
						    nmismatches_allowed,max_insertionlen,max_deletionlen,
						    intlistpool,uintlistpool,univcoordlistpool,
						    listpool,univdiagpool,pathpool,vectorpool,
						    hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
  }
    
  for (p = this->unextended_sense_paths_gminus; p != NULL; p = List_next(p)) {
    path = (Path_T) List_head(p);

    if (Path_unextended_qstart_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_querystart_minus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						       query_compress_fwd,query_compress_rev,/*genestrand*/0,
						       nmismatches_allowed,max_insertionlen,max_deletionlen,
						       intlistpool,uintlistpool,univcoordlistpool,
						       listpool,univdiagpool,pathpool,vectorpool,
						       hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }

    if (Path_unextended_qend_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_queryend_minus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						     query_compress_fwd,query_compress_rev,/*genestrand*/0,
						     nmismatches_allowed,max_insertionlen,max_deletionlen,
						     intlistpool,uintlistpool,univcoordlistpool,
						     listpool,univdiagpool,pathpool,vectorpool,
						     hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
  }
    
  for (p = this->unextended_antisense_paths_gminus; p != NULL; p = List_next(p)) {
    path = (Path_T) List_head(p);

    if (Path_unextended_qstart_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_querystart_minus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						       query_compress_fwd,query_compress_rev,/*genestrand*/0,
						       nmismatches_allowed,max_insertionlen,max_deletionlen,
						       intlistpool,uintlistpool,univcoordlistpool,
						       listpool,univdiagpool,pathpool,vectorpool,
						       hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
      
    if (Path_unextended_qend_p(path,/*endtrim_allowed*/25,/*allow_ambig_p*/false) == true) {
      paths = List_append(Path_fusion_queryend_minus(&(*found_score),/*main*/path,this,querylength,knownsplicing,
						     query_compress_fwd,query_compress_rev,/*genestrand*/0,
						     nmismatches_allowed,max_insertionlen,max_deletionlen,
						     intlistpool,uintlistpool,univcoordlistpool,
						     listpool,univdiagpool,pathpool,vectorpool,
						     hitlistpool,transcriptpool,/*endtrim_allowed*/8),paths);
    }
  }

  return paths;
}



Path_T *
Stage1_single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *second_absmq,
		    Shortread_T queryseq, EF64_T repetitive_ef64,
		    Knownsplicing_T knownsplicing, Knownindels_T knownindels,
		    Trdiagpool_T trdiagpool, Univdiagpool_T univdiagpool,
		    Intlistpool_T intlistpool, Uintlistpool_T uintlistpool,
		    Univcoordlistpool_T univcoordlistpool, Listpool_T listpool,
		    Trpathpool_T trpathpool, Pathpool_T pathpool, Transcriptpool_T transcriptpool,
		    Vectorpool_T vectorpool, Hitlistpool_T hitlistpool, 
		    Spliceendsgen_T spliceendsgen, bool single_cell_p, Pass_T pass) {
  Path_T *patharray, path;
  T this;
  List_T paths;
  int *mismatch_positions_alloc;
  Univcoord_T *novel_diagonals_alloc;
  unsigned short *localdb_alloc;

  int nmismatches_filter, mincoverage_filter;
  int nmismatches_allowed;
  int max_middle_insertions, max_middle_deletions, max_insertionlen, max_deletionlen;
  Chrpos_T overall_max_distance, overall_end_distance;

  int found_score, sufficient_score;
  bool found_transcriptp;
  Method_T last_method = METHOD_INIT;

  int querylength;
  char *queryuc_ptr, *queryrc;
  Compress_T query_compress_fwd, query_compress_rev;

  bool first_read_p;
  int i;
  

  if (single_cell_p == true) {
    first_read_p = false;
  } else {
    first_read_p = true;
  }

  if ((querylength = Shortread_fulllength(queryseq)) < index1part + index1interval - 1) {
    *npaths_primary = *npaths_altloc = 0;
    return (Path_T *) NULL;
  } else {
    this = Stage1_new(querylength);    
  }

  /* nmismatches_allowed means nmismatches_search and is not specified
     by the user.  The user-specified value for -m represents
     nmismatches_filter */
  /* TODO: make this dependent upon the defect_rate */
  nmismatches_allowed = querylength/20; /* was querylength/index1part */

  if (user_nmismatches_filter_float < 0.0) {
    /* Not specified, so don't filter */
    nmismatches_filter = querylength;
  } else if (user_nmismatches_filter_float < 1.0) {
    nmismatches_filter = (int) rint(user_nmismatches_filter_float * (double) querylength);
  } else {
    nmismatches_filter = (int) user_nmismatches_filter_float;
  }

  if (user_mincoverage_filter_float <= 0.0) {
    mincoverage_filter = 0;
  } else if (user_mincoverage_filter_float <= 1.0) {
    /* Assuming that --min-coverage=1 must mean 1.0 and not a coverage of 1 bp */
    mincoverage_filter = (int) rint(user_mincoverage_filter_float * (double) querylength);
  } else {
    mincoverage_filter = (int) user_mincoverage_filter_float;
  }

  if (max_middle_insertions_float > 0.0 && max_middle_insertions_float < 1.0) {
    max_middle_insertions = (int) rint(max_middle_insertions_float * (double) querylength);
  } else {
    max_middle_insertions = (int) max_middle_insertions_float;
  }
  max_insertionlen = max_middle_insertions;
  if (max_insertionlen > querylength) {
    max_insertionlen = querylength;
  }

  if (max_middle_deletions_float > 0.0 && max_middle_deletions_float < 1.0) {
    max_middle_deletions = (int) rint(max_middle_deletions_float * (double) querylength);
  } else {
    max_middle_deletions = (int) max_middle_deletions_float;
  }
  max_deletionlen = max_middle_deletions;

  overall_max_distance = shortsplicedist;
  if ((Chrpos_T) max_middle_deletions > overall_max_distance) {
    overall_max_distance = (Chrpos_T) max_middle_deletions;
  }
  if ((Chrpos_T) max_middle_insertions > overall_max_distance) {
    overall_max_distance = (Chrpos_T) max_middle_insertions;
  }
  overall_end_distance = shortsplicedist_novelend > (Chrpos_T) max_deletionlen ? shortsplicedist_novelend : (Chrpos_T) max_deletionlen;


  queryuc_ptr = Shortread_queryuc_ptr(queryseq);
  queryrc = Shortread_queryrc(queryseq);

  mismatch_positions_alloc = (int *) MALLOC((querylength+MISMATCH_EXTRA)*sizeof(int));

  /* 2 localdb regions possible if shortsplicedist_novelend < 65536 */
  /* 65536 represents the worst possible case where every position in the localdb region matches the query */
  novel_diagonals_alloc = (Univcoord_T *) MALLOC(2 * 65536 *sizeof(Univcoord_T));
  MALLOC_ALIGN(localdb_alloc,2 * 65536 * sizeof(unsigned short));

  query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
  query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);

  found_score = querylength;
  sufficient_score = querylength/20;

  if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
    Stage1_init(this,queryuc_ptr,querylength,/*genestrand*/0);

    single_read_search(&found_score,&found_transcriptp,&last_method,
		       this,repetitive_ef64,/*genestrand*/0,
		       queryuc_ptr,queryrc,querylength,
		       knownsplicing,knownindels,mismatch_positions_alloc,
		       novel_diagonals_alloc,localdb_alloc,
		       query_compress_fwd,query_compress_rev,
		       nmismatches_allowed,max_insertionlen,max_deletionlen,
		       overall_max_distance,overall_end_distance,
		       trdiagpool,univdiagpool,intlistpool,uintlistpool,univcoordlistpool,
		       listpool,trpathpool,pathpool,transcriptpool,vectorpool,
		       hitlistpool,spliceendsgen,
		       /*paired_end_p*/false,first_read_p,pass,sufficient_score);

    if ((paths = List_append(this->sense_paths_gplus,
			     List_append(this->sense_paths_gminus,
					 List_append(this->antisense_paths_gplus,
						     this->antisense_paths_gminus)))) == NULL) {

      single_read_extend(&found_score,&found_transcriptp,this,
			 queryuc_ptr,queryrc,querylength,
			 knownsplicing,knownindels,mismatch_positions_alloc,
			 novel_diagonals_alloc,localdb_alloc,
			 query_compress_fwd,query_compress_rev,
			 nmismatches_allowed,max_insertionlen,max_deletionlen,
			 overall_end_distance,
			 intlistpool,uintlistpool,univcoordlistpool,
			 listpool,pathpool,transcriptpool,vectorpool,
			 hitlistpool,spliceendsgen,pass);

      if ((paths = List_append(this->extended_sense_paths_gplus,
			       List_append(this->extended_sense_paths_gminus,
					   List_append(this->extended_antisense_paths_gplus,
						       this->extended_antisense_paths_gminus)))) == NULL &&
	  splicingp == true) {

	paths = single_read_fusion(&found_score,this,querylength,
				   knownsplicing,
				   query_compress_fwd,query_compress_rev,
				   nmismatches_allowed,max_insertionlen,max_deletionlen,
				   univdiagpool,intlistpool,uintlistpool,univcoordlistpool,
				   listpool,pathpool,transcriptpool,vectorpool,hitlistpool);
      }
    }

    if (paths != NULL) {
      Path_gc(&this->unextended_sense_paths_gplus,intlistpool,univcoordlistpool,listpool,
	      pathpool,transcriptpool,hitlistpool);
      Path_gc(&this->unextended_sense_paths_gminus,intlistpool,univcoordlistpool,listpool,
	      pathpool,transcriptpool,hitlistpool);
      Path_gc(&this->unextended_antisense_paths_gplus,intlistpool,univcoordlistpool,listpool,
	      pathpool,transcriptpool,hitlistpool);
      Path_gc(&this->unextended_antisense_paths_gminus,intlistpool,univcoordlistpool,listpool,
	      pathpool,transcriptpool,hitlistpool);

    } else {
      /* As last resort, use unextended paths */
      paths = List_append(this->unextended_sense_paths_gplus,
			  List_append(this->unextended_sense_paths_gminus,
				      List_append(this->unextended_antisense_paths_gplus,
						  this->unextended_antisense_paths_gminus)));
    }
    
    if (paths == NULL) {
      *npaths_primary = *npaths_altloc = 0;
      patharray = (Path_T *) NULL;

    } else {
      patharray = (Path_T *) List_to_array_out(paths,NULL);
      patharray = Path_eval_and_sort(&(*npaths_primary),&(*npaths_altloc),
				     &(*first_absmq),&(*second_absmq),patharray,
				     /*npaths*/List_length(paths),
				     query_compress_fwd,query_compress_rev,queryuc_ptr,queryrc,
				     Shortread_quality_string(queryseq),nmismatches_filter,mincoverage_filter,
				     intlistpool,univcoordlistpool,listpool,pathpool,transcriptpool);

      if (transcriptome != NULL && pass == PASS2) {
	for (i = 0; i < (*npaths_primary) + (*npaths_altloc); i++) {
	  path = patharray[i];
	  Transcript_velocity_single(path);
	}
      }

      Hitlistpool_free_list(&paths,hitlistpool
			    hitlistpool_trace(__FILE__,__LINE__));
    }
  }
  
  Compress_free(&query_compress_fwd);
  Compress_free(&query_compress_rev);
  FREE_ALIGN(localdb_alloc);
  FREE(novel_diagonals_alloc);
  FREE(mismatch_positions_alloc);

  /* Do not free paths, since they are now appended to paths */
  Stage1_free(&this,trdiagpool,univdiagpool,intlistpool,univcoordlistpool,listpool,pathpool,
	      transcriptpool,hitlistpool,/*free_paths_p*/false);

  /* FREE(queryrc); -- Now taken from Shortread */

  return patharray;
}


void
Stage1hr_single_setup (Mode_T mode_in, int index1part_in, int index1interval_in,
		       Transcriptome_T transcriptome_in, bool genome_align_p_in,
		       bool transcriptome_align_p_in, 
		       double user_nmismatches_filter_float_in, double user_mincoverage_filter_float_in,
		       double max_middle_insertions_float_in, double max_middle_deletions_float_in,
		       bool splicingp_in, Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_novelend_in) {

  mode = mode_in;
  index1part = index1part_in;
  index1interval = index1interval_in;

  transcriptome = transcriptome_in;
  genome_align_p = genome_align_p_in;
  transcriptome_align_p = transcriptome_align_p_in;

  user_nmismatches_filter_float = user_nmismatches_filter_float_in;
  user_mincoverage_filter_float = user_mincoverage_filter_float_in;

  max_middle_insertions_float = max_middle_insertions_float_in;
  max_middle_deletions_float = max_middle_deletions_float_in;

  splicingp = splicingp_in;
  shortsplicedist = shortsplicedist_in;
  shortsplicedist_novelend = shortsplicedist_novelend_in;

  return;
}
