static char rcsid[] = "$Id: ca9064c1434540cf17aef747d705e9cc9c35aef4 $";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef HAVE_MEMCPY
#define memcpy(d,s,n) bcopy((s),(d),(n))
#endif

#include "kmer-search.h"

#include <string.h>		/* For strlen */
#include "assert.h"
#include "mem.h"
#include "types.h"
#include "chrnum.h"
#include "reader.h"
#include "oligo.h"

#ifdef LARGE_GENOMES
#include "intersect-large.h"
#endif
#include "intersect-simd.h"
#include "intersect.h"
#include "transcript.h"
#include "popcount.h"
#include "junction.h"

#include "univdiag.h"
#include "univdiagdef.h"
#include "genomebits_count.h"
#include "genomebits_kmer.h"
#include "splice.h"
#include "indel.h"
#include "intron.h"
#include "maxent_hr.h"
#include "sedgesort.h"
#include "path-solve.h"


#ifndef LARGE_GENOMES
#include "merge-diagonals-simd-uint4.h"
#elif !defined(HAVE_AVX512) && !defined(HAVE_AVX2)
#include "merge-diagonals-heap.h" /* For Merge_diagonals_large */
#include "merge-diagonals-simd-uint4.h"
#else
#include "merge-diagonals-simd-uint8.h" /* For Merge_diagonals_large */
#include "merge-diagonals-simd-uint4.h"
#endif

#include "simd.h"

#if 0
#if defined(HAVE_SSE2)
#include <emmintrin.h>
#endif
#ifdef HAVE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef HAVE_AVX2
#include <immintrin.h>
#endif
#ifdef HAVE_AVX512
#include <immintrin.h>
#endif
#endif


#define TRIM_AT_GENOME_BOUNDS 1

/* Causes problems with counting nmismatches */
/* #define TRIM_AT_CHROMOSOME_BOUNDS 1 */

#define MIN_SUPPORT_INDEL 6
#define NEED_CHRNUM 1


/* General flow */
#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif


/* Merging faster than count table */
#define USE_MERGE 1

#define MAX_NEIGHBORS 3		/* Cannot be 0 */
#define SUBOPT 3

#define LONG_END 6
#define ALLOWED_END_MISMATCHES 2 /* For long ends */
#define ALLOWED_TRANSCRIPTOME_TRIM 3


static int index1part;
static int index1interval;

static EF64_T chromosome_ef64;
static Genomebits_T genomebits;
static Genomebits_T genomebits_alt;
static Univcoord_T genomelength;

static bool splicingp;


/* Indexdb: oligo + diagterm -> streams of diagonals */
/* Merge: an array of diagonals with duplicates */
/* Path: genomic endpoints + gaps + trnums */

/* All calls to Substring_new are for transcriptome.  May need to make call to Univ_IIT_update_chrnum */

/* Genome */
/* Ultrafast: check ends only */
/* find_local_sets: merged->diagonals -> middle_diagonal, left_diagonals, right_diagonals */
/* Algorithm 1a: find_best_path_genome -> complete_path (list of Univdiag_T) */
/* Algorithm 1b: Kmer_search_genome (solve_via_segments_genome): complete_path -> hits */
/* Algorithm 2: Stage3end_complete_path_run_gmap: complete_path -> hits */


/* Searches the ends, just like ultrafast transcriptome */
/* Previously thought we needed max_hits because repetitive reads can give many exact matches in a genome */
void
Kmer_search_exact (int *found_score, bool *found_transcriptp,

		   List_T *sense_paths_gplus, List_T *sense_paths_gminus,
		   List_T *antisense_paths_gplus, List_T *antisense_paths_gminus,
		   Compress_T query_compress_fwd, Compress_T query_compress_rev,
		   int querylength, Stage1_T stage1, 

		   int genestrand, int nmismatches_allowed,
		   Intlistpool_T intlistpool, Uintlistpool_T uintlistpool,
		   Univcoordlistpool_T univcoordlistpool,
		   Listpool_T listpool, Pathpool_T pathpool, Vectorpool_T vectorpool,
		   Hitlistpool_T hitlistpool, Transcriptpool_T transcriptpool) {

  int mod5, mod3;

  Univcoord_T *univdiagonals;
  int pos5, pos3, adj;
  int nunivdiagonals, i;

  Chrnum_T chrnum;
  Univcoord_T chroffset, chrhigh;


  /* max_hits = 1000000; */

  debug(printf("Entered Kmer_search_exact with %d mismatches allowed\n",nmismatches_allowed));

  /* gplus */
  for (mod5 = 0; mod5 < index1interval; mod5++) {
    adj = (querylength - index1part) % index1interval;
    mod3 = (index1interval + adj - mod5) % index1interval;
    debug(printf("plus mod5 %d, mod3 %d: diagterm5 %d, diagterm3 %d\n",
		  mod5,mod3,stage1->plus_diagterms_end5[mod5],stage1->plus_diagterms_end3[mod3]));

#ifdef LARGE_GENOMES
    univdiagonals = Intersect_exact_large(&nunivdiagonals,
					  stage1->plus_positions_high_end5[mod5],stage1->plus_positions_end5[mod5],
					  stage1->plus_npositions_end5[mod5],stage1->plus_diagterms_end5[mod5],
					  stage1->plus_positions_high_end3[mod3],stage1->plus_positions_end3[mod3],
					  stage1->plus_npositions_end3[mod3],stage1->plus_diagterms_end3[mod3]);
#else
    univdiagonals = Intersect_exact(&nunivdiagonals,
				    stage1->plus_positions_end5[mod5],stage1->plus_npositions_end5[mod5],
				    stage1->plus_diagterms_end5[mod5],
				    stage1->plus_positions_end3[mod3],stage1->plus_npositions_end3[mod3],
				    stage1->plus_diagterms_end3[mod3]);
#endif
    debug(printf("plus mod5 %d, mod3 %d: %d univdiagonals\n",mod5,mod3,nunivdiagonals));

    i = 0;
    while (/*nhits <= max_hits && */ i < nunivdiagonals) {
      debug(printf("ULTRAFAST PLUS DIAGONAL %u\n",univdiagonals[i]));
      if (univdiagonals[i] < (Univcoord_T) querylength) {
	/* Skip */

	/* TRIM COORDINATES AT GENOME BOUNDS */
	/* low = (univdiagonals[i] >= (Univcoord_T) querylength) ? left : 0; */
	/* high = (univdiagonals[i] <= genomelength) ? univdiagonals[i] : genomelength; */

      } else {
#ifdef NEED_CHRNUM
	chrnum = EF64_chrnum(&chroffset,&chrhigh,chromosome_ef64,
			     univdiagonals[i] - querylength,univdiagonals[i]);
#endif

#ifdef TRIM_AT_CHROMOSOME_BOUNDS
	pos5 = (univdiagonals[i] >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - left);
	pos3 = (univdiagonals[i] <= chrhigh) ? querylength : (int) (chrhigh - left);
#else
	pos5 = 0;
	pos3 = querylength;
#endif

	Path_solve_exact(&(*found_score),&(*found_transcriptp),
			 &(*sense_paths_gplus),&(*antisense_paths_gplus),
			 univdiagonals[i],pos5,pos3,/*plusp*/true,genestrand,
			 /*query_compress*/query_compress_fwd,
			 query_compress_fwd,query_compress_rev,querylength,
			 chrnum,chroffset,chrhigh,
			 intlistpool,uintlistpool,univcoordlistpool,
			 listpool,pathpool,vectorpool,hitlistpool,transcriptpool,
			 nmismatches_allowed,/*method*/KMER_EXACT);
      }

      i++;
    }
    FREE(univdiagonals);


    /* gminus */
    debug(printf("minus mod5 %d, mod3 %d: diagterm5 %d, diagterm3 %d\n",
		  mod5,mod3,stage1->minus_diagterms_end5[mod5],stage1->minus_diagterms_end3[mod3]));

#ifdef LARGE_GENOMES
    univdiagonals = Intersect_exact_large(&nunivdiagonals,
					  stage1->minus_positions_high_end5[mod5],stage1->minus_positions_end5[mod5],
					  stage1->minus_npositions_end5[mod5],stage1->minus_diagterms_end5[mod5],
					  stage1->minus_positions_high_end3[mod3],stage1->minus_positions_end3[mod3],
					  stage1->minus_npositions_end3[mod3],stage1->minus_diagterms_end3[mod3]);
#else
    univdiagonals = Intersect_exact(&nunivdiagonals,
				    stage1->minus_positions_end5[mod5],stage1->minus_npositions_end5[mod5],
				    stage1->minus_diagterms_end5[mod5],
				    stage1->minus_positions_end3[mod3],stage1->minus_npositions_end3[mod3],
				    stage1->minus_diagterms_end3[mod3]);
#endif
    debug(printf("minus mod5 %d, mod3 %d: %d univdiagonals\n",mod5,mod3,nunivdiagonals));

    i = 0;
    while (/*nhits <= max_hits && */ i < nunivdiagonals) {
      debug(printf("ULTRAFAST MINUS DIAGONAL %u\n",univdiagonals[i]));
      if (univdiagonals[i] < (Univcoord_T) querylength) {
	/* Skip */

	/* TRIM COORDINATES AT GENOME BOUNDS */
	/* low = (univdiagonals[i] >= (Univcoord_T) querylength) ? left : 0; */
	/* high = (univdiagonals[i] <= genomelength) ? univdiagonals[i] : genomelength; */

      } else {
#ifdef NEED_CHRNUM
	chrnum = EF64_chrnum(&chroffset,&chrhigh,chromosome_ef64,
			     univdiagonals[i] - querylength,univdiagonals[i]);
#endif
	    
#ifdef TRIM_AT_CHROMOSOME_BOUNDS
	pos5 = (univdiagonals[i] >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - left);
	pos3 = (univdiagonals[i] <= chrhigh) ? querylength : (int) (chrhigh - left);
#else
	pos5 = 0;
	pos3 = querylength;
#endif

	Path_solve_exact(&(*found_score),&(*found_transcriptp),
			 &(*sense_paths_gminus),&(*antisense_paths_gminus),
			 univdiagonals[i],pos5,pos3,/*plusp*/false,genestrand,
			 /*query_compress*/query_compress_rev,
			 query_compress_fwd,query_compress_rev,querylength,
			 chrnum,chroffset,chrhigh,
			 intlistpool,uintlistpool,univcoordlistpool,
			 listpool,pathpool,vectorpool,hitlistpool,transcriptpool,
			 nmismatches_allowed,/*method*/KMER_EXACT);
      }

      i++;
    }
    FREE(univdiagonals);

  }

#if 0
  if (nhits > max_hits) {
    debug(printf("Kmer_search_ends_exact aborting because nhits %d > max_hits %d\n",nhits,max_hits));
    Stage3end_gc(*hits_gplus);
    Hitlist_free(&(*hits_gplus));
    Stage3end_gc(*hits_gminus);
    Hitlist_free(&(*hits_gminus));
    *abort_exact_p = true;	/* Indicates that we should not try to run approx algorithm */
  }
#endif

  debug(printf("Kmer_search_exact returning sense %d plus and %d minus paths, antisense %d plus and %d minus paths\n",
		List_length(*sense_paths_gplus),List_length(*sense_paths_gminus),
		List_length(*antisense_paths_gplus),List_length(*antisense_paths_gminus)));

  return;
}


/* univdiagonal0/left0 is for the beginning of the query; univdiagonal1/left1 is for the end */
static void
combine_ends_plus (int *found_score, bool *found_transcriptp,
		   List_T *unsolved_sense_paths, List_T *unsolved_antisense_paths,
		   List_T *sense_paths, List_T *antisense_paths,
		   Univcoord_T univdiagonal0, Univcoord_T univdiagonal1,
		   int pos5_0, int pos3_0, int pos5_1, int pos3_1,

		   Compress_T query_compress, Compress_T query_compress_fwd, Compress_T query_compress_rev,
		   int querylength,
		   Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,

		   Indelinfo_T indelinfo, Spliceinfo_T spliceinfo,
		   Knownsplicing_T knownsplicing,

		   int genestrand,
		   int nmismatches_allowed, int max_insertionlen, int max_deletionlen,
		   Intlistpool_T intlistpool, Uintlistpool_T uintlistpool, Univcoordlistpool_T univcoordlistpool,
		   Listpool_T listpool, Pathpool_T pathpool, Vectorpool_T vectorpool, Transcriptpool_T transcriptpool,
		   Hitlistpool_T hitlistpool) {

  debug(printf("Entered combine_ends_plus with univdiagonal0 %u and univdiagonal1 %u\n",
		univdiagonal0,univdiagonal1));

  /* Called checked for adj == 0 */
  assert(univdiagonal1 != univdiagonal0);

  if (univdiagonal0 < chroffset) {
    /* Skip.  Straddles chromosome bounds */
    debug(printf("Skipping because univdiagonal0 %u < chroffset %u\n",
		 univdiagonal0,chroffset));
    
  } else if (univdiagonal1 - querylength >= chrhigh) {
    /* Skip.  Straddles chromosome bounds */
    debug(printf("Skipping because univdiagonal1 %u - querylength %d >= chrhigh %u\n",
		 univdiagonal1,querylength,chrhigh));

  } else {
    Path_solve_from_ends(&(*found_score),&(*found_transcriptp),
			 &(*unsolved_sense_paths),&(*sense_paths),
			 univdiagonal0,pos5_0,pos3_0,univdiagonal1,pos5_1,pos3_1,
			 /*plusp*/true,/*sensedir*/SENSE_FORWARD,genestrand,
			 query_compress,query_compress_fwd,query_compress_rev,
			 querylength,chrnum,chroffset,chrhigh,
			 indelinfo,spliceinfo,knownsplicing,
			 intlistpool,uintlistpool,univcoordlistpool,
			 listpool,pathpool,vectorpool,transcriptpool,
			 hitlistpool,max_insertionlen,max_deletionlen,nmismatches_allowed,
			 /*method*/KMER_APPROX);

    if (splicingp == true) {
      Path_solve_from_ends(&(*found_score),&(*found_transcriptp),
			   &(*unsolved_antisense_paths),&(*antisense_paths),
			   univdiagonal0,pos5_0,pos3_0,univdiagonal1,pos5_1,pos3_1,
			   /*plusp*/true,/*sensedir*/SENSE_ANTI,genestrand,
			   query_compress,query_compress_fwd,query_compress_rev,
			   querylength,chrnum,chroffset,chrhigh,
			   indelinfo,spliceinfo,knownsplicing,
			   intlistpool,uintlistpool,univcoordlistpool,
			   listpool,pathpool,vectorpool,transcriptpool,
			   hitlistpool,max_insertionlen,max_deletionlen,nmismatches_allowed,
			   /*method*/KMER_APPROX);
    }
  }

  return;
}


/* Genomic procedure */
/* left0 is for the beginning of the query; left1 is for the end */
static void
combine_ends_minus (int *found_score, bool *found_transcriptp,
		    List_T *unsolved_sense_paths, List_T *unsolved_antisense_paths,
		    List_T *sense_paths, List_T *antisense_paths,
		    Univcoord_T univdiagonal0, Univcoord_T univdiagonal1,
		    int pos5_0, int pos3_0, int pos5_1, int pos3_1,

		    Compress_T query_compress, Compress_T query_compress_fwd, Compress_T query_compress_rev,
		    int querylength,
		    Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,

		    Indelinfo_T indelinfo, Spliceinfo_T spliceinfo,
		    Knownsplicing_T knownsplicing,

		    int genestrand,
		    int nmismatches_allowed, int max_insertionlen, int max_deletionlen,
		    Intlistpool_T intlistpool, Uintlistpool_T uintlistpool, Univcoordlistpool_T univcoordlistpool,
		    Listpool_T listpool, Pathpool_T pathpool, Vectorpool_T vectorpool, Transcriptpool_T transcriptpool,
		    Hitlistpool_T hitlistpool) {

  debug(printf("Entered combine_ends_minus with univdiagonal %u and univdiagonal %u\n",
	       univdiagonal0,univdiagonal1));

  /* Caller checked for adj == 0 */
  assert(univdiagonal1 != univdiagonal0);

  if (univdiagonal0 < chroffset) {
    /* Skip.  Straddles chromosome bounds */
    debug(printf("Skipping because univdiagonal0 %u < chroffset %u\n",
		 univdiagonal0,chroffset));
    
  } else if (univdiagonal1 - querylength >= chrhigh) {
    /* Skip.  Straddles chromosome bounds */
    debug(printf("Skipping because univdiagonal1 %u - querylength %d >= chrhigh %u\n",
		 univdiagonal1,querylength,chrhigh));

  } else {
    Path_solve_from_ends(&(*found_score),&(*found_transcriptp),
			 &(*unsolved_sense_paths),&(*sense_paths),
			 univdiagonal0,pos5_0,pos3_0,univdiagonal1,pos5_1,pos3_1,
			 /*plusp*/false,/*sensedir*/SENSE_FORWARD,genestrand,
			 query_compress,query_compress_fwd,query_compress_rev,
			 querylength,chrnum,chroffset,chrhigh,
			 indelinfo,spliceinfo,knownsplicing,
			 intlistpool,uintlistpool,univcoordlistpool,
			 listpool,pathpool,vectorpool,transcriptpool,
			 hitlistpool,max_insertionlen,max_deletionlen,nmismatches_allowed,
			 /*method*/KMER_APPROX);

    if (splicingp == true) {
      Path_solve_from_ends(&(*found_score),&(*found_transcriptp),
			   &(*unsolved_antisense_paths),&(*antisense_paths),
			   univdiagonal0,pos5_0,pos3_0,univdiagonal1,pos5_1,pos3_1,
			   /*plusp*/false,/*sensedir*/SENSE_ANTI,genestrand,
			   query_compress,query_compress_fwd,query_compress_rev,
			   querylength,chrnum,chroffset,chrhigh,
			   indelinfo,spliceinfo,knownsplicing,
			   intlistpool,uintlistpool,univcoordlistpool,
			   listpool,pathpool,vectorpool,transcriptpool,
			   hitlistpool,max_insertionlen,max_deletionlen,nmismatches_allowed,
			   /*method*/KMER_APPROX);
    }
  }

  return;
}


/* Performs a merge.  About 10% faster than the alternative of trying all combinations of mod5 and mod3. */
/* Need max_hits, because repetitive reads can give many exact matches in a genome */
/* Approx distance can cover a typical splice distance */
void
Kmer_search_approx (int *found_score, bool *found_transcriptp,
		    List_T *unsolved_sense_paths_gplus, List_T *unsolved_sense_paths_gminus,
		    List_T *unsolved_antisense_paths_gplus, List_T *unsolved_antisense_paths_gminus,

		    List_T *sense_paths_gplus, List_T *sense_paths_gminus,
		    List_T *antisense_paths_gplus, List_T *antisense_paths_gminus,
				
		    Compress_T query_compress_fwd, Compress_T query_compress_rev,
		    int querylength, Stage1_T stage1, Knownsplicing_T knownsplicing,
		    int genestrand, int nmismatches_allowed,
		    int max_insertionlen, int max_deletionlen, int max_gaplen, int sizelimit,

		    Intlistpool_T intlistpool, Uintlistpool_T uintlistpool,
		    Univcoordlistpool_T univcoordlistpool,
		    Listpool_T listpool, Pathpool_T pathpool, Vectorpool_T vectorpool,
		    Hitlistpool_T hitlistpool, Transcriptpool_T transcriptpool) {

  int gplus_streami_5 = 0, gminus_streami_5 = 0, gplus_streami_3 = 0, gminus_streami_3 = 0;

  int total_npositions_gplus_5 = 0, total_npositions_gminus_5 = 0,
    total_npositions_gplus_3 = 0, total_npositions_gminus_3 = 0;

  Univcoord_T *gplus_diagonals_5, *gminus_diagonals_5, *gplus_diagonals_3, *gminus_diagonals_3;
  int n_gplus_diagonals_5, n_gminus_diagonals_5, n_gplus_diagonals_3, n_gminus_diagonals_3;

  bool gplus_exactp, gminus_exactp;
  Univcoord_T *gplus_diagpairs, *gminus_diagpairs;
  int n_gplus_diagpairs, n_gminus_diagpairs;

  int i, k;
  Univcoord_T univdiagonala, univdiagonalb, univdiagonal0, univdiagonal1, low, high;

  int pos5_0, pos3_0, pos5_1, pos3_1, pos5a, pos3a, pos5b, pos3b, adj;
  int trim_a5, trim_a3, trim_b5, trim_b3;
  int nmismatches_a5, nmismatches_a3, nmismatches_b5, nmismatches_b3;

  Chrnum_T chrnum;
  Univcoord_T chroffset, chrhigh;


  /* max_hits = 1000000; */

  for (i = 0; i < index1interval; i++) {
    if (stage1->plus_npositions_end5[i] > 0) {
#ifdef LARGE_GENOMES
      stage1->gplus_stream_high_array_5[gplus_streami_5] = stage1->plus_positions_high_end5[i];
      stage1->gplus_stream_low_array_5[gplus_streami_5] = stage1->plus_positions_end5[i];
#else
      stage1->gplus_stream_array_5[gplus_streami_5] = stage1->plus_positions_end5[i];
#endif
      stage1->gplus_streamsize_array_5[gplus_streami_5] = stage1->plus_npositions_end5[i];
      stage1->gplus_diagterm_array_5[gplus_streami_5] = stage1->plus_diagterms_end5[i];
      total_npositions_gplus_5 += stage1->plus_npositions_end5[i];
      gplus_streami_5++;
    }

    if (stage1->minus_npositions_end5[i] > 0) {
#ifdef LARGE_GENOMES
      stage1->gminus_stream_high_array_5[gminus_streami_5] = stage1->minus_positions_high_end5[i];
      stage1->gminus_stream_low_array_5[gminus_streami_5] = stage1->minus_positions_end5[i];
#else
      stage1->gminus_stream_array_5[gminus_streami_5] = stage1->minus_positions_end5[i];
#endif
      stage1->gminus_streamsize_array_5[gminus_streami_5] = stage1->minus_npositions_end5[i];
      stage1->gminus_diagterm_array_5[gminus_streami_5] = stage1->minus_diagterms_end5[i];
      total_npositions_gminus_5 += stage1->minus_npositions_end5[i];
      gminus_streami_5++;
    }

    if (stage1->plus_npositions_end3[i] > 0) {
#ifdef LARGE_GENOMES
      stage1->gplus_stream_high_array_3[gplus_streami_3] = stage1->plus_positions_high_end3[i];
      stage1->gplus_stream_low_array_3[gplus_streami_3] = stage1->plus_positions_end3[i];
#else
      stage1->gplus_stream_array_3[gplus_streami_3] = stage1->plus_positions_end3[i];
#endif
      stage1->gplus_streamsize_array_3[gplus_streami_3] = stage1->plus_npositions_end3[i];
      stage1->gplus_diagterm_array_3[gplus_streami_3] = stage1->plus_diagterms_end3[i];
      total_npositions_gplus_3 += stage1->plus_npositions_end3[i];
      gplus_streami_3++;
    }

    if (stage1->minus_npositions_end3[i] > 0) {
#ifdef LARGE_GENOMES
      stage1->gminus_stream_high_array_3[gminus_streami_3] = stage1->minus_positions_high_end3[i];
      stage1->gminus_stream_low_array_3[gminus_streami_3] = stage1->minus_positions_end3[i];
#else
      stage1->gminus_stream_array_3[gminus_streami_3] = stage1->minus_positions_end3[i];
#endif
      stage1->gminus_streamsize_array_3[gminus_streami_3] = stage1->minus_npositions_end3[i];
      stage1->gminus_diagterm_array_3[gminus_streami_3] = stage1->minus_diagterms_end3[i];
      total_npositions_gminus_3 += stage1->minus_npositions_end3[i];
      gminus_streami_3++;
    }
  }

  debug(printf("Comparing total_npositions_gplus_5 %d against sizelmit %d\n",total_npositions_gplus_5,sizelimit));
  if (0 && total_npositions_gplus_5 > sizelimit) {
    gplus_diagonals_5 = (Univcoord_T *) NULL;
    n_gplus_diagonals_5 = 0;
  } else {
#ifdef LARGE_GENOMES
    gplus_diagonals_5 = Merge_diagonals_large(&n_gplus_diagonals_5,
					      stage1->gplus_stream_high_array_5,stage1->gplus_stream_low_array_5,
					      stage1->gplus_streamsize_array_5,stage1->gplus_diagterm_array_5,
					      /*nstreams*/gplus_streami_5,stage1->mergeinfo);
#else
    gplus_diagonals_5 = Merge_diagonals(&n_gplus_diagonals_5,stage1->gplus_stream_array_5,
					stage1->gplus_streamsize_array_5,stage1->gplus_diagterm_array_5,
					/*nstreams*/gplus_streami_5,stage1->mergeinfo);
#endif
  }


  debug(printf("Comparing total_npositions_gminus_5 %d against sizelmit %d\n",total_npositions_gminus_5,sizelimit));
  if (0 && total_npositions_gminus_5 > sizelimit) {
    gminus_diagonals_5 = (Univcoord_T *) NULL;
    n_gminus_diagonals_5 = 0;
  } else {
#ifdef LARGE_GENOMES
    gminus_diagonals_5 = Merge_diagonals_large(&n_gminus_diagonals_5,
					       stage1->gminus_stream_high_array_5,stage1->gminus_stream_low_array_5,
					       stage1->gminus_streamsize_array_5,stage1->gminus_diagterm_array_5,
					       /*nstreams*/gminus_streami_5,stage1->mergeinfo);
#else
    gminus_diagonals_5 = Merge_diagonals(&n_gminus_diagonals_5,stage1->gminus_stream_array_5,
					 stage1->gminus_streamsize_array_5,stage1->gminus_diagterm_array_5,
					 /*nstreams*/gminus_streami_5,stage1->mergeinfo);
#endif
  }


  debug(printf("Comparing total_npositions_gplus_3 %d against sizelmit %d\n",total_npositions_gplus_3,sizelimit));
  if (0 && total_npositions_gplus_3 > sizelimit) {
    gplus_diagonals_3 = (Univcoord_T *) NULL;
    n_gplus_diagonals_3 = 0;
  } else {
#ifdef LARGE_GENOMES
    gplus_diagonals_3 = Merge_diagonals_large(&n_gplus_diagonals_3,
					      stage1->gplus_stream_high_array_3,stage1->gplus_stream_low_array_3,
					      stage1->gplus_streamsize_array_3,stage1->gplus_diagterm_array_3,
					      /*nstreams*/gplus_streami_3,stage1->mergeinfo);
#else
    gplus_diagonals_3 = Merge_diagonals(&n_gplus_diagonals_3,stage1->gplus_stream_array_3,
					stage1->gplus_streamsize_array_3,stage1->gplus_diagterm_array_3,
					/*nstreams*/gplus_streami_3,stage1->mergeinfo);
#endif

  }


  debug(printf("Comparing total_npositions_gminus_3 %d against sizelmit %d\n",total_npositions_gminus_3,sizelimit));
  if (0 && total_npositions_gminus_3 > sizelimit) {
    gminus_diagonals_3 = (Univcoord_T *) NULL;
    n_gminus_diagonals_3 = 0;
  } else {
#ifdef LARGE_GENOMES
    gminus_diagonals_3 = Merge_diagonals_large(&n_gminus_diagonals_3,
					       stage1->gminus_stream_high_array_3,stage1->gminus_stream_low_array_3,
					       stage1->gminus_streamsize_array_3,stage1->gminus_diagterm_array_3,
					       /*nstreams*/gminus_streami_3,stage1->mergeinfo);
#else
    gminus_diagonals_3 = Merge_diagonals(&n_gminus_diagonals_3,stage1->gminus_stream_array_3,
					 stage1->gminus_streamsize_array_3,stage1->gminus_diagterm_array_3,
					 /*nstreams*/gminus_streami_3,stage1->mergeinfo);
#endif
  }

  gplus_diagpairs = Intersect_approx_simple(&gplus_exactp,&n_gplus_diagpairs,
					    gplus_diagonals_5,n_gplus_diagonals_5,
					    gplus_diagonals_3,n_gplus_diagonals_3,
					    /*maxdistance*/max_gaplen);
  gminus_diagpairs = Intersect_approx_simple(&gminus_exactp,&n_gminus_diagpairs,
					     gminus_diagonals_5,n_gminus_diagonals_5,
					     gminus_diagonals_3,n_gminus_diagonals_3,
					     /*maxdistance*/max_gaplen);
  debug(printf("***Intersect ends approx: exactp %d and %d.  %d plus and %d minus diagpairs\n",
		gplus_exactp,gminus_exactp,n_gplus_diagpairs,n_gminus_diagpairs));

#if !defined(LARGE_GENOMES) || defined(HAVE_AVX512) || defined(HAVE_AVX2)
  FREE_ALIGN(gplus_diagonals_5);
  FREE_ALIGN(gminus_diagonals_5);
  FREE_ALIGN(gplus_diagonals_3);
  FREE_ALIGN(gminus_diagonals_3);
#else
  FREE(gplus_diagonals_5);
  FREE(gminus_diagonals_5);
  FREE(gplus_diagonals_3);
  FREE(gminus_diagonals_3);
#endif

  i = k = 0;
  while (/*nhits <= max_hits && */ i < n_gplus_diagpairs) {
    univdiagonala = gplus_diagpairs[k];
    univdiagonalb = gplus_diagpairs[k+1];

#ifdef TRIM_AT_CHROMOSOME_BOUNDS
    pos5a = (univdiagonala >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - lefta);
    pos3a = (univdiagonala <= chrhigh) ? querylength : (int) (chrhigh - lefta);
#else
    pos5a = 0;
    pos3a = querylength;
#endif
    trim_a5 = Genomebits_first_kmer_left(&nmismatches_a5,genomebits,query_compress_fwd,
					 univdiagonala,querylength,pos5a,pos3a,
					 /*plusp*/true,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
    trim_a3 = querylength - Genomebits_first_kmer_right(&nmismatches_a3,genomebits,query_compress_fwd,
							univdiagonala,querylength,pos5a,pos3a,
							/*plusp*/true,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
      
#ifdef TRIM_AT_CHROMOSOME_BOUNDS
    pos5b = (univdiagonalb >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - leftb);
    pos3b = (univdiagonalb <= chrhigh) ? querylength : (int) (chrhigh - leftb);
#else
    pos5b = 0;
    pos3b = querylength;
#endif
    trim_b5 = Genomebits_first_kmer_left(&nmismatches_b5,genomebits,query_compress_fwd,
					 univdiagonalb,querylength,pos5b,pos3b,
					 /*plusp*/true,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
    trim_b3 = querylength - Genomebits_first_kmer_right(&nmismatches_b3,genomebits,query_compress_fwd,
							univdiagonalb,querylength,pos5b,pos3b,
							/*plusp*/true,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
      
    debug(printf("trimmed a %d..%d, trimmed b %d..%d\n",
		 trim_a5,querylength - trim_a3,trim_b5,querylength - trim_b3));
    if (trim_a5 == pos5a || trim_b3 == pos3b) {
      univdiagonal0 = univdiagonala; pos5_0 = pos5a; pos3_0 = pos3a;
      univdiagonal1 = univdiagonalb; pos5_1 = pos5b; pos3_1 = pos3b;
      /* low0 = lefta + pos5a; high0 = lefta + pos3a; */
      /* low1 = leftb + pos5b; high1 = leftb + pos3b; */
      adj = (int) (univdiagonalb - univdiagonala);
      debug(printf("Setting a first, b second, adj %d\n",adj));
	
    } else if (trim_a3 == pos3a || trim_b5 == pos5b) {
      univdiagonal0 = univdiagonalb; pos5_0 = pos5b; pos3_0 = pos3b;
      univdiagonal1 = univdiagonala; pos5_1 = pos5a; pos3_1 = pos3a;
      /* low0 = leftb + pos5b; high0 = leftb + pos3b; */
      /* low1 = lefta + pos5a; high1 = lefta + pos3a; */
      adj = (int) (univdiagonala - univdiagonalb);
      debug(printf("Setting b first, a second, adj %d\n",adj));
	
    } else {
      adj = 0;
    }
	  
    /* printf("plus adj %d, univdiagonal0 %llu, univdiagonal1 %llu\n",adj,univdiagonal0,univdiagonal1); */
    if (adj == 0) {
      /* Skip, since would have been found by Kmer exact */

    } else if (adj > max_gaplen) {
      /* Too far apart */

    } else if (adj < -max_insertionlen) {
      /* Impossible */

    } else {
      /* Doesn't matter if univdiagonal0 is less than or greater than univdiagonal1 */
#ifdef TRIM_AT_GENOME_BOUNDS
      low = (univdiagonal0 >= (Univcoord_T) querylength) ? univdiagonal0 - querylength : 0;
      high = (univdiagonal1 <= genomelength) ? univdiagonal1 : genomelength;
#endif
#ifdef NEED_CHRNUM
      chrnum = EF64_chrnum(&chroffset,&chrhigh,chromosome_ef64,low,high);
#endif

      combine_ends_plus(&(*found_score),&(*found_transcriptp),
			&(*unsolved_sense_paths_gplus),&(*unsolved_antisense_paths_gplus),
			&(*sense_paths_gplus),&(*antisense_paths_gplus),
			univdiagonal0,univdiagonal1,pos5_0,pos3_0,pos5_1,pos3_1,
			/*query_compress*/query_compress_fwd,query_compress_fwd,query_compress_rev,
			querylength,chrnum,chroffset,chrhigh,

			stage1->indelinfo,stage1->spliceinfo,knownsplicing,
			genestrand,nmismatches_allowed,
			max_insertionlen,max_deletionlen,
			intlistpool,uintlistpool,univcoordlistpool,listpool,
			pathpool,vectorpool,transcriptpool,hitlistpool);
    }

    i++;
    k += 2;
  }
      

  i = k = 0;
  while (/*nhits <= max_hits && */ i < n_gminus_diagpairs) {
    univdiagonala = gminus_diagpairs[k];
    univdiagonalb = gminus_diagpairs[k+1];

#ifdef TRIM_AT_CHROMOSOME_BOUNDS
    pos5a = (univdiagonala >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - lefta);
    pos3a = (univdiagonala <= chrhigh) ? querylength : (int) (chrhigh - lefta);
#else
    pos5a = 0;
    pos3a = querylength;
#endif
    trim_a5 = Genomebits_first_kmer_left(&nmismatches_a5,genomebits,query_compress_rev,
					 univdiagonala,querylength,pos5a,pos3a,
					 /*plusp*/false,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
    trim_a3 = querylength - Genomebits_first_kmer_right(&nmismatches_a3,genomebits,query_compress_rev,
							univdiagonala,querylength,pos5a,pos3a,
							/*plusp*/false,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
      
#ifdef TRIM_AT_CHROMOSOME_BOUNDS
    pos5b = (univdiagonalb >= chroffset + (Univcoord_T) querylength) ? 0 : (int) (chroffset - leftb);
    pos3b = (univdiagonalb <= chrhigh) ? querylength : (int) (chrhigh - leftb);
#else
    pos5b = 0;
    pos3b = querylength;
#endif
    trim_b5 = Genomebits_first_kmer_left(&nmismatches_b5,genomebits,query_compress_rev,
					 univdiagonalb,querylength,pos5b,pos3b,
					 /*plusp*/false,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
    trim_b3 = querylength - Genomebits_first_kmer_right(&nmismatches_b3,genomebits,query_compress_rev,
							univdiagonalb,querylength,pos5b,pos3b,
							/*plusp*/false,genestrand,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part);
      
    debug(printf("trimmed a %d..%d, trimmed b %d..%d\n",
		 trim_a5,querylength - trim_a3,trim_b5,querylength - trim_b3));
    if (trim_a5 == pos5a || trim_b3 == pos3b) {
      univdiagonal0 = univdiagonala; pos5_0 = pos5a; pos3_0 = pos3a;
      univdiagonal1 = univdiagonalb; pos5_1 = pos5b; pos3_1 = pos3b;
      /* low0 = lefta + pos5a; high0 = lefta + pos3a; */
      /* low1 = leftb + pos5b; high1 = leftb + pos3b; */
      adj = (int) (univdiagonalb - univdiagonala);
      debug(printf("Setting a first, b second, adj %d\n",adj));
	
    } else if (trim_a3 == pos3a || trim_b5 == pos5b) {
      univdiagonal0 = univdiagonalb; pos5_0 = pos5b; pos3_0 = pos3b;
      univdiagonal1 = univdiagonala; pos5_1 = pos5a; pos3_1 = pos3a;
      /* low0 = leftb + pos5b; high0 = leftb + pos3b; */
      /* low1 = lefta + pos5a; high1 = lefta + pos3a; */
      adj = (int) (univdiagonala - univdiagonalb);
      debug(printf("Setting b first, a second, adj %d\n",adj));
	
    } else {
      adj = 0;
    }
	  
    /* printf("minus adj %d, univdiagonal0 %llu, univdiagonal1 %llu\n",adj,univdiagonal0,univdiagonal1); */
    if (adj == 0) {
      /* Skip, since would have been found by Kmer exact */

    } else if (adj > max_gaplen) {
      /* Too far apart */

    } else if (adj < -max_insertionlen) {
      /* Impossible */

    } else {
      /* Doesn't matter if univdiagonal1 is less than or greater than univdiagonal0 */
#ifdef TRIM_AT_GENOME_BOUNDS
      low = (univdiagonal0 >= (Univcoord_T) querylength) ? univdiagonal0 - querylength : 0;
      high = (univdiagonal1 <= genomelength) ? univdiagonal1 : genomelength;
#endif
#ifdef NEED_CHRNUM
      chrnum = EF64_chrnum(&chroffset,&chrhigh,chromosome_ef64,low,high);
#endif

      combine_ends_minus(&(*found_score),&(*found_transcriptp),
			 &(*unsolved_sense_paths_gminus),&(*unsolved_antisense_paths_gminus),
			 &(*sense_paths_gminus),&(*antisense_paths_gminus),
			 univdiagonal0,univdiagonal1,pos5_0,pos3_0,pos5_1,pos3_1,
			 /*query_compress*/query_compress_rev,query_compress_fwd,query_compress_rev,
			 querylength,chrnum,chroffset,chrhigh,

			 stage1->indelinfo,stage1->spliceinfo,knownsplicing,
			 genestrand,nmismatches_allowed,
			 max_insertionlen,max_deletionlen,
			 intlistpool,uintlistpool,univcoordlistpool,listpool,
			 pathpool,vectorpool,transcriptpool,hitlistpool);
    }

    i++;
    k += 2;
  }

  FREE(gminus_diagpairs);
  FREE(gplus_diagpairs);

  debug(printf("Done with Kmer_search_genome_ends_approx\n"));
  return;
}


void
Kmer_search_setup (int index1part_in, int index1interval_in, EF64_T chromosome_ef64_in,
		   Genomebits_T genomebits_in, Genomebits_T genomebits_alt_in,
		   Univcoord_T genomelength_in, bool splicingp_in) {

  index1part = index1part_in;
  index1interval = index1interval_in;

  chromosome_ef64 = chromosome_ef64_in;
  genomebits = genomebits_in;
  genomebits_alt = genomebits_alt_in;
  genomelength = genomelength_in;

  splicingp = splicingp_in;

  return;
}


