macro-; 
macro*35 1000 ; 
macro=; 
log taxonomy ; 
lquote[; 
lquote=; 
report- ; 
silent = all ; 

/********************************************************\
|                                                        |
|   This is the script used in Goloboff et al's paper,   |
|       (Phylogenetic of 73,060 taxa corroborates...).   |
|   See paper for details of usage.                      |
|   The script checks groups (calculating their fit      |
|   on multiple trees), also finding taxa to prune to    |
|   improve groups.  First argument is max. depht,       |
|   second argument is group to improve (both are        |
|   optional).  It stores taxa to prune in group 31      |
|   (if doing only a group, it stores taxa to cut to     |
|   improve that group, to the level specified)          |
|                                                        |
\********************************************************/

goto =%0 ;

var: 
  maxlevel  curlevel interrupt_it num_malos 
  recording_group record_level found_record_level record_name[60] 
  tmpnam1[60] tmpnam2[60] curnam[60] mostsim  proper intruders
  runaways skip_uniques taxonomy_only skip_genera curspr worstspr index 
  max_proper min_proper avg_proper max_intruders min_intruders avg_intruders
  max_runaways min_runaways avg_runaways min_index max_index avg_index max_spr min_spr avg_spr ; 

set recording_group 0 ; 
set found_record_level 0 ; 
if ( argnumber > 1 ) 
   set recording_group 1 ; 
   set record_name $%2; 
   sil - all ; 
   quote Working only on &34$record_name&34 &10; 
   sil = all ; 
   agroup =0 :%2 ; 
   loop 0 ntax 
       if ( !isinagroup[0 #1] || ( !isactax[ #1 ] ) ) 
          taxn /XXXXX #1 ;
          end 
       stop 
end 

loop 0 ntax
    if ( !isintree[0 #1] )
        taxn /XXXXX #1 ;
    else 
        taxn /$taxon>@(#1) #1 ;
        end 
    stop
agroup -.; 

set maxlevel 10000 ; 
if ( argnumber ) set maxlevel %1 ; end 

set taxonomy_only 0 ; 
set skip_uniques 0; 
set skip_genera 0 ; 

ag = 31 ; 

set curlevel 0 ; 
set interrupt_it 0 ; 

loop 0 ntax
     if ( eqstring [ $taxon(#1) XXXXX ] ) continue ; end 
     set curnam $$taxon<_(#1); 
     goto PROCESS_GROUP #1 ; 
     if ( 'interrupt_it' ) endloop; end 
stop 

if ( 'interrupt_it' ) return 'num_malos' ; end 

ag -0.30 ; 
sil-all ; 
ag * ; 
proc/; 

-------------------------------------------------------
label PROCESS_GROUP ;
var : mylevel mytaxa ; 
set mylevel 'curlevel' ; 
set mytaxa 0 ; 
agroup ='mylevel' $curnam; 

if ( 'curlevel' >= 'maxlevel' )
  loop %1 ntax
    if ( !isinagroup['mylevel' #1] ) continue ; end
    taxname /XXXXX #1 ; 
    set mytaxa ++ ; 
  stop 
  proc/; 
end 

loop %1 ntax
    if ( !isinagroup['mylevel' #1] ) continue ; end 
    set mytaxa ++ ; 
    taxname /$taxon>_(#1) #1 ; 
stop
if ( 'skip_uniques' && ( 'mytaxa' == 1 ) ) proc/; end 

set + ; 
if ( 'recording_group' ) 
   if ( eqstring [ $curnam  $record_name  ] ) 
      set found_record_level 1 ; 
      set record_level 1 ; 
   else 
      if ( 'found_record_level' ) 
         set record_level ++ ; 
      end 
   end 
end 
set - ; 


if ( ( !'recording_group') || 'found_record_level' ) 
  goto DO_STATS ;
  sil - all ;
  loop 0 'mylevel' quote &32 &32 ; stop 
  quote $curnam; 
  if ( !'taxonomy_only' ) 
    quote : TAXA: 'mytaxa' , IN: 'min_proper' , 'max_proper' , '/.2avg_proper' ;    
    quote , PLUS: 'min_intruders' , 'max_intruders' , '/.2avg_intruders' ; 
    quote , MINUS: 'min_runaways', 'max_runaways', '/.2avg_runaways' , INDEX;
    quote : 'min_index', 'max_index' , '/.2avg_index' , SPR: 'min_spr' ;
    quote , 'max_spr' , '/.2avg_spr' , 'worstspr' &10; 
  else 
    quote &10 ; 
  end 
  sil = all ;
end 

set curlevel ++ ; 
loop %1 ntax
    if ( !isinagroup['mylevel' #1] ) continue ; end 
    if ( isinstring [ $taxon(#1) _ ] ) 
         set curnam $$taxon<_(#1)_;
         goto PROCESS_GROUP #1 ;
         if ( 'interrupt_it' ) endloop; end 
    else
         if ( eqstring [ $taxon(#1) XXXXX ] ) continue ; end 
         set curnam $$taxon<_(#1);
         goto DO_GENUS #1 ; 
         end 
stop 
set curlevel -- ;

if ( 'interrupt_it' ) proc/; end 

if ( 'found_record_level' && 'record_level' ) 
   set record_level -- ; 
   if ( !'record_level' ) 
      set found_record_level 0 ; 
      ag - 0.30 ; 
      sil - all ; 
      ag * ; 
      set num_malos 0 ; 
      loop 0 ntax 
          if ( isinagroup[ 31 #1 ] ) set num_malos ++ ; end 
          stop 
      errmsg Went beyond group to be recorded...
             found 'num_malos' bad taxa... ...stopping script ; 
      set interrupt_it 1 ; 
      proc/; 
      end 
   end 

proc/; 

-------------------------------------------------------
label DO_GENUS ; 
var : mylevel mytaxa ; 
set mylevel 'curlevel' ; 
set mytaxa 0 ; 
agroup ='mylevel' $curnam; 
loop %1 ntax
    if ( !isinagroup['mylevel' #1] ) continue ; end
    taxname /XXXXX #1 ; 
    set mytaxa ++ ; 
stop 
if ( 'skip_genera' ) proc/; end 
if ( 'skip_uniques' && ( 'mytaxa' == 1 ) ) proc/; end 

if ( !'taxonomy_only' ) 
goto DO_STATS ; 
end 

sil - all ;
loop 0 'mylevel' quote &32 &32 ; stop 
quote $curnam; 
if ( !'taxonomy_only' ) 
  quote : TAXA: 'mytaxa' , IN: 'min_proper' , 'max_proper' , '/.2avg_proper' , GIVE: 'min_intruders' , 'max_intruders' , '/.2avg_intruders' , TAKE: 'min_runaways', 'max_runaways', '/.2avg_runaways' , INDEX: 'min_index', 'max_index' , '/.2avg_index' , SPR: 'min_spr' , 'max_spr' , '/.2avg_spr' , 'worstspr' &10; 
else 
  quote &10 ; 
end 

sil = all ; 
proc/; 


-------------------------------------------------------
label DO_STATS ;
var : to ; 

set avg_proper    0 ; 
set avg_spr       0 ; 
set avg_runaways  0 ; 
set avg_intruders 0 ; 
set avg_index 0 ; 

set min_proper    100000 ; 
set min_spr       100000 ; 
set min_runaways  100000 ; 
set min_intruders 100000 ; 
set min_index     100000 ; 

set max_proper    0 ; 
set max_spr       0 ; 
set max_runaways  0 ; 
set max_intruders 0 ; 
set max_index     0 ; 

if ( 'mytaxa' < 2 ) 
  set avg_proper    1 ; 
  set max_proper    1 ; 
  set min_proper    1 ; 
  set min_spr       0 ; 
  set min_runaways  0 ; 
  set min_intruders 0 ;  
  set min_index     0 ;  
  proc/; 
end 

set to ntrees + 1 ; 
xr =! 0 [.] 0 ; xr = 0 [ { 'mylevel' } ] 1 ; 

loop 0 ntrees 

  if ( 'mytaxa' > 1 )
    tread ( ( { 'mylevel' } ) ... ) ;  
    set mostsim simgroup[ ntrees (root+1) #1 ] ;
    if ( 'mostsim' > 0 ) 
       set proper regbeta ; 
       set intruders regalfa ;
       set runaways regr ;

if ( 
     ( ( 'mylevel' <= 4 ) && (!'recording_group' ) ) || 
     ( 'recording_group' && ( 'record_level' < 4 ) && 'found_record_level' && 
     ( 'proper' > 50 ) && ( 'intruders' < 30 ) && ( 'runaways' < ( 'proper' / 3 )))
   ) 
if ( 'intruders' < 30 ) 
  ag =30 @#1 'mostsim' <30 [ 'mylevel' ] ;   
  ag >31 [ 30 ] ;    /*  add all intruders  */ 
  end 
if ( 'runaways' < ( 'proper' / 3 ) ) 
  ag =30 [ 'mylevel' ] <30 @#1 'mostsim' ; 
  ag >31 [ 30 ] ;    /*  add all runaways  */ 
  end 
end 

    else 
       set proper 0; 
       set intruders 0 ;
       set runaways 'mytaxa' ;
       end  
    keep 'to' ; 
  else
    set proper 1 ; set intruders 0 ; set runaways 0 ;
    end 

  set curspr length[ #1 0 ] - 1 ; 

  if ( 'proper' < 'min_proper' ) set min_proper 'proper' ; end 
  if ( 'proper' > 'max_proper' ) set max_proper 'proper' ; end 

  if ( 'curspr' < 'min_spr' ) set min_spr 'curspr' ; end 
  if ( 'curspr' > 'max_spr' ) set max_spr 'curspr' ; end 

  if ( 'intruders' < 'min_intruders' ) set min_intruders 'intruders' ; end 
  if ( 'intruders' > 'max_intruders' ) set max_intruders 'intruders' ; end 

  if ( 'runaways' < 'min_runaways' ) set min_runaways 'runaways' ; end 
  if ( 'runaways' > 'max_runaways' ) set max_runaways 'runaways' ; end 

  set index 1 - ( ( 'intruders' + 'runaways' ) / 'mytaxa' ) ; 
  if ( 'index' < 0 ) set index 0 ; end 
  if ( 'index' < 'min_index ) set min_index 'index' ; end 
  if ( 'index' > 'max_index ) set max_index 'index' ; end 

  set avg_proper    += 'proper' ; 
  set avg_spr       += 'curspr' ; 
  set avg_intruders += 'intruders' ; 
  set avg_runaways  += 'runaways' ; 
  set avg_index     += 'index' ; 

  stop 

set worstspr  maxsteps[0] - 1 ; 
set avg_proper     /= 'to' ; 
set avg_spr        /= 'to' ; 
set avg_intruders  /= 'to' ; 
set avg_runaways    /= 'to' ; 
set avg_index      /= 'to' ; 

proc/;

