function [informativemrnas,groups,rho_orig,numgroupseachrow]=transcript_sorter(td,rhothresh,pvalthresh,userank,sdthresh,sdthreshnum,infornasonly)
%description: takes a list of normalized levels of transcripts present in each cell, optionally ranks cells according to their level of each transcript
%finds the correlation between each pair of transcripts in either a) the rank list of those two transcripts across all cells or b) the vector of levels of those two transcripts across all cells
%finds all groups of genes such that each pair within the group is correlated above a set level with confidence of this correlation also meeting a threshold
%td=transcript data, expected as (transcripts,cells)
%called by: command line
%calls: transcript_sorter_subfun
%notes:
%code confirmed date:
groups={};
%related code
%mx=max(td');mxmtx=repmat(mx,size(td,2),1);nltd=td'./mxmtx;nltd=nltd'; %normalize transcript levels
%z=[];for i=1:size(groups,1),if ~isempty(groups{i,1}),z(i)=size(groups{i,1},2);end,end %find the number of elements in the first group of each row
%first decide which transcripts will be included in the analysis -- transcripts that vary little among cells are unlikely to be informative
avgpermrna=mean(td');
stdpermrna=std(td');
mrnainclusionthresh=avgpermrna+(sdthresh*stdpermrna);
threshmat=repmat(mrnainclusionthresh,size(td,2),1);
a=td'>threshmat; %have to use > rather than >= to avoid the transcripts that are zero in every cell
b=sum(a);
c=b>=sdthreshnum;
informativemrnas=find(c);
td=td(informativemrnas,:);
numgroupseachrow=zeros(1,length(informativemrnas));
%for tweaking sdthresh and sdthreshnum to get 'appropriate' numbers of transcripts to work with
if infornasonly, rho_orig=[]; return, end
if userank %was originally thought to be better to sort, but probably not
[~,ind]=sort(td,2,'descend');
else
ind=td;
end
[rho,pval]=corr(ind'); %each transcript is a column at the time the correlation is calculated
rho(diag(rho))=0;
rho(rhopvalthresh)=0;
rho(find(rho))=1;
rho_orig=rho;
for jj=1:size(rho,1) %each successive transcript is the 'fulcrum' for each successive row
for kk=size(rho,2):-1:1 %working backwards
if rho(jj,kk) %the correlated and fulcrum transcripts will now be put into a group together, either alone or with others
if ~numgroupseachrow(jj)%if there are as yet no groups 'hinging on' the fulcrum transcript, whose pairs we're evaluating in this row
groups{jj,1}=[jj,kk];%make a new group with this 'correlated' transcript and the fulcrum transcript
rho(kk,jj)=0; %zero out the fulcrum transcript from the row where this correlated transcript is the fulcrum to eliminate double-counting
numgroupseachrow(jj)=1;
else
[rho,groupz,numgroupsthisrow]=transcript_sorter_subfun1(rho,jj,kk,groups(jj,:),numgroupseachrow(jj));
groups(jj,1:size(groupz,2))=groupz;
numgroupseachrow(jj)=numgroupsthisrow;
end
end
end
end