function [groups,groupsizes,prmut]=transcript_binary_grouper(td,threshstyle,sdthresh,numberlive,groupsizethresh)
%description: take a list of normalized levels of transcripts in each cell, binarize by mean/SD criteria
%use nchoosek to generate list of all possible ways to choose numberlive cells from size(td,2) total cells
%for each of these permutations, find all the transcripts that are high (i.e., equal to 1) in all of those cells, regardless of whether they are high in any other cells
%sort the list of cell groupings and the list of permutations according to which groups have the most cells
%td=transcript data, expected as (transcripts,cells)
%called by:
%calls:
%notes:
%code confirmed date:
if threshstyle(1)==1 %binarize the input matrix by mean/SD criteria -- second component of the variable tells how many columns (of bulk mRNA data) to remove
avgpermrna=mean(td');
stdpermrna=std(td');
mrnainclusionthresh=avgpermrna+(sdthresh*stdpermrna);
threshmat=repmat(mrnainclusionthresh,size(td,2),1);
a=td'>threshmat; %have to use > rather than >= to avoid the transcripts that are zero in every cell
bintd=a';
bintd(:,end-(threshstyle(2)-1):end)=[];
elseif threshstyle(1)==2 %binarize by comparison to the bulk mRNA columns -- second component of the variable tells how many columns to use in calculating the bulk mRNA average
bulk=td(:,end-(threshstyle(2)-1):end)';
bulk=mean(bulk)';
td(:,end-(threshstyle(2)-1):end)=[];
b=repmat(bulk,1,size(td,2));
bintd=td>b;
else %binarize by a simple threshold, given by threshstyle(1), and remove the unneeded columns, given by threshstyle(2)
bintd=td>threshstyle(1);
bintd(:,end-(threshstyle(2)-1):end)=[];
end
%generate the list of permutations
prmut=nchoosek(1:size(bintd,2),numberlive);
numprmuts=size(prmut,1);
%for each permutation, find all transcripts that are high in the cells in question
groups=cell(numprmuts,1);
groupsizes=zeros(numprmuts,1);
for j=1:numprmuts
aa=bintd(:,prmut(j,:))';
bb=all(aa);
cc=find(bb);
groupsize=length(cc);
if groupsize>=groupsizethresh %if the group has too few members it will be ignored
groups{j}=cc;
groupsizes(j)=groupsize;
end
end
%sort the grouping in order of # of transcripts, most populous first, and sort the list of permutations in the same way so they can be directly compared
[groupsizes,ind]=sort(groupsizes,'descend');
groups=groups(ind);
prmut=prmut(ind,:);