Added translation of admix
This commit is contained in:
parent
25cbfe4ed3
commit
76c408613d
1 changed files with 367 additions and 315 deletions
682
R/admix1.R
682
R/admix1.R
|
|
@ -1,49 +1,64 @@
|
||||||
#' @title Admixture analysis
|
#' @title Admixture analysis
|
||||||
#' @param tietue record
|
#' @param tietue a named record list
|
||||||
#' @details If the record == -1, the mixture results file is loaded. Otherwise, will the required variables be retrieved from the record fields?
|
#' @details If the record == -1, the mixture results file is loaded. Otherwise,
|
||||||
|
#' will the required variables be retrieved from the record fields?
|
||||||
|
#' `tietue`should contain the following elements: PARTITION, COUNTS, SUMCOUNTS,
|
||||||
|
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
|
||||||
#' @export
|
#' @export
|
||||||
admix1 <- function(tietue) {
|
admix1 <- function(tietue, PARTITION = matrix(NA, 0, 0),
|
||||||
|
COUNTS = matrix(NA, 0, 0), SUMCOUNTS = NA) {
|
||||||
if (!is.list(tietue)) {
|
if (!is.list(tietue)) {
|
||||||
# c(filename, pathname) = uigetfile('*.mat', 'Load mixture result file');
|
message('Load mixture result file. These are the files in this directory:')
|
||||||
# if (filename==0 & pathname==0), return;
|
print(list.files())
|
||||||
# else
|
pathname_filename <- file.choose()
|
||||||
# disp('---------------------------------------------------');
|
if (!file.exists(pathname_filename)) {
|
||||||
# disp(['Reading mixture result from: ',[pathname filename],'...']);
|
stop(
|
||||||
# end
|
"File ", pathname_filename,
|
||||||
# pause(0.0001);
|
" does not exist. Check spelling and location."
|
||||||
# h0 = findobj('Tag','filename1_text');
|
)
|
||||||
# set(h0,'String',filename); clear h0;
|
} else {
|
||||||
|
cat('---------------------------------------------------\n');
|
||||||
|
message('Reading mixture result from: ', pathname_filename, '...')
|
||||||
|
}
|
||||||
|
sys.sleep(0.0001) #ASK: what for?
|
||||||
|
|
||||||
# struct_array = load([pathname filename]);
|
# ASK: what is this supposed to do? What do graphic obj have to do here?
|
||||||
# if isfield(struct_array,'c') #Matlab versio
|
# h0 = findobj('Tag','filename1_text');
|
||||||
# c = struct_array.c;
|
# set(h0,'String',filename); clear h0;
|
||||||
# if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd')
|
|
||||||
# disp('Incorrect file format');
|
|
||||||
# return
|
|
||||||
# end
|
|
||||||
# elseif isfield(struct_array,'PARTITION') #Mideva versio
|
|
||||||
# c = struct_array;
|
|
||||||
# if ~isfield(c,'rowsFromInd')
|
|
||||||
# disp('Incorrect file format');
|
|
||||||
# return
|
|
||||||
# end
|
|
||||||
# else
|
|
||||||
# disp('Incorrect file format');
|
|
||||||
# return;
|
|
||||||
# end
|
|
||||||
|
|
||||||
# if isfield(c, 'gene_lengths') && ...
|
struct_array <- load(pathname_filename)
|
||||||
# (strcmp(c.mixtureType,'linear_mix') | ...
|
if (isfield(struct_array, 'c')) { #Matlab versio
|
||||||
# strcmp(c.mixtureType,'codon_mix')) # if the mixture is from a linkage model
|
c <- struct_array$c
|
||||||
# # Redirect the call to the linkage admixture function.
|
if (!isfield(c, 'PARTITION') | !isfield(c,'rowsFromInd')) {
|
||||||
# c.data = noIndex(c.data,c.noalle); # call function noindex to remove the index column
|
stop('Incorrect file format')
|
||||||
# linkage_admix(c);
|
}
|
||||||
# return
|
} else if (isfield(struct_array, 'PARTITION')) { #Mideva versio
|
||||||
# end
|
c <- struct_array
|
||||||
|
if (!isfield(c,'rowsFromInd')) stop('Incorrect file format')
|
||||||
|
} else {
|
||||||
|
stop('Incorrect file format')
|
||||||
|
}
|
||||||
|
|
||||||
# PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS;
|
if (isfield(c, 'gene_lengths') &
|
||||||
# alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames;
|
strcmp(c$mixtureType, 'linear_mix') |
|
||||||
# rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle;
|
strcmp(c$mixtureType, 'codon_mix')) { # if the mixture is from a linkage model
|
||||||
|
# Redirect the call to the linkage admixture function.
|
||||||
|
# call function noindex to remove the index column
|
||||||
|
c$data <- noIndex(c$data, c$noalle)
|
||||||
|
# linkage_admix(c) # ASK: translate this function to R or drop?
|
||||||
|
# return
|
||||||
|
stop("linkage_admix not implemented")
|
||||||
|
}
|
||||||
|
PARTITION <- c$PARTITION
|
||||||
|
COUNTS <- c$COUNTS
|
||||||
|
SUMCOUNTS <- c$SUMCOUNTS
|
||||||
|
alleleCodes <- c$alleleCodes
|
||||||
|
adjprior <- c$adjprior
|
||||||
|
popnames <- c$popnames
|
||||||
|
rowsFromInd <- c$rowsFromInd
|
||||||
|
data <- c$data
|
||||||
|
npops <- c$npops
|
||||||
|
noalle <- c$noalle
|
||||||
} else {
|
} else {
|
||||||
PARTITION <- tietue$PARTITION
|
PARTITION <- tietue$PARTITION
|
||||||
COUNTS <- tietue$COUNTS
|
COUNTS <- tietue$COUNTS
|
||||||
|
|
@ -57,297 +72,334 @@ admix1 <- function(tietue) {
|
||||||
noalle <- tietue$noalle
|
noalle <- tietue$noalle
|
||||||
}
|
}
|
||||||
|
|
||||||
# answers = inputdlg({['Input the minimum size of a population that will'...
|
answers <- inputdlg(
|
||||||
# ' be taken into account when admixture is estimated.']},...
|
prompt = paste(
|
||||||
# 'Input minimum population size',[1],...
|
"Input the minimum size of a population that will",
|
||||||
# {'5'});
|
"be taken into account when admixture is estimated."
|
||||||
# if isempty(answers) return; end
|
),
|
||||||
# alaRaja = str2num(answers{1,1});
|
definput = 5
|
||||||
# [npops] = poistaLiianPienet(npops, rowsFromInd, alaRaja);
|
)
|
||||||
|
alaRaja <- as.num(answers)
|
||||||
|
npops <- poistaLiianPienet(npops, rowsFromInd, alaRaja)
|
||||||
|
|
||||||
# nloci = size(COUNTS,2);
|
nloci <- size(COUNTS, 2)
|
||||||
# ninds = size(data,1)/rowsFromInd;
|
ninds <- size(data, 1) / rowsFromInd
|
||||||
|
|
||||||
# answers = inputdlg({['Input number of iterations']},'Input',[1],{'50'});
|
answers <- inputdlg('Input number of iterations', 50)
|
||||||
# if isempty(answers) return; end
|
if (isempty(answers)) return()
|
||||||
# iterationCount = str2num(answers{1,1});
|
iterationCount <- as.numeric(answers[1, 1]) # maybe [[]]?
|
||||||
|
|
||||||
# answers = inputdlg({['Input number of reference individuals from each population']},'Input',[1],{'50'});
|
answers <- inputdlg(
|
||||||
# if isempty(answers) nrefIndsInPop = 50;
|
prompt = 'Input number of reference individuals from each population',
|
||||||
# else nrefIndsInPop = str2num(answers{1,1});
|
definput = 50
|
||||||
# end
|
)
|
||||||
|
if (isempty(answers)) {
|
||||||
|
nrefIndsInPop <- 50
|
||||||
|
} else {
|
||||||
|
nrefIndsInPop <- as.numeric(answers[1, 1])
|
||||||
|
}
|
||||||
|
|
||||||
# answers = inputdlg({['Input number of iterations for reference individuals']},'Input',[1],{'10'});
|
answers <- inputdlg(
|
||||||
# if isempty(answers) return; end
|
prompt = 'Input number of iterations for reference individuals',
|
||||||
# iterationCountRef = str2num(answers{1,1});
|
definput = 10
|
||||||
|
)
|
||||||
|
if (isempty(answers)) return()
|
||||||
|
iterationCountRef <- as.numeric(answers[1, 1])
|
||||||
|
|
||||||
|
# First calculate log-likelihood ratio for all individuals:
|
||||||
|
likelihood <- zeros(ninds, 1)
|
||||||
|
allfreqs <- computeAllFreqs2(noalle)
|
||||||
|
for (ind in 1:ninds) {
|
||||||
|
omaFreqs <- computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd)
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
if (PARTITION[ind] == 0) {
|
||||||
|
# Yksil?on outlier
|
||||||
|
} else if (PARTITION[ind] != 0) {
|
||||||
|
if (PARTITION[ind] > 0) {
|
||||||
|
osuusTaulu(PARTITION[ind]) <- 1
|
||||||
|
} else {
|
||||||
|
# Yksilöt, joita ei ole sijoitettu mihinkään koriin.
|
||||||
|
arvot <- zeros(1, npops)
|
||||||
|
for (q in 1:npops) {
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
osuusTaulu[q] <- 1
|
||||||
|
arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
}
|
||||||
|
iso_arvo <- max(arvot)
|
||||||
|
isoimman_indeksi <- match(max(arvot), arvot)
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
osuusTaulu[isoimman_indeksi] <- 1
|
||||||
|
PARTITION[ind] <- isoimman_indeksi
|
||||||
|
}
|
||||||
|
logml <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
logmlAlku <- logml
|
||||||
|
for (osuus in c(0.5, 0.25, 0.05, 0.01)) {
|
||||||
|
etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml)
|
||||||
|
osuusTaulu <- etsiResult[1]
|
||||||
|
logml <- etsiResult[2]
|
||||||
|
}
|
||||||
|
logmlLoppu <- logml
|
||||||
|
likelihood[ind] <- logmlLoppu - logmlAlku
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Analyze further only individuals who have log-likelihood ratio larger than 3:
|
||||||
|
to_investigate <- t(find(likelihood > 3))
|
||||||
|
cat('Possibly admixed individuals:\n')
|
||||||
|
for (i in 1:length(to_investigate)) {
|
||||||
|
cat(as.character(to_investigate[i]))
|
||||||
|
}
|
||||||
|
cat(' ')
|
||||||
|
cat('Populations for possibly admixed individuals:\n')
|
||||||
|
admix_populaatiot <- unique(PARTITION[to_investigate])
|
||||||
|
for (i in 1:length(admix_populaatiot)) {
|
||||||
|
cat(as.character(admix_populaatiot[i]))
|
||||||
|
}
|
||||||
|
|
||||||
|
# THUS, there are two types of individuals, who will not be analyzed with
|
||||||
|
# simulated allele frequencies: those who belonged to a mini-population
|
||||||
|
# which was removed, and those who have log-likelihood ratio less than 3.
|
||||||
|
# The value in the PARTITION for the first kind of individuals is 0. The
|
||||||
|
# second kind of individuals can be identified, because they do not
|
||||||
|
# belong to "to_investigate" array. When the results are presented, the
|
||||||
|
# first kind of individuals are omitted completely, while the second kind
|
||||||
|
# of individuals are completely put to the population, where they ended up
|
||||||
|
# in the mixture analysis. These second type of individuals will have a
|
||||||
|
# unit p-value.
|
||||||
|
|
||||||
|
|
||||||
# # First calculate log-likelihood ratio for all individuals:
|
# Simulate allele frequencies a given number of times and save the average
|
||||||
# likelihood = zeros(ninds,1);
|
# result to "proportionsIt" array.
|
||||||
# allfreqs = computeAllFreqs2(noalle);
|
|
||||||
# for ind = 1:ninds
|
|
||||||
# omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# if PARTITION(ind)==0
|
|
||||||
# # Yksil?on outlier
|
|
||||||
# elseif PARTITION(ind)~=0
|
|
||||||
# if PARTITION(ind)>0
|
|
||||||
# osuusTaulu(PARTITION(ind)) = 1;
|
|
||||||
# else
|
|
||||||
# # Yksilöt, joita ei ole sijoitettu mihinkään koriin.
|
|
||||||
# arvot = zeros(1,npops);
|
|
||||||
# for q=1:npops
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# osuusTaulu(q) = 1;
|
|
||||||
# arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
|
|
||||||
# end
|
|
||||||
# [iso_arvo, isoimman_indeksi] = max(arvot);
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# osuusTaulu(isoimman_indeksi) = 1;
|
|
||||||
# PARTITION(ind)=isoimman_indeksi;
|
|
||||||
# end
|
|
||||||
# logml = computeIndLogml(omaFreqs, osuusTaulu);
|
|
||||||
# logmlAlku = logml;
|
|
||||||
# for osuus = [0.5 0.25 0.05 0.01]
|
|
||||||
# [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
|
|
||||||
# end
|
|
||||||
# logmlLoppu = logml;
|
|
||||||
# likelihood(ind) = logmlLoppu-logmlAlku;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# # Analyze further only individuals who have log-likelihood ratio larger than 3:
|
proportionsIt <- zeros(ninds, npops)
|
||||||
# to_investigate = (find(likelihood>3))';
|
for (iterationNum in 1:iterationCount) {
|
||||||
# disp('Possibly admixed individuals: ');
|
cat('Iter:', as.character(iterationNum))
|
||||||
# for i = 1:length(to_investigate)
|
allfreqs <- simulateAllFreqs(noalle) # Allele frequencies on this iteration.
|
||||||
# disp(num2str(to_investigate(i)));
|
|
||||||
# end
|
|
||||||
# disp(' ');
|
|
||||||
# disp('Populations for possibly admixed individuals: ');
|
|
||||||
# admix_populaatiot = unique(PARTITION(to_investigate));
|
|
||||||
# for i = 1:length(admix_populaatiot)
|
|
||||||
# disp(num2str(admix_populaatiot(i)));
|
|
||||||
# end
|
|
||||||
|
|
||||||
# # THUS, there are two types of individuals, who will not be analyzed with
|
for (ind in to_investigate) {
|
||||||
# # simulated allele frequencies: those who belonged to a mini-population
|
#disp(num2str(ind));
|
||||||
# # which was removed, and those who have log-likelihood ratio less than 3.
|
omaFreqs <- computePersonalAllFreqs(
|
||||||
# # The value in the PARTITION for the first kind of individuals is 0. The
|
ind, data, allfreqs, rowsFromInd
|
||||||
# # second kind of individuals can be identified, because they do not
|
)
|
||||||
# # belong to "to_investigate" array. When the results are presented, the
|
osuusTaulu = zeros(1, npops)
|
||||||
# # first kind of individuals are omitted completely, while the second kind
|
if (PARTITION[ind] == 0) {
|
||||||
# # of individuals are completely put to the population, where they ended up
|
# Yksil?on outlier
|
||||||
# # in the mixture analysis. These second type of individuals will have a
|
} else if (PARTITION[ind] != 0) {
|
||||||
# # unit p-value.
|
if (PARTITION[ind] > 0) {
|
||||||
|
osuusTaulu(PARTITION[ind]) <- 1
|
||||||
|
} else {
|
||||||
|
# Yksilöt, joita ei ole sijoitettu mihinkään koriin.
|
||||||
|
arvot <- zeros(1, npops)
|
||||||
|
for (q in 1:npops) {
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
osuusTaulu[q] <- 1
|
||||||
|
arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
}
|
||||||
|
iso_arvo <- max(arvot)
|
||||||
|
isoimman_indeksi <- match(max(arvot), arvot)
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
osuusTaulu[isoimman_indeksi] <- 1
|
||||||
|
PARTITION[ind] <- isoimman_indeksi
|
||||||
|
}
|
||||||
|
logml <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
|
||||||
|
for (osuus in c(0.5, 0.25, 0.05, 0.01)) {
|
||||||
|
etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml)
|
||||||
|
osuusTaulu <- etsiResult[1]
|
||||||
|
logml <- etsiResult[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
proportionsIt[ind, ] <- proportionsIt[ind, ] * (iterationNum - 1) +
|
||||||
|
osuusTaulu
|
||||||
|
proportionsIt[ind, ] <- proportionsIt[ind, ] / iterationNum
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']);
|
||||||
|
#disp('each population.');
|
||||||
|
|
||||||
|
#allfreqs = simulateAllFreqs(noalle); # Simuloidaan alleelifrekvenssisetti
|
||||||
|
allfreqs <- computeAllFreqs2(noalle); # Koitetaan tällaista.
|
||||||
|
|
||||||
|
|
||||||
# # Simulate allele frequencies a given number of times and save the average
|
# Initialize the data structures, which are required in taking the missing
|
||||||
# # result to "proportionsIt" array.
|
# data into account:
|
||||||
|
n_missing_levels <- zeros(npops, 1) # number of different levels of "missingness" in each pop (max 3).
|
||||||
|
missing_levels <- zeros(npops, 3) # the mean values for different levels.
|
||||||
|
missing_level_partition <- zeros(ninds, 1) # level of each individual (one of the levels of its population).
|
||||||
|
for (i in 1:npops) {
|
||||||
|
inds <- find(PARTITION == i)
|
||||||
|
# Proportions of non-missing data for the individuals:
|
||||||
|
non_missing_data <- zeros(length(inds), 1)
|
||||||
|
for (j in 1:length(inds)) {
|
||||||
|
ind <- inds[j]
|
||||||
|
non_missing_data[j] <- length(
|
||||||
|
find(data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0)
|
||||||
|
) / (rowsFromInd * nloci)
|
||||||
|
}
|
||||||
|
if (all(non_missing_data > 0.9)) {
|
||||||
|
n_missing_levels[i] <- 1
|
||||||
|
missing_levels[i, 1] <- mean(non_missing_data)
|
||||||
|
missing_level_partition[inds] <- 1
|
||||||
|
} else {
|
||||||
|
# TODO: fix syntax
|
||||||
|
# [ordered, ordering] = sort(non_missing_data);
|
||||||
|
ordered <- ordering <- sort(non_missing_data)
|
||||||
|
#part = learn_simple_partition(ordered, 0.05);
|
||||||
|
part <- learn_partition_modified(ordered)
|
||||||
|
aux <- sortrows(cbind(part, ordering), 2)
|
||||||
|
part = aux[, 1]
|
||||||
|
missing_level_partition[inds]<- part
|
||||||
|
n_levels <- length(unique(part))
|
||||||
|
n_missing_levels[i] <- n_levels
|
||||||
|
for (j in 1:n_levels) {
|
||||||
|
missing_levels[i, j] <- mean(non_missing_data[find(part == j)])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# proportionsIt = zeros(ninds,npops);
|
# Create and analyse reference individuals for populations
|
||||||
# for iterationNum = 1:iterationCount
|
# with potentially admixed individuals:
|
||||||
# disp(['Iter: ' num2str(iterationNum)]);
|
refTaulu <- zeros(npops, 100, 3)
|
||||||
# allfreqs = simulateAllFreqs(noalle); # Allele frequencies on this iteration.
|
for (pop in t(admix_populaatiot)) {
|
||||||
|
|
||||||
# for ind=to_investigate
|
for (level in 1:n_missing_levels[pop]) {
|
||||||
# #disp(num2str(ind));
|
|
||||||
# omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# if PARTITION(ind)==0
|
|
||||||
# # Yksil?on outlier
|
|
||||||
# elseif PARTITION(ind)~=0
|
|
||||||
# if PARTITION(ind)>0
|
|
||||||
# osuusTaulu(PARTITION(ind)) = 1;
|
|
||||||
# else
|
|
||||||
# # Yksilöt, joita ei ole sijoitettu mihinkään koriin.
|
|
||||||
# arvot = zeros(1,npops);
|
|
||||||
# for q=1:npops
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# osuusTaulu(q) = 1;
|
|
||||||
# arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
|
|
||||||
# end
|
|
||||||
# [iso_arvo, isoimman_indeksi] = max(arvot);
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# osuusTaulu(isoimman_indeksi) = 1;
|
|
||||||
# PARTITION(ind)=isoimman_indeksi;
|
|
||||||
# end
|
|
||||||
# logml = computeIndLogml(omaFreqs, osuusTaulu);
|
|
||||||
|
|
||||||
# for osuus = [0.5 0.25 0.05 0.01]
|
potential_inds_in_this_pop_and_level <-
|
||||||
# [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
|
find(
|
||||||
# end
|
PARTITION == pop & missing_level_partition == level &
|
||||||
# end
|
likelihood > 3
|
||||||
# proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu;
|
) # Potential admix individuals here.
|
||||||
# proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# #disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']);
|
if (!isempty(potential_inds_in_this_pop_and_level)) {
|
||||||
# #disp('each population.');
|
|
||||||
|
|
||||||
# #allfreqs = simulateAllFreqs(noalle); # Simuloidaan alleelifrekvenssisetti
|
#refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs);
|
||||||
# allfreqs = computeAllFreqs2(noalle); # Koitetaan tällaista.
|
refData <- simulateIndividuals(
|
||||||
|
nrefIndsInPop, rowsFromInd, allfreqs, pop,
|
||||||
|
missing_levels[pop, level]
|
||||||
|
)
|
||||||
|
|
||||||
|
cat(
|
||||||
|
'Analysing the reference individuals from pop', pop,
|
||||||
|
'(level', level, ').'
|
||||||
|
)
|
||||||
|
refProportions <- zeros(nrefIndsInPop, npops)
|
||||||
|
for (iter in 1:iterationCountRef) {
|
||||||
|
#disp(['Iter: ' num2str(iter)]);
|
||||||
|
allfreqs <- simulateAllFreqs(noalle)
|
||||||
|
|
||||||
|
for (ind in 1:nrefIndsInPop) {
|
||||||
|
omaFreqs <- computePersonalAllFreqs(
|
||||||
|
ind, refData, allfreqs, rowsFromInd
|
||||||
|
)
|
||||||
|
osuusTaulu <- zeros(1, npops)
|
||||||
|
osuusTaulu[pop] <- 1
|
||||||
|
logml <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
for (osuus in c(0.5, 0.25, 0.05, 0.01)) {
|
||||||
|
etsiResult <- etsiParas(
|
||||||
|
osuus, osuusTaulu, omaFreqs, logml
|
||||||
|
)
|
||||||
|
osuusTaulu <- etsiResult[1]
|
||||||
|
logml <- etsiResult[2]
|
||||||
|
}
|
||||||
|
refProportions[ind, ] <-
|
||||||
|
refProportions[ind, ] * (iter - 1) + osuusTaulu
|
||||||
|
refProportions[ind, ] <- refProportions[ind, ] / iter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (ind in 1:nrefIndsInPop) {
|
||||||
|
omanOsuus <- refProportions[ind, pop]
|
||||||
|
if (round(omanOsuus * 100) == 0) {
|
||||||
|
omanOsuus <- 0.01
|
||||||
|
}
|
||||||
|
if (abs(omanOsuus) < 1e-5) {
|
||||||
|
omanOsuus <- 0.01
|
||||||
|
}
|
||||||
|
refTaulu[pop, round(omanOsuus*100), level] <-
|
||||||
|
refTaulu[pop, round(omanOsuus*100),level] + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Rounding of the results:
|
||||||
|
proportionsIt <- proportionsIt * 100
|
||||||
|
proportionsIt <- round(proportionsIt)
|
||||||
|
proportionsIt <- proportionsIt / 100
|
||||||
|
for (ind in 1:ninds) {
|
||||||
|
if (!any(to_investigate == ind)) {
|
||||||
|
if (PARTITION[ind] > 0) {
|
||||||
|
proportionsIt[ind, PARTITION[ind]] <- 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
# In case of a rounding error, the sum is made equal to unity by
|
||||||
|
# fixing the largest value.
|
||||||
|
if ((PARTITION[ind] > 0) & (sum(proportionsIt[ind, ]) != 1)) {
|
||||||
|
isoin <- max(proportionsIt[ind, ])
|
||||||
|
indeksi <- match(isoin, max(proportionsIt[ind, ]))
|
||||||
|
erotus <- sum(proportionsIt[ind, ]) - 1
|
||||||
|
proportionsIt[ind, indeksi] <- isoin - erotus
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate p-value for each individual:
|
||||||
|
uskottavuus <- zeros(ninds, 1)
|
||||||
|
for (ind in 1:ninds) {
|
||||||
|
pop <- PARTITION[ind]
|
||||||
|
if (pop == 0) { # Individual is outlier
|
||||||
|
uskottavuus[ind] <- 1
|
||||||
|
} else if (isempty(find(to_investigate == ind))) {
|
||||||
|
# Individual had log-likelihood ratio<3
|
||||||
|
uskottavuus[ind] <- 1
|
||||||
|
} else {
|
||||||
|
omanOsuus <- proportionsIt[ind, pop]
|
||||||
|
if (abs(omanOsuus) < 1e-5) {
|
||||||
|
omanOsuus <- 0.01
|
||||||
|
}
|
||||||
|
if (round(omanOsuus*100)==0) {
|
||||||
|
omanOsuus <- 0.01
|
||||||
|
}
|
||||||
|
level <- missing_level_partition[ind]
|
||||||
|
refPienempia <- sum(refTaulu[pop, 1:round(100*omanOsuus), level])
|
||||||
|
uskottavuus[ind] <- refPienempia / nrefIndsInPop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ASK: Remove? are these plotting functions?
|
||||||
|
tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount)
|
||||||
|
viewPartition(proportionsIt, popnames)
|
||||||
|
|
||||||
|
talle = inputdlg('Do you want to save the admixture results? [Y/n]', 'y')
|
||||||
|
if (talle %in% c('y', 'Y', 'yes', 'Yes')) {
|
||||||
|
#waitALittle;
|
||||||
|
filename <- inputdlg(
|
||||||
|
'Save results as (file name):', 'admixture_results.rda'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# # Initialize the data structures, which are required in taking the missing
|
if (filename == 0) {
|
||||||
# # data into account:
|
# Cancel was pressed
|
||||||
# n_missing_levels = zeros(npops,1); # number of different levels of "missingness" in each pop (max 3).
|
return()
|
||||||
# missing_levels = zeros(npops,3); # the mean values for different levels.
|
} else { # copy 'baps4_output.baps' into the text file with the same name.
|
||||||
# missing_level_partition = zeros(ninds,1); # level of each individual (one of the levels of its population).
|
if (file.exists('baps4_output.baps')) {
|
||||||
# for i=1:npops
|
file.copy('baps4_output.baps', paste0(filename, '.txt'))
|
||||||
# inds = find(PARTITION==i);
|
file.remove('baps4_output.baps')
|
||||||
# # Proportions of non-missing data for the individuals:
|
}
|
||||||
# non_missing_data = zeros(length(inds),1);
|
}
|
||||||
# for j = 1:length(inds)
|
|
||||||
# ind = inds(j);
|
|
||||||
# non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci);
|
|
||||||
# end
|
|
||||||
# if all(non_missing_data>0.9)
|
|
||||||
# n_missing_levels(i) = 1;
|
|
||||||
# missing_levels(i,1) = mean(non_missing_data);
|
|
||||||
# missing_level_partition(inds) = 1;
|
|
||||||
# else
|
|
||||||
# [ordered, ordering] = sort(non_missing_data);
|
|
||||||
# #part = learn_simple_partition(ordered, 0.05);
|
|
||||||
# part = learn_partition_modified(ordered);
|
|
||||||
# aux = sortrows([part ordering],2);
|
|
||||||
# part = aux(:,1);
|
|
||||||
# missing_level_partition(inds) = part;
|
|
||||||
# n_levels = length(unique(part));
|
|
||||||
# n_missing_levels(i) = n_levels;
|
|
||||||
# for j=1:n_levels
|
|
||||||
# missing_levels(i,j) = mean(non_missing_data(find(part==j)));
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# # Create and analyse reference individuals for populations
|
if (!isstruct(tietue)) {
|
||||||
# # with potentially admixed individuals:
|
c$proportionsIt <- proportionsIt
|
||||||
# refTaulu = zeros(npops,100,3);
|
c$pvalue <- uskottavuus # Added by Jing
|
||||||
# for pop = admix_populaatiot'
|
c$mixtureType <- 'admix' # Jing
|
||||||
|
c$admixnpops <- npops;
|
||||||
# for level = 1:n_missing_levels(pop)
|
save(c, file=filename)
|
||||||
|
} else {
|
||||||
# potential_inds_in_this_pop_and_level = ...
|
tietue$proportionsIt <- proportionsIt
|
||||||
# find(PARTITION==pop & missing_level_partition==level &...
|
tietue$pvalue <- uskottavuus; # Added by Jing
|
||||||
# likelihood>3); # Potential admix individuals here.
|
tietue$mixtureType <- 'admix'
|
||||||
|
tietue$admixnpops <- npops
|
||||||
# if ~isempty(potential_inds_in_this_pop_and_level)
|
save(tietue, file=filename)
|
||||||
|
}
|
||||||
# #refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs);
|
}
|
||||||
# refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ...
|
|
||||||
# pop, missing_levels(pop,level));
|
|
||||||
|
|
||||||
# disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']);
|
|
||||||
# refProportions = zeros(nrefIndsInPop,npops);
|
|
||||||
# for iter = 1:iterationCountRef
|
|
||||||
# #disp(['Iter: ' num2str(iter)]);
|
|
||||||
# allfreqs = simulateAllFreqs(noalle);
|
|
||||||
|
|
||||||
# for ind = 1:nrefIndsInPop
|
|
||||||
# omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd);
|
|
||||||
# osuusTaulu = zeros(1,npops);
|
|
||||||
# osuusTaulu(pop)=1;
|
|
||||||
# logml = computeIndLogml(omaFreqs, osuusTaulu);
|
|
||||||
# for osuus = [0.5 0.25 0.05 0.01]
|
|
||||||
# [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
|
|
||||||
# end
|
|
||||||
# refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu;
|
|
||||||
# refProportions(ind,:) = refProportions(ind,:)./iter;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# for ind = 1:nrefIndsInPop
|
|
||||||
# omanOsuus = refProportions(ind,pop);
|
|
||||||
# if round(omanOsuus*100)==0
|
|
||||||
# omanOsuus = 0.01;
|
|
||||||
# end
|
|
||||||
# if abs(omanOsuus)<1e-5
|
|
||||||
# omanOsuus = 0.01;
|
|
||||||
# end
|
|
||||||
# refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# # Rounding of the results:
|
|
||||||
# proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt);
|
|
||||||
# proportionsIt = proportionsIt./100;
|
|
||||||
# for ind = 1:ninds
|
|
||||||
# if ~any(to_investigate==ind)
|
|
||||||
# if PARTITION(ind)>0
|
|
||||||
# proportionsIt(ind,PARTITION(ind))=1;
|
|
||||||
# end
|
|
||||||
# else
|
|
||||||
# # In case of a rounding error, the sum is made equal to unity by
|
|
||||||
# # fixing the largest value.
|
|
||||||
# if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1)
|
|
||||||
# [isoin,indeksi] = max(proportionsIt(ind,:));
|
|
||||||
# erotus = sum(proportionsIt(ind,:))-1;
|
|
||||||
# proportionsIt(ind,indeksi) = isoin-erotus;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# # Calculate p-value for each individual:
|
|
||||||
# uskottavuus = zeros(ninds,1);
|
|
||||||
# for ind = 1:ninds
|
|
||||||
# pop = PARTITION(ind);
|
|
||||||
# if pop==0 # Individual is outlier
|
|
||||||
# uskottavuus(ind)=1;
|
|
||||||
# elseif isempty(find(to_investigate==ind))
|
|
||||||
# # Individual had log-likelihood ratio<3
|
|
||||||
# uskottavuus(ind)=1;
|
|
||||||
# else
|
|
||||||
# omanOsuus = proportionsIt(ind,pop);
|
|
||||||
# if abs(omanOsuus)<1e-5
|
|
||||||
# omanOsuus = 0.01;
|
|
||||||
# end
|
|
||||||
# if round(omanOsuus*100)==0
|
|
||||||
# omanOsuus = 0.01;
|
|
||||||
# end
|
|
||||||
# level = missing_level_partition(ind);
|
|
||||||
# refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level));
|
|
||||||
# uskottavuus(ind) = refPienempia / nrefIndsInPop;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount);
|
|
||||||
|
|
||||||
# viewPartition(proportionsIt, popnames);
|
|
||||||
|
|
||||||
# talle = questdlg(['Do you want to save the admixture results?'], ...
|
|
||||||
# 'Save results?','Yes','No','Yes');
|
|
||||||
# if isequal(talle,'Yes')
|
|
||||||
# #waitALittle;
|
|
||||||
# [filename, pathname] = uiputfile('*.mat','Save results as');
|
|
||||||
|
|
||||||
|
|
||||||
# if (filename == 0) & (pathname == 0)
|
|
||||||
# # Cancel was pressed
|
|
||||||
# return
|
|
||||||
# else # copy 'baps4_output.baps' into the text file with the same name.
|
|
||||||
# if exist('baps4_output.baps','file')
|
|
||||||
# copyfile('baps4_output.baps',[pathname filename '.txt'])
|
|
||||||
# delete('baps4_output.baps')
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
|
|
||||||
# if (~isstruct(tietue))
|
|
||||||
# c.proportionsIt = proportionsIt;
|
|
||||||
# c.pvalue = uskottavuus; # Added by Jing
|
|
||||||
# c.mixtureType = 'admix'; # Jing
|
|
||||||
# c.admixnpops = npops;
|
|
||||||
# # save([pathname filename], 'c');
|
|
||||||
# save([pathname filename], 'c', '-v7.3'); # added by Lu Cheng, 08.06.2012
|
|
||||||
# else
|
|
||||||
# tietue.proportionsIt = proportionsIt;
|
|
||||||
# tietue.pvalue = uskottavuus; # Added by Jing
|
|
||||||
# tietue.mixtureType = 'admix';
|
|
||||||
# tietue.admixnpops = npops;
|
|
||||||
# # save([pathname filename], 'tietue');
|
|
||||||
# save([pathname filename], 'tietue', '-v7.3'); # added by Lu Cheng, 08.06.2012
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
}
|
}
|
||||||
Loading…
Add table
Reference in a new issue