diff --git a/R/addAlleles.R b/R/addAlleles.R index 6dd1c08..8f08341 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -6,47 +6,47 @@ #' @return data (after alleles were added) #' @export addAlleles <- function(data, ind, line, divider) { - # Lisaa BAPS-formaatissa olevaan datataulukkoon - # yksil�� ind vastaavat rivit. Yksil�n alleelit - # luetaan genepop-formaatissa olevasta rivist? - # line. Jos data on 3 digit formaatissa on divider=1000. - # Jos data on 2 digit formaatissa on divider=100. + # Lisaa BAPS-formaatissa olevaan datataulukkoon + # yksil�� ind vastaavat rivit. Yksil�n alleelit + # luetaan genepop-formaatissa olevasta rivist? + # line. Jos data on 3 digit formaatissa on divider=1000. + # Jos data on 2 digit formaatissa on divider=100. - nloci <- size(data, 2) # added 1 from original code - if (size(data, 1) < (2 * ind)) { - data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code - } + nloci <- size(data, 2) # added 1 from original code + if (size(data, 1) < (2 * ind)) { + data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code + } - k <- 1 - merkki <- substring(line, k, k) - while (merkki != ',') { - k <- k + 1 - merkki <- substring(line, k, k) - } - line <- substring(line, k + 1) - # clear k; clear merkki; + k <- 1 + merkki <- substring(line, k, k) + while (merkki != ",") { + k <- k + 1 + merkki <- substring(line, k, k) + } + line <- substring(line, k + 1) + # clear k; clear merkki; - if (grepl(" ", line)) { - alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) - } else if (grepl("\t", line)) { - alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]]) - } + if (grepl(" ", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) + } else if (grepl("\t", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]]) + } - if (length(alleeliTaulu) != nloci) { - stop('Incorrect data format.') - } + if (length(alleeliTaulu) != nloci) { + stop("Incorrect data format.") + } - for (j in seq_len(nloci)) { - ekaAlleeli <- floor(alleeliTaulu[j] / divider) - if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999 - tokaAlleeli <- alleeliTaulu[j] %% divider - if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999 + for (j in seq_len(nloci)) { + ekaAlleeli <- floor(alleeliTaulu[j] / divider) + if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999 + tokaAlleeli <- alleeliTaulu[j] %% divider + if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999 - data[2 * ind - 1, j] <- ekaAlleeli - data[2 * ind, j] <- tokaAlleeli - } + data[2 * ind - 1, j] <- ekaAlleeli + data[2 * ind, j] <- tokaAlleeli + } - data[2 * ind - 1, ncol(data)] <- ind - data[2 * ind, ncol(data)] <- ind - return(data) -} \ No newline at end of file + data[2 * ind - 1, ncol(data)] <- ind + data[2 * ind, ncol(data)] <- ind + return(data) +} diff --git a/R/addToSummary.R b/R/addToSummary.R index 4affbe9..a1861d8 100644 --- a/R/addToSummary.R +++ b/R/addToSummary.R @@ -1,18 +1,18 @@ addToSummary <- function(logml, partitionSummary, worstIndex) { - # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo - # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole - # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja - # nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. + # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo + # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole + # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja + # nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. - apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) - if (isempty(apu)) { - # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. - npops <- length(unique(PARTITION)) - partitionSummary[worstIndex, 1] <- npops - partitionSummary[worstIndex, 2] <- logml - added <- 1 - } else { - added <- 0 - } - return(list(partitionSummary = partitionSummary, added = added)) -} \ No newline at end of file + apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) + if (isempty(apu)) { + # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. + npops <- length(unique(PARTITION)) + partitionSummary[worstIndex, 1] <- npops + partitionSummary[worstIndex, 2] <- logml + added <- 1 + } else { + added <- 0 + } + return(list(partitionSummary = partitionSummary, added = added)) +} diff --git a/R/admix1.R b/R/admix1.R index b1ac383..fb59911 100644 --- a/R/admix1.R +++ b/R/admix1.R @@ -8,398 +8,400 @@ #' @importFrom methods is #' @export admix1 <- function(tietue) { - if (!is.list(tietue)) { - message('Load mixture result file. These are the files in this directory:') - print(list.files()) - pathname_filename <- file.choose() - if (!file.exists(pathname_filename)) { - stop( - "File ", pathname_filename, - " does not exist. Check spelling and location." - ) - } else { - cat('---------------------------------------------------\n'); - message('Reading mixture result from: ', pathname_filename, '...') - } - Sys.sleep(0.0001) #TODO: remove - - # ASK: what is this supposed to do? What do graphic obj have to do here? - # h0 = findobj('Tag','filename1_text'); - # set(h0,'String',filename); clear h0; - - struct_array <- load(pathname_filename) - if (isfield(struct_array, 'c')) { #Matlab versio - c <- struct_array$c - if (!isfield(c, 'PARTITION') | !isfield(c,'rowsFromInd')) { - stop('Incorrect file format') - } - } else if (isfield(struct_array, 'PARTITION')) { #Mideva versio - c <- struct_array - if (!isfield(c,'rowsFromInd')) stop('Incorrect file format') - } else { - stop('Incorrect file format') - } - - if (isfield(c, 'gene_lengths') & - strcmp(c$mixtureType, 'linear_mix') | - strcmp(c$mixtureType, 'codon_mix')) { # if the mixture is from a linkage model - # Redirect the call to the linkage admixture function. - # call function noindex to remove the index column - c$data <- noIndex(c$data, c$noalle) - # linkage_admix(c) # TODO: obsolete. remove. - # return - stop("linkage_admix not implemented") - } - PARTITION <- c$PARTITION - COUNTS <- c$COUNTS - SUMCOUNTS <- c$SUMCOUNTS - alleleCodes <- c$alleleCodes - adjprior <- c$adjprior - popnames <- c$popnames - rowsFromInd <- c$rowsFromInd - data <- c$data - npops <- c$npops - noalle <- c$noalle + if (!is.list(tietue)) { + message("Load mixture result file. These are the files in this directory:") + print(list.files()) + pathname_filename <- file.choose() + if (!file.exists(pathname_filename)) { + stop( + "File ", pathname_filename, + " does not exist. Check spelling and location." + ) } else { - PARTITION <- tietue$PARTITION - COUNTS <- tietue$COUNTS - SUMCOUNTS <- tietue$SUMCOUNTS - alleleCodes <- tietue$alleleCodes - adjprior <- tietue$adjprior - popnames <- tietue$popnames - rowsFromInd <- tietue$rowsFromInd - data <- as.double(tietue$data) - npops <- tietue$npops - noalle <- tietue$noalle + cat("---------------------------------------------------\n") + message("Reading mixture result from: ", pathname_filename, "...") + } + Sys.sleep(0.0001) # TODO: remove + + # ASK: what is this supposed to do? What do graphic obj have to do here? + # h0 = findobj('Tag','filename1_text'); + # set(h0,'String',filename); clear h0; + + struct_array <- load(pathname_filename) + if (isfield(struct_array, "c")) { # Matlab versio + c <- struct_array$c + if (!isfield(c, "PARTITION") | !isfield(c, "rowsFromInd")) { + stop("Incorrect file format") + } + } else if (isfield(struct_array, "PARTITION")) { # Mideva versio + c <- struct_array + if (!isfield(c, "rowsFromInd")) stop("Incorrect file format") + } else { + stop("Incorrect file format") } - answers <- inputdlg( - prompt = paste( - "Input the minimum size of a population that will", - "be taken into account when admixture is estimated." - ), - definput = 5 - ) - alaRaja <- as.numeric(answers) - npops <- poistaLiianPienet(npops, rowsFromInd, alaRaja) - - nloci <- size(COUNTS, 2) - ninds <- size(data, 1) / rowsFromInd - - answers <- inputdlg('Input number of iterations', definput=50) - if (isempty(answers)) return() - iterationCount <- as.numeric(answers[1, 1]) # maybe [[]]? - - answers <- inputdlg( - prompt = 'Input number of reference individuals from each population', - definput = 50 - ) - if (isempty(answers)) { - nrefIndsInPop <- 50 - } else { - nrefIndsInPop <- as.numeric(answers[1, 1]) + if (isfield(c, "gene_lengths") & + strcmp(c$mixtureType, "linear_mix") | + strcmp(c$mixtureType, "codon_mix")) { # if the mixture is from a linkage model + # Redirect the call to the linkage admixture function. + # call function noindex to remove the index column + c$data <- noIndex(c$data, c$noalle) + # linkage_admix(c) # TODO: obsolete. remove. + # return + stop("linkage_admix not implemented") } + PARTITION <- c$PARTITION + COUNTS <- c$COUNTS + SUMCOUNTS <- c$SUMCOUNTS + alleleCodes <- c$alleleCodes + adjprior <- c$adjprior + popnames <- c$popnames + rowsFromInd <- c$rowsFromInd + data <- c$data + npops <- c$npops + noalle <- c$noalle + } else { + PARTITION <- tietue$PARTITION + COUNTS <- tietue$COUNTS + SUMCOUNTS <- tietue$SUMCOUNTS + alleleCodes <- tietue$alleleCodes + adjprior <- tietue$adjprior + popnames <- tietue$popnames + rowsFromInd <- tietue$rowsFromInd + data <- as.double(tietue$data) + npops <- tietue$npops + noalle <- tietue$noalle + } - answers <- inputdlg( - prompt = 'Input number of iterations for reference individuals', - definput = 10 - ) - if (isempty(answers)) return() - iterationCountRef <- as.numeric(answers[1, 1]) + answers <- inputdlg( + prompt = paste( + "Input the minimum size of a population that will", + "be taken into account when admixture is estimated." + ), + definput = 5 + ) + alaRaja <- as.numeric(answers) + npops <- poistaLiianPienet(npops, rowsFromInd, alaRaja) - # First calculate log-likelihood ratio for all individuals: - likelihood <- zeros(ninds, 1) - allfreqs <- computeAllFreqs2(noalle) - for (ind in 1:ninds) { - omaFreqs <- computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd) + nloci <- size(COUNTS, 2) + ninds <- size(data, 1) / rowsFromInd + + answers <- inputdlg("Input number of iterations", definput = 50) + if (isempty(answers)) { + return() + } + iterationCount <- as.numeric(answers[1, 1]) # maybe [[]]? + + answers <- inputdlg( + prompt = "Input number of reference individuals from each population", + definput = 50 + ) + if (isempty(answers)) { + nrefIndsInPop <- 50 + } else { + nrefIndsInPop <- as.numeric(answers[1, 1]) + } + + answers <- inputdlg( + prompt = "Input number of iterations for reference individuals", + definput = 10 + ) + if (isempty(answers)) { + return() + } + iterationCountRef <- as.numeric(answers[1, 1]) + + # First calculate log-likelihood ratio for all individuals: + likelihood <- zeros(ninds, 1) + allfreqs <- computeAllFreqs2(noalle) + for (ind in 1:ninds) { + omaFreqs <- computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd) + osuusTaulu <- zeros(1, npops) + if (PARTITION[ind] == 0) { + # Yksil?on outlier + } else if (PARTITION[ind] != 0) { + if (PARTITION[ind] > 0) { + osuusTaulu[PARTITION[ind]] <- 1 + } else { + # Yksilöt, joita ei ole sijoitettu mihinkään koriin. + arvot <- zeros(1, npops) + for (q in 1:npops) { + osuusTaulu <- zeros(1, npops) + osuusTaulu[q] <- 1 + arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu) + } + iso_arvo <- max(arvot) + isoimman_indeksi <- match(max(arvot), arvot) osuusTaulu <- zeros(1, npops) - if (PARTITION[ind] == 0) { - # Yksil?on outlier - } else if (PARTITION[ind] != 0) { - if (PARTITION[ind] > 0) { - osuusTaulu[PARTITION[ind]] <- 1 - } else { - # Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot <- zeros(1, npops) - for (q in 1:npops) { - osuusTaulu <- zeros(1, npops) - osuusTaulu[q] <- 1 - arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu) - } - iso_arvo <- max(arvot) - isoimman_indeksi <- match(max(arvot), arvot) - osuusTaulu <- zeros(1, npops) - osuusTaulu[isoimman_indeksi] <- 1 - PARTITION[ind] <- isoimman_indeksi - } - logml <- computeIndLogml(omaFreqs, osuusTaulu) - logmlAlku <- logml - for (osuus in c(0.5, 0.25, 0.05, 0.01)) { - etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml) - osuusTaulu <- etsiResult[1] - logml <- etsiResult[2] - } - logmlLoppu <- logml - likelihood[ind] <- logmlLoppu - logmlAlku - } + osuusTaulu[isoimman_indeksi] <- 1 + PARTITION[ind] <- isoimman_indeksi + } + logml <- computeIndLogml(omaFreqs, osuusTaulu) + logmlAlku <- logml + for (osuus in c(0.5, 0.25, 0.05, 0.01)) { + etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml) + osuusTaulu <- etsiResult[1] + logml <- etsiResult[2] + } + logmlLoppu <- logml + likelihood[ind] <- logmlLoppu - logmlAlku } + } - # Analyze further only individuals who have log-likelihood ratio larger than 3: - to_investigate <- t(find(likelihood > 3)) - cat('Possibly admixed individuals:\n') - for (i in 1:length(to_investigate)) { - cat(as.character(to_investigate[i])) - } - cat(' ') - cat('Populations for possibly admixed individuals:\n') - admix_populaatiot <- unique(PARTITION[to_investigate]) - for (i in 1:length(admix_populaatiot)) { - cat(as.character(admix_populaatiot[i])) - } + # Analyze further only individuals who have log-likelihood ratio larger than 3: + to_investigate <- t(find(likelihood > 3)) + cat("Possibly admixed individuals:\n") + for (i in 1:length(to_investigate)) { + cat(as.character(to_investigate[i])) + } + cat(" ") + cat("Populations for possibly admixed individuals:\n") + admix_populaatiot <- unique(PARTITION[to_investigate]) + for (i in 1:length(admix_populaatiot)) { + cat(as.character(admix_populaatiot[i])) + } - # THUS, there are two types of individuals, who will not be analyzed with - # simulated allele frequencies: those who belonged to a mini-population - # which was removed, and those who have log-likelihood ratio less than 3. - # The value in the PARTITION for the first kind of individuals is 0. The - # second kind of individuals can be identified, because they do not - # belong to "to_investigate" array. When the results are presented, the - # first kind of individuals are omitted completely, while the second kind - # of individuals are completely put to the population, where they ended up - # in the mixture analysis. These second type of individuals will have a - # unit p-value. + # THUS, there are two types of individuals, who will not be analyzed with + # simulated allele frequencies: those who belonged to a mini-population + # which was removed, and those who have log-likelihood ratio less than 3. + # The value in the PARTITION for the first kind of individuals is 0. The + # second kind of individuals can be identified, because they do not + # belong to "to_investigate" array. When the results are presented, the + # first kind of individuals are omitted completely, while the second kind + # of individuals are completely put to the population, where they ended up + # in the mixture analysis. These second type of individuals will have a + # unit p-value. - # Simulate allele frequencies a given number of times and save the average - # result to "proportionsIt" array. + # Simulate allele frequencies a given number of times and save the average + # result to "proportionsIt" array. - proportionsIt <- zeros(ninds, npops) - for (iterationNum in 1:iterationCount) { - cat('Iter:', as.character(iterationNum)) - allfreqs <- simulateAllFreqs(noalle) # Allele frequencies on this iteration. + proportionsIt <- zeros(ninds, npops) + for (iterationNum in 1:iterationCount) { + cat("Iter:", as.character(iterationNum)) + allfreqs <- simulateAllFreqs(noalle) # Allele frequencies on this iteration. - for (ind in to_investigate) { - #disp(num2str(ind)); - omaFreqs <- computePersonalAllFreqs( - ind, data, allfreqs, rowsFromInd - ) + for (ind in to_investigate) { + # disp(num2str(ind)); + omaFreqs <- computePersonalAllFreqs( + ind, data, allfreqs, rowsFromInd + ) + osuusTaulu <- zeros(1, npops) + if (PARTITION[ind] == 0) { + # Yksil?on outlier + } else if (PARTITION[ind] != 0) { + if (PARTITION[ind] > 0) { + osuusTaulu[PARTITION[ind]] <- 1 + } else { + # Yksilöt, joita ei ole sijoitettu mihinkään koriin. + arvot <- zeros(1, npops) + for (q in 1:npops) { osuusTaulu <- zeros(1, npops) - if (PARTITION[ind] == 0) { - # Yksil?on outlier - } else if (PARTITION[ind] != 0) { - if (PARTITION[ind] > 0) { - osuusTaulu[PARTITION[ind]] <- 1 - } else { - # Yksilöt, joita ei ole sijoitettu mihinkään koriin. - arvot <- zeros(1, npops) - for (q in 1:npops) { - osuusTaulu <- zeros(1, npops) - osuusTaulu[q] <- 1 - arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu) - } - iso_arvo <- max(arvot) - isoimman_indeksi <- match(max(arvot), arvot) - osuusTaulu <- zeros(1, npops) - osuusTaulu[isoimman_indeksi] <- 1 - PARTITION[ind] <- isoimman_indeksi - } - logml <- computeIndLogml(omaFreqs, osuusTaulu) - - for (osuus in c(0.5, 0.25, 0.05, 0.01)) { - etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml) - osuusTaulu <- etsiResult[1] - logml <- etsiResult[2] - } - } - proportionsIt[ind, ] <- proportionsIt[ind, ] * (iterationNum - 1) + - osuusTaulu - proportionsIt[ind, ] <- proportionsIt[ind, ] / iterationNum + osuusTaulu[q] <- 1 + arvot[q] <- computeIndLogml(omaFreqs, osuusTaulu) + } + iso_arvo <- max(arvot) + isoimman_indeksi <- match(max(arvot), arvot) + osuusTaulu <- zeros(1, npops) + osuusTaulu[isoimman_indeksi] <- 1 + PARTITION[ind] <- isoimman_indeksi } + logml <- computeIndLogml(omaFreqs, osuusTaulu) + + for (osuus in c(0.5, 0.25, 0.05, 0.01)) { + etsiResult <- etsiParas(osuus, osuusTaulu, omaFreqs, logml) + osuusTaulu <- etsiResult[1] + logml <- etsiResult[2] + } + } + proportionsIt[ind, ] <- proportionsIt[ind, ] * (iterationNum - 1) + + osuusTaulu + proportionsIt[ind, ] <- proportionsIt[ind, ] / iterationNum } + } - #disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); - #disp('each population.'); + # disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']); + # disp('each population.'); - #allfreqs = simulateAllFreqs(noalle); # Simuloidaan alleelifrekvenssisetti - allfreqs <- computeAllFreqs2(noalle); # Koitetaan tällaista. + # allfreqs = simulateAllFreqs(noalle); # Simuloidaan alleelifrekvenssisetti + allfreqs <- computeAllFreqs2(noalle) # Koitetaan tällaista. - # Initialize the data structures, which are required in taking the missing - # data into account: - n_missing_levels <- zeros(npops, 1) # number of different levels of "missingness" in each pop (max 3). - missing_levels <- zeros(npops, 3) # the mean values for different levels. - missing_level_partition <- zeros(ninds, 1) # level of each individual (one of the levels of its population). - for (i in 1:npops) { - inds <- find(PARTITION == i) - # Proportions of non-missing data for the individuals: - non_missing_data <- zeros(length(inds), 1) - for (j in 1:length(inds)) { - ind <- inds[j] - non_missing_data[j] <- length( - find(data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0) - ) / (rowsFromInd * nloci) - } - if (all(non_missing_data > 0.9)) { - n_missing_levels[i] <- 1 - missing_levels[i, 1] <- mean(non_missing_data) - missing_level_partition[inds] <- 1 - } else { - # TODO: fix syntax - # [ordered, ordering] = sort(non_missing_data); - ordered <- ordering <- sort(non_missing_data) - #part = learn_simple_partition(ordered, 0.05); - part <- learn_partition_modified(ordered) - aux <- sortrows(cbind(part, ordering), 2) - part = aux[, 1] - missing_level_partition[inds]<- part - n_levels <- length(unique(part)) - n_missing_levels[i] <- n_levels - for (j in 1:n_levels) { - missing_levels[i, j] <- mean(non_missing_data[find(part == j)]) - } - } + # Initialize the data structures, which are required in taking the missing + # data into account: + n_missing_levels <- zeros(npops, 1) # number of different levels of "missingness" in each pop (max 3). + missing_levels <- zeros(npops, 3) # the mean values for different levels. + missing_level_partition <- zeros(ninds, 1) # level of each individual (one of the levels of its population). + for (i in 1:npops) { + inds <- find(PARTITION == i) + # Proportions of non-missing data for the individuals: + non_missing_data <- zeros(length(inds), 1) + for (j in 1:length(inds)) { + ind <- inds[j] + non_missing_data[j] <- length( + find(data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0) + ) / (rowsFromInd * nloci) } - - # Create and analyse reference individuals for populations - # with potentially admixed individuals: - refTaulu <- zeros(npops, 100, 3) - for (pop in t(admix_populaatiot)) { - - for (level in 1:n_missing_levels[pop]) { - - potential_inds_in_this_pop_and_level <- - find( - PARTITION == pop & missing_level_partition == level & - likelihood > 3 - ) # Potential admix individuals here. - - if (!isempty(potential_inds_in_this_pop_and_level)) { - - #refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); - refData <- simulateIndividuals( - nrefIndsInPop, rowsFromInd, allfreqs, pop, - missing_levels[pop, level] - ) - - cat( - 'Analysing the reference individuals from pop', pop, - '(level', level, ').' - ) - refProportions <- zeros(nrefIndsInPop, npops) - for (iter in 1:iterationCountRef) { - #disp(['Iter: ' num2str(iter)]); - allfreqs <- simulateAllFreqs(noalle) - - for (ind in 1:nrefIndsInPop) { - omaFreqs <- computePersonalAllFreqs( - ind, refData, allfreqs, rowsFromInd - ) - osuusTaulu <- zeros(1, npops) - osuusTaulu[pop] <- 1 - logml <- computeIndLogml(omaFreqs, osuusTaulu) - for (osuus in c(0.5, 0.25, 0.05, 0.01)) { - etsiResult <- etsiParas( - osuus, osuusTaulu, omaFreqs, logml - ) - osuusTaulu <- etsiResult[1] - logml <- etsiResult[2] - } - refProportions[ind, ] <- - refProportions[ind, ] * (iter - 1) + osuusTaulu - refProportions[ind, ] <- refProportions[ind, ] / iter - } - } - for (ind in 1:nrefIndsInPop) { - omanOsuus <- refProportions[ind, pop] - if (round(omanOsuus * 100) == 0) { - omanOsuus <- 0.01 - } - if (abs(omanOsuus) < 1e-5) { - omanOsuus <- 0.01 - } - refTaulu[pop, round(omanOsuus*100), level] <- - refTaulu[pop, round(omanOsuus*100),level] + 1 - } - } - } + if (all(non_missing_data > 0.9)) { + n_missing_levels[i] <- 1 + missing_levels[i, 1] <- mean(non_missing_data) + missing_level_partition[inds] <- 1 + } else { + # TODO: fix syntax + # [ordered, ordering] = sort(non_missing_data); + ordered <- ordering <- sort(non_missing_data) + # part = learn_simple_partition(ordered, 0.05); + part <- learn_partition_modified(ordered) + aux <- sortrows(cbind(part, ordering), 2) + part <- aux[, 1] + missing_level_partition[inds] <- part + n_levels <- length(unique(part)) + n_missing_levels[i] <- n_levels + for (j in 1:n_levels) { + missing_levels[i, j] <- mean(non_missing_data[find(part == j)]) + } } + } - # Rounding of the results: - proportionsIt <- proportionsIt * 100 - proportionsIt <- round(proportionsIt) - proportionsIt <- proportionsIt / 100 - for (ind in 1:ninds) { - if (!any(to_investigate == ind)) { - if (PARTITION[ind] > 0) { - proportionsIt[ind, PARTITION[ind]] <- 1 - } - } else { - # In case of a rounding error, the sum is made equal to unity by - # fixing the largest value. - if ((PARTITION[ind] > 0) & (sum(proportionsIt[ind, ]) != 1)) { - isoin <- max(proportionsIt[ind, ]) - indeksi <- match(isoin, max(proportionsIt[ind, ])) - erotus <- sum(proportionsIt[ind, ]) - 1 - proportionsIt[ind, indeksi] <- isoin - erotus - } - } - } + # Create and analyse reference individuals for populations + # with potentially admixed individuals: + refTaulu <- zeros(npops, 100, 3) + for (pop in t(admix_populaatiot)) { + for (level in 1:n_missing_levels[pop]) { + potential_inds_in_this_pop_and_level <- + find( + PARTITION == pop & missing_level_partition == level & + likelihood > 3 + ) # Potential admix individuals here. - # Calculate p-value for each individual: - uskottavuus <- zeros(ninds, 1) - for (ind in 1:ninds) { - pop <- PARTITION[ind] - if (pop == 0) { # Individual is outlier - uskottavuus[ind] <- 1 - } else if (isempty(find(to_investigate == ind))) { - # Individual had log-likelihood ratio<3 - uskottavuus[ind] <- 1 - } else { - omanOsuus <- proportionsIt[ind, pop] - if (abs(omanOsuus) < 1e-5) { - omanOsuus <- 0.01 - } - if (round(omanOsuus*100)==0) { - omanOsuus <- 0.01 - } - level <- missing_level_partition[ind] - refPienempia <- sum(refTaulu[pop, 1:round(100*omanOsuus), level]) - uskottavuus[ind] <- refPienempia / nrefIndsInPop - } - } + if (!isempty(potential_inds_in_this_pop_and_level)) { - tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount) # TODO: textual outputs. probably not necessary. translate nonetheless - viewPartition(proportionsIt, popnames) # TODO: adapt - - talle = inputdlg('Do you want to save the admixture results? [Y/n]', 'y') - if (talle %in% c('y', 'Y', 'yes', 'Yes')) { - #waitALittle; - filename <- inputdlg( - 'Save results as (file name):', 'admixture_results.rda' + # refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs); + refData <- simulateIndividuals( + nrefIndsInPop, rowsFromInd, allfreqs, pop, + missing_levels[pop, level] ) + cat( + "Analysing the reference individuals from pop", pop, + "(level", level, ")." + ) + refProportions <- zeros(nrefIndsInPop, npops) + for (iter in 1:iterationCountRef) { + # disp(['Iter: ' num2str(iter)]); + allfreqs <- simulateAllFreqs(noalle) - if (filename == 0) { - # Cancel was pressed - return() - } else { # copy 'baps4_output.baps' into the text file with the same name. - if (file.exists('baps4_output.baps')) { - file.copy('baps4_output.baps', paste0(filename, '.txt')) - file.remove('baps4_output.baps') + for (ind in 1:nrefIndsInPop) { + omaFreqs <- computePersonalAllFreqs( + ind, refData, allfreqs, rowsFromInd + ) + osuusTaulu <- zeros(1, npops) + osuusTaulu[pop] <- 1 + logml <- computeIndLogml(omaFreqs, osuusTaulu) + for (osuus in c(0.5, 0.25, 0.05, 0.01)) { + etsiResult <- etsiParas( + osuus, osuusTaulu, omaFreqs, logml + ) + osuusTaulu <- etsiResult[1] + logml <- etsiResult[2] } + refProportions[ind, ] <- + refProportions[ind, ] * (iter - 1) + osuusTaulu + refProportions[ind, ] <- refProportions[ind, ] / iter + } } - - if (!is(tietue, "list")) { - c$proportionsIt <- proportionsIt - c$pvalue <- uskottavuus # Added by Jing - c$mixtureType <- 'admix' # Jing - c$admixnpops <- npops; - save(c, file=filename) - } else { - tietue$proportionsIt <- proportionsIt - tietue$pvalue <- uskottavuus; # Added by Jing - tietue$mixtureType <- 'admix' - tietue$admixnpops <- npops - save(tietue, file=filename) + for (ind in 1:nrefIndsInPop) { + omanOsuus <- refProportions[ind, pop] + if (round(omanOsuus * 100) == 0) { + omanOsuus <- 0.01 + } + if (abs(omanOsuus) < 1e-5) { + omanOsuus <- 0.01 + } + refTaulu[pop, round(omanOsuus * 100), level] <- + refTaulu[pop, round(omanOsuus * 100), level] + 1 } + } } -} \ No newline at end of file + } + + # Rounding of the results: + proportionsIt <- proportionsIt * 100 + proportionsIt <- round(proportionsIt) + proportionsIt <- proportionsIt / 100 + for (ind in 1:ninds) { + if (!any(to_investigate == ind)) { + if (PARTITION[ind] > 0) { + proportionsIt[ind, PARTITION[ind]] <- 1 + } + } else { + # In case of a rounding error, the sum is made equal to unity by + # fixing the largest value. + if ((PARTITION[ind] > 0) & (sum(proportionsIt[ind, ]) != 1)) { + isoin <- max(proportionsIt[ind, ]) + indeksi <- match(isoin, max(proportionsIt[ind, ])) + erotus <- sum(proportionsIt[ind, ]) - 1 + proportionsIt[ind, indeksi] <- isoin - erotus + } + } + } + + # Calculate p-value for each individual: + uskottavuus <- zeros(ninds, 1) + for (ind in 1:ninds) { + pop <- PARTITION[ind] + if (pop == 0) { # Individual is outlier + uskottavuus[ind] <- 1 + } else if (isempty(find(to_investigate == ind))) { + # Individual had log-likelihood ratio<3 + uskottavuus[ind] <- 1 + } else { + omanOsuus <- proportionsIt[ind, pop] + if (abs(omanOsuus) < 1e-5) { + omanOsuus <- 0.01 + } + if (round(omanOsuus * 100) == 0) { + omanOsuus <- 0.01 + } + level <- missing_level_partition[ind] + refPienempia <- sum(refTaulu[pop, 1:round(100 * omanOsuus), level]) + uskottavuus[ind] <- refPienempia / nrefIndsInPop + } + } + + tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount) # TODO: textual outputs. probably not necessary. translate nonetheless + viewPartition(proportionsIt, popnames) # TODO: adapt + + talle <- inputdlg("Do you want to save the admixture results? [Y/n]", "y") + if (talle %in% c("y", "Y", "yes", "Yes")) { + # waitALittle; + filename <- inputdlg( + "Save results as (file name):", "admixture_results.rda" + ) + + + if (filename == 0) { + # Cancel was pressed + return() + } else { # copy 'baps4_output.baps' into the text file with the same name. + if (file.exists("baps4_output.baps")) { + file.copy("baps4_output.baps", paste0(filename, ".txt")) + file.remove("baps4_output.baps") + } + } + + if (!is(tietue, "list")) { + c$proportionsIt <- proportionsIt + c$pvalue <- uskottavuus # Added by Jing + c$mixtureType <- "admix" # Jing + c$admixnpops <- npops + save(c, file = filename) + } else { + tietue$proportionsIt <- proportionsIt + tietue$pvalue <- uskottavuus # Added by Jing + tietue$mixtureType <- "admix" + tietue$admixnpops <- npops + save(tietue, file = filename) + } + } +} diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R index c69dd63..a5e98b5 100644 --- a/R/admixture_initialization.R +++ b/R/admixture_initialization.R @@ -3,18 +3,18 @@ #' @param nclusters ncluster #' @param Z Z -admixture_initialization <- function (data_matrix, nclusters, Z) { - size_data <- size(data_matrix) - nloci <- size_data[2] - 1 - n <- max(data_matrix[, ncol(data_matrix)]) - T <- cluster_own(Z, nclusters) - initial_partition <- zeros(size_data[1], 1) - for (i in 1:n) { - kori <- T[i] - here <- find(data_matrix[, ncol(data_matrix)] == i) - for (j in 1:length(here)) { - initial_partition[here[j], 1] <- kori - } - } - return(initial_partition) -} \ No newline at end of file +admixture_initialization <- function(data_matrix, nclusters, Z) { + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, ncol(data_matrix)]) + T <- cluster_own(Z, nclusters) + initial_partition <- zeros(size_data[1], 1) + for (i in 1:n) { + kori <- T[i] + here <- find(data_matrix[, ncol(data_matrix)] == i) + for (j in 1:length(here)) { + initial_partition[here[j], 1] <- kori + } + } + return(initial_partition) +} diff --git a/R/arvoSeuraavaTi.R b/R/arvoSeuraavaTi.R index e276889..ccdfbd5 100644 --- a/R/arvoSeuraavaTi.R +++ b/R/arvoSeuraavaTi.R @@ -1,14 +1,14 @@ arvoSeuraavaTila <- function(muutokset, logml) { - # Suorittaa yksil�n seuraavan tilan arvonnan + # Suorittaa yksil�n seuraavan tilan arvonnan - y <- logml + muutokset # siirron j�lkeiset logml:t - y <- y - max(y) - y <- exp(y) - summa <- sum(y) - y <- y / summa - y <- cumsum(y) + y <- logml + muutokset # siirron j�lkeiset logml:t + y <- y - max(y) + y <- exp(y) + summa <- sum(y) + y <- y / summa + y <- cumsum(y) - i2 <- rand_disc(y) # uusi kori - suurin <- muutokset(i2) - return(list(suurin = suurin, i2 = i2)) + i2 <- rand_disc(y) # uusi kori + suurin <- muutokset(i2) + return(list(suurin = suurin, i2 = i2)) } diff --git a/R/blanks.R b/R/blanks.R index bd1409c..246c6fc 100644 --- a/R/blanks.R +++ b/R/blanks.R @@ -6,9 +6,9 @@ #' @author Waldir Leoncio #' @export blanks <- function(n) { - if (n < 0) { - warning("Negative n passed. Treating as n = 0") - n <- 0 - } - paste(rep(" ", n), collapse="") -} \ No newline at end of file + if (n < 0) { + warning("Negative n passed. Treating as n = 0") + n <- 0 + } + paste(rep(" ", n), collapse = "") +} diff --git a/R/calculatePopLogml.R b/R/calculatePopLogml.R index bcd7a44..2803697 100644 --- a/R/calculatePopLogml.R +++ b/R/calculatePopLogml.R @@ -1,20 +1,19 @@ #' @title Calculate log marginal likelihood -#' @description Calculates fuzzy (log) marginal likelihood for a population of +#' @description Calculates fuzzy (log) marginal likelihood for a population of #' real values using estimate "fii" for the dispersion value, and Jeffreys prior #' for the mean parameter. #' @param points points #' @param fii fii #' @export calculatePopLogml <- function(points, fii) { - n <- length(points) - fuzzy_ones <- sum(points) - fuzzy_zeros <- n - fuzzy_ones - val <- log_gamma(1) - - log_gamma(1 + n / fii) + - log_gamma(0.5 + fuzzy_ones / fii) + - log_gamma(0.5 + fuzzy_zeros / fii) - - log_gamma(0.5) - - log_gamma(0.5) - return(val) + n <- length(points) + fuzzy_ones <- sum(points) + fuzzy_zeros <- n - fuzzy_ones + val <- log_gamma(1) - + log_gamma(1 + n / fii) + + log_gamma(0.5 + fuzzy_ones / fii) + + log_gamma(0.5 + fuzzy_zeros / fii) - + log_gamma(0.5) - + log_gamma(0.5) + return(val) } - diff --git a/R/cell.R b/R/cell.R index a3cdedd..7febb6b 100644 --- a/R/cell.R +++ b/R/cell.R @@ -6,31 +6,31 @@ #' lengths) #' @param ... Other dimensions #' @return An array of zeroes with the dimensions passed on call -cell <- function(n, sz = c(n, n), expandable=FALSE, ...) { +cell <- function(n, sz = c(n, n), expandable = FALSE, ...) { - # Uglyly figuring out if the third arg is an extra dim --- # + # Uglyly figuring out if the third arg is an extra dim --- # - sz3 <- vector() - if (!is.logical(expandable)) { - sz3 <- expandable - expandable <- FALSE - } - args <- c(as.list(environment()), list(...)) - exp <- args$expandable - extra_dims <- c(sz3, args[names(args) == ""]) + sz3 <- vector() + if (!is.logical(expandable)) { + sz3 <- expandable + expandable <- FALSE + } + args <- c(as.list(environment()), list(...)) + exp <- args$expandable + extra_dims <- c(sz3, args[names(args) == ""]) - # Creating output vector --------------------------------- # + # Creating output vector --------------------------------- # - if (exp) { - return(vector("list", length = n)) - } - if (length(sz) == 1 & length(extra_dims) == 0) { - return(array(0, dim = c(n, sz))) - } else if (length(extra_dims) > 0) { - return(array(0, dim = c(n, sz, extra_dims))) - } else if (length(sz) == 2) { - return(array(0, dim = sz)) - } else { - return(array(0, dim = c(n, sz, ...))) - } -} \ No newline at end of file + if (exp) { + return(vector("list", length = n)) + } + if (length(sz) == 1 & length(extra_dims) == 0) { + return(array(0, dim = c(n, sz))) + } else if (length(extra_dims) > 0) { + return(array(0, dim = c(n, sz, extra_dims))) + } else if (length(sz) == 2) { + return(array(0, dim = sz)) + } else { + return(array(0, dim = c(n, sz, ...))) + } +} diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index 3a7746c..4899e17 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -1,7 +1,7 @@ clearGlobalVars <- function() { - COUNTS <- vector() - SUMCOUNTS <- vector() - PARTITION <- vector() - POP_LOGML <- vector() - LOGDIFF <- vector() -} \ No newline at end of file + COUNTS <- vector() + SUMCOUNTS <- vector() + PARTITION <- vector() + POP_LOGML <- vector() + LOGDIFF <- vector() +} diff --git a/R/cluster_own.R b/R/cluster_own.R index bab7bb9..e3a5125 100644 --- a/R/cluster_own.R +++ b/R/cluster_own.R @@ -1,35 +1,35 @@ cluster_own <- function(Z, nclust) { - true <- TRUE - false <- FALSE - maxclust <- nclust - # % Start of algorithm - m <- size(Z, 1) + 1 - T <- zeros(m, 1) - # % maximum number of clusters based on inconsistency - if (m <= maxclust) { - T = t((1:m)) - } else if (maxclust == 1) { - T <- ones(m, 1) - } else { - clsnum <- 1 - for (k in (m - maxclust + 1):(m - 1)) { - i = Z[k, 1] # left tree - if (i <= m) { # original node, no leafs - T[i] = clsnum - clsnum = clsnum + 1 - } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree - T <- clusternum(Z, T, i - m, clsnum) - clsnum <- clsnum + 1 - } - i <- Z[k, 2] # right tree - if (i <= m) { # original node, no leafs - T[i] <- clsnum - clsnum <- clsnum + 1 - } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree - T <- clusternum(Z, T, i - m, clsnum) - clsnum <- clsnum + 1 - } - } - } - return(T) + true <- TRUE + false <- FALSE + maxclust <- nclust + # % Start of algorithm + m <- size(Z, 1) + 1 + T <- zeros(m, 1) + # % maximum number of clusters based on inconsistency + if (m <= maxclust) { + T <- t((1:m)) + } else if (maxclust == 1) { + T <- ones(m, 1) + } else { + clsnum <- 1 + for (k in (m - maxclust + 1):(m - 1)) { + i <- Z[k, 1] # left tree + if (i <= m) { # original node, no leafs + T[i] <- clsnum + clsnum <- clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + i <- Z[k, 2] # right tree + if (i <= m) { # original node, no leafs + T[i] <- clsnum + clsnum <- clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + } + } + return(T) } diff --git a/R/clusternum.R b/R/clusternum.R index 9f9cd5f..1dceb51 100644 --- a/R/clusternum.R +++ b/R/clusternum.R @@ -1,14 +1,14 @@ clusternum <- function(X, T, k, c) { - m <- size(X, 1) + 1 - while (!isempty(k)) { - # Get the children of nodes at this level - children <- X[k, 1:2] + m <- size(X, 1) + 1 + while (!isempty(k)) { + # Get the children of nodes at this level + children <- X[k, 1:2] - # Assign this node number to leaf children - t <- (children <= m) - T[children[t]] <- c - # Move to next level - k <- children[!t] - m - } - return(T) -} \ No newline at end of file + # Assign this node number to leaf children + t <- (children <= m) + T[children[t]] <- c + # Move to next level + k <- children[!t] - m + } + return(T) +} diff --git a/R/colon.R b/R/colon.R index abe7a12..c4c78d2 100644 --- a/R/colon.R +++ b/R/colon.R @@ -4,9 +4,9 @@ #' @param b final number #' @export colon <- function(a, b) { - if (a <= b) { - return(a:b) - } else { - return(vector(mode = "numeric")) - } -} \ No newline at end of file + if (a <= b) { + return(a:b) + } else { + return(vector(mode = "numeric")) + } +} diff --git a/R/computeAllFreqs2.R b/R/computeAllFreqs2.R index f34f2dd..d6f623a 100644 --- a/R/computeAllFreqs2.R +++ b/R/computeAllFreqs2.R @@ -3,28 +3,28 @@ #' j 1/noalle(j) verran. #' @param noalle noalle #' @export -computeAllFreqs2 <- function (noalle) { - COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS) - SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS) - max_noalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - npops <- size(COUNTS, 3) - sumCounts <- SUMCOUNTS + ones(size(SUMCOUNTS)) - sumCounts <- reshape(t(sumCounts), c(1, nloci, npops)) - sumCounts <- repmat(sumCounts, c(max_noalle, 1, 1)) +computeAllFreqs2 <- function(noalle) { + COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS) + SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS) + max_noalle <- size(COUNTS, 1) + nloci <- size(COUNTS, 2) + npops <- size(COUNTS, 3) + sumCounts <- SUMCOUNTS + ones(size(SUMCOUNTS)) + sumCounts <- reshape(t(sumCounts), c(1, nloci, npops)) + sumCounts <- repmat(sumCounts, c(max_noalle, 1, 1)) - prioriAlleelit <- zeros(max_noalle, nloci) - if (nloci > 0) { - for (j in 1:nloci) { - prioriAlleelit[1:noalle[j], j] <- 1 / noalle[j] - } + prioriAlleelit <- zeros(max_noalle, nloci) + if (nloci > 0) { + for (j in 1:nloci) { + prioriAlleelit[1:noalle[j], j] <- 1 / noalle[j] } - prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops)) - counts <- ifelse( - test = isGlobalEmpty(COUNTS), - yes = prioriAlleelit, - no = COUNTS + prioriAlleelit - ) - allFreqs <- counts / drop(sumCounts) - return(allFreqs) -} \ No newline at end of file + } + prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops)) + counts <- ifelse( + test = isGlobalEmpty(COUNTS), + yes = prioriAlleelit, + no = COUNTS + prioriAlleelit + ) + allFreqs <- counts / drop(sumCounts) + return(allFreqs) +} diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R index 40055d8..103261a 100644 --- a/R/computeDiffInCounts.R +++ b/R/computeDiffInCounts.R @@ -1,23 +1,23 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) { - # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien - # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n - # % riveill� rows. rows pit�� olla vaakavektori. + # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien + # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n + # % riveill� rows. rows pit�� olla vaakavektori. - diffInCounts <- zeros(max_noalle, nloci) - for (i in seq_len(nrow(data))) { - row <- data[i, ] - notEmpty <- as.matrix(find(row>=0)) + diffInCounts <- zeros(max_noalle, nloci) + for (i in seq_len(nrow(data))) { + row <- data[i, ] + notEmpty <- as.matrix(find(row >= 0)) - if (length(notEmpty) > 0) { - diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <- - diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1 - } - } - diffInCounts <- matrix( - data = diffInCounts[!is.na(diffInCounts)], - nrow = max_noalle, - ncol = nloci, - byrow = TRUE - ) - return(diffInCounts) + if (length(notEmpty) > 0) { + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <- + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1 + } + } + diffInCounts <- matrix( + data = diffInCounts[!is.na(diffInCounts)], + nrow = max_noalle, + ncol = nloci, + byrow = TRUE + ) + return(diffInCounts) } diff --git a/R/computeIndLogml.R b/R/computeIndLogml.R index 0e6d08c..d928e6b 100644 --- a/R/computeIndLogml.R +++ b/R/computeIndLogml.R @@ -4,24 +4,24 @@ #' @param omaFreqs own Freqs? #' @param osuusTaulu Percentage table? #' @export -computeIndLogml <- function (omaFreqs, osuusTaulu) { - omaFreqs <- as.matrix(omaFreqs) - osuusTaulu <- as.matrix(osuusTaulu) +computeIndLogml <- function(omaFreqs, osuusTaulu) { + omaFreqs <- as.matrix(omaFreqs) + osuusTaulu <- as.matrix(osuusTaulu) - apu <- repmat(t(osuusTaulu), c(1, dim(omaFreqs)[2])) - apu <- times(apu, omaFreqs) # c() avoids deprecation error re. matrix ops - if (length(apu) > 1) { - apu <- colSums(as.matrix(apu)) - } else { - apu <- sum(apu) - } + apu <- repmat(t(osuusTaulu), c(1, dim(omaFreqs)[2])) + apu <- times(apu, omaFreqs) # c() avoids deprecation error re. matrix ops + if (length(apu) > 1) { + apu <- colSums(as.matrix(apu)) + } else { + apu <- sum(apu) + } - if (any(apu < 0)) { - # Workaround for log of a negative number - apu <- as.complex(apu) - } - apu <- log(apu) + if (any(apu < 0)) { + # Workaround for log of a negative number + apu <- as.complex(apu) + } + apu <- log(apu) - loggis <- sum(apu) - return (loggis) -} \ No newline at end of file + loggis <- sum(apu) + return(loggis) +} diff --git a/R/computeLogml.R b/R/computeLogml.R index 795925c..b67fb87 100644 --- a/R/computeLogml.R +++ b/R/computeLogml.R @@ -1,27 +1,27 @@ computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) { - nloci <- size(counts, 2) - npops <- size(counts, 3) - adjnoalle <- zeros(max(noalle), nloci) - for (j in 1:nloci) { - adjnoalle[1:noalle[j], j] <- noalle(j) - if ((noalle(j)= 0) { - if (pointer > ncol(omaFreqs)) omaFreqs <- cbind(omaFreqs, 0) - omaFreqs[, pointer] <- tryCatch( - matrix( - data = as.matrix(t(allFreqs))[rows[all, loc], loc], - nrow = npops - ), - error = function(e) return(NA) - ) - } else { - omaFreqs[, pointer] <- ones(npops, 1) - } - # omaFreqs <- unname(cbind(omaFreqs, new_omaFreqs)) - pointer <- pointer + 1 - } + rows <- as.matrix(t(data))[computeRows(rowsFromInd, ind, 1), , drop = FALSE] + omaFreqs <- zeros(npops, rowsFromInd * nloci) + pointer <- 1 + for (loc in 1:dim(rows)[2]) { + for (all in 1:dim(rows)[1]) { + if (rows[all, loc] >= 0) { + if (pointer > ncol(omaFreqs)) omaFreqs <- cbind(omaFreqs, 0) + omaFreqs[, pointer] <- tryCatch( + matrix( + data = as.matrix(t(allFreqs))[rows[all, loc], loc], + nrow = npops + ), + error = function(e) { + return(NA) + } + ) + } else { + omaFreqs[, pointer] <- ones(npops, 1) + } + # omaFreqs <- unname(cbind(omaFreqs, new_omaFreqs)) + pointer <- pointer + 1 } - omaFreqs <- omaFreqs[, !is.na(omaFreqs)] - return(omaFreqs) -} \ No newline at end of file + } + omaFreqs <- omaFreqs[, !is.na(omaFreqs)] + return(omaFreqs) +} diff --git a/R/computePopulationLogml.R b/R/computePopulationLogml.R index e598e05..afe3a5e 100644 --- a/R/computePopulationLogml.R +++ b/R/computePopulationLogml.R @@ -1,36 +1,36 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) { - # Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset + # Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset - # ======================================================== # - # Limiting COUNTS size # - # ======================================================== # - COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop=FALSE] + # ======================================================== # + # Limiting COUNTS size # + # ======================================================== # + COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE] - x <- size(COUNTS, 1) - y <- size(COUNTS, 2) - z <- length(pops) + x <- size(COUNTS, 1) + y <- size(COUNTS, 2) + z <- length(pops) - # ======================================================== # - # Computation # - # ======================================================== # - isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2 - # FIXME: 3rd dimension of COUNTS getting dropped - term1 <- squeeze( - sum( - sum( - reshape( - lgamma( - repmat(adjprior, c(1, 1, length(pops))) + - COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop=!isarray] - ), - c(x, y, z) - ), - 1 - ), - 2 - ) - ) - if (is.null(priorTerm)) priorTerm <- 0 - popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm - return(popLogml) -} \ No newline at end of file + # ======================================================== # + # Computation # + # ======================================================== # + isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2 + # FIXME: 3rd dimension of COUNTS getting dropped + term1 <- squeeze( + sum( + sum( + reshape( + lgamma( + repmat(adjprior, c(1, 1, length(pops))) + + COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = !isarray] + ), + c(x, y, z) + ), + 1 + ), + 2 + ) + ) + if (is.null(priorTerm)) priorTerm <- 0 + popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm + return(popLogml) +} diff --git a/R/computeRows.R b/R/computeRows.R index 51b4c0f..ab95da9 100644 --- a/R/computeRows.R +++ b/R/computeRows.R @@ -6,18 +6,19 @@ #' @param ninds ninds #' @export computeRows <- function(rowsFromInd, inds, ninds) { - if (!is(inds, "matrix")) inds <- as.matrix(inds) - if (identical(dim(inds), c(nrow(inds), 1L))) { - # Special treatment for vectors because R has col vectors by default, - # whereas Matlab has row vectors by default. - inds <- t(inds) - if (ninds == 0) return(matrix(, 1, 0)) + if (!is(inds, "matrix")) inds <- as.matrix(inds) + if (identical(dim(inds), c(nrow(inds), 1L))) { + # Special treatment for vectors because R has col vectors by default, + # whereas Matlab has row vectors by default. + inds <- t(inds) + if (ninds == 0) { + return(matrix(, 1, 0)) } - rows <- inds[, rep(1, rowsFromInd)] - rows <- rows * rowsFromInd - miinus <- repmat(t((rowsFromInd - 1):0), c(ninds, 1)) - rows <- rows - miinus - rows <- matrix(t(rows), c(1, rowsFromInd * ninds)) - return(t(rows)) + } + rows <- inds[, rep(1, rowsFromInd)] + rows <- rows * rowsFromInd + miinus <- repmat(t((rowsFromInd - 1):0), c(ninds, 1)) + rows <- rows - miinus + rows <- matrix(t(rows), c(1, rowsFromInd * ninds)) + return(t(rows)) } - diff --git a/R/displine.R b/R/displine.R index 145d558..3d17878 100644 --- a/R/displine.R +++ b/R/displine.R @@ -1,4 +1,4 @@ #' @title Print a separator line dispLine <- function() { - cat('---------------------------------------------------\n') -} \ No newline at end of file + cat("---------------------------------------------------\n") +} diff --git a/R/etsiParas.R b/R/etsiParas.R index 98606e7..6de1c0d 100644 --- a/R/etsiParas.R +++ b/R/etsiParas.R @@ -5,26 +5,26 @@ #' @param omaFreqs own Freqs? #' @param osuusTaulu Percentage table? #' @param logml log maximum likelihood -etsiParas <- function (osuus, osuusTaulu, omaFreqs, logml) { - ready <- 0 - while (ready != 1) { - muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) +etsiParas <- function(osuus, osuusTaulu, omaFreqs, logml) { + ready <- 0 + while (ready != 1) { + muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) - # Work around R's max() limitation on complex numbers - if (any(sapply(muutokset, class) == "complex")) { - maxRe <- max(Re(as.vector(muutokset))) - maxIm <- max(Im(as.vector(muutokset))) - maxMuutos <- complex(real = maxRe, imaginary = maxIm) - } else { - maxMuutos <- max(as.vector(muutokset)) - } - indeksi <- which(muutokset == maxMuutos) - if (Re(maxMuutos) > 0) { - osuusTaulu <- suoritaMuutos(osuusTaulu, osuus, indeksi) - logml <- logml + maxMuutos - } else { - ready <- 1 - } + # Work around R's max() limitation on complex numbers + if (any(sapply(muutokset, class) == "complex")) { + maxRe <- max(Re(as.vector(muutokset))) + maxIm <- max(Im(as.vector(muutokset))) + maxMuutos <- complex(real = maxRe, imaginary = maxIm) + } else { + maxMuutos <- max(as.vector(muutokset)) } - return (c(osuusTaulu, logml)) -} \ No newline at end of file + indeksi <- which(muutokset == maxMuutos) + if (Re(maxMuutos) > 0) { + osuusTaulu <- suoritaMuutos(osuusTaulu, osuus, indeksi) + logml <- logml + maxMuutos + } else { + ready <- 1 + } + } + return(c(osuusTaulu, logml)) +} diff --git a/R/fgetl-fopen.R b/R/fgetl-fopen.R index 176a330..d8b4740 100644 --- a/R/fgetl-fopen.R +++ b/R/fgetl-fopen.R @@ -6,15 +6,17 @@ #' @seealso fopen #' @export fgetl <- function(file) { - # ========================================================================== - # Validation - # ========================================================================== - if (length(file) <= 1) return(-1) - # ========================================================================== - # Returning file minus the first line - # ========================================================================== - out <- file[-1] - return(out) + # ========================================================================== + # Validation + # ========================================================================== + if (length(file) <= 1) { + return(-1) + } + # ========================================================================== + # Returning file minus the first line + # ========================================================================== + out <- file[-1] + return(out) } #' @title Open file @@ -24,4 +26,4 @@ fgetl <- function(file) { #' @author Waldir Leoncio #' @seealso fgetl #' @export -fopen <- function(filename) readLines(filename) \ No newline at end of file +fopen <- function(filename) readLines(filename) diff --git a/R/find.R b/R/find.R index 5e5efff..4c7e42f 100644 --- a/R/find.R +++ b/R/find.R @@ -2,14 +2,14 @@ #' @description Emulates behavior of `find` #' @param x object or logic operation on an object #' @param sort sort output? -find <- function(x, sort=TRUE) { - if (is.logical(x)) { - out <- which(x) - } else { - out <- which(x > 0) - } - if (sort) { - out <- sort(out) - } - return(out) -} \ No newline at end of file +find <- function(x, sort = TRUE) { + if (is.logical(x)) { + out <- which(x) + } else { + out <- which(x > 0) + } + if (sort) { + out <- sort(out) + } + return(out) +} diff --git a/R/findEmptyPop.R b/R/findEmptyPop.R index e62b5e3..8d57702 100644 --- a/R/findEmptyPop.R +++ b/R/findEmptyPop.R @@ -1,12 +1,12 @@ findEmptyPop <- function(npops) { - # % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� - # % populaatioita ei ole, palauttaa -1:n. - pops <- t(unique(PARTITION)) - if (length(pops) == npops) { - emptyPop <- -1 - } else { - popDiff <- diff(c(0, pops, npops + 1)) - emptyPop <- min(find(popDiff > 1)) - } - return(list(emptyPop = emptyPop, pops = pops)) + # % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� + # % populaatioita ei ole, palauttaa -1:n. + pops <- t(unique(PARTITION)) + if (length(pops) == npops) { + emptyPop <- -1 + } else { + popDiff <- diff(c(0, pops, npops + 1)) + emptyPop <- min(find(popDiff > 1)) + } + return(list(emptyPop = emptyPop, pops = pops)) } diff --git a/R/fix.R b/R/fix.R index 50ec46e..988b6e4 100644 --- a/R/fix.R +++ b/R/fix.R @@ -2,4 +2,4 @@ #' @description Rounds each element of input to the nearest integer towards zero. Basically the same as trunc() #' @param X input element #' @author Waldir Leoncio -fix <- function(X) trunc(X) \ No newline at end of file +fix <- function(X) trunc(X) diff --git a/R/getDistances.R b/R/getDistances.R index 853be3b..1919b17 100644 --- a/R/getDistances.R +++ b/R/getDistances.R @@ -1,43 +1,43 @@ getDistances <- function(data_matrix, nclusters) { - # %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance - # %gives partition in 8 - bit format - # %allocates all alleles of a single individual into the same basket - # %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row, - # %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row - # %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. + # %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance + # %gives partition in 8 - bit format + # %allocates all alleles of a single individual into the same basket + # %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row, + # %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row + # %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - size_data <- size(data_matrix) - nloci <- size_data[2] - 1 - n <- max(data_matrix[, ncol(data_matrix)]) - distances <- zeros(choose(n, 2), 1) - pointer <- 1 - for (i in 1:n - 1) { - i_data <- data_matrix[ - find(data_matrix[, ncol(data_matrix)] == i), - 1:nloci - ] - for (j in (i + 1):n) { - d_ij <- 0 - j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci] - vertailuja <- 0 - for (k in 1:size(i_data, 1)) { - for (l in 1:size(j_data, 1)) { - here_i <- find(i_data[k, ] >= 0) - here_j <- find(j_data[l, ] >= 0) - here_joint <- intersect(here_i, here_j) - vertailuja <- vertailuja + length(here_joint) - d_ij <- d_ij + length( - find(i_data[k, here_joint] != j_data[l, here_joint]) - ) - } - } - d_ij <- d_ij / vertailuja - distances[pointer] <- d_ij - pointer <- pointer + 1 - } - } + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, ncol(data_matrix)]) + distances <- zeros(choose(n, 2), 1) + pointer <- 1 + for (i in 1:n - 1) { + i_data <- data_matrix[ + find(data_matrix[, ncol(data_matrix)] == i), + 1:nloci + ] + for (j in (i + 1):n) { + d_ij <- 0 + j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci] + vertailuja <- 0 + for (k in 1:size(i_data, 1)) { + for (l in 1:size(j_data, 1)) { + here_i <- find(i_data[k, ] >= 0) + here_j <- find(j_data[l, ] >= 0) + here_joint <- intersect(here_i, here_j) + vertailuja <- vertailuja + length(here_joint) + d_ij <- d_ij + length( + find(i_data[k, here_joint] != j_data[l, here_joint]) + ) + } + } + d_ij <- d_ij / vertailuja + distances[pointer] <- d_ij + pointer <- pointer + 1 + } + } - Z <- linkage(t(distances)) - return(list(Z = Z, distances = distances)) + Z <- linkage(t(distances)) + return(list(Z = Z, distances = distances)) } diff --git a/R/globals.R b/R/globals.R index 281a3f4..300a358 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,12 +1,12 @@ -COUNTS <- array(0, dim=c(100, 100, 100)) -SUMCOUNTS <- array(0, dim=c(100, 100)) -PARTITION <- array(1, dim=c(100)) -POP_LOGML <- array(1, dim=c(100)) -LOGDIFF <- array(1, dim=c(100, 100)) +COUNTS <- array(0, dim = c(100, 100, 100)) +SUMCOUNTS <- array(0, dim = c(100, 100)) +PARTITION <- array(1, dim = c(100)) +POP_LOGML <- array(1, dim = c(100)) +LOGDIFF <- array(1, dim = c(100, 100)) # If handling globas break, try other ideas from https://stackoverflow.com/a/65252740/1169233 #' @import utils utils::globalVariables( - c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN") + c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN") ) diff --git a/R/greedyMix.R b/R/greedyMix.R index 4be9387..3988510 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -9,34 +9,34 @@ #' with high-throughput sequencing data. #' @export greedyMix <- function(data, format, verbose = TRUE) { - # Parsing data format ------------------------------------------------------ + # Parsing data format ------------------------------------------------------ - if (missing(format)) { - format <- gsub(".*\\.(.+)$", "\\1", data) - message("Format not provided. Guessing from file extension: ", format) - } - format <- tolower(format) + if (missing(format)) { + format <- gsub(".*\\.(.+)$", "\\1", data) + message("Format not provided. Guessing from file extension: ", format) + } + format <- tolower(format) - # Dispatching to proper loading function ----------------------------------- + # Dispatching to proper loading function ----------------------------------- - if (format == "fasta") { - out <- load_fasta(data) - } else if (format == "vcf") { - out <- vcfR::read.vcfR(data, verbose = verbose) - } else if (format == "sam") { - stop( - "SAM files not directly supported. ", - "Install the samtools software and execute\n\n", - "samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ", - "and try running this function again with 'format=BAM'" - ) - } else if (format == "bam") { - out <- Rsamtools::scanBam(data) - } else if (format == "genepop") { - # TODO #19: implement load_genepop() - stop("GenePop files not yet supported." ) - } else { - stop("Format not supported.") - } - return(out) -} \ No newline at end of file + if (format == "fasta") { + out <- load_fasta(data) + } else if (format == "vcf") { + out <- vcfR::read.vcfR(data, verbose = verbose) + } else if (format == "sam") { + stop( + "SAM files not directly supported. ", + "Install the samtools software and execute\n\n", + "samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ", + "and try running this function again with 'format=BAM'" + ) + } else if (format == "bam") { + out <- Rsamtools::scanBam(data) + } else if (format == "genepop") { + # TODO #19: implement load_genepop() + stop("GenePop files not yet supported.") + } else { + stop("Format not supported.") + } + return(out) +} diff --git a/R/handleData.R b/R/handleData.R index 0cbe633..a65781e 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -10,93 +10,93 @@ #' codes get values between? 1, ..., Noah (j). #' @export handleData <- function(raw_data) { - # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? - # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako - # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan - # tiet?? onko kyseess?haploidi, diploidi jne... T?m?n j?lkeen funktio - # lis?? tyhji?rivej?niille yksil?ille, joilta on per?isin v?hemm?n - # rivej?kuin maksimim??r? - # Mik?li jonkin alleelin koodi on =0, funktio muuttaa t?m?n alleelin - # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. - # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j - # koodit saavat arvoja v?lill?1,...,noalle(j). - data <- as.matrix(raw_data) - nloci <- size(raw_data, 2) - 1 + # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? + # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako + # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan + # tiet?? onko kyseess?haploidi, diploidi jne... T?m?n j?lkeen funktio + # lis?? tyhji?rivej?niille yksil?ille, joilta on per?isin v?hemm?n + # rivej?kuin maksimim??r? + # Mik?li jonkin alleelin koodi on =0, funktio muuttaa t?m?n alleelin + # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. + # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j + # koodit saavat arvoja v?lill?1,...,noalle(j). + data <- as.matrix(raw_data) + nloci <- size(raw_data, 2) - 1 - dataApu <- data[, 1:nloci] - nollat <- find(dataApu == 0) - if (!isempty(nollat)) { - isoinAlleeli <- max(max(dataApu)) - dataApu[nollat] <- isoinAlleeli + 1 - data[, 1:nloci] <- dataApu - } - # dataApu <- [] - # nollat <- [] - # isoinAlleeli <- [] + dataApu <- data[, 1:nloci] + nollat <- find(dataApu == 0) + if (!isempty(nollat)) { + isoinAlleeli <- max(max(dataApu)) + dataApu[nollat] <- isoinAlleeli + 1 + data[, 1:nloci] <- dataApu + } + # dataApu <- [] + # nollat <- [] + # isoinAlleeli <- [] - noalle <- zeros(1, nloci) - alleelitLokuksessa <- cell(nloci, 1, expandable=TRUE) - for (i in 1:nloci) { - alleelitLokuksessaI <- unique(data[, i]) - alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[ - find( - alleelitLokuksessaI >= 0 - ) - ]) - noalle[i] <- length(alleelitLokuksessa[[i]]) - } - alleleCodes <- zeros(max(noalle), nloci) - for (i in 1:nloci) { - alleelitLokuksessaI <- alleelitLokuksessa[[i]] - puuttuvia <- max(noalle) - length(alleelitLokuksessaI) - alleleCodes[, i] <- as.matrix( - c(alleelitLokuksessaI, zeros(puuttuvia, 1)) - ) - } + noalle <- zeros(1, nloci) + alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) + for (i in 1:nloci) { + alleelitLokuksessaI <- unique(data[, i]) + alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[ + find( + alleelitLokuksessaI >= 0 + ) + ]) + noalle[i] <- length(alleelitLokuksessa[[i]]) + } + alleleCodes <- zeros(max(noalle), nloci) + for (i in 1:nloci) { + alleelitLokuksessaI <- alleelitLokuksessa[[i]] + puuttuvia <- max(noalle) - length(alleelitLokuksessaI) + alleleCodes[, i] <- as.matrix( + c(alleelitLokuksessaI, zeros(puuttuvia, 1)) + ) + } - for (loc in seq_len(nloci)) { - for (all in seq_len(noalle[loc])) { - data[find(data[, loc] == alleleCodes[all, loc]), loc] <- all - } - } + for (loc in seq_len(nloci)) { + for (all in seq_len(noalle[loc])) { + data[find(data[, loc] == alleleCodes[all, loc]), loc] <- all + } + } - nind <- max(data[, ncol(data)]) - nrows <- size(data, 1) - ncols <- size(data, 2) - rowsFromInd <- zeros(nind, 1) - for (i in 1:nind) { - rowsFromInd[i] <- length(find(data[, ncol(data)] == i)) - } - maxRowsFromInd <- max(rowsFromInd) - a <- -999 - emptyRow <- repmat(a, c(1, ncols)) - lessThanMax <- find(rowsFromInd < maxRowsFromInd) - missingRows <- maxRowsFromInd * nind - nrows - data <- rbind(data, zeros(missingRows, ncols)) - pointer <- 1 - for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej? - miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm. - } - data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti - newData <- data - rowsFromInd <- maxRowsFromInd + nind <- max(data[, ncol(data)]) + nrows <- size(data, 1) + ncols <- size(data, 2) + rowsFromInd <- zeros(nind, 1) + for (i in 1:nind) { + rowsFromInd[i] <- length(find(data[, ncol(data)] == i)) + } + maxRowsFromInd <- max(rowsFromInd) + a <- -999 + emptyRow <- repmat(a, c(1, ncols)) + lessThanMax <- find(rowsFromInd < maxRowsFromInd) + missingRows <- maxRowsFromInd * nind - nrows + data <- rbind(data, zeros(missingRows, ncols)) + pointer <- 1 + for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej? + miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm. + } + data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti + newData <- data + rowsFromInd <- maxRowsFromInd - adjprior <- zeros(max(noalle), nloci) - priorTerm <- 0 - for (j in 1:nloci) { - adjprior[, j] <- as.matrix(c( - repmat(1 / noalle[j], c(noalle[j], 1)), - ones(max(noalle) - noalle[j], 1) - )) - priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j]) - } - out <- list( - newData = newData, - rowsFromInd = rowsFromInd, - alleleCodes = alleleCodes, - noalle = noalle, - adjprior = adjprior, - priorTerm = priorTerm - ) - return(out) -} \ No newline at end of file + adjprior <- zeros(max(noalle), nloci) + priorTerm <- 0 + for (j in 1:nloci) { + adjprior[, j] <- as.matrix(c( + repmat(1 / noalle[j], c(noalle[j], 1)), + ones(max(noalle) - noalle[j], 1) + )) + priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j]) + } + out <- list( + newData = newData, + rowsFromInd = rowsFromInd, + alleleCodes = alleleCodes, + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm + ) + return(out) +} diff --git a/R/indMix.R b/R/indMix.R index 99ac0ad..5cc9811 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -1,564 +1,556 @@ -indMix <- function(c, npops, dispText=TRUE) { - # Greedy search algorithm with unknown number of classes for regular - # clustering. - # Input npops is not used if called by greedyMix or greedyPopMix. - - logml <- 1 - clearGlobalVars() - - noalle <- c$noalle - rows <- c$rows - data <- c$data - - adjprior <- c$adjprior - priorTerm <- c$priorTerm - rowsFromInd <- c$rowsFromInd - - if (isfield(c, 'dist')) { - dist <- c$dist - Z <- c$Z - } - - rm(c) - nargin <- length(as.list(match.call())) - 1 - if (nargin < 2) { - dispText <- 1 - npopstext <- matrix() - ready <- FALSE - teksti <- 'Input upper bound to the number of populations (possibly multiple values)' - while (!ready) { - npopstextExtra <- inputdlg(teksti, 1, '20') - if (isempty(npopstextExtra)) { # Painettu Cancel:ia - warnings("Empty value provided") - return() - } - npopstextExtra <- npopstextExtra[[1]] - if (length(npopstextExtra)>=255) { - npopstextExtra <- npopstextExtra[1:255] - npopstext <- c(npopstext, ' ', npopstextExtra) - teksti <- 'The input field length limit (255 characters) was reached. Input more values: ' - } else { - npopstext <- as.numeric(strsplit(as.character(npopstextExtra), " ")[[1]]) - ready <- TRUE - } - } - rm(ready, teksti) - if (isempty(npopstext) | length(npopstext) == 1) { - warning("Empty or 1-length vector provided") - return() - } else { - npopsTaulu <- as.numeric(npopstext) - ykkoset <- find(npopsTaulu == 1) - npopsTaulu[ykkoset] <- NA # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan (if ones are given as an upper limit, they are deleted) - if (isempty(npopsTaulu)) { - logml <- 1 - partitionSummary <- 1 - npops <- 1 - return() - } - rm(ykkoset) - } - } else { - npopsTaulu <- npops - } - - nruns <- length(npopsTaulu) - - initData <- data - data <- data[,1:(ncol(data) - 1)] - - logmlBest <- -1e50 - partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) - partitionSummary[,1] <- zeros(30, 1) - worstLogml <- -1e50 - worstIndex <- 1 - for (run in 1:nruns) { - npops <- npopsTaulu[[run]] - if (dispText) { - dispLine() - cat( - 'Run ', as.character(run), '/', as.character(nruns), - ', maximum number of populations ', as.character(npops), - '.\n', - sep = "" - ) - } - ninds <- size(rows, 1) - initialPartition <- admixture_initialization(initData, npops, Z) - sumcounts_counts_logml <- initialCounts( - initialPartition, data, npops, rows, noalle, adjprior - ) - sumcounts <- sumcounts_counts_logml$sumcounts - counts <- sumcounts_counts_logml$counts - logml <- sumcounts_counts_logml$logml - - PARTITION <- zeros(ninds, 1) - for (i in 1:ninds) { - apu <- rows[i] - PARTITION[i] <- initialPartition[apu[1]] - } - - COUNTS <- counts - SUMCOUNTS <- sumcounts - POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) - LOGDIFF <- repmat(-Inf, c(ninds, npops)) - rm(initialPartition, counts, sumcounts) - - # PARHAAN MIXTURE-PARTITION ETSIMINEN - nRoundTypes <- 7 - kokeiltu <- zeros(nRoundTypes, 1) - roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan. - ready <- 0 - vaihe <- 1 - - if (dispText) { - message( - paste0( - '\nMixture analysis started with initial ', - as.character(npops), ' populations.' - ) - ) - } - - while (ready != 1) { - # FIXME: loop caught in here - muutoksia <- 0 - - if (dispText) { - message(paste('\nPerforming steps:', as.character(roundTypes))) - } - - for (n in 1:length(roundTypes)) { - - round <- roundTypes[n] - kivaluku <- 0 - - if (kokeiltu[round] == 1) { #Askelta kokeiltu viime muutoksen j�lkeen - - } else if (round == 0 | round == 1) { #Yksil�n siirt�minen toiseen populaatioon. - inds <- 1:ninds - aputaulu <- cbind(inds, rand(ninds, 1)) - aputaulu <- sortrows(aputaulu, 2) - inds <- t(aputaulu[, 1]) - muutosNyt <- 0 - - for (ind in inds) { - i1 <- PARTITION[ind] - muutokset_diffInCounts <- laskeMuutokset( - #FIXME: using 100-length global variables instead of the ones in this function - ind, rows, data, adjprior, priorTerm - ) - muutokset <- muutokset_diffInCounts$muutokset - diffInCounts <- muutokset_diffInCounts$diffInCounts - - if (round == 1) { - maxMuutos <- max_MATLAB(muutokset)$max - i2 <- max_MATLAB(muutokset)$idx - } - - if (i1 != i2 & maxMuutos > 1e-5) { - # Tapahtui muutos - muutoksia <- 1 - if (muutosNyt == 0) { - muutosNyt <- 1 - if (dispText) message('Action 1') - } - kokeiltu <- zeros(nRoundTypes, 1) - kivaluku <- kivaluku + 1 - updateGlobalVariables( - ind, i2, diffInCounts, adjprior, priorTerm - ) - logml <- logml + maxMuutos - if (logml > worstLogml) { - temp_addToSum <- addToSummary( - logml, partitionSummary, worstIndex - ) - partitionSummary <- temp_addToSum$partitionSummary - added <- temp_addToSum$added - if (added == 1) { - temp_minMATLAB <- min_MATLAB( - partitionSummary[, 2] - ) - worstLogml <- temp_minMATLAB$mins - worstIndex <- temp_minMATLAB$idx - } - } - } - } - - if (muutosNyt == 0) { - kokeiltu[round] <- 1 - } - } else if (round == 2) { # Populaation yhdist�minen toiseen. - maxMuutos <- 0 - for (pop in 1:npops) { - muutokset_diffInCounts <- laskeMuutokset2( - pop, rows, data, adjprior, priorTerm - ) - muutokset <- muutokset_diffInCounts$muutokset - diffInCounts <- muutokset_diffInCounts$diffInCounts - isoin <- max_MATLAB(muutokset)[[1]] - indeksi <- max_MATLAB(muutokset)[[2]] - if (isoin > maxMuutos) { - maxMuutos <- isoin - i1 <- pop - i2 <- indeksi - diffInCountsBest <- diffInCounts - } - } - - if (maxMuutos > 1e-5) { - muutoksia <- 1 - kokeiltu <- zeros(nRoundTypes, 1) - updateGlobalVariables2( - i1, i2, diffInCountsBest, adjprior, priorTerm - ) - logml <- logml + maxMuutos - if (dispText) { - cat('Action 2') - } - if (logml > worstLogml) { - temp_addToSum <- addToSummary( - logml, partitionSummary, worstIndex - ) - partitionSummary <- temp_addToSum$partitionSummary - added <- temp_addToSum$added - if (added==1) { - worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] - worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] - } - } - } else { - kokeiltu[round] <- 1 - } - - - } else if (round == 3 || round == 4) { #Populaation jakaminen osiin. - maxMuutos <- 0 - ninds <- size(rows, 1) - for (pop in 1:npops) { - inds2 <- find(PARTITION == pop) - ninds2 <- length(inds2) - if (ninds2 > 2) { - dist2 <- laskeOsaDist(inds2, dist, ninds) - Z2 <- linkage(t(dist2)) - if (round == 3) { - npops2 <- max(min(20, floor(ninds2 / 5)), 2) - } else if (round == 4) { - npops2 <- 2 # Moneenko osaan jaetaan - } - T2 <- cluster_own(Z2, npops2) - muutokset <- laskeMuutokset3( - T2, inds2, rows, data, adjprior, priorTerm, pop - ) - isoin <- max_MATLAB(muutokset)[[1]] - indeksi <- max_MATLAB(muutokset)[[2]] - if (isoin > maxMuutos) { - maxMuutos <- isoin - muuttuvaPop2 <- indeksi %% npops2 - if (muuttuvaPop2==0) muuttuvaPop2 <- npops2 - muuttuvat <- inds2[find(T2 == muuttuvaPop2)] - i2 <- ceiling(indeksi / npops2) - } - } - } - if (maxMuutos > 1e-5) { - muutoksia <- 1 - kokeiltu <- zeros(nRoundTypes, 1) - rivit <- list() - for (i in 1:length(muuttuvat)) { - ind <- muuttuvat[i] - lisa <- rows[ind, 1]:rows[ind, 2] - rivit <- rbind(rivit, t(lisa)) - } - diffInCounts <- computeDiffInCounts( - t(rivit), size(COUNTS, 1), size(COUNTS, 2), data - ) - i1 <- PARTITION(muuttuvat[1]) - updateGlobalVariables3( - muuttuvat, diffInCounts, adjprior, priorTerm, i2 - ) - logml <- logml + maxMuutos - if (dispText) { - if (round == 3) { - cat('Action 3') - } else { - cat('Action 4') - } - } - if (logml > worstLogml) { - temp_addToSum <- addToSummary( - logml, partitionSummary, worstIndex - ) - partitionSummary <- temp_addToSum$partitionSummary - added <- temp_addToSum$added - if (added==1) { - worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] - worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] - } - } - - } else { - kokeiltu[round] <- 1 - } - } else if (round == 5 || round == 6) { - j <- 0 - muutettu <- 0 - poplogml <- POP_LOGML - partition <- PARTITION - counts <- COUNTS - sumcounts <- SUMCOUNTS - logdiff <- LOGDIFF - - pops <- sample(npops) - while (j < npops & muutettu == 0) { - j <- j + 1 - pop <- pops[j] - totalMuutos <- 0 - inds <- find(PARTITION == pop) - if (round == 5) { - aputaulu <- c(inds, rand(length(inds), 1)) - aputaulu <- sortrows(aputaulu, 2) - inds <- t(aputaulu[, 1]) - } else if (round == 6) { - inds <- returnInOrder( - inds, pop, rows, data, adjprior, priorTerm - ) - } - - i <- 0 - - while (length(inds) > 0 & i < length(inds)) { - i <- i + 1 - ind <- inds[i] - - muutokset_diffInCounts <- laskeMuutokset( - ind, rows, data, adjprior, priorTerm - ) - muutokset <- muutokset_diffInCounts$muutokset - diffInCounts <- muutokset_diffInCounts$diffInCounts - - muutokset[pop] <- -1e50 # Varmasti ei suurin!!! - maxMuutos <- max_MATLAB(muutokset)[[1]] - i2 <- max_MATLAB(muutokset)[[2]] - updateGlobalVariables( - ind, i2, diffInCounts, adjprior, priorTerm - ) - - totalMuutos <- totalMuutos+maxMuutos - logml <- logml + maxMuutos - if (round == 6) { - # Lopetetaan heti kun muutos on positiivinen. - if (totalMuutos > 1e-5) { - i <- length(inds) - } - } - } - - if (totalMuutos > 1e-5) { - kokeiltu <- zeros(nRoundTypes, 1) - muutettu <- 1 - if (muutoksia == 0) { - muutoksia <- 1 # Ulompi kirjanpito. - if (dispText) { - if (round == 5) { - cat('Action 5') - } else { - cat('Action 6') - } - } - } - if (logml > worstLogml) { - temp_addToSum <- addToSummary( - logml, partitionSummary, worstIndex - ) - partitionSummary <- temp_addToSum$partitionSummary - added <- temp_addToSum$added - if (added==1) { - worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] - worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] - } - } - } else { - # Miss��n vaiheessa tila ei parantunut. - # Perutaan kaikki muutokset. - PARTITION <- partition - SUMCOUNTS <- sumcounts - POP_LOGML <- poplogml - COUNTS <- counts - logml <- logml - totalMuutos - LOGDIFF <- logdiff - kokeiltu[round] <- 1 - } - } - rm(partition, sumcounts, counts, poplogml) - - } else if (round == 7) { - emptyPop <- findEmptyPop(npops) - j <- 0 - pops <- sample(npops) - muutoksiaNyt <- 0 - if (emptyPop == -1) { - j <- npops - } - while (j < npops) { - j <- j + 1 - pop <- pops[j] - inds2 <- find(PARTITION == pop) - ninds2 <- length(inds2) - if (ninds2 > 5) { - partition <- PARTITION - sumcounts <- SUMCOUNTS - counts <- COUNTS - poplogml <- POP_LOGML - logdiff <- LOGDIFF - - dist2 <- laskeOsaDist(inds2, dist, ninds); - Z2 <- linkage(t(dist2)) - T2 <- cluster_own(Z2, 2) - muuttuvat <- inds2[find(T2 == 1)] - - muutokset <- laskeMuutokset3( - T2, inds2, rows, data, adjprior, priorTerm, pop - ) - totalMuutos <- muutokset(1, emptyPop) - - rivit <- list() - for (i in 1:length(muuttuvat)) { - ind <- muuttuvat[i] - lisa <- rows[ind, 1]:rows[ind, 2] - rivit <- c(rivit, lisa) - } - diffInCounts <- computeDiffInCounts( - rivit, size(COUNTS, 1), size(COUNTS, 2), data - ) - - updateGlobalVariables3( - muuttuvat, diffInCounts, adjprior, priorTerm, - emptyPop - ) - - muutettu <- 1 - while (muutettu == 1) { - muutettu <- 0 - # Siirret��n yksil�it� populaatioiden v�lill� - muutokset <- laskeMuutokset5( - inds2, rows, data, adjprior, priorTerm, - pop, emptyPop - ) - - maxMuutos <- indeksi <- max_MATLAB(muutokset) - - muuttuva <- inds2(indeksi) - if (PARTITION(muuttuva) == pop) { - i2 <- emptyPop - } else { - i2 <- pop - } - - if (maxMuutos > 1e-5) { - rivit <- rows[muuttuva, 1]:rows[muuttuva, 2] - diffInCounts <- computeDiffInCounts( - rivit, size(COUNTS, 1), size(COUNTS, 2), - data - ) - updateGlobalVariables3( - muuttuva,diffInCounts, adjprior, - priorTerm, i2 - ) - muutettu <- 1 - totalMuutos <- totalMuutos + maxMuutos - } - } - if (totalMuutos > 1e-5) { - muutoksia <- 1 - logml <- logml + totalMuutos - if (logml > worstLogml) { - temp_addToSum <- addToSummary( - logml, partitionSummary, worstIndex - ) - partitionSummary <- temp_addToSum$partitionSummary - added <- temp_addToSum$added - if (added == 1) { - worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] - worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] - } - } - if (muutoksiaNyt == 0) { - if (dispText) { - cat('Action 7') - } - muutoksiaNyt <- 1 - } - kokeiltu <- zeros(nRoundTypes, 1) - j <- npops - } else { - # palutetaan vanhat arvot - PARTITION <- partition - SUMCOUNTS <- sumcounts - COUNTS <- counts - POP_LOGML <- poplogml - LOGDIFF <- logdiff - } - } - - } - - if (muutoksiaNyt == 0) { - kokeiltu[round] <- 1 - } - - } - } - # FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1 - print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos))#TEMP - if (muutoksia == 0) { - if (vaihe <= 4) { - vaihe <= vaihe + 1 - } else if (vaihe == 5) { - ready <- 1 - } - } else { - muutoksia <- 0 - } - - if (ready == 0) { - if (vaihe == 1) { - roundTypes <- c(1) - } else if (vaihe == 2) { - roundTypes <- c(2, 1) - } else if (vaihe == 3) { - roundTypes <- c(5, 5, 7) - } else if (vaihe == 4) { - roundTypes <- c(4, 3, 1) - } else if (vaihe == 5) { - roundTypes <- c(6, 7, 2, 3, 4, 1) - } - } - } - - # TALLENNETAAN - - npops <- poistaTyhjatPopulaatiot(npops) - POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) - if (dispText) { - print(paste('Found partition with', as.character(npops), 'populations.')) - print(paste('Log(ml) =', as.character(logml))) - print(' ') - } - - if (logml > logmlBest) { - # P�ivitet��n parasta l�ydetty� partitiota. - logmlBest <- logml - npopsBest <- npops - partitionBest <- PARTITION - countsBest <- COUNTS - sumCountsBest <- SUMCOUNTS - pop_logmlBest <- POP_LOGML - logdiffbest <- LOGDIFF - } - } - return( - list(logml = logml, npops = npops, partitionSummary = partitionSummary) - ) -} \ No newline at end of file +indMix <- function(c, npops, dispText = TRUE) { + # Greedy search algorithm with unknown number of classes for regular + # clustering. + # Input npops is not used if called by greedyMix or greedyPopMix. + + logml <- 1 + clearGlobalVars() + + noalle <- c$noalle + rows <- c$rows + data <- c$data + + adjprior <- c$adjprior + priorTerm <- c$priorTerm + rowsFromInd <- c$rowsFromInd + + if (isfield(c, "dist")) { + dist <- c$dist + Z <- c$Z + } + + rm(c) + nargin <- length(as.list(match.call())) - 1 + if (nargin < 2) { + dispText <- 1 + npopstext <- matrix() + ready <- FALSE + teksti <- "Input upper bound to the number of populations (possibly multiple values)" + while (!ready) { + npopstextExtra <- inputdlg(teksti, 1, "20") + if (isempty(npopstextExtra)) { # Painettu Cancel:ia + warnings("Empty value provided") + return() + } + npopstextExtra <- npopstextExtra[[1]] + if (length(npopstextExtra) >= 255) { + npopstextExtra <- npopstextExtra[1:255] + npopstext <- c(npopstext, " ", npopstextExtra) + teksti <- "The input field length limit (255 characters) was reached. Input more values: " + } else { + npopstext <- as.numeric(strsplit(as.character(npopstextExtra), " ")[[1]]) + ready <- TRUE + } + } + rm(ready, teksti) + if (isempty(npopstext) | length(npopstext) == 1) { + warning("Empty or 1-length vector provided") + return() + } else { + npopsTaulu <- as.numeric(npopstext) + ykkoset <- find(npopsTaulu == 1) + npopsTaulu[ykkoset] <- NA # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan (if ones are given as an upper limit, they are deleted) + if (isempty(npopsTaulu)) { + logml <- 1 + partitionSummary <- 1 + npops <- 1 + return() + } + rm(ykkoset) + } + } else { + npopsTaulu <- npops + } + + nruns <- length(npopsTaulu) + + initData <- data + data <- data[, 1:(ncol(data) - 1)] + + logmlBest <- -1e50 + partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) + partitionSummary[, 1] <- zeros(30, 1) + worstLogml <- -1e50 + worstIndex <- 1 + for (run in 1:nruns) { + npops <- npopsTaulu[[run]] + if (dispText) { + dispLine() + cat( + "Run ", as.character(run), "/", as.character(nruns), + ", maximum number of populations ", as.character(npops), + ".\n", + sep = "" + ) + } + ninds <- size(rows, 1) + initialPartition <- admixture_initialization(initData, npops, Z) + sumcounts_counts_logml <- initialCounts( + initialPartition, data, npops, rows, noalle, adjprior + ) + sumcounts <- sumcounts_counts_logml$sumcounts + counts <- sumcounts_counts_logml$counts + logml <- sumcounts_counts_logml$logml + + PARTITION <- zeros(ninds, 1) + for (i in 1:ninds) { + apu <- rows[i] + PARTITION[i] <- initialPartition[apu[1]] + } + + COUNTS <- counts + SUMCOUNTS <- sumcounts + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) + LOGDIFF <- repmat(-Inf, c(ninds, npops)) + rm(initialPartition, counts, sumcounts) + + # PARHAAN MIXTURE-PARTITION ETSIMINEN + nRoundTypes <- 7 + kokeiltu <- zeros(nRoundTypes, 1) + roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan. + ready <- 0 + vaihe <- 1 + + if (dispText) { + message( + paste0( + "\nMixture analysis started with initial ", + as.character(npops), " populations." + ) + ) + } + + while (ready != 1) { + # FIXME: loop caught in here + muutoksia <- 0 + + if (dispText) { + message(paste("\nPerforming steps:", as.character(roundTypes))) + } + + for (n in 1:length(roundTypes)) { + round <- roundTypes[n] + kivaluku <- 0 + + if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j�lkeen + } else if (round == 0 | round == 1) { # Yksil�n siirt�minen toiseen populaatioon. + inds <- 1:ninds + aputaulu <- cbind(inds, rand(ninds, 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + muutosNyt <- 0 + + for (ind in inds) { + i1 <- PARTITION[ind] + muutokset_diffInCounts <- laskeMuutokset( + # FIXME: using 100-length global variables instead of the ones in this function + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + if (round == 1) { + maxMuutos <- max_MATLAB(muutokset)$max + i2 <- max_MATLAB(muutokset)$idx + } + + if (i1 != i2 & maxMuutos > 1e-5) { + # Tapahtui muutos + muutoksia <- 1 + if (muutosNyt == 0) { + muutosNyt <- 1 + if (dispText) message("Action 1") + } + kokeiltu <- zeros(nRoundTypes, 1) + kivaluku <- kivaluku + 1 + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + logml <- logml + maxMuutos + if (logml > worstLogml) { + temp_addToSum <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- temp_addToSum$partitionSummary + added <- temp_addToSum$added + if (added == 1) { + temp_minMATLAB <- min_MATLAB( + partitionSummary[, 2] + ) + worstLogml <- temp_minMATLAB$mins + worstIndex <- temp_minMATLAB$idx + } + } + } + } + + if (muutosNyt == 0) { + kokeiltu[round] <- 1 + } + } else if (round == 2) { # Populaation yhdist�minen toiseen. + maxMuutos <- 0 + for (pop in 1:npops) { + muutokset_diffInCounts <- laskeMuutokset2( + pop, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + i1 <- pop + i2 <- indeksi + diffInCountsBest <- diffInCounts + } + } + + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + updateGlobalVariables2( + i1, i2, diffInCountsBest, adjprior, priorTerm + ) + logml <- logml + maxMuutos + if (dispText) { + cat("Action 2") + } + if (logml > worstLogml) { + temp_addToSum <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- temp_addToSum$partitionSummary + added <- temp_addToSum$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + kokeiltu[round] <- 1 + } + } else if (round == 3 || round == 4) { # Populaation jakaminen osiin. + maxMuutos <- 0 + ninds <- size(rows, 1) + for (pop in 1:npops) { + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 2) { + dist2 <- laskeOsaDist(inds2, dist, ninds) + Z2 <- linkage(t(dist2)) + if (round == 3) { + npops2 <- max(min(20, floor(ninds2 / 5)), 2) + } else if (round == 4) { + npops2 <- 2 # Moneenko osaan jaetaan + } + T2 <- cluster_own(Z2, npops2) + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + muuttuvaPop2 <- indeksi %% npops2 + if (muuttuvaPop2 == 0) muuttuvaPop2 <- npops2 + muuttuvat <- inds2[find(T2 == muuttuvaPop2)] + i2 <- ceiling(indeksi / npops2) + } + } + } + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- rbind(rivit, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rivit), size(COUNTS, 1), size(COUNTS, 2), data + ) + i1 <- PARTITION(muuttuvat[1]) + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 + ) + logml <- logml + maxMuutos + if (dispText) { + if (round == 3) { + cat("Action 3") + } else { + cat("Action 4") + } + } + if (logml > worstLogml) { + temp_addToSum <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- temp_addToSum$partitionSummary + added <- temp_addToSum$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + kokeiltu[round] <- 1 + } + } else if (round == 5 || round == 6) { + j <- 0 + muutettu <- 0 + poplogml <- POP_LOGML + partition <- PARTITION + counts <- COUNTS + sumcounts <- SUMCOUNTS + logdiff <- LOGDIFF + + pops <- sample(npops) + while (j < npops & muutettu == 0) { + j <- j + 1 + pop <- pops[j] + totalMuutos <- 0 + inds <- find(PARTITION == pop) + if (round == 5) { + aputaulu <- c(inds, rand(length(inds), 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + } else if (round == 6) { + inds <- returnInOrder( + inds, pop, rows, data, adjprior, priorTerm + ) + } + + i <- 0 + + while (length(inds) > 0 & i < length(inds)) { + i <- i + 1 + ind <- inds[i] + + muutokset_diffInCounts <- laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + muutokset[pop] <- -1e50 # Varmasti ei suurin!!! + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + + totalMuutos <- totalMuutos + maxMuutos + logml <- logml + maxMuutos + if (round == 6) { + # Lopetetaan heti kun muutos on positiivinen. + if (totalMuutos > 1e-5) { + i <- length(inds) + } + } + } + + if (totalMuutos > 1e-5) { + kokeiltu <- zeros(nRoundTypes, 1) + muutettu <- 1 + if (muutoksia == 0) { + muutoksia <- 1 # Ulompi kirjanpito. + if (dispText) { + if (round == 5) { + cat("Action 5") + } else { + cat("Action 6") + } + } + } + if (logml > worstLogml) { + temp_addToSum <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- temp_addToSum$partitionSummary + added <- temp_addToSum$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + # Miss��n vaiheessa tila ei parantunut. + # Perutaan kaikki muutokset. + PARTITION <- partition + SUMCOUNTS <- sumcounts + POP_LOGML <- poplogml + COUNTS <- counts + logml <- logml - totalMuutos + LOGDIFF <- logdiff + kokeiltu[round] <- 1 + } + } + rm(partition, sumcounts, counts, poplogml) + } else if (round == 7) { + emptyPop <- findEmptyPop(npops) + j <- 0 + pops <- sample(npops) + muutoksiaNyt <- 0 + if (emptyPop == -1) { + j <- npops + } + while (j < npops) { + j <- j + 1 + pop <- pops[j] + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 5) { + partition <- PARTITION + sumcounts <- SUMCOUNTS + counts <- COUNTS + poplogml <- POP_LOGML + logdiff <- LOGDIFF + + dist2 <- laskeOsaDist(inds2, dist, ninds) + Z2 <- linkage(t(dist2)) + T2 <- cluster_own(Z2, 2) + muuttuvat <- inds2[find(T2 == 1)] + + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + totalMuutos <- muutokset(1, emptyPop) + + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- c(rivit, lisa) + } + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), data + ) + + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, + emptyPop + ) + + muutettu <- 1 + while (muutettu == 1) { + muutettu <- 0 + # Siirret��n yksil�it� populaatioiden v�lill� + muutokset <- laskeMuutokset5( + inds2, rows, data, adjprior, priorTerm, + pop, emptyPop + ) + + maxMuutos <- indeksi <- max_MATLAB(muutokset) + + muuttuva <- inds2(indeksi) + if (PARTITION(muuttuva) == pop) { + i2 <- emptyPop + } else { + i2 <- pop + } + + if (maxMuutos > 1e-5) { + rivit <- rows[muuttuva, 1]:rows[muuttuva, 2] + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), + data + ) + updateGlobalVariables3( + muuttuva, diffInCounts, adjprior, + priorTerm, i2 + ) + muutettu <- 1 + totalMuutos <- totalMuutos + maxMuutos + } + } + if (totalMuutos > 1e-5) { + muutoksia <- 1 + logml <- logml + totalMuutos + if (logml > worstLogml) { + temp_addToSum <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- temp_addToSum$partitionSummary + added <- temp_addToSum$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + if (muutoksiaNyt == 0) { + if (dispText) { + cat("Action 7") + } + muutoksiaNyt <- 1 + } + kokeiltu <- zeros(nRoundTypes, 1) + j <- npops + } else { + # palutetaan vanhat arvot + PARTITION <- partition + SUMCOUNTS <- sumcounts + COUNTS <- counts + POP_LOGML <- poplogml + LOGDIFF <- logdiff + } + } + } + + if (muutoksiaNyt == 0) { + kokeiltu[round] <- 1 + } + } + } + # FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1 + print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos)) # TEMP + if (muutoksia == 0) { + if (vaihe <= 4) { + vaihe <= vaihe + 1 + } else if (vaihe == 5) { + ready <- 1 + } + } else { + muutoksia <- 0 + } + + if (ready == 0) { + if (vaihe == 1) { + roundTypes <- c(1) + } else if (vaihe == 2) { + roundTypes <- c(2, 1) + } else if (vaihe == 3) { + roundTypes <- c(5, 5, 7) + } else if (vaihe == 4) { + roundTypes <- c(4, 3, 1) + } else if (vaihe == 5) { + roundTypes <- c(6, 7, 2, 3, 4, 1) + } + } + } + + # TALLENNETAAN + + npops <- poistaTyhjatPopulaatiot(npops) + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) + if (dispText) { + print(paste("Found partition with", as.character(npops), "populations.")) + print(paste("Log(ml) =", as.character(logml))) + print(" ") + } + + if (logml > logmlBest) { + # P�ivitet��n parasta l�ydetty� partitiota. + logmlBest <- logml + npopsBest <- npops + partitionBest <- PARTITION + countsBest <- COUNTS + sumCountsBest <- SUMCOUNTS + pop_logmlBest <- POP_LOGML + logdiffbest <- LOGDIFF + } + } + return( + list(logml = logml, npops = npops, partitionSummary = partitionSummary) + ) +} diff --git a/R/initPopNames.R b/R/initPopNames.R index 47dd2f8..3a01e15 100644 --- a/R/initPopNames.R +++ b/R/initPopNames.R @@ -3,35 +3,35 @@ #' @param indexFile indexFile #' @export initPopNames <- function(nameFile, indexFile) { - # Palauttaa tyhj�n, mik�li nimitiedosto ja indeksitiedosto - # eiv�t olleet yht?pitki? + # Palauttaa tyhj�n, mik�li nimitiedosto ja indeksitiedosto + # eiv�t olleet yht?pitki? - indices <- load(indexFile) + indices <- load(indexFile) - fid = load(nameFile) - if (fid == -1) { - # File didn't exist - stop('Loading of the population names was unsuccessful') - } - line <- readLines(fid)[1] - counter <- 1 - names <- vector() - while ((line != -1) & (line != "")) { - names[counter] <- line - line <- readLines(fid)[counter] - counter <- counter + 1 - } + fid <- load(nameFile) + if (fid == -1) { + # File didn't exist + stop("Loading of the population names was unsuccessful") + } + line <- readLines(fid)[1] + counter <- 1 + names <- vector() + while ((line != -1) & (line != "")) { + names[counter] <- line + line <- readLines(fid)[counter] + counter <- counter + 1 + } - if (length(names) != length(indices)) { - cat('The number of population names must be equal to the number of') - cat('entries in the file specifying indices of the first individuals') - cat('of each population.') - } + if (length(names) != length(indices)) { + cat("The number of population names must be equal to the number of") + cat("entries in the file specifying indices of the first individuals") + cat("of each population.") + } - popnames <- cell(length(names), 2) - for (i in 1:length(names)) { - popnames[i, 1] <- names[i] - popnames[i, 2] <- indices[i] - } - return(popnames) -} \ No newline at end of file + popnames <- cell(length(names), 2) + for (i in 1:length(names)) { + popnames[i, 1] <- names[i] + popnames[i, 2] <- indices[i] + } + return(popnames) +} diff --git a/R/initialCounts.R b/R/initialCounts.R index 6efe2e9..fabc7b5 100644 --- a/R/initialCounts.R +++ b/R/initialCounts.R @@ -1,25 +1,25 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) { - nloci <- size(data, 2) - ninds <- size(rows, 1) + nloci <- size(data, 2) + ninds <- size(rows, 1) - koot <- rows[, 1] - rows[, 2] + 1 - maxSize <- max(koot) + koot <- rows[, 1] - rows[, 2] + 1 + maxSize <- max(koot) - counts <- zeros(max(noalle), nloci, npops) - sumcounts <- zeros(npops, nloci) - for (i in 1:npops) { - for (j in 1:nloci) { - havainnotLokuksessa <- find(partition == i & data[, j] >= 0) - sumcounts[i, j] <- length(havainnotLokuksessa) - for (k in 1:noalle[j]) { - alleleCode <- k - N_ijk <- length( - find(data[havainnotLokuksessa, j] == alleleCode) - ) - counts[k, j, i] <- N_ijk - } - } - } - logml <- laskeLoggis(counts, sumcounts, adjprior) - return(list(sumcounts = sumcounts, counts = counts, logml = logml)) -} \ No newline at end of file + counts <- zeros(max(noalle), nloci, npops) + sumcounts <- zeros(npops, nloci) + for (i in 1:npops) { + for (j in 1:nloci) { + havainnotLokuksessa <- find(partition == i & data[, j] >= 0) + sumcounts[i, j] <- length(havainnotLokuksessa) + for (k in 1:noalle[j]) { + alleleCode <- k + N_ijk <- length( + find(data[havainnotLokuksessa, j] == alleleCode) + ) + counts[k, j, i] <- N_ijk + } + } + } + logml <- laskeLoggis(counts, sumcounts, adjprior) + return(list(sumcounts = sumcounts, counts = counts, logml = logml)) +} diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R index 2bd08ed..bc6b62d 100644 --- a/R/initialPopCounts.R +++ b/R/initialPopCounts.R @@ -1,20 +1,20 @@ initialPopCounts <- function(data, npops, rows, noalle, adjprior) { - nloci <- size(data, 2) - counts <- zeros(max(noalle), nloci, npops) - sumcounts <- zeros(npops, nloci) + nloci <- size(data, 2) + counts <- zeros(max(noalle), nloci, npops) + sumcounts <- zeros(npops, nloci) - for (i in 1:npops) { - for (j in 1:nloci) { - i_rivit <- rows(i, 1):rows(i, 2) - havainnotLokuksessa <- find(data[i_rivit, j] >= 0) - sumcounts[i, j] <- length(havainnotLokuksessa) - for (k in 1:noalle[j]) { - alleleCode <- k - N_ijk <- length(find(data[i_rivit, j] == alleleCode)) - counts[k, j, i] <- N_ijk - } - } - } - logml <- laskeLoggis(counts, sumcounts, adjprior) - return(sumcounts = sumcounts, counts = counts, logml = logml) + for (i in 1:npops) { + for (j in 1:nloci) { + i_rivit <- rows(i, 1):rows(i, 2) + havainnotLokuksessa <- find(data[i_rivit, j] >= 0) + sumcounts[i, j] <- length(havainnotLokuksessa) + for (k in 1:noalle[j]) { + alleleCode <- k + N_ijk <- length(find(data[i_rivit, j] == alleleCode)) + counts[k, j, i] <- N_ijk + } + } + } + logml <- laskeLoggis(counts, sumcounts, adjprior) + return(sumcounts = sumcounts, counts = counts, logml = logml) } diff --git a/R/initializeGammaln.R b/R/initializeGammaln.R index ac1864f..37d96aa 100644 --- a/R/initializeGammaln.R +++ b/R/initializeGammaln.R @@ -1,9 +1,9 @@ initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) { - #Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j) - GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles) - for (i in 1:(ninds + 1) * rowsFromInd) { - for (j in 1:maxAlleles) { - GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j) - } - } + # Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j) + GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles) + for (i in 1:(ninds + 1) * rowsFromInd) { + for (j in 1:maxAlleles) { + GAMMA_LN[i, j] <- log_gamma((i - 1) + 1 / j) + } + } } diff --git a/R/inputdlg.R b/R/inputdlg.R index 52d7495..cf59589 100644 --- a/R/inputdlg.R +++ b/R/inputdlg.R @@ -4,14 +4,15 @@ #' @param dims number of dimensions in the answwers #' @param definput default value of the input #' @export -inputdlg <- function(prompt, dims=1, definput=NULL) { - if (!is.null(definput)) { - prompt <- append(prompt, paste0(" (default: ", definput, ")")) - } - input_chr <- readline(paste0(prompt, ": ")) - if (input_chr == "") input_chr <- definput - input_chr_or_num <- tryCatch( - as.numeric(input_chr), warning = function(w) input_chr - ) - return(input_chr_or_num) -} \ No newline at end of file +inputdlg <- function(prompt, dims = 1, definput = NULL) { + if (!is.null(definput)) { + prompt <- append(prompt, paste0(" (default: ", definput, ")")) + } + input_chr <- readline(paste0(prompt, ": ")) + if (input_chr == "") input_chr <- definput + input_chr_or_num <- tryCatch( + as.numeric(input_chr), + warning = function(w) input_chr + ) + return(input_chr_or_num) +} diff --git a/R/isGlobalEmpty.R b/R/isGlobalEmpty.R index 9334507..1bd9995 100644 --- a/R/isGlobalEmpty.R +++ b/R/isGlobalEmpty.R @@ -6,5 +6,5 @@ #' @importFrom stats sd #' @author Waldir Leoncio isGlobalEmpty <- function(g) { - return(sum(g) == 0 & sd(g) == 0) -} \ No newline at end of file + return(sum(g) == 0 & sd(g) == 0) +} diff --git a/R/isempty.R b/R/isempty.R index fe1730d..e1ef187 100644 --- a/R/isempty.R +++ b/R/isempty.R @@ -4,10 +4,10 @@ #' @param x array #' isempty <- function(x) { - if (class(x)[1] %in% c("array", "matrix")) { - dim_mat_x <- dim(x) - } else { - dim_mat_x <- dim(matrix(x)) - } - return(any(dim_mat_x == 0) | is.null(dim_mat_x)) -} \ No newline at end of file + if (class(x)[1] %in% c("array", "matrix")) { + dim_mat_x <- dim(x) + } else { + dim_mat_x <- dim(matrix(x)) + } + return(any(dim_mat_x == 0) | is.null(dim_mat_x)) +} diff --git a/R/isfield.R b/R/isfield.R index dc6404a..2120b85 100644 --- a/R/isfield.R +++ b/R/isfield.R @@ -6,5 +6,5 @@ #' @references https://se.mathworks.com/help/matlab/ref/isfield.html #' @export isfield <- function(x, field) { - sapply(field, function(f) f %in% names(x)) -} \ No newline at end of file + sapply(field, function(f) f %in% names(x)) +} diff --git a/R/isspace.R b/R/isspace.R index a4ee2d7..c00b5f0 100644 --- a/R/isspace.R +++ b/R/isspace.R @@ -5,7 +5,7 @@ #' @note Recognized whitespace characters are ` ` and `\\t`. #' @author Waldir Leoncio isspace <- function(A) { - A_split <- unlist(strsplit(A, '')) - TF <- A_split %in% c(' ', '\t') - return(as.numeric(TF)) -} \ No newline at end of file + A_split <- unlist(strsplit(A, "")) + TF <- A_split %in% c(" ", "\t") + return(as.numeric(TF)) +} diff --git a/R/kldiv2str.R b/R/kldiv2str.R index 9e1e31e..1a88a12 100644 --- a/R/kldiv2str.R +++ b/R/kldiv2str.R @@ -1,24 +1,23 @@ kldiv2str <- function(div) { - mjono <- ' ' - if (abs(div) < 100) { - # Ei tarvita e-muotoa - mjono[6] <- as.character((floor(div * 1000)) %% 10) - mjono[5] <- as.character((floor(div * 100)) %% 10) - mjono[4] <- as.character((floor(div * 10)) %% 10) - mjono[3] <- '.' - mjono[2] <- as.character((floor(div)) %% 10) - arvo <- (floor(div / 10)) %% 10 - if (arvo > 0) { - mjono[1] <- as.character(arvo) - } - - } else { - suurinYks <- floor(log10(div)) - mjono[6] <- as.character(suurinYks) - mjono[5] <- 'e' - mjono[4] <- palautaYks(abs(div), suurinYks - 1) - mjono[3] <- '.' - mjono[2] <- palautaYks(abs(div), suurinYks) - } - return(mjono) -} \ No newline at end of file + mjono <- " " + if (abs(div) < 100) { + # Ei tarvita e-muotoa + mjono[6] <- as.character((floor(div * 1000)) %% 10) + mjono[5] <- as.character((floor(div * 100)) %% 10) + mjono[4] <- as.character((floor(div * 10)) %% 10) + mjono[3] <- "." + mjono[2] <- as.character((floor(div)) %% 10) + arvo <- (floor(div / 10)) %% 10 + if (arvo > 0) { + mjono[1] <- as.character(arvo) + } + } else { + suurinYks <- floor(log10(div)) + mjono[6] <- as.character(suurinYks) + mjono[5] <- "e" + mjono[4] <- palautaYks(abs(div), suurinYks - 1) + mjono[3] <- "." + mjono[2] <- palautaYks(abs(div), suurinYks) + } + return(mjono) +} diff --git a/R/laskeLoggis.R b/R/laskeLoggis.R index 672dec9..aa147eb 100644 --- a/R/laskeLoggis.R +++ b/R/laskeLoggis.R @@ -1,9 +1,9 @@ laskeLoggis <- function(counts, sumcounts, adjprior) { - npops <- size(counts, 3) + npops <- size(counts, 3) - sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) - sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) - logml2 <- sum1 - npops * sum3 - loggis <- logml2 - return(loggis) -} \ No newline at end of file + sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) + sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) + logml2 <- sum1 - npops * sum3 + loggis <- logml2 + return(loggis) +} diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index 795f6ab..cbe724b 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -8,36 +8,36 @@ #' @param osuusTaulu Percentage table? #' @param logml log maximum likelihood #' @export -laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml) { - if (isGlobalEmpty(COUNTS)) { - npops <- 1 - } else { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - } - notEmpty <- which(osuusTaulu > 0.005) - muutokset <- zeros(npops) - empties <- !notEmpty +laskeMuutokset4 <- function(osuus, osuusTaulu, omaFreqs, logml) { + if (isGlobalEmpty(COUNTS)) { + npops <- 1 + } else { + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + } + notEmpty <- which(osuusTaulu > 0.005) + muutokset <- zeros(npops) + empties <- !notEmpty - for (i1 in notEmpty) { - osuusTaulu[i1] <- osuusTaulu[i1] - osuus - for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { - osuusTaulu[i2] <- osuusTaulu[i2] + osuus - loggis <- computeIndLogml(omaFreqs, osuusTaulu) + for (i1 in notEmpty) { + osuusTaulu[i1] <- osuusTaulu[i1] - osuus + for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { + osuusTaulu[i2] <- osuusTaulu[i2] + osuus + loggis <- computeIndLogml(omaFreqs, osuusTaulu) - # Work around Matlab OOB bug - if (i1 > nrow(muutokset)) { - muutokset <- rbind(muutokset, muutokset * 0) - } - if (i2 > ncol(muutokset)) { - muutokset <- cbind(muutokset, muutokset * 0) - } + # Work around Matlab OOB bug + if (i1 > nrow(muutokset)) { + muutokset <- rbind(muutokset, muutokset * 0) + } + if (i2 > ncol(muutokset)) { + muutokset <- cbind(muutokset, muutokset * 0) + } - muutokset[i1, i2] <- loggis - logml - osuusTaulu[i2] <- osuusTaulu[i2] - osuus - } - osuusTaulu[i1] <- osuusTaulu[i1] + osuus - } - return (muutokset) + muutokset[i1, i2] <- loggis - logml + osuusTaulu[i2] <- osuusTaulu[i2] - osuus + } + osuusTaulu[i1] <- osuusTaulu[i1] + osuus + } + return(muutokset) } # Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi @@ -49,179 +49,179 @@ laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml) { # Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset # logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { - npops <- size(COUNTS, 3) - muutokset <- LOGDIFF[ind, ] + npops <- size(COUNTS, 3) + muutokset <- LOGDIFF[ind, ] - i1 <- PARTITION[ind] - i1_logml <- POP_LOGML[i1] - muutokset[i1] <- 0 + i1 <- PARTITION[ind] + i1_logml <- POP_LOGML[i1] + muutokset[i1] <- 0 - rows <- globalRows[ind, 1]:globalRows[ind, 2] - diffInCounts <- computeDiffInCounts( - rows, size(COUNTS, 1), size(COUNTS, 2), data - ) - diffInSumCounts <- colSums(diffInCounts) + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- colSums(diffInCounts) - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts - new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) - COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. (Searching for populations that have changed since the last time) - i2 <- setdiff(i2, i1) - i2_logml <- POP_LOGML[i2] + i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. (Searching for populations that have changed since the last time) + i2 <- setdiff(i2, i1) + i2_logml <- POP_LOGML[i2] - ni2 <- length(i2) + ni2 <- length(i2) - COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) - new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) - COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) - muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml - LOGDIFF[ind, ] <- muutokset - return(list(muutokset = muutokset, diffInCounts = diffInCounts)) + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + LOGDIFF[ind, ] <- muutokset + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) } laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) { - # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi - # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n - # % koriin i. + # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n + # % koriin i. - npops <- size(COUNTS, 3) - muutokset <- zeros(npops, 1) + npops <- size(COUNTS, 3) + muutokset <- zeros(npops, 1) - i1_logml <- POP_LOGML[i1] + i1_logml <- POP_LOGML[i1] - inds <- find(PARTITION == i1) - ninds <- length(inds) + inds <- find(PARTITION == i1) + ninds <- length(inds) - if (ninds == 0) { - diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) - return() - } + if (ninds == 0) { + diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) + return() + } - rows = list() - for (i in 1:ninds) { - ind <- inds(i) - lisa <- globalRows(ind, 1):globalRows(ind, 2) - rows <- c(rows, t(lisa)) - } + rows <- list() + for (i in 1:ninds) { + ind <- inds(i) + lisa <- globalRows(ind, 1):globalRows(ind, 2) + rows <- c(rows, t(lisa)) + } - diffInCounts <- computeDiffInCounts( - t(rows), size(COUNTS, 1), size(COUNTS, 2), data - ) - diffInSumCounts <- sum(diffInCounts) + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts - new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) - COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - i2 <- c(1:i1-1, i1+1:npops) - i2_logml <- POP_LOGML[i2] + i2 <- c(1:i1 - 1, i1 + 1:npops) + i2_logml <- POP_LOGML[i2] - COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) - new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) - COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) - muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml - return(list(muutokset = muutokset, diffInCounts = diffInCounts)) + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) } laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { - # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio - # kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio - # inds2(find(T2==i)) siirret��n koriin j. + # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio + # kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio + # inds2(find(T2==i)) siirret��n koriin j. - npops <- size(COUNTS, 3) - npops2 <- length(unique(T2)) - muutokset <- zeros(npops2, npops) + npops <- size(COUNTS, 3) + npops2 <- length(unique(T2)) + muutokset <- zeros(npops2, npops) - i1_logml = POP_LOGML[i1] - for (pop2 in 1:npops2) { - inds <- inds2[find(T2==pop2)] - ninds <- length(inds); - if (ninds > 0) { - rows <- list() - for (i in 1:ninds) { - ind <- inds[i] - lisa <- globalRows[ind, 1]:globalRows[ind, 2] - rows <- c(rows, t(lisa)) - } - diffInCounts <- computeDiffInCounts( - t(rows), size(COUNTS, 1), size(COUNTS, 2), data - ) - diffInSumCounts <- sum(diffInCounts) + i1_logml <- POP_LOGML[i1] + for (pop2 in 1:npops2) { + inds <- inds2[find(T2 == pop2)] + ninds <- length(inds) + if (ninds > 0) { + rows <- list() + for (i in 1:ninds) { + ind <- inds[i] + lisa <- globalRows[ind, 1]:globalRows[ind, 2] + rows <- c(rows, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts - new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) - COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - i2 <- c(1:i1-1, i1+1:npops) - i2_logml <- t(POP_LOGML[i2]) + i2 <- c(1:i1 - 1, i1 + 1:npops) + i2_logml <- t(POP_LOGML[i2]) - COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) - new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm)) - COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm)) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) - muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml - } - } - return(muutokset) + muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + } + } + return(muutokset) } laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { - # Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi - # muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + # Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi + # muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. - ninds <- length(inds) - muutokset <- zeros(ninds, 1) + ninds <- length(inds) + muutokset <- zeros(ninds, 1) - i1_logml <- POP_LOGML[i1] - i2_logml <- POP_LOGML[i2] + i1_logml <- POP_LOGML[i1] + i2_logml <- POP_LOGML[i2] - for (i in 1:ninds) { - ind <- inds[i] - if (PARTITION[ind] == i1) { - pop1 <- i1 #mist� - pop2 <- i2 #mihin - } else { - pop1 <- i2 - pop2 <- i1 - } - rows <- globalRows[ind, 1]:globalRows[ind, 2] - diffInCounts <- computeDiffInCounts( - rows, size(COUNTS, 1), size(COUNTS, 2), data - ) - diffInSumCounts <- sum(diffInCounts) + for (i in 1:ninds) { + ind <- inds[i] + if (PARTITION[ind] == i1) { + pop1 <- i1 # mist� + pop2 <- i2 # mihin + } else { + pop1 <- i2 + pop2 <- i1 + } + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts - SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts - COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts - SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts + COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts - new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm) - muutokset[i] <- sum(new_logmls) + new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm) + muutokset[i] <- sum(new_logmls) - COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts - SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts - COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts - SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts - } + COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts + } - muutokset <- muutokset - i1_logml - i2_logml - return(muutokset) -} \ No newline at end of file + muutokset <- muutokset - i1_logml - i2_logml + return(muutokset) +} diff --git a/R/laskeOsaDist.R b/R/laskeOsaDist.R index 018614d..4f76b8b 100644 --- a/R/laskeOsaDist.R +++ b/R/laskeOsaDist.R @@ -5,21 +5,21 @@ #' @param ninds ninds #' @author Waldir Leoncio laskeOsaDist <- function(inds2, dist, ninds) { - # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 - # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. + # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 + # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. - ninds2 <- length(inds2) - apu <- zeros(choose(ninds2, 2), 2) - rivi <- 1 - for (i in 1:ninds2-1) { - for (j in i+1:ninds2) { - apu[rivi, 1] <- inds2[i] - apu[rivi, 2] <- inds2[j] - rivi <- rivi + 1 - } - } - apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 * - (apu[, 1]-1) + (apu[, 2] - apu[, 1]) - dist2 <- dist(apu) - return(dist2) -} \ No newline at end of file + ninds2 <- length(inds2) + apu <- zeros(choose(ninds2, 2), 2) + rivi <- 1 + for (i in 1:ninds2 - 1) { + for (j in i + 1:ninds2) { + apu[rivi, 1] <- inds2[i] + apu[rivi, 2] <- inds2[j] + rivi <- rivi + 1 + } + } + apu <- (apu[, 1] - 1) * ninds - apu[, 1] / 2 * + (apu[, 1] - 1) + (apu[, 2] - apu[, 1]) + dist2 <- dist(apu) + return(dist2) +} diff --git a/R/learn_partition_modified.R b/R/learn_partition_modified.R index 71d1a38..76b78c9 100644 --- a/R/learn_partition_modified.R +++ b/R/learn_partition_modified.R @@ -8,22 +8,22 @@ #' found clusters are such that all the values are >0.9, then those two are #' further combined. learn_partition_modified <- function(ordered) { - part <- learn_simple_partition(ordered, 0.05) - nclust <- length(unique(part)) - if (nclust == 3) { - mini_1 <- min(ordered(which(part == 1))) - mini_2 <- min(ordered(which(part == 2))) - mini_3 <- min(ordered(which(part == 3))) - if (mini_1 > 0.9 & mini_2 > 0.9) { - part[part == 2] <- 1 - part[part == 3] <- 2 - } else if (mini_1 > 0.9 & mini_3 > 0.9) { - part[part == 3] <- 1 - } else if (mini_2 > 0.9 & mini_3 > 0.9) { - # This is the one happening in practice, since the values are - # ordered, leading to mini_1 <= mini_2 <= mini_3 - part[part == 3] <- 2 - } + part <- learn_simple_partition(ordered, 0.05) + nclust <- length(unique(part)) + if (nclust == 3) { + mini_1 <- min(ordered(which(part == 1))) + mini_2 <- min(ordered(which(part == 2))) + mini_3 <- min(ordered(which(part == 3))) + if (mini_1 > 0.9 & mini_2 > 0.9) { + part[part == 2] <- 1 + part[part == 3] <- 2 + } else if (mini_1 > 0.9 & mini_3 > 0.9) { + part[part == 3] <- 1 + } else if (mini_2 > 0.9 & mini_3 > 0.9) { + # This is the one happening in practice, since the values are + # ordered, leading to mini_1 <= mini_2 <= mini_3 + part[part == 3] <- 2 } - return(part) -} \ No newline at end of file + } + return(part) +} diff --git a/R/learn_simple_partition.R b/R/learn_simple_partition.R index d2999e6..d85d4e3 100644 --- a/R/learn_simple_partition.R +++ b/R/learn_simple_partition.R @@ -1,61 +1,61 @@ #' @title Learn simple partition #' @param ordered_points ordered_points #' @param fii fii -#' @description Goes through all the ways to divide the points into two or +#' @description Goes through all the ways to divide the points into two or #' three groups. Chooses the partition which obtains highest logml. #' @export learn_simple_partition <- function(ordered_points, fii) { - npoints <- length(ordered_points) - - # One cluster: - val <- calculatePopLogml(ordered_points, fii) - bestValue <- val - best_type <- 'single' - - # Two clusters: - for (i in 1:(npoints - 1)) { - # The right endpoint of the first cluster. - val_1 <- calculatePopLogml(ordered_points[1:i], fii) - val_2 <- calculatePopLogml(ordered_points[(i + 1):length(ordered_points)], fii) - total <- val_1 + val_2 - if (total > bestValue) { - bestValue <- total - best_type <- 'double' - best_i <- i - } - } + npoints <- length(ordered_points) - # Three clusters: - for (i in 1:(npoints - 2)) { - for (j in (i + 1):(npoints - 1)) { - val_1 <- calculatePopLogml(ordered_points[1:i], fii) - val_2 <- calculatePopLogml(ordered_points[(i + 1):j], fii) - val_3 <- calculatePopLogml(ordered_points[(j + 1):length(ordered_points)], fii) - total <- val_1 + val_2 + val_3 - if (total > bestValue) { - bestValue <- total - best_type <- 'triple' - best_i <- i - best_j <- j - } - } - } - - part = matrix(0, npoints, 1) + # One cluster: + val <- calculatePopLogml(ordered_points, fii) + bestValue <- val + best_type <- "single" - switch(best_type, - 'single' = { - part <- matrix(1, npoints, 1) - }, - 'double' = { - part[1:best_i] <- 1 - part[(best_i + 1):length(part)] <- 2 - }, - 'triple' = { - part[1:best_i] <- 1 - part[(best_i + 1):best_j] <- 2 - part[(best_j + 1):length(part)] <- 3 - } - ) - return(part) -} \ No newline at end of file + # Two clusters: + for (i in 1:(npoints - 1)) { + # The right endpoint of the first cluster. + val_1 <- calculatePopLogml(ordered_points[1:i], fii) + val_2 <- calculatePopLogml(ordered_points[(i + 1):length(ordered_points)], fii) + total <- val_1 + val_2 + if (total > bestValue) { + bestValue <- total + best_type <- "double" + best_i <- i + } + } + + # Three clusters: + for (i in 1:(npoints - 2)) { + for (j in (i + 1):(npoints - 1)) { + val_1 <- calculatePopLogml(ordered_points[1:i], fii) + val_2 <- calculatePopLogml(ordered_points[(i + 1):j], fii) + val_3 <- calculatePopLogml(ordered_points[(j + 1):length(ordered_points)], fii) + total <- val_1 + val_2 + val_3 + if (total > bestValue) { + bestValue <- total + best_type <- "triple" + best_i <- i + best_j <- j + } + } + } + + part <- matrix(0, npoints, 1) + + switch(best_type, + "single" = { + part <- matrix(1, npoints, 1) + }, + "double" = { + part[1:best_i] <- 1 + part[(best_i + 1):length(part)] <- 2 + }, + "triple" = { + part[1:best_i] <- 1 + part[(best_i + 1):best_j] <- 2 + part[(best_j + 1):length(part)] <- 3 + } + ) + return(part) +} diff --git a/R/linkage.R b/R/linkage.R index ccaf9b5..fdd1e6d 100644 --- a/R/linkage.R +++ b/R/linkage.R @@ -10,84 +10,84 @@ #' @param method either 'si', 'av', 'co' 'ce' or 'wa' #' @note This is also a base Matlab function. The reason why the source code is also present here is unclear. #' @export -linkage <- function(Y, method = 'co') { - #TODO: compare R output with MATLAB output - k <- size(Y)[1] - n <- size(Y)[2] - m <- (1 + sqrt(1 + 8 * n)) / 2 - if ((k != 1) | (m != trunc(m))) { - stop( - 'The first input has to match the output', - 'of the PDIST function in size.' - ) - } - method <- tolower(substr(method, 1, 2)) # simplify the switch string. - monotonic <- 1 - Z <- zeros(m - 1, 3) # allocate the output matrix. - N <- zeros(1, 2 * m - 1) - N[1:m] <- 1 - n <- m; # since m is changing, we need to save m in n. - R <- 1:n - for (s in 1:(n - 1)) { - X <- as.matrix(as.vector(Y), ncol=1) +linkage <- function(Y, method = "co") { + # TODO: compare R output with MATLAB output + k <- size(Y)[1] + n <- size(Y)[2] + m <- (1 + sqrt(1 + 8 * n)) / 2 + if ((k != 1) | (m != trunc(m))) { + stop( + "The first input has to match the output", + "of the PDIST function in size." + ) + } + method <- tolower(substr(method, 1, 2)) # simplify the switch string. + monotonic <- 1 + Z <- zeros(m - 1, 3) # allocate the output matrix. + N <- zeros(1, 2 * m - 1) + N[1:m] <- 1 + n <- m # since m is changing, we need to save m in n. + R <- 1:n + for (s in 1:(n - 1)) { + X <- as.matrix(as.vector(Y), ncol = 1) - v <- min_MATLAB(X)$mins - k <- min_MATLAB(X)$idx + v <- min_MATLAB(X)$mins + k <- min_MATLAB(X)$idx - i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1))) - j <- k - (i - 1) * (m - i / 2) + i + i <- floor(m + 1 / 2 - sqrt(m^2 - m + 1 / 4 - 2 * (k - 1))) + j <- k - (i - 1) * (m - i / 2) + i - Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A + Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A - # Temp variables - if (i > 1) { - I1 <- 1:(i - 1) - } else { - I1 <- NULL - } - if (i + 1 <= j - 1) { - I2 <- (i + 1):(j - 1) - } else { - I2 <- NULL - } - if (j + 1 <= m) { - I3 <- (j + 1):m - } else { - I3 <- NULL - } - U <- c(I1, I2, I3) - I <- c( - I1 * (m - (I1 + 1) / 2) - m + i, - i * (m - (i + 1) / 2) - m + I2, - i * (m - (i + 1) / 2) - m + I3 - ) - J <- c( - I1 * (m - (I1 + 1) / 2) - m + j, - I2 * (m - (I2 + 1) / 2) - m + j, - j * (m - (j + 1) / 2) - m + I3 - ) - # Workaround in R for negative values in I and J - # I <- I[I > 0 & I <= length(Y)] - # J <- J[J > 0 & J <= length(Y)] - switch(method, - 'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage - 'av' = Y[I] <- Y[I] + Y[J], # average linkage - 'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage - 'ce' = { - K <- N[R[i]] + N[R[j]] # centroid linkage - Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] - - (N[R[i]] * N[R[j]] * v ^ 2) / K) / K - }, - 'wa' = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) * - Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]]) - ) - J <- c(J, i * (m - (i + 1) / 2) - m + j) - Y <- Y[-J] # no need for the cluster information about j - # update m, N, R - m <- m - 1 - N[n + s] <- N[R[i]] + N[R[j]] - R[i] <- n + s - R[j:(n - 1)] <- R[(j + 1):n] - } - return(Z) -} \ No newline at end of file + # Temp variables + if (i > 1) { + I1 <- 1:(i - 1) + } else { + I1 <- NULL + } + if (i + 1 <= j - 1) { + I2 <- (i + 1):(j - 1) + } else { + I2 <- NULL + } + if (j + 1 <= m) { + I3 <- (j + 1):m + } else { + I3 <- NULL + } + U <- c(I1, I2, I3) + I <- c( + I1 * (m - (I1 + 1) / 2) - m + i, + i * (m - (i + 1) / 2) - m + I2, + i * (m - (i + 1) / 2) - m + I3 + ) + J <- c( + I1 * (m - (I1 + 1) / 2) - m + j, + I2 * (m - (I2 + 1) / 2) - m + j, + j * (m - (j + 1) / 2) - m + I3 + ) + # Workaround in R for negative values in I and J + # I <- I[I > 0 & I <= length(Y)] + # J <- J[J > 0 & J <= length(Y)] + switch(method, + "si" = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage + "av" = Y[I] <- Y[I] + Y[J], # average linkage + "co" = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), # complete linkage + "ce" = { + K <- N[R[i]] + N[R[j]] # centroid linkage + Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] - + (N[R[i]] * N[R[j]] * v^2) / K) / K + }, + "wa" = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) * + Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]]) + ) + J <- c(J, i * (m - (i + 1) / 2) - m + j) + Y <- Y[-J] # no need for the cluster information about j + # update m, N, R + m <- m - 1 + N[n + s] <- N[R[i]] + N[R[j]] + R[i] <- n + s + R[j:(n - 1)] <- R[(j + 1):n] + } + return(Z) +} diff --git a/R/load_fasta.R b/R/load_fasta.R index 209a7c3..ce68820 100644 --- a/R/load_fasta.R +++ b/R/load_fasta.R @@ -9,29 +9,28 @@ #' @return A character matrix with filtered SNP data #' #' @examples -#' msa <- system.file("ext", "seqs.fa", package="rBAPS") +#' msa <- system.file("ext", "seqs.fa", package = "rBAPS") #' snp.matrix <- load_fasta(msa) -#' #' @author Gerry Tonkin-Hill, Waldir Leoncio #' @seealso rhierbaps::load_fasta #' @importFrom ape read.FASTA as.DNAbin #' @export -load_fasta <- function(msa, keep.singletons=FALSE) { +load_fasta <- function(msa, keep.singletons = FALSE) { - #Check inputs - if(class(msa)=="character"){ + # Check inputs + if (class(msa) == "character") { if (!file.exists(msa)) stop("Invalid msa or the file does not exist!") seqs <- ape::read.FASTA(msa) - } else if(class(msa)=="matrix"){ + } else if (class(msa) == "matrix") { seqs <- ape::as.DNAbin(msa) - } else if(class(msa)=="DNAbin"){ + } else if (class(msa) == "DNAbin") { seqs <- msa - } else{ + } else { stop("incorrect input for msa!") } if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.") - #Load sequences using ape. This does a lot of the checking for us. + # Load sequences using ape. This does a lot of the checking for us. seq_names <- labels(seqs) seqs <- as.character(as.matrix(seqs)) rownames(seqs) <- seq_names @@ -43,21 +42,21 @@ load_fasta <- function(msa, keep.singletons=FALSE) { warning("Characters not in acgtnACGTN- will be treated as missing (-)...") } - #Remove conserved columns - conserved <- colSums(t(t(seqs)==seqs[1,]))==nrow(seqs) + # Remove conserved columns + conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs) seqs <- seqs[, !conserved] - if(!keep.singletons){ - #remove singletons as they are uninformative in the algorithm - is_singleton <- apply(seqs, 2, function(x){ + if (!keep.singletons) { + # remove singletons as they are uninformative in the algorithm + is_singleton <- apply(seqs, 2, function(x) { tab <- table(x) - return(x %in% names(tab)[tab==1]) + return(x %in% names(tab)[tab == 1]) }) seqs[is_singleton] <- "-" } - #Convert gaps and unknowns to same symbol - seqs[seqs=="n"] <- "-" + # Convert gaps and unknowns to same symbol + seqs[seqs == "n"] <- "-" return(seqs) } diff --git a/R/log_gamma.R b/R/log_gamma.R index 48b1ad6..67b4351 100644 --- a/R/log_gamma.R +++ b/R/log_gamma.R @@ -1,8 +1,8 @@ #' @title Log Gamma -#' @description Equal to log(gamma(x)) with special handling of x < 0 for +#' @description Equal to log(gamma(x)) with special handling of x < 0 for #' Matlab compatibility #' @param x number #' @return log(gamma(x)) for x > 0, Inf otherwise log_gamma <- function(x) { - ifelse(x > 0, log(gamma(x)), Inf) + ifelse(x > 0, log(gamma(x)), Inf) } diff --git a/R/logml2String.R b/R/logml2String.R index 7e2c1dc..f1ce392 100644 --- a/R/logml2String.R +++ b/R/logml2String.R @@ -4,54 +4,54 @@ #' @return String version of logml #' @export logml2String <- function(logml) { - # Palauttaa logml:n string-esityksen. - mjono = ' ' + # Palauttaa logml:n string-esityksen. + mjono <- " " - if (logml == -Inf) { - mjono[7] <- '-' - return(mjono) - } + if (logml == -Inf) { + mjono[7] <- "-" + return(mjono) + } - if (abs(logml) < 10000) { - # Ei tarvita e-muotoa - mjono[7] <- palautaYks(abs(logml), -1) - mjono[6] <- '.' - mjono[5] <- palautaYks(abs(logml), 0) - mjono[4] <- palautaYks(abs(logml), 1) - mjono[3] <- palautaYks(abs(logml), 2) - mjono[2] <- palautaYks(abs(logml), 3) - pointer <- 2 - while (mjono[pointer] == '0' & pointer < 7) { - mjono[pointer] <- ' ' - pointer <- pointer + 1 - } - if (logml < 0) { - mjono[pointer - 1] <- '-' - } - } else { - suurinYks <- 4 - while (abs(logml) / (10 ^ (suurinYks + 1)) >= 1) { - suurinYks <- suurinYks + 1 - } - if (suurinYks < 10) { - mjono[7] <- as.character(suurinYks) - mjono[6] <- 'e' - mjono[5] <- palautaYks(abs(logml), suurinYks - 1) - mjono[4] <- '.' - mjono[3] <- palautaYks(abs(logml), suurinYks) - if (logml < 0) { - mjono[2] <- '-' - } - } else if (suurinYks >= 10) { - mjono[6:7] <- as.character(suurinYks) - mjono[5] <- 'e' - mjono[4] <- palautaYks(abs(logml), suurinYks - 1) - mjono[3] <- '.' - mjono[2] <- palautaYks(abs(logml), suurinYks) - if (logml < 0) { - mjono[1] <- '-' - } - } - } - return(mjono) -} \ No newline at end of file + if (abs(logml) < 10000) { + # Ei tarvita e-muotoa + mjono[7] <- palautaYks(abs(logml), -1) + mjono[6] <- "." + mjono[5] <- palautaYks(abs(logml), 0) + mjono[4] <- palautaYks(abs(logml), 1) + mjono[3] <- palautaYks(abs(logml), 2) + mjono[2] <- palautaYks(abs(logml), 3) + pointer <- 2 + while (mjono[pointer] == "0" & pointer < 7) { + mjono[pointer] <- " " + pointer <- pointer + 1 + } + if (logml < 0) { + mjono[pointer - 1] <- "-" + } + } else { + suurinYks <- 4 + while (abs(logml) / (10^(suurinYks + 1)) >= 1) { + suurinYks <- suurinYks + 1 + } + if (suurinYks < 10) { + mjono[7] <- as.character(suurinYks) + mjono[6] <- "e" + mjono[5] <- palautaYks(abs(logml), suurinYks - 1) + mjono[4] <- "." + mjono[3] <- palautaYks(abs(logml), suurinYks) + if (logml < 0) { + mjono[2] <- "-" + } + } else if (suurinYks >= 10) { + mjono[6:7] <- as.character(suurinYks) + mjono[5] <- "e" + mjono[4] <- palautaYks(abs(logml), suurinYks - 1) + mjono[3] <- "." + mjono[2] <- palautaYks(abs(logml), suurinYks) + if (logml < 0) { + mjono[1] <- "-" + } + } + } + return(mjono) +} diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R index 7276537..e85cd8a 100644 --- a/R/lueGenePopData.R +++ b/R/lueGenePopData.R @@ -3,59 +3,59 @@ #' @param tiedostonNimi Name of the file #' @return list containing data and popnames #' @export -lueGenePopData <- function (tiedostonNimi) { - fid <- readLines(tiedostonNimi) - line <- fid[1] # ensimmäinen rivi - line <- fid[2] # toinen rivi - count <- rivinSisaltamienMjonojenLkm(line) +lueGenePopData <- function(tiedostonNimi) { + fid <- readLines(tiedostonNimi) + line <- fid[1] # ensimmäinen rivi + line <- fid[2] # toinen rivi + count <- rivinSisaltamienMjonojenLkm(line) - line <- fid[3] - lokusRiveja <- 1 - while (testaaPop(line) == 0) { - lokusRiveja <- lokusRiveja + 1 # locus row - line <- fid[2 + lokusRiveja] - } + line <- fid[3] + lokusRiveja <- 1 + while (testaaPop(line) == 0) { + lokusRiveja <- lokusRiveja + 1 # locus row + line <- fid[2 + lokusRiveja] + } - if (lokusRiveja > 1) { - nloci <- lokusRiveja - } else { - nloci <- count - } + if (lokusRiveja > 1) { + nloci <- lokusRiveja + } else { + nloci <- count + } - popnames <- cell(10, 2) - data <- zeros(100, nloci + 1) - nimienLkm <- 0 - ninds <- 0 - poimiNimi <- 1 - digitFormat <- -1 - while (lokusRiveja < length(fid) - 2) { - lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along - line <- fid[lokusRiveja + 2] - if (poimiNimi == 1) { - # Edellinen rivi oli 'pop' (previous line was pop) - nimienLkm <- nimienLkm + 1 - ninds <- ninds + 1 - if (nimienLkm > size(popnames, 1)) { - popnames <- rbind(popnames, cell(10, 2)) - } - nimi <- lueNimi(line) - if (digitFormat == -1) { - digitFormat <- selvitaDigitFormat(line) - divider <- 10 ^ digitFormat - } - popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? - popnames[nimienLkm, 2] <- ninds - poimiNimi <- 0 - data <- addAlleles(data, ninds, line, divider) - } else if (testaaPop(line)) { - poimiNimi <- 1 - } else if (!is.na(line)) { - ninds <- ninds + 1 - data <- addAlleles(data, ninds, line, divider) - } - } + popnames <- cell(10, 2) + data <- zeros(100, nloci + 1) + nimienLkm <- 0 + ninds <- 0 + poimiNimi <- 1 + digitFormat <- -1 + while (lokusRiveja < length(fid) - 2) { + lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along + line <- fid[lokusRiveja + 2] + if (poimiNimi == 1) { + # Edellinen rivi oli 'pop' (previous line was pop) + nimienLkm <- nimienLkm + 1 + ninds <- ninds + 1 + if (nimienLkm > size(popnames, 1)) { + popnames <- rbind(popnames, cell(10, 2)) + } + nimi <- lueNimi(line) + if (digitFormat == -1) { + digitFormat <- selvitaDigitFormat(line) + divider <- 10^digitFormat + } + popnames[nimienLkm, 1] <- nimi # N�in se on greedyMix:iss�kin?!? + popnames[nimienLkm, 2] <- ninds + poimiNimi <- 0 + data <- addAlleles(data, ninds, line, divider) + } else if (testaaPop(line)) { + poimiNimi <- 1 + } else if (!is.na(line)) { + ninds <- ninds + 1 + data <- addAlleles(data, ninds, line, divider) + } + } - data <- data[1:(ninds * 2), ] - popnames <- popnames[seq_len(nimienLkm), ] - return(list(data = data, popnames = popnames)) -} \ No newline at end of file + data <- data[1:(ninds * 2), ] + popnames <- popnames[seq_len(nimienLkm), ] + return(list(data = data, popnames = popnames)) +} diff --git a/R/lueNimi.R b/R/lueNimi.R index 377e791..0154ddc 100644 --- a/R/lueNimi.R +++ b/R/lueNimi.R @@ -4,20 +4,20 @@ #' @return nimi #' @export lueNimi <- function(line) { - # ========================================================================== - # Validation - # ========================================================================== - if (!grepl(",", line)) { - stop("There are no commas in this line") - } - # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. - n <- 1 - merkki <- substring(line, n, n) - nimi <- '' - while (merkki != ',') { - nimi <- c(nimi, merkki) - n <- n + 1 - merkki <- substring(line, n, n) - } - return(paste(nimi, collapse="")) -} \ No newline at end of file + # ========================================================================== + # Validation + # ========================================================================== + if (!grepl(",", line)) { + stop("There are no commas in this line") + } + # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. + n <- 1 + merkki <- substring(line, n, n) + nimi <- "" + while (merkki != ",") { + nimi <- c(nimi, merkki) + n <- n + 1 + merkki <- substring(line, n, n) + } + return(paste(nimi, collapse = "")) +} diff --git a/R/matlab2r.R b/R/matlab2r.R index 092897e..8e1db09 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -19,142 +19,140 @@ #' It is also advised to do a dry-run with `output = "clean"` and only switching #' to `output = "save"` when you are confident that no important code will be #' lost (for shorter functions, a careful visual inspection should suffice). -matlab2r <- function( - filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE, - append=FALSE -) { - # TODO: this function is too long! Split into subfunctions - # (say, by rule and/or section) - # ======================================================== # - # Verification # - # ======================================================== # - if (!file.exists(filename)) stop("File not found") +matlab2r <- function(filename, output = "diff", improve_formatting = TRUE, change_assignment = TRUE, + append = FALSE) { + # TODO: this function is too long! Split into subfunctions + # (say, by rule and/or section) + # ======================================================== # + # Verification # + # ======================================================== # + if (!file.exists(filename)) stop("File not found") - # ======================================================== # - # Reading file into R # - # ======================================================== # - txt <- readLines(filename) - original <- txt + # ======================================================== # + # Reading file into R # + # ======================================================== # + txt <- readLines(filename) + original <- txt - # ======================================================== # - # Replacing text # - # ======================================================== # + # ======================================================== # + # Replacing text # + # ======================================================== # - # Uncommenting ------------------------------------------- # - txt <- gsub("^#\\s?(.+)", "\\1", txt) + # Uncommenting ------------------------------------------- # + txt <- gsub("^#\\s?(.+)", "\\1", txt) - # Output variable ---------------------------------------- # - out <- gsub( - pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)", - replacement = "\\1", - x = txt[1] - ) # TODO: improve by detecting listed outputs - if (substring(out, 1, 1) == "[") { - out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]] - out <- out[which(out != "")] - out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x])) - out <- paste0("list(", paste(out, collapse=", "), ")") - } + # Output variable ---------------------------------------- # + out <- gsub( + pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)", + replacement = "\\1", + x = txt[1] + ) # TODO: improve by detecting listed outputs + if (substring(out, 1, 1) == "[") { + out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]] + out <- out[which(out != "")] + out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x])) + out <- paste0("list(", paste(out, collapse = ", "), ")") + } - # Function header ---------------------------------------- # - txt <- gsub( - pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", - replacement = "\\2 <- function(\\3) {", - x = txt - ) - txt <- gsub( - pattern = "function (.+)\\((.+)\\)", - replacement = "\\1 <- function(\\2) {", - x = txt - ) + # Function header ---------------------------------------- # + txt <- gsub( + pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + replacement = "\\2 <- function(\\3) {", + x = txt + ) + txt <- gsub( + pattern = "function (.+)\\((.+)\\)", + replacement = "\\1 <- function(\\2) {", + x = txt + ) - # Function body ------------------------------------------ # - txt <- gsub("(.+)\\.\\.\\.", "\\1", txt) - txt <- gsub(";", "", txt) + # Function body ------------------------------------------ # + txt <- gsub("(.+)\\.\\.\\.", "\\1", txt) + txt <- gsub(";", "", txt) - # Loops and if-statements - txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) - txt <- gsub("end$", "}", txt) - txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { - txt <- gsub("else$", "} else {", txt) - txt <- gsub("elseif", "} else if", txt) - txt <- gsub("while (.+)", "while \\1 {", txt) + # Loops and if-statements + txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) + txt <- gsub("end$", "}", txt) + txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { + txt <- gsub("else$", "} else {", txt) + txt <- gsub("elseif", "} else if", txt) + txt <- gsub("while (.+)", "while \\1 {", txt) - # MATLAB-equivalent functions in R - txt <- gsub("gamma_ln", "log_gamma", txt) - txt <- gsub("nchoosek", "choose", txt) - txt <- gsub("isempty", "is.null", txt) - # txt <- gsub("(.+)\\'", "t(\\1)", txt) + # MATLAB-equivalent functions in R + txt <- gsub("gamma_ln", "log_gamma", txt) + txt <- gsub("nchoosek", "choose", txt) + txt <- gsub("isempty", "is.null", txt) + # txt <- gsub("(.+)\\'", "t(\\1)", txt) - # Subsets ------------------------------------------------ # - ass_op <- ifelse(change_assignment, "<-", "=") - txt <- gsub( - pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)", - replacement = paste0("\\1[\\2] ", ass_op, "\\3"), - x = txt - ) - txt <- gsub("\\(:\\)", "[, ]", txt) - txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt) + # Subsets ------------------------------------------------ # + ass_op <- ifelse(change_assignment, "<-", "=") + txt <- gsub( + pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)", + replacement = paste0("\\1[\\2] ", ass_op, "\\3"), + x = txt + ) + txt <- gsub("\\(:\\)", "[, ]", txt) + txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt) - # Formatting --------------------------------------------- # - if (improve_formatting) { - txt <- gsub("(.),(\\S)", "\\1, \\2", txt) - # Math operators - txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) - txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) - txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) - txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt) - # Logic operators - txt <- gsub("~", "!", txt) - txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) - txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt) - txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt) - # Assignment - txt <- gsub( - pattern = "(\\w)(\\s?)=(\\s?)(\\w)", - replacement = paste0("\\1 ", ass_op, " \\4"), - x = txt - ) - # txt <- gsub( - # pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)", - # replacement = paste0("\\1 ", ass_op, "\\5"), - # x = txt - # ) - txt <- gsub("%(\\s?)(\\w)", "# \\2", txt) - } + # Formatting --------------------------------------------- # + if (improve_formatting) { + txt <- gsub("(.),(\\S)", "\\1, \\2", txt) + # Math operators + txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) + txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) + txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) + txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt) + # Logic operators + txt <- gsub("~", "!", txt) + txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) + txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt) + txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt) + # Assignment + txt <- gsub( + pattern = "(\\w)(\\s?)=(\\s?)(\\w)", + replacement = paste0("\\1 ", ass_op, " \\4"), + x = txt + ) + # txt <- gsub( + # pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)", + # replacement = paste0("\\1 ", ass_op, "\\5"), + # x = txt + # ) + txt <- gsub("%(\\s?)(\\w)", "# \\2", txt) + } - # Adding output and end-of-file brace -------------------- # - txt <- append(txt, paste0("\treturn(", out, ")\n}")) + # Adding output and end-of-file brace -------------------- # + txt <- append(txt, paste0("\treturn(", out, ")\n}")) - # Returning converted code ------------------------------- # - if (output == "asis") { - return(txt) - } else if (output == "clean") { - return(cat(txt, sep="\n")) - } else if (output == "save") { - return( - write.table( - x = txt, - file = filename, - quote = FALSE, - row.names = FALSE, - col.names = FALSE, - append = append - ) - ) - } else if (output == "diff") { - diff_text <- vector(mode="character", length=(2 * length(original) + 1)) - for (i in seq_along(txt)) { - new_i <- (2 * i) + i - 2 - diff_text[new_i] <- paste( - "-----------------------", "line", i, "-----------------------" - ) - diff_text[new_i + 1] <- original[i] - diff_text[new_i + 2] <- txt[i] - } - message("Displaying line number, original content and modified content") - return(cat(diff_text, sep="\n")) - } else { - stop ("Invalid output argument") - } + # Returning converted code ------------------------------- # + if (output == "asis") { + return(txt) + } else if (output == "clean") { + return(cat(txt, sep = "\n")) + } else if (output == "save") { + return( + write.table( + x = txt, + file = filename, + quote = FALSE, + row.names = FALSE, + col.names = FALSE, + append = append + ) + ) + } else if (output == "diff") { + diff_text <- vector(mode = "character", length = (2 * length(original) + 1)) + for (i in seq_along(txt)) { + new_i <- (2 * i) + i - 2 + diff_text[new_i] <- paste( + "-----------------------", "line", i, "-----------------------" + ) + diff_text[new_i + 1] <- original[i] + diff_text[new_i + 2] <- txt[i] + } + message("Displaying line number, original content and modified content") + return(cat(diff_text, sep = "\n")) + } else { + stop("Invalid output argument") + } } diff --git a/R/min_max_MATLAB.R b/R/min_max_MATLAB.R index 83162d2..2d93a15 100644 --- a/R/min_max_MATLAB.R +++ b/R/min_max_MATLAB.R @@ -5,14 +5,14 @@ #' @return Either a list or a vector #' @author Waldir Leoncio min_MATLAB <- function(X, indices = TRUE) { - if (!is(X, "matrix")) X <- as.matrix(X) - mins <- apply(X, 2, min) - idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) - if (indices) { - return(list(mins = mins, idx = idx)) - } else { - return(mins) - } + if (!is(X, "matrix")) X <- as.matrix(X) + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } } #' @title Maximum (MATLAB version) @@ -22,12 +22,12 @@ min_MATLAB <- function(X, indices = TRUE) { #' @return Either a list or a vector #' @author Waldir Leoncio max_MATLAB <- function(X, indices = TRUE) { - if (!is(X, "matrix")) X <- as.matrix(X) - maxs <- apply(X, 2, max) - idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) - if (indices) { - return(list(maxs = maxs, idx = idx)) - } else { - return(maxs) - } -} \ No newline at end of file + if (!is(X, "matrix")) X <- as.matrix(X) + maxs <- apply(X, 2, max) + idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) + if (indices) { + return(list(maxs = maxs, idx = idx)) + } else { + return(maxs) + } +} diff --git a/R/nargin.R b/R/nargin.R index 6ff20cc..0d48ab2 100644 --- a/R/nargin.R +++ b/R/nargin.R @@ -5,6 +5,6 @@ #' @note This function only makes sense inside another function #' @references https://stackoverflow.com/q/64422780/1169233 nargin <- function() { - if(sys.nframe() < 2) stop("must be called from inside a function") + if (sys.nframe() < 2) stop("must be called from inside a function") length(as.list(sys.call(-1))) - 1 } diff --git a/R/newGetDistances.R b/R/newGetDistances.R index b386afa..bd07623 100644 --- a/R/newGetDistances.R +++ b/R/newGetDistances.R @@ -1,62 +1,62 @@ newGetDistances <- function(data, rowsFromInd) { - ninds <- max(data[, ncol(data)]) - nloci <- size(data, 2) - 1 - riviLkm <- choose(ninds, 2) + ninds <- max(data[, ncol(data)]) + nloci <- size(data, 2) - 1 + riviLkm <- choose(ninds, 2) - empties <- find(data < 0) - data[empties] <- 0 - data <- apply(data, 2, as.numeric) # max(noalle) oltava <256 + empties <- find(data < 0) + data[empties] <- 0 + data <- apply(data, 2, as.numeric) # max(noalle) oltava <256 - pariTaulu <- zeros(riviLkm, 2) - aPointer <- 1 - for (a in 1:(ninds - 1)) { - pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a) - pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a - pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds) - aPointer <- aPointer + ninds - a - } + pariTaulu <- zeros(riviLkm, 2) + aPointer <- 1 + for (a in 1:(ninds - 1)) { + pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a) + pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a + pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds) + aPointer <- aPointer + ninds - a + } - eka <- pariTaulu[, ones(1, rowsFromInd)] - eka <- eka * rowsFromInd - miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1)) - eka <- eka - miinus + eka <- pariTaulu[, ones(1, rowsFromInd)] + eka <- eka * rowsFromInd + miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1)) + eka <- eka - miinus - toka <- pariTaulu[, ones(1, rowsFromInd) * 2] - toka <- toka * rowsFromInd - toka <- toka - miinus + toka <- pariTaulu[, ones(1, rowsFromInd) * 2] + toka <- toka * rowsFromInd + toka <- toka - miinus - summa <- zeros(riviLkm, 1) - vertailuja <- zeros(riviLkm, 1) + summa <- zeros(riviLkm, 1) + vertailuja <- zeros(riviLkm, 1) - rm(pariTaulu, miinus) + rm(pariTaulu, miinus) - x <- zeros(size(eka)) - x <- apply(x, 2, as.integer) - y <- zeros(size(toka)) - y <- apply(y, 2, as.integer) + x <- zeros(size(eka)) + x <- apply(x, 2, as.integer) + y <- zeros(size(toka)) + y <- apply(y, 2, as.integer) - for (j in 1:nloci) { - for (k in 1:rowsFromInd) { - x[, k] <- data[eka[, k], j] - y[, k] <- data[toka[, k], j] - } - for (a in 1:rowsFromInd) { - for (b in 1:rowsFromInd) { - vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0) - vertailuja <- vertailuja + vertailutNyt - lisays <- (x[, a] != y[, b] & vertailutNyt) - summa <- summa + as.double(lisays) - } - } - } + for (j in 1:nloci) { + for (k in 1:rowsFromInd) { + x[, k] <- data[eka[, k], j] + y[, k] <- data[toka[, k], j] + } + for (a in 1:rowsFromInd) { + for (b in 1:rowsFromInd) { + vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0) + vertailuja <- vertailuja + vertailutNyt + lisays <- (x[, a] != y[, b] & vertailutNyt) + summa <- summa + as.double(lisays) + } + } + } - rm(x, y, vertailutNyt) - nollat <- find(vertailuja == 0) - dist <- zeros(length(vertailuja), 1) - dist[nollat] <- 1 - muut <- find(vertailuja > 0) - dist[muut] <- summa[muut] / vertailuja[muut] - rm(summa, vertailuja) - Z <- linkage(t(dist)) - return(list(Z = Z, dist = dist)) -} \ No newline at end of file + rm(x, y, vertailutNyt) + nollat <- find(vertailuja == 0) + dist <- zeros(length(vertailuja), 1) + dist[nollat] <- 1 + muut <- find(vertailuja > 0) + dist[muut] <- summa[muut] / vertailuja[muut] + rm(summa, vertailuja) + Z <- linkage(t(dist)) + return(list(Z = Z, dist = dist)) +} diff --git a/R/noIndex.R b/R/noIndex.R index 1fb52b0..3159238 100644 --- a/R/noIndex.R +++ b/R/noIndex.R @@ -6,16 +6,16 @@ #' @param data data #' @param noalle noalle #' @export -noIndex <- function (data, noalle) { - limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle)) - if (size(data, 2) == limit + 1) { - if (is(data, "matrix")) { - puredata <- data[, -ncol(data)] # remove the index column - } else { - puredata <- data[-length(data)] - } +noIndex <- function(data, noalle) { + limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle)) + if (size(data, 2) == limit + 1) { + if (is(data, "matrix")) { + puredata <- data[, -ncol(data)] # remove the index column } else { - puredata <- data + puredata <- data[-length(data)] } - return(puredata) -} \ No newline at end of file + } else { + puredata <- data + } + return(puredata) +} diff --git a/R/omaRound.R b/R/omaRound.R index 2c67f1d..0535bb5 100644 --- a/R/omaRound.R +++ b/R/omaRound.R @@ -1,7 +1,7 @@ omaRound <- function(num) { - # Py�rist�� luvun num 1 desimaalin tarkkuuteen - num <- num * 10 - num <- round(num) - num2 <- num / 10 - return(num2) -} \ No newline at end of file + # Py�rist�� luvun num 1 desimaalin tarkkuuteen + num <- num * 10 + num <- round(num) + num2 <- num / 10 + return(num2) +} diff --git a/R/ownNum2Str.R b/R/ownNum2Str.R index e063df3..df7ac1e 100644 --- a/R/ownNum2Str.R +++ b/R/ownNum2Str.R @@ -4,37 +4,37 @@ #' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors. #' @export ownNum2Str <- function(number) { - absolute <- abs(number) - if (absolute < 1000) { - str <- as.character(number) - } else if (absolute < 10000000) { - first_three <- number %% 1000 - next_four <- (number - first_three) /1000 - first_three <- abs(first_three) - if (first_three < 10) { - first_three <- paste0('00', as.character(first_three)) - } else if (first_three < 100) { - first_three <- paste0('0', as.character(first_three)) - } else { - first_three <- as.character(first_three) - } - str <- paste0(as.character(next_four), first_three) - } else if (absolute < 100000000) { - first_four <- number %% 10000 - next_four <- (number - first_four) / 10000 - first_four <- abs(first_four) - if (first_four < 10) { - first_four <- paste0('000', as.character(first_four)) - } else if (first_four < 100) { - first_four <- paste0('00', as.character(first_four)) - } else if (first_four < 1000) { - first_four <- paste0('0', as.character(first_four)) - } else { - first_four <- as.character(first_four) - } - str <- paste0(as.character(next_four), first_four) + absolute <- abs(number) + if (absolute < 1000) { + str <- as.character(number) + } else if (absolute < 10000000) { + first_three <- number %% 1000 + next_four <- (number - first_three) / 1000 + first_three <- abs(first_three) + if (first_three < 10) { + first_three <- paste0("00", as.character(first_three)) + } else if (first_three < 100) { + first_three <- paste0("0", as.character(first_three)) } else { - str <- as.character(number) + first_three <- as.character(first_three) } - return(str) + str <- paste0(as.character(next_four), first_three) + } else if (absolute < 100000000) { + first_four <- number %% 10000 + next_four <- (number - first_four) / 10000 + first_four <- abs(first_four) + if (first_four < 10) { + first_four <- paste0("000", as.character(first_four)) + } else if (first_four < 100) { + first_four <- paste0("00", as.character(first_four)) + } else if (first_four < 1000) { + first_four <- paste0("0", as.character(first_four)) + } else { + first_four <- as.character(first_four) + } + str <- paste0(as.character(next_four), first_four) + } else { + str <- as.character(number) + } + return(str) } diff --git a/R/palautaYks.R b/R/palautaYks.R index e64cde7..0b9db2d 100644 --- a/R/palautaYks.R +++ b/R/palautaYks.R @@ -1,17 +1,17 @@ palautaYks <- function(num, yks) { - # palauttaa luvun num 10^yks termin kertoimen - # string:in? - # yks t�ytyy olla kokonaisluku, joka on - # v�hint��n -1:n suuruinen. Pienemmill? - # luvuilla tapahtuu jokin py�ristysvirhe. + # palauttaa luvun num 10^yks termin kertoimen + # string:in? + # yks t�ytyy olla kokonaisluku, joka on + # v�hint��n -1:n suuruinen. Pienemmill? + # luvuilla tapahtuu jokin py�ristysvirhe. - if (yks >= 0) { - digit <- num %% 10 ^ (yks + 1) - digit <- floor(digit / (10 ^ yks)) - } else { - digit <- num * 10 - digit <- floor(digit %% 10) - } - digit <- as.character(digit) - return(digit) -} \ No newline at end of file + if (yks >= 0) { + digit <- num %% 10^(yks + 1) + digit <- floor(digit / (10^yks)) + } else { + digit <- num * 10 + digit <- floor(digit %% 10) + } + digit <- as.character(digit) + return(digit) +} diff --git a/R/poistaLiianPienet.R b/R/poistaLiianPienet.R index 22ecfac..a07c700 100644 --- a/R/poistaLiianPienet.R +++ b/R/poistaLiianPienet.R @@ -6,41 +6,41 @@ #' @param rowsFromInd rowsFromInd #' @param alaraja alaraja #' @export -poistaLiianPienet <- function (npops, rowsFromInd, alaraja) { - popSize <- zeros(1,npops) - if (npops > 0) { - for (i in 1:npops) { - popSize[i] <- length(which(PARTITION == i)) - } +poistaLiianPienet <- function(npops, rowsFromInd, alaraja) { + popSize <- zeros(1, npops) + if (npops > 0) { + for (i in 1:npops) { + popSize[i] <- length(which(PARTITION == i)) } - miniPops <- which(popSize < alaraja) - - if (length(miniPops) == 0) { - return(npops) - } - - outliers <- matrix(NA, 0, 0) - for (pop in miniPops) { - inds <- which(PARTITION == pop) - cat('Removed individuals: ') - cat(as.character(inds)) - outliers = matrix(c(outliers, inds), ncol=1) - } - - ninds <- length(PARTITION) - PARTITION[outliers] <- 0 - korit <- unique(PARTITION(which(PARTITION > 0))) - for (n in 1:length(korit)) { - kori <- korit[n] - yksilot <- which(PARTITION == kori) - PARTITION[yksilot] == n - } - - # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <- - COUNTS[, , miniPops] <- NA - SUMCOUNTS[miniPops, ] <- NA - - npops <- npops - length(miniPops) + } + miniPops <- which(popSize < alaraja) + if (length(miniPops) == 0) { return(npops) -} \ No newline at end of file + } + + outliers <- matrix(NA, 0, 0) + for (pop in miniPops) { + inds <- which(PARTITION == pop) + cat("Removed individuals: ") + cat(as.character(inds)) + outliers <- matrix(c(outliers, inds), ncol = 1) + } + + ninds <- length(PARTITION) + PARTITION[outliers] <- 0 + korit <- unique(PARTITION(which(PARTITION > 0))) + for (n in 1:length(korit)) { + kori <- korit[n] + yksilot <- which(PARTITION == kori) + PARTITION[yksilot] == n + } + + # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <- + COUNTS[, , miniPops] <- NA + SUMCOUNTS[miniPops, ] <- NA + + npops <- npops - length(miniPops) + + return(npops) +} diff --git a/R/poistaTyhjatPopulaatiot.R b/R/poistaTyhjatPopulaatiot.R index badef3e..c48a139 100644 --- a/R/poistaTyhjatPopulaatiot.R +++ b/R/poistaTyhjatPopulaatiot.R @@ -1,15 +1,15 @@ poistaTyhjatPopulaatiot <- function(npops) { - # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja - # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. - notEmpty <- find(any(SUMCOUNTS, 2)) - COUNTS <- COUNTS[, , notEmpty] - SUMCOUNTS <- SUMCOUNTS[notEmpty, ] - LOGDIFF <- LOGDIFF[, notEmpty] + # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja + # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. + notEmpty <- find(any(SUMCOUNTS, 2)) + COUNTS <- COUNTS[, , notEmpty] + SUMCOUNTS <- SUMCOUNTS[notEmpty, ] + LOGDIFF <- LOGDIFF[, notEmpty] - for (n in 1:length(notEmpty)) { - apu <- find(PARTITION == notEmpty(n)) - PARTITION[apu] <- n - } - npops <- length(notEmpty) - return(npops) -} \ No newline at end of file + for (n in 1:length(notEmpty)) { + apu <- find(PARTITION == notEmpty(n)) + PARTITION[apu] <- n + } + npops <- length(notEmpty) + return(npops) +} diff --git a/R/proportion2str.R b/R/proportion2str.R index 2dfe9df..bac1ed4 100644 --- a/R/proportion2str.R +++ b/R/proportion2str.R @@ -1,21 +1,21 @@ #' @title Convert proportion to string #' @param prob belongs to [0.00, 0.01, ... ,1] #' @return a 4-mark presentation of proportion -#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The +#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The #' Matlab equivalent rounds it to 9. #' @export -proportion2str <- function (prob) { - if (abs(prob) < 1e-3) { - str <- '0.00' - } else if (abs(prob - 1) < 1e-3) { - str <- '1.00' +proportion2str <- function(prob) { + if (abs(prob) < 1e-3) { + str <- "0.00" + } else if (abs(prob - 1) < 1e-3) { + str <- "1.00" + } else { + prob <- round(100 * prob) + if (prob < 10) { + str <- paste0("0.0", as.character(prob)) } else { - prob <- round(100 * prob) - if (prob < 10) { - str <- paste0('0.0', as.character(prob)) - } else { - str <- paste0('0.', as.character(prob)) - } + str <- paste0("0.", as.character(prob)) } - return(str) -} \ No newline at end of file + } + return(str) +} diff --git a/R/questdlg.R b/R/questdlg.R index 826f6b8..01a5beb 100644 --- a/R/questdlg.R +++ b/R/questdlg.R @@ -7,35 +7,33 @@ #' @description This function aims to loosely mimic the behavior of the #' questdlg function on Matlab #' @export -questdlg <- function( - quest, - dlgtitle = "", - btn = c('y', 'n'), - defbtn = 'n', - accepted_ans = c('y', 'yes', 'n', 'no') -) { - message(dlgtitle) - # ========================================================================== - # Replacing the default option with a capitalized version on btn - # ========================================================================== - btn[match(tolower(defbtn), tolower(btn))] <- toupper(defbtn) - # ========================================================================== - # Creating prompt - # ========================================================================== - option_char <- paste0(' [', paste(btn, collapse = ', '), ']') - answer <- readline(paste0(quest, option_char, ": ")) - # ========================================================================== - # Processing answer - # ========================================================================== - answer <- tolower(answer) - if (!(answer %in% tolower(c(btn, accepted_ans)))) { - if (answer != "") { - warning( - "'", answer, "' is not a valid alternative. Defaulting to ", - defbtn - ) - } - answer <- defbtn - } - return(answer) -} \ No newline at end of file +questdlg <- function(quest, + dlgtitle = "", + btn = c("y", "n"), + defbtn = "n", + accepted_ans = c("y", "yes", "n", "no")) { + message(dlgtitle) + # ========================================================================== + # Replacing the default option with a capitalized version on btn + # ========================================================================== + btn[match(tolower(defbtn), tolower(btn))] <- toupper(defbtn) + # ========================================================================== + # Creating prompt + # ========================================================================== + option_char <- paste0(" [", paste(btn, collapse = ", "), "]") + answer <- readline(paste0(quest, option_char, ": ")) + # ========================================================================== + # Processing answer + # ========================================================================== + answer <- tolower(answer) + if (!(answer %in% tolower(c(btn, accepted_ans)))) { + if (answer != "") { + warning( + "'", answer, "' is not a valid alternative. Defaulting to ", + defbtn + ) + } + answer <- defbtn + } + return(answer) +} diff --git a/R/rand.R b/R/rand.R index 917f35a..394447c 100644 --- a/R/rand.R +++ b/R/rand.R @@ -6,5 +6,5 @@ #' @importFrom stats runif #' @export rand <- function(r = 1, c = 1) { - matrix(runif(r * c), r, c) -} \ No newline at end of file + matrix(runif(r * c), r, c) +} diff --git a/R/rand_disc.R b/R/rand_disc.R index e75f1c4..dfd46d7 100644 --- a/R/rand_disc.R +++ b/R/rand_disc.R @@ -1,7 +1,7 @@ rand_disc <- function(CDF) { - # %returns an index of a value from a discrete distribution using inversion method - slump <- rand - har <- find(CDF > slump) - svar <- har(1) - return(svar) + # %returns an index of a value from a discrete distribution using inversion method + slump <- rand + har <- find(CDF > slump) + svar <- har(1) + return(svar) } diff --git a/R/randdir.R b/R/randdir.R index 4164e9b..19eceae 100644 --- a/R/randdir.R +++ b/R/randdir.R @@ -5,11 +5,11 @@ #' @param nc number of rows on output #' @seealso randga #' @export -randdir <- function (counts, nc) { - svar <- zeros(nc, 1) - for (i in 1:nc) { - svar[i, 1] = randga(counts[i, 1], 1) - } - svar <- svar / sum(svar) - return(svar) -} \ No newline at end of file +randdir <- function(counts, nc) { + svar <- zeros(nc, 1) + for (i in 1:nc) { + svar[i, 1] <- randga(counts[i, 1], 1) + } + svar <- svar / sum(svar) + return(svar) +} diff --git a/R/randga.R b/R/randga.R index d3d9473..cd098bf 100644 --- a/R/randga.R +++ b/R/randga.R @@ -4,55 +4,55 @@ #' @param b rate #' @return One realization of Gamma(a, b) #' @details The generated random variable has mean a / b. It will be positively-skewed for small values, but converges to a symmetric distribution for very large numbers of a and b. -randga <- function (a, b) { - flag <- 0 - if (a > 1) { - c1 <- a - 1 - c2 <- (a - (1 / (6 * a))) / c1 - c3 <- 2 / c1 - c4 <- c3 + 2 - c5 <- 1 / sqrt(a) - U1 <- 1 - while (flag == 0) { - if (a <= 2.5) { - U1 <- rand() - U2 <- rand() - } else { - while (!(U1 > 0 & U1 < 1)) { - U1 <- rand() - U2 <- rand() - U1 <- U2 + c5 * (1 - 1.86 * U1) - } - } - W <- c2 * U2 / U1 - if (c3 * U1 + W + (1 / W) <= c4) { - flag <- 1 - g <- c1 * W / b - } else if (c3 * log(U1) - log(W) + W < 1) { - flag <- 1 - g <- c1 * W / b - } else { - U1 <- -1 - } +randga <- function(a, b) { + flag <- 0 + if (a > 1) { + c1 <- a - 1 + c2 <- (a - (1 / (6 * a))) / c1 + c3 <- 2 / c1 + c4 <- c3 + 2 + c5 <- 1 / sqrt(a) + U1 <- 1 + while (flag == 0) { + if (a <= 2.5) { + U1 <- rand() + U2 <- rand() + } else { + while (!(U1 > 0 & U1 < 1)) { + U1 <- rand() + U2 <- rand() + U1 <- U2 + c5 * (1 - 1.86 * U1) } - } else if (a == 1) { - g <- sum(-(1 / b) * log(rand(a, 1))) - } else { - while (flag == 0) { - U <- rand(2, 1) - if (U[1] > exp(1) / (a + exp(1))) { - g <- -log(((a + exp(1)) * (1 - U[1])) / (a * exp(1))) - if (U[2] <= g ^ (a - 1)) { - flag <- 1 - } - } else { - g <- ((a + exp(1)) * U[1] / ((exp(1)) ^ (1 / a))) - if (U[2] <= exp(-g)) { - flag <- 1 - } - } - } - g <- g / b + } + W <- c2 * U2 / U1 + if (c3 * U1 + W + (1 / W) <= c4) { + flag <- 1 + g <- c1 * W / b + } else if (c3 * log(U1) - log(W) + W < 1) { + flag <- 1 + g <- c1 * W / b + } else { + U1 <- -1 + } } - return(g) -} \ No newline at end of file + } else if (a == 1) { + g <- sum(-(1 / b) * log(rand(a, 1))) + } else { + while (flag == 0) { + U <- rand(2, 1) + if (U[1] > exp(1) / (a + exp(1))) { + g <- -log(((a + exp(1)) * (1 - U[1])) / (a * exp(1))) + if (U[2] <= g^(a - 1)) { + flag <- 1 + } + } else { + g <- ((a + exp(1)) * U[1] / ((exp(1))^(1 / a))) + if (U[2] <= exp(-g)) { + flag <- 1 + } + } + } + g <- g / b + } + return(g) +} diff --git a/R/repmat.R b/R/repmat.R index 2845b7e..87ee5f9 100644 --- a/R/repmat.R +++ b/R/repmat.R @@ -10,29 +10,29 @@ #' #' It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset. #' @export -repmat <- function (mx, n) { - # Validation - if (length(n) > 3) warning("Extra dimensions of n ignored") - if (!is(mx, "matrix")) mx <- t(as.matrix(mx)) - if (length(n) == 1) n <- rep(n, 2) - if (any(n == 0)) { - n_zero <- which(n == 0) - out_dim <- dim(mx) - out_dim[n_zero] <- 0 - return(array(dim=out_dim)) - } +repmat <- function(mx, n) { + # Validation + if (length(n) > 3) warning("Extra dimensions of n ignored") + if (!is(mx, "matrix")) mx <- t(as.matrix(mx)) + if (length(n) == 1) n <- rep(n, 2) + if (any(n == 0)) { + n_zero <- which(n == 0) + out_dim <- dim(mx) + out_dim[n_zero] <- 0 + return(array(dim = out_dim)) + } - # Replicating cols - out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx)) + # Replicating cols + out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx)) - # Replicating rows - if (n[1] > 1) { - for (i in seq(n[1] - 1)) out <- rbind(out, mx_col) - } + # Replicating rows + if (n[1] > 1) { + for (i in seq(n[1] - 1)) out <- rbind(out, mx_col) + } - # Replicating 3rd dimension - if (!is.na(n[3]) & n[3] > 1) out <- array(out, c(dim(out), n[3])) + # Replicating 3rd dimension + if (!is.na(n[3]) & n[3] > 1) out <- array(out, c(dim(out), n[3])) - # Output - return(unname(as.array(out))) -} \ No newline at end of file + # Output + return(unname(as.array(out))) +} diff --git a/R/reshape.R b/R/reshape.R index 824bbae..cb78c59 100644 --- a/R/reshape.R +++ b/R/reshape.R @@ -10,15 +10,15 @@ #' @note The Matlab function also accepts as input the dismemberment of sz as #' scalars. reshape <- function(A, sz) { - # Validation - if (prod(sz) != prod(dim(A))) { - stop("To RESHAPE the number of elements must not change.") - } - if (length(sz) == 1) { - stop("Size vector must have at least two elements.") - } + # Validation + if (prod(sz) != prod(dim(A))) { + stop("To RESHAPE the number of elements must not change.") + } + if (length(sz) == 1) { + stop("Size vector must have at least two elements.") + } - # Reshaping A - A <- array(A, sz) - return(A) -} \ No newline at end of file + # Reshaping A + A <- array(A, sz) + return(A) +} diff --git a/R/returnInOrder.R b/R/returnInOrder.R index c6a48d8..5287007 100644 --- a/R/returnInOrder.R +++ b/R/returnInOrder.R @@ -1,26 +1,26 @@ returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) { - # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on - # % se, jonka poistaminen populaatiosta pop nostaisi logml:n - # % arvoa eniten. + # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on + # % se, jonka poistaminen populaatiosta pop nostaisi logml:n + # % arvoa eniten. - ninds <- length(inds) - apuTaulu <- c(inds, zeros(ninds, 1)) + ninds <- length(inds) + apuTaulu <- c(inds, zeros(ninds, 1)) - for (i in 1:ninds) { - ind <- inds[i] - rows <- globalRows[i, 1]:globalRows[i, 2] - diffInCounts <- computeDiffInCounts( - rows, size[COUNTS, 1], size[COUNTS, 2], data - ) - diffInSumCounts <- sum(diffInCounts) + for (i in 1:ninds) { + ind <- inds[i] + rows <- globalRows[i, 1]:globalRows[i, 2] + diffInCounts <- computeDiffInCounts( + rows, size[COUNTS, 1], size[COUNTS, 2], data + ) + diffInSumCounts <- sum(diffInCounts) - COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts - SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts - apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm) - COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts - SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts - } - apuTaulu <- sortrows(apuTaulu, 2) - inds <- apuTaulu[ninds:1, 1] - return(inds) -} \ No newline at end of file + COUNTS[, , pop] <- COUNTS[, , pop] - diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts + apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm) + COUNTS[, , pop] <- COUNTS[, , pop] + diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts + } + apuTaulu <- sortrows(apuTaulu, 2) + inds <- apuTaulu[ninds:1, 1] + return(inds) +} diff --git a/R/rivinSisaltamienMjonojenLkm.R b/R/rivinSisaltamienMjonojenLkm.R index 24abaeb..88b4248 100644 --- a/R/rivinSisaltamienMjonojenLkm.R +++ b/R/rivinSisaltamienMjonojenLkm.R @@ -4,23 +4,23 @@ #' @description Returns the number of queues contained in the line. There must be a space between the queues. #' @export rivinSisaltamienMjonojenLkm <- function(line) { - # Palauttaa line:n sis�lt�mien mjonojen lukum��r�n. - # Mjonojen v�liss?t�ytyy olla v�lily�nti. - count <- 0 - pit <- nchar(line) - tila <- 0 # 0, jos odotetaan v�lily�ntej? 1 jos odotetaan muita merkkej? - for (i in seq_len(pit)) { - merkki <- substring(line, i, i) - if (isspace(merkki) & tila == 0) { - # Ei tehd?mit��n. - } else if (isspace(merkki) & tila == 1) { - tila <- 0 - } else if (!isspace(merkki) & tila == 0) { - tila <- 1 - count <- count + 1 - } else if (!isspace(merkki) & tila == 1) { - # %Ei tehd?mit��n - } - } - return(count) -} \ No newline at end of file + # Palauttaa line:n sis�lt�mien mjonojen lukum��r�n. + # Mjonojen v�liss?t�ytyy olla v�lily�nti. + count <- 0 + pit <- nchar(line) + tila <- 0 # 0, jos odotetaan v�lily�ntej? 1 jos odotetaan muita merkkej? + for (i in seq_len(pit)) { + merkki <- substring(line, i, i) + if (isspace(merkki) & tila == 0) { + # Ei tehd?mit��n. + } else if (isspace(merkki) & tila == 1) { + tila <- 0 + } else if (!isspace(merkki) & tila == 0) { + tila <- 1 + count <- count + 1 + } else if (!isspace(merkki) & tila == 1) { + # %Ei tehd?mit��n + } + } + return(count) +} diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R index 32af592..ac179f1 100644 --- a/R/selvitaDigitFormat.R +++ b/R/selvitaDigitFormat.R @@ -3,28 +3,28 @@ #' @return df #' @export selvitaDigitFormat <- function(line) { - # line on ensimm�inen pop-sanan j�lkeinen rivi - # Genepop-formaatissa olevasta datasta. funktio selvitt�� - # rivin muodon perusteella, ovatko datan alleelit annettu - # 2 vai 3 numeron avulla. - n <- 1 - merkki <- substring(line, n, n) - while (merkki != ',') { - n <- n + 1 - merkki <- substring(line, n, n) - } + # line on ensimm�inen pop-sanan j�lkeinen rivi + # Genepop-formaatissa olevasta datasta. funktio selvitt�� + # rivin muodon perusteella, ovatko datan alleelit annettu + # 2 vai 3 numeron avulla. + n <- 1 + merkki <- substring(line, n, n) + while (merkki != ",") { + n <- n + 1 + merkki <- substring(line, n, n) + } - while (!any(merkki %in% as.character(0:9))) { - n <- n + 1 - merkki <- substring(line, n, n) - } - numeroja <- 0 - while (any(merkki %in% as.character(0:9))) { - numeroja <- numeroja + 1 - n <- n + 1 - merkki <- substring(line, n, n) - } + while (!any(merkki %in% as.character(0:9))) { + n <- n + 1 + merkki <- substring(line, n, n) + } + numeroja <- 0 + while (any(merkki %in% as.character(0:9))) { + numeroja <- numeroja + 1 + n <- n + 1 + merkki <- substring(line, n, n) + } - df <- numeroja / 2 - return(df) -} \ No newline at end of file + df <- numeroja / 2 + return(df) +} diff --git a/R/setdiff_MATLAB.R b/R/setdiff_MATLAB.R index 69742b9..2d00f8f 100644 --- a/R/setdiff_MATLAB.R +++ b/R/setdiff_MATLAB.R @@ -5,21 +5,21 @@ #' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported) #' @author Waldir Leoncio setdiff_MATLAB <- function(A, B, legacy = FALSE) { - if (legacy) message("legacy=TRUE not supported. Ignoring.") - if (is(A, "numeric") & is(B, "numeric")) { - values <- sort(unique(A[is.na(match(A, B))])) - } else if (is(A, "data.frame") & is(B, "data.frame")) { - C <- A - exclude_rows <- vector() - for (r1 in seq_len(nrow(A))) { - for (r2 in seq_len(nrow(B))) { - if (all(A[r1, ] == B[r2, ])) { - exclude_rows <- append(exclude_rows, r1) - } - } - } - values <- C[-exclude_rows, ] - } - # TODO: add support for indices (if necessary) - return(values) -} \ No newline at end of file + if (legacy) message("legacy=TRUE not supported. Ignoring.") + if (is(A, "numeric") & is(B, "numeric")) { + values <- sort(unique(A[is.na(match(A, B))])) + } else if (is(A, "data.frame") & is(B, "data.frame")) { + C <- A + exclude_rows <- vector() + for (r1 in seq_len(nrow(A))) { + for (r2 in seq_len(nrow(B))) { + if (all(A[r1, ] == B[r2, ])) { + exclude_rows <- append(exclude_rows, r1) + } + } + } + values <- C[-exclude_rows, ] + } + # TODO: add support for indices (if necessary) + return(values) +} diff --git a/R/simulateAllFreqs.R b/R/simulateAllFreqs.R index 2c7e0b0..7d2e138 100644 --- a/R/simulateAllFreqs.R +++ b/R/simulateAllFreqs.R @@ -5,37 +5,37 @@ #' @export simulateAllFreqs <- function(noalle) { - if (isGlobalEmpty(COUNTS)) { - max_noalle <- 0 - nloci <- 0 - npops <- 1 - } else { - max_noalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - npops <- size(COUNTS, 3) - } + if (isGlobalEmpty(COUNTS)) { + max_noalle <- 0 + nloci <- 0 + npops <- 1 + } else { + max_noalle <- size(COUNTS, 1) + nloci <- size(COUNTS, 2) + npops <- size(COUNTS, 3) + } - prioriAlleelit <- zeros(max_noalle, nloci) + prioriAlleelit <- zeros(max_noalle, nloci) + if (nloci > 0) { + for (j in 1:nloci) { + prioriAlleelit[1:noalle[j], j] <- 1 / noalle[j] + } + } + prioriAlleelit <- repmat(prioriAlleelit, matrix(c(1, 1, npops), 1)) + counts <- ifelse( + test = isGlobalEmpty(COUNTS), + yes = prioriAlleelit, + no = COUNTS + prioriAlleelit + ) + allfreqs <- zeros(size(counts)) + + for (i in 1:npops) { if (nloci > 0) { - for (j in 1:nloci) { - prioriAlleelit[1:noalle[j], j] <- 1 / noalle[j] - } + for (j in 1:nloci) { + simuloidut <- randdir(counts[1:noalle[j], j, i], noalle[j]) + allfreqs[1:noalle[j], j, i] <- simuloidut + } } - prioriAlleelit <- repmat(prioriAlleelit, matrix(c(1, 1, npops), 1)) - counts <- ifelse( - test = isGlobalEmpty(COUNTS), - yes = prioriAlleelit, - no = COUNTS + prioriAlleelit - ) - allfreqs <- zeros(size(counts)) - - for (i in 1:npops) { - if (nloci > 0) { - for (j in 1:nloci) { - simuloidut <- randdir(counts[1:noalle[j], j, i] , noalle[j]) - allfreqs[1:noalle[j], j, i] <- simuloidut - } - } - } - return(allfreqs) -} \ No newline at end of file + } + return(allfreqs) +} diff --git a/R/simulateIndividuals.R b/R/simulateIndividuals.R index 94878aa..a630ac1 100644 --- a/R/simulateIndividuals.R +++ b/R/simulateIndividuals.R @@ -9,24 +9,24 @@ #' @export simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) { - nloci <- size(allfreqs, 2) + nloci <- size(allfreqs, 2) - refData <- zeros(n * rowsFromInd, nloci) - counter <- 1 # which row will be generated next. + refData <- zeros(n * rowsFromInd, nloci) + counter <- 1 # which row will be generated next. - for (ind in 1:n) { - for (loc in 1:nloci) { - for (k in 0:(rowsFromInd - 1)) { - if (runif(1) < missing_level) { - refData[counter + k, loc] <- simuloiAlleeli( - allfreqs, pop, loc - ) - } else { - refData[counter + k, loc] <- -999 - } - } + for (ind in 1:n) { + for (loc in 1:nloci) { + for (k in 0:(rowsFromInd - 1)) { + if (runif(1) < missing_level) { + refData[counter + k, loc] <- simuloiAlleeli( + allfreqs, pop, loc + ) + } else { + refData[counter + k, loc] <- -999 } - counter <- counter + rowsFromInd + } } - return(refData) -} \ No newline at end of file + counter <- counter + rowsFromInd + } + return(refData) +} diff --git a/R/simuloiAlleeli.R b/R/simuloiAlleeli.R index a1ec88a..e711cdd 100644 --- a/R/simuloiAlleeli.R +++ b/R/simuloiAlleeli.R @@ -7,25 +7,25 @@ #' @export simuloiAlleeli <- function(allfreqs, pop, loc) { - if (length(dim(allfreqs)) == 0) { - freqs <- 1 + if (length(dim(allfreqs)) == 0) { + freqs <- 1 + } else { + if (length(dim(allfreqs)) == 3) { # distinguish between array and matrix + freqs <- allfreqs[, loc, pop] } else { - if (length(dim(allfreqs)) == 3) { # distinguish between array and matrix - freqs <- allfreqs[, loc, pop] - } else { - freqs <- allfreqs[, loc] - } + freqs <- allfreqs[, loc] } - # freqs <- ifelse(is.null(length(dim(allfreqs)), allfreqs[loc], 0) - # freqs <- switch() + 1, - # allfreqs[, loc], - # allfreqs[, loc, pop] - # ) + } + # freqs <- ifelse(is.null(length(dim(allfreqs)), allfreqs[loc], 0) + # freqs <- switch() + 1, + # allfreqs[, loc], + # allfreqs[, loc, pop] + # ) - cumsumma <- cumsum(freqs) - arvo <- runif(1) - isommat <- which(cumsumma > arvo) - all <- min(isommat) - return(all) -} \ No newline at end of file + cumsumma <- cumsum(freqs) + arvo <- runif(1) + isommat <- which(cumsumma > arvo) + all <- min(isommat) + return(all) +} diff --git a/R/size.R b/R/size.R index 219193e..0192a1c 100644 --- a/R/size.R +++ b/R/size.R @@ -9,30 +9,30 @@ #' (bugs and questionable behaviors included), this function also does this. #' @export size <- function(x, d) { - # Determining the number of dimensions - if (all(is.na(x))) { - if (missing(d)) { - return(c(0, 0)) - } else { - return(ifelse(d <= 2, 0, 1)) - } - } - if (length(x) == 1) { - # x is surely a scalar - return(1) + # Determining the number of dimensions + if (all(is.na(x))) { + if (missing(d)) { + return(c(0, 0)) } else { - # x is a vector, a matrix or an array - n_dim <- ifelse(is.null(dim(x)), 1, length(dim(x))) - if (missing(d)) { - if (n_dim == 1) { - out <- c(1, length(x)) - } else { - out <- dim(x) - } - } else { - out <- ifelse(n_dim == 1, c(1, length(x))[d], dim(x)[d]) - if (is.na(out)) out <- 1 # for MATLAB compatibility - } - return(out) + return(ifelse(d <= 2, 0, 1)) } -} \ No newline at end of file + } + if (length(x) == 1) { + # x is surely a scalar + return(1) + } else { + # x is a vector, a matrix or an array + n_dim <- ifelse(is.null(dim(x)), 1, length(dim(x))) + if (missing(d)) { + if (n_dim == 1) { + out <- c(1, length(x)) + } else { + out <- dim(x) + } + } else { + out <- ifelse(n_dim == 1, c(1, length(x))[d], dim(x)[d]) + if (is.na(out)) out <- 1 # for MATLAB compatibility + } + return(out) + } +} diff --git a/R/sortrows.R b/R/sortrows.R index 28c4d25..c17fc2a 100644 --- a/R/sortrows.R +++ b/R/sortrows.R @@ -3,13 +3,13 @@ #' @param A matrix #' @param column ordering column sortrows <- function(A, column = 1) { - if (length(column) == 1) { - new_row_order <- order(A[, column]) - } else if (length(column) == 2) { - new_row_order <- order(A[, column[1]], A[, column[2]]) - } else { - stop("Not yet implemented for 2+ tie-breakers") - } - A_reordered <- A[new_row_order, ] - return(A_reordered) -} \ No newline at end of file + if (length(column) == 1) { + new_row_order <- order(A[, column]) + } else if (length(column) == 2) { + new_row_order <- order(A[, column[1]], A[, column[2]]) + } else { + stop("Not yet implemented for 2+ tie-breakers") + } + A_reordered <- A[new_row_order, ] + return(A_reordered) +} diff --git a/R/squeeze.R b/R/squeeze.R index 8030894..fc8e654 100644 --- a/R/squeeze.R +++ b/R/squeeze.R @@ -12,4 +12,4 @@ #' @return An array with the same elements as the input array, but with #' dimensions of length 1 removed. #' @author Waldir Leoncio -squeeze <- function(A) as.matrix(drop(A)) \ No newline at end of file +squeeze <- function(A) as.matrix(drop(A)) diff --git a/R/strcmp.R b/R/strcmp.R index 6637e7b..4522dd3 100644 --- a/R/strcmp.R +++ b/R/strcmp.R @@ -5,24 +5,24 @@ #' @return a logical element of the same type as the input #' @export strcmp <- function(s1, s2) { - if (length(s1) == 1 & length(s2) == 1) { - # Both are scalars, comparison is straightforward - return(identical(s1, s2)) - } else if (length(s1) == 1 & length(s2) > 1) { - # s1 is a scalar and s2 is a vector or a matrix - checks <- sapply(s2, function(s) s1 %in% s) - if (is(s2, "matrix")) checks <- matrix(checks, nrow(s2)) - } else if (length(s1) > 1 & length(s2) == 1) { - # s1 is a vector/matrix, s2 is a scalar - checks <- sapply(s1, function(s) s2 %in% s) - if (is(s1, "matrix")) checks <- matrix(checks, nrow(s1)) + if (length(s1) == 1 & length(s2) == 1) { + # Both are scalars, comparison is straightforward + return(identical(s1, s2)) + } else if (length(s1) == 1 & length(s2) > 1) { + # s1 is a scalar and s2 is a vector or a matrix + checks <- sapply(s2, function(s) s1 %in% s) + if (is(s2, "matrix")) checks <- matrix(checks, nrow(s2)) + } else if (length(s1) > 1 & length(s2) == 1) { + # s1 is a vector/matrix, s2 is a scalar + checks <- sapply(s1, function(s) s2 %in% s) + if (is(s1, "matrix")) checks <- matrix(checks, nrow(s1)) + } else { + # s1 and s2 are vectors/matrices + if (identical(dim(s1), dim(s2))) { + checks <- as.matrix(s1 == s2) } else { - # s1 and s2 are vectors/matrices - if (identical(dim(s1), dim(s2))) { - checks <- as.matrix(s1 == s2) - } else { - stop("Inputs must be the same size or either one can be a scalar.") - } + stop("Inputs must be the same size or either one can be a scalar.") } - return(checks) -} \ No newline at end of file + } + return(checks) +} diff --git a/R/suoritaMuutos.R b/R/suoritaMuutos.R index dc495f1..f45a397 100644 --- a/R/suoritaMuutos.R +++ b/R/suoritaMuutos.R @@ -4,19 +4,19 @@ #' @param osuus percentage? #' @param indeksi index #' @export -suoritaMuutos <- function (osuusTaulu, osuus, indeksi) { - if (isGlobalEmpty(COUNTS)) { - npops <- 1 - } else { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - } +suoritaMuutos <- function(osuusTaulu, osuus, indeksi) { + if (isGlobalEmpty(COUNTS)) { + npops <- 1 + } else { + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + } - i1 <- indeksi %% npops - if (is.na(i1) | i1 == 0) i1 <- npops - i2 <- ceiling(indeksi / npops) + i1 <- indeksi %% npops + if (is.na(i1) | i1 == 0) i1 <- npops + i2 <- ceiling(indeksi / npops) - osuusTaulu[i1] <- osuusTaulu[i1] - osuus - osuusTaulu[i2] <- osuusTaulu[i2] + osuus + osuusTaulu[i1] <- osuusTaulu[i1] - osuus + osuusTaulu[i2] <- osuusTaulu[i2] + osuus - return (osuusTaulu) -} \ No newline at end of file + return(osuusTaulu) +} diff --git a/R/takeLine.R b/R/takeLine.R index 8e32b80..8208eaf 100644 --- a/R/takeLine.R +++ b/R/takeLine.R @@ -5,13 +5,13 @@ #' @return newline #' @export takeLine <- function(description, width) { - # Returns one line from the description: line ends to the first - # space after width:th mark. - newLine <- description[1:width] - n <- width + 1 - while ((description[n] != " ") & (n < length(description))) { - n <- n + 1 - } - newline <- description[1:n] - return(newline) -} \ No newline at end of file + # Returns one line from the description: line ends to the first + # space after width:th mark. + newLine <- description[1:width] + n <- width + 1 + while ((description[n] != " ") & (n < length(description))) { + n <- n + 1 + } + newline <- description[1:n] + return(newline) +} diff --git a/R/testaaGenePopData.R b/R/testaaGenePopData.R index d62bff4..137d849 100644 --- a/R/testaaGenePopData.R +++ b/R/testaaGenePopData.R @@ -3,66 +3,66 @@ #' @return kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1. #' @details GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop. testaaGenePopData <- function(tiedostonNimi) { - # kunnossa == 0, jos data ei ole kelvollinen genePop data. - # Muussa tapauksessa kunnossa == 1. + # kunnossa == 0, jos data ei ole kelvollinen genePop data. + # Muussa tapauksessa kunnossa == 1. - kunnossa <- 0 - if (file.exists(tiedostonNimi)) { - fid <- readLines(tiedostonNimi) - line1 <- fid[1] # ensimmäinen rivi - line2 <- fid[2] # toinen rivi - line3 <- fid[3] # kolmas - } else { - fid <- line1 <- line2 <- line3 <- -1 - } + kunnossa <- 0 + if (file.exists(tiedostonNimi)) { + fid <- readLines(tiedostonNimi) + line1 <- fid[1] # ensimmäinen rivi + line2 <- fid[2] # toinen rivi + line3 <- fid[3] # kolmas + } else { + fid <- line1 <- line2 <- line3 <- -1 + } - if (line1 == -1 | line2 == -1 | line3 == -1) { - stop('Incorrect file format 1168') - } - if (testaaPop(line1) == 1 | testaaPop(line2) == 1) { - stop('Incorrect file format 1172') - } - if (testaaPop(line3) == 1) { - # 2 rivi t�ll�in lokusrivi (2 rows then locus row) - nloci <- rivinSisaltamienMjonojenLkm(line2) - line4 <- fid[4] - if (line4 == -1) stop('Incorrect file format 1180') - if (!grepl(',', line4)) { - # Rivin nelj?t�ytyy sis�lt�� pilkku. - stop('Incorrect file format 1185') - } - pointer <- 1 - while (substring(line4, pointer, pointer) != ',') { - # Tiedet��n, ett?pys�htyy - pointer <- pointer + 1 - } - line4 <- substring(line4, pointer + 1) # pilkun j�lkeinen osa (the part after the comma) - nloci2 <- rivinSisaltamienMjonojenLkm(line4) - if (nloci2 != nloci) stop('Incorrect file format 1195') - } else { - line <- fid[4] - lineNumb <- 4 - while (testaaPop(line) != 1 & line != -1) { - line <- fid[lineNumb + 1] - lineNumb <- lineNumb + 1 - } - if (line == -1) stop('Incorrect file format 1206') - nloci <- lineNumb - 2 - line4 <- fid[lineNumb + 1] # Eka rivi pop sanan j�lkeen - if (line4 == -1) stop('Incorrect file format 1212') - if (!grepl(',', line4)) { - # Rivin t�ytyy sis�lt�� pilkku. (The line must contain a comma) - stop('Incorrect file format 1217') - } - pointer <- 1 - while (substring(line4, pointer, pointer) != ',') { - # Tiedet��n, ett?pys�htyy - pointer <- pointer + 1 - } - line4 <- substring(line4, pointer + 1) # pilkun j�lkeinen osa (the part after the comma) - nloci2 <- rivinSisaltamienMjonojenLkm(line4) - if (nloci2 != nloci) stop('Incorrect file format 1228') - } - kunnossa <- 1 - return(kunnossa) -} \ No newline at end of file + if (line1 == -1 | line2 == -1 | line3 == -1) { + stop("Incorrect file format 1168") + } + if (testaaPop(line1) == 1 | testaaPop(line2) == 1) { + stop("Incorrect file format 1172") + } + if (testaaPop(line3) == 1) { + # 2 rivi t�ll�in lokusrivi (2 rows then locus row) + nloci <- rivinSisaltamienMjonojenLkm(line2) + line4 <- fid[4] + if (line4 == -1) stop("Incorrect file format 1180") + if (!grepl(",", line4)) { + # Rivin nelj?t�ytyy sis�lt�� pilkku. + stop("Incorrect file format 1185") + } + pointer <- 1 + while (substring(line4, pointer, pointer) != ",") { + # Tiedet��n, ett?pys�htyy + pointer <- pointer + 1 + } + line4 <- substring(line4, pointer + 1) # pilkun j�lkeinen osa (the part after the comma) + nloci2 <- rivinSisaltamienMjonojenLkm(line4) + if (nloci2 != nloci) stop("Incorrect file format 1195") + } else { + line <- fid[4] + lineNumb <- 4 + while (testaaPop(line) != 1 & line != -1) { + line <- fid[lineNumb + 1] + lineNumb <- lineNumb + 1 + } + if (line == -1) stop("Incorrect file format 1206") + nloci <- lineNumb - 2 + line4 <- fid[lineNumb + 1] # Eka rivi pop sanan j�lkeen + if (line4 == -1) stop("Incorrect file format 1212") + if (!grepl(",", line4)) { + # Rivin t�ytyy sis�lt�� pilkku. (The line must contain a comma) + stop("Incorrect file format 1217") + } + pointer <- 1 + while (substring(line4, pointer, pointer) != ",") { + # Tiedet��n, ett?pys�htyy + pointer <- pointer + 1 + } + line4 <- substring(line4, pointer + 1) # pilkun j�lkeinen osa (the part after the comma) + nloci2 <- rivinSisaltamienMjonojenLkm(line4) + if (nloci2 != nloci) stop("Incorrect file format 1228") + } + kunnossa <- 1 + return(kunnossa) +} diff --git a/R/testaaOnkoKunnollinenBapsData.R b/R/testaaOnkoKunnollinenBapsData.R index 0bf5c61..446258a 100644 --- a/R/testaaOnkoKunnollinenBapsData.R +++ b/R/testaaOnkoKunnollinenBapsData.R @@ -4,18 +4,18 @@ #' @return ninds #' @export testaaOnkoKunnollinenBapsData <- function(data) { - # Tarkastaa onko viimeisess?sarakkeessa kaikki - # luvut 1,2,...,n johonkin n:��n asti. - # Tarkastaa lis�ksi, ett?on v�hint��n 2 saraketta. - if (size(data, 1) < 2) { - ninds <- 0 - return(ninds) - } - lastCol <- data[, ncol(data)] - ninds <- max(lastCol) - if (any(1:ninds != unique(lastCol))) { - ninds <- 0 - return(ninds) - } - return(ninds) -} \ No newline at end of file + # Tarkastaa onko viimeisess?sarakkeessa kaikki + # luvut 1,2,...,n johonkin n:��n asti. + # Tarkastaa lis�ksi, ett?on v�hint��n 2 saraketta. + if (size(data, 1) < 2) { + ninds <- 0 + return(ninds) + } + lastCol <- data[, ncol(data)] + ninds <- max(lastCol) + if (any(1:ninds != unique(lastCol))) { + ninds <- 0 + return(ninds) + } + return(ninds) +} diff --git a/R/testaaPop.R b/R/testaaPop.R index 119a63b..91f9a82 100644 --- a/R/testaaPop.R +++ b/R/testaaPop.R @@ -5,15 +5,15 @@ # letter combinations: Pop, pop, POP. In all others cases, pal = 0 #' @export testaaPop <- function(rivi) { - # pal=1, mik�li rivi alkaa jollain seuraavista - # kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa - # tapauksissa pal=0. + # pal=1, mik�li rivi alkaa jollain seuraavista + # kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa + # tapauksissa pal=0. - if (nchar(rivi) < 3) { - pal <- 0 - } else { - rivi_start <- substring(rivi, 1, 3) - pal <- ifelse(rivi_start %in% c('Pop', 'pop', 'POP'), 1, 0) - } - return(pal) -} \ No newline at end of file + if (nchar(rivi) < 3) { + pal <- 0 + } else { + rivi_start <- substring(rivi, 1, 3) + pal <- ifelse(rivi_start %in% c("Pop", "pop", "POP"), 1, 0) + } + return(pal) +} diff --git a/R/times.R b/R/times.R index e935cfc..7c58fee 100644 --- a/R/times.R +++ b/R/times.R @@ -6,46 +6,46 @@ #' @export #' @returns matrix with dimensions equal to the larger of the two factors times <- function(a, b) { - # Converting everything to matrix because Matlab looooooves the matrix - a <- as.matrix(a) - b <- as.matrix(b) + # Converting everything to matrix because Matlab looooooves the matrix + a <- as.matrix(a) + b <- as.matrix(b) - dominant_mx <- NULL - if (!all(dim(a) == dim(b))) { - if (all(dim(a) >= dim(b))) { - dominant_mx <- a - dominated_mx <- b - } else if (all(dim(b) >= dim(a))) { - dominant_mx <- b - dominated_mx <- a - } else { - dominant_mx <- "neither" - dominant_dim <- c(max(nrow(b), nrow(a)), max(ncol(b), ncol(a))) - } - } - - if (is.null(dominant_mx)) { - out <- a * b - } else if (dominant_mx[1] == "neither") { - a <- repmat( - mx = a, - n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1) - ) - b <- repmat( - mx = b, - n = c(dominant_dim[1] - nrow(b) + 1, dominant_dim[2] - ncol(b) + 1) - ) - out <- a * b + dominant_mx <- NULL + if (!all(dim(a) == dim(b))) { + if (all(dim(a) >= dim(b))) { + dominant_mx <- a + dominated_mx <- b + } else if (all(dim(b) >= dim(a))) { + dominant_mx <- b + dominated_mx <- a } else { - # Expanding dominated matrix - dominated_mx <- repmat( - mx = dominated_mx, - n = c( - nrow(dominant_mx) - nrow(dominated_mx) + 1, - ncol(dominant_mx) - ncol(dominated_mx) + 1 - ) - ) - out <- dominant_mx * dominated_mx + dominant_mx <- "neither" + dominant_dim <- c(max(nrow(b), nrow(a)), max(ncol(b), ncol(a))) } - return(out) -} \ No newline at end of file + } + + if (is.null(dominant_mx)) { + out <- a * b + } else if (dominant_mx[1] == "neither") { + a <- repmat( + mx = a, + n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1) + ) + b <- repmat( + mx = b, + n = c(dominant_dim[1] - nrow(b) + 1, dominant_dim[2] - ncol(b) + 1) + ) + out <- a * b + } else { + # Expanding dominated matrix + dominated_mx <- repmat( + mx = dominated_mx, + n = c( + nrow(dominant_mx) - nrow(dominated_mx) + 1, + ncol(dominant_mx) - ncol(dominated_mx) + 1 + ) + ) + out <- dominant_mx * dominated_mx + } + return(out) +} diff --git a/R/tulostaAdmixtureTiedot.R b/R/tulostaAdmixtureTiedot.R index de4af77..a68bf4b 100644 --- a/R/tulostaAdmixtureTiedot.R +++ b/R/tulostaAdmixtureTiedot.R @@ -1,60 +1,60 @@ tulostaAdmixtureTiedot <- function(proportions, uskottavuus, alaRaja, niter) { - # ASK: what does this function does. Plotting? Get examples? - # h0 <- findobj('Tag','filename1_text') - # inputf = get(h0,'String'); - # h0 = findobj('Tag','filename2_text'); - # outf = get(h0,'String'); clear h0; + # ASK: what does this function does. Plotting? Get examples? + # h0 <- findobj('Tag','filename1_text') + # inputf = get(h0,'String'); + # h0 = findobj('Tag','filename2_text'); + # outf = get(h0,'String'); clear h0; - # if length(outf)>0 - # fid = fopen(outf,'a'); - # else - # fid = -1; - # diary('baps4_output.baps'); % save in text anyway. - # end + # if length(outf)>0 + # fid = fopen(outf,'a'); + # else + # fid = -1; + # diary('baps4_output.baps'); % save in text anyway. + # end - # ninds = length(uskottavuus); - # npops = size(proportions,2); - # disp(' '); - # dispLine; - # disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); - # disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); - # disp(['Data file: ' inputf]); - # disp(['Number of individuals: ' num2str(ninds)]); - # disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); - # disp(' '); - # if fid ~= -1 - # fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); - # fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); - # fprintf(fid, '\n'); - # end + # ninds = length(uskottavuus); + # npops = size(proportions,2); + # disp(' '); + # dispLine; + # disp('RESULTS OF ADMIXTURE ANALYSIS BASED'); + # disp('ON MIXTURE CLUSTERING OF INDIVIDUALS'); + # disp(['Data file: ' inputf]); + # disp(['Number of individuals: ' num2str(ninds)]); + # disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); + # disp(' '); + # if fid ~= -1 + # fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n'); + # fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n'); + # fprintf(fid, '\n'); + # end - # ekaRivi = blanks(6); - # for pop = 1:npops - # ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; - # end - # ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 - # disp(ekaRivi); - # for ind = 1:ninds - # rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; - # if any(proportions(ind,:)>0) - # for pop = 1:npops-1 - # rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; - # end - # rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; - # rivi = [rivi ownNum2Str(uskottavuus(ind))]; - # end - # disp(rivi); - # if fid ~= -1 - # fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); - # end - # end - # if fid ~= -1 - # fclose(fid); - # else - # diary off -} \ No newline at end of file + # ekaRivi = blanks(6); + # for pop = 1:npops + # ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)]; + # end + # ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06 + # disp(ekaRivi); + # for ind = 1:ninds + # rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))]; + # if any(proportions(ind,:)>0) + # for pop = 1:npops-1 + # rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)]; + # end + # rivi = [rivi proportion2str(proportions(ind,npops)) ': ']; + # rivi = [rivi ownNum2Str(uskottavuus(ind))]; + # end + # disp(rivi); + # if fid ~= -1 + # fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n'); + # end + # end + # if fid ~= -1 + # fclose(fid); + # else + # diary off +} diff --git a/R/uigetfile.R b/R/uigetfile.R index ccddb99..bfcb72c 100644 --- a/R/uigetfile.R +++ b/R/uigetfile.R @@ -5,27 +5,27 @@ #' @param filter Filter listed files #' @param title Pre-prompt message #' @export -uigetfile <- function(filter = "", title = "") { - # ========================================================================== - # Pre-prompt message - # ========================================================================== - message(title) - # ========================================================================== - # Reading file path and name - # ========================================================================== - filepath <- readline( - paste0("Enter file path (leave empty for ", getwd(), "): ") - ) - if (filepath == "") filepath <- getwd() - # ========================================================================== - # Presenting possible files - # ========================================================================== - message("Files present on that directory:") - print(list.files(path = filepath, pattern = filter, ignore.case = TRUE)) - filename <- file.choose() - # ========================================================================== - # Organizing output - # ========================================================================== - out <- list(name = filename, path = filepath) - return(out) -} \ No newline at end of file +uigetfile <- function(filter = "", title = "") { + # ========================================================================== + # Pre-prompt message + # ========================================================================== + message(title) + # ========================================================================== + # Reading file path and name + # ========================================================================== + filepath <- readline( + paste0("Enter file path (leave empty for ", getwd(), "): ") + ) + if (filepath == "") filepath <- getwd() + # ========================================================================== + # Presenting possible files + # ========================================================================== + message("Files present on that directory:") + print(list.files(path = filepath, pattern = filter, ignore.case = TRUE)) + filename <- file.choose() + # ========================================================================== + # Organizing output + # ========================================================================== + out <- list(name = filename, path = filepath) + return(out) +} diff --git a/R/uiputfile.R b/R/uiputfile.R index cb9da66..7a0ba92 100644 --- a/R/uiputfile.R +++ b/R/uiputfile.R @@ -5,17 +5,17 @@ #' homonymous Matlab function. #' @export uiputfile <- function(filter = ".rda", title = "Save file") { - # ========================================================================== - # Processing input - # ========================================================================== - message(title) - filename <- readline(paste0('File name (end with ', filter, '): ')) - filepath <- readline(paste0('File path (leave empty for ', getwd(), '): ')) - if (filename == "") filename <- 0 - if (filepath == "") filepath <- getwd() - # ========================================================================== - # Processing output - # ========================================================================== - out <- list(name = filename, path = filepath) - return(out) -} \ No newline at end of file + # ========================================================================== + # Processing input + # ========================================================================== + message(title) + filename <- readline(paste0("File name (end with ", filter, "): ")) + filepath <- readline(paste0("File path (leave empty for ", getwd(), "): ")) + if (filename == "") filename <- 0 + if (filepath == "") filepath <- getwd() + # ========================================================================== + # Processing output + # ========================================================================== + out <- list(name = filename, path = filepath) + return(out) +} diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R index 94e5261..9a3f831 100644 --- a/R/updateGlobalVariables.R +++ b/R/updateGlobalVariables.R @@ -1,63 +1,61 @@ updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { - # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind - # % on siirret��n koriin i2. - i1 <- PARTITION[ind] - PARTITION[ind] <- i2 + # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind + # % on siirret��n koriin i2. + i1 <- PARTITION[ind] + PARTITION[ind] <- i2 - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) - POP_LOGML[c(i1, i2)] <- computePopulationLogml( - c(i1, i2), adjprior, priorTerm - ) + POP_LOGML[c(i1, i2)] <- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) - LOGDIFF[, c(i1, i2)] <- -Inf - inx <- c(find(PARTITION == i1), find(PARTITION==i2)) - LOGDIFF[inx, ] <- -Inf + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf } updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { - # % Suorittaa globaalien muuttujien muutokset, kun kaikki - # % korissa i1 olevat yksil�t siirret��n koriin i2. + # % Suorittaa globaalien muuttujien muutokset, kun kaikki + # % korissa i1 olevat yksil�t siirret��n koriin i2. - inds <- find(PARTITION == i1) - PARTITION[inds] <- i2 + inds <- find(PARTITION == i1) + PARTITION[inds] <- i2 - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) - POP_LOGML[i1] <- 0 - POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) + POP_LOGML[i1] <- 0 + POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) - LOGDIFF[, c(i1, i2)] <- -Inf - inx <- c(find(PARTITION == i1), find(PARTITION == i2)) - LOGDIFF[inx, ] <- -Inf + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf } -updateGlobalVariables3 <- function( - muuttuvat, diffInCounts, adjprior, priorTerm, i2 -) { - # % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' - # % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan - # % koriin. +updateGlobalVariables3 <- function(muuttuvat, diffInCounts, adjprior, priorTerm, i2) { + # % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' + # % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan + # % koriin. - i1 <- PARTITION[muuttuvat(1)] - PARTITION[muuttuvat] <- i2 + i1 <- PARTITION[muuttuvat(1)] + PARTITION[muuttuvat] <- i2 - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) - POP_LOGML[c(i1, i2)] <- computePopulationLogml( - c(i1, i2), adjprior, priorTerm - ) + POP_LOGML[c(i1, i2)] <- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) - LOGDIFF[, c(i1, i2)] <- -Inf - inx <- c(find(PARTITION == i1), find(PARTITION == i2)) - LOGDIFF[inx, ] <- -Inf + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf } diff --git a/R/viewPartition.R b/R/viewPartition.R index b478219..6dfb14f 100644 --- a/R/viewPartition.R +++ b/R/viewPartition.R @@ -1,121 +1,120 @@ viewPartition <- function(osuudet, popnames) { + npops <- size(COUNTS, 3) + nind <- size(osuudet, 1) - npops <- size(COUNTS, 3) - nind <- size(osuudet,1) - -# TODO: translate if necessary. Remove if this function won't be used -# disp(['Number of populations: ' num2str(npops)]); -# if npops>30 -# disp(' '); -# disp('Figure can be drawn only if the number of populations'); -# disp('is less or equal to 30.'); -# disp(' '); -# return; -# end + # TODO: translate if necessary. Remove if this function won't be used + # disp(['Number of populations: ' num2str(npops)]); + # if npops>30 + # disp(' '); + # disp('Figure can be drawn only if the number of populations'); + # disp('is less or equal to 30.'); + # disp(' '); + # return; + # end -# varit = givecolors(npops); -# korkeinviiva = 1.05; -# pieninarvo = -korkeinviiva; + # varit = givecolors(npops); + # korkeinviiva = 1.05; + # pieninarvo = -korkeinviiva; -# h0 = figure; -# set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu -# tiedot.popnames = popnames; -# tiedot.info = osuudet; -# set(h0,'UserData',tiedot); + # h0 = figure; + # set(h0, 'NumberTitle', 'off'); %image_figure; %Muutettu + # tiedot.popnames = popnames; + # tiedot.info = osuudet; + # set(h0,'UserData',tiedot); -# set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... -# 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); + # set(gca, 'Xlim', [-.5 ,nind+.5], 'YLim', [pieninarvo ,korkeinviiva], ... + # 'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []); -# for i=1:nind + # for i=1:nind -# if any(osuudet(i,:)>0) -# cumOsuudet = cumsum(osuudet(i,:)); + # if any(osuudet(i,:)>0) + # cumOsuudet = cumsum(osuudet(i,:)); -# % Pylv��n piirt�minen -# for j=1:npops -# if j==1 -# if cumOsuudet(1)>0 -# h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(j,:)); -# set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! -# end -# else -# if (cumOsuudet(j)>cumOsuudet(j-1)) -# h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... -# cumOsuudet(j), cumOsuudet(j)], varit(j,:)); -# set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! -# end -# end -# end -# end -# end + # % Pylv��n piirt�minen + # for j=1:npops + # if j==1 + # if cumOsuudet(1)>0 + # h0 =patch([i-1, i, i, i-1], [0, 0, cumOsuudet(1), cumOsuudet(1)], varit(j,:)); + # set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! + # end + # else + # if (cumOsuudet(j)>cumOsuudet(j-1)) + # h0 = patch([i-1, i, i, i-1], [cumOsuudet(j-1), cumOsuudet(j-1), ... + # cumOsuudet(j), cumOsuudet(j)], varit(j,:)); + # set(h0,'EdgeColor','none'); % Midevaa varten kommentoitava! + # end + # end + # end + # end + # end -# if ~isempty(popnames) -# npops = size(popnames,1); -# for i=1:npops -# firstInd = popnames{i,2}; -# line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat + # if ~isempty(popnames) + # npops = size(popnames,1); + # for i=1:npops + # firstInd = popnames{i,2}; + # line([firstInd-1, firstInd-1], [0,1], 'Color', 'k'); %Populaatioiden rajat -# if i 36) stop('Maximum number of colors 36') - colors <- matrix( - data = c( - 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, - 0.4, 0, 0, 0, 0.4, 0, 0, 0, 0.4, 0.4, 0.4, 0, 0.4, 0, - 0.4, 0, 0.4, 0.4, 0.2, 0, 0, 0, 0.2, 0, 0, 0, 0.2, 0.2, - 0.2, 0, 0.2, 0, 0.2, 0, 0.2, 0.2, 0.8, 0, 0, 0, 0.8, 0, - 0, 0, 0.8, 0.8, 0.8, 0, 0.8, 0, 0.8, 0, 0.8, 0.8, - 0.6, 0, 0, 0, 0.6, 0, 0, 0, 0.6, 0.6, 0.6, 0, 0.6, 0, - 0.6, 0, 0.6, 0.6, 0.6, 0.2, 0.4, 0.2, 0.4, 0.8, 0.8, - 0.4, 0.2, 0, 0.6, 0.2, 0.2, 0.8, 0.6, 0.5, 0.2, 0.1, - 0.6, 0.3, 0.1 - ), - ncol = 3, - byrow = TRUE - ) - colors = colors[1:n, ] - # red; green; blue; yellow - # RGB format: [red green blue] - return(colors) -} \ No newline at end of file + if (n > 36) stop("Maximum number of colors 36") + colors <- matrix( + data = c( + 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, + 0.4, 0, 0, 0, 0.4, 0, 0, 0, 0.4, 0.4, 0.4, 0, 0.4, 0, + 0.4, 0, 0.4, 0.4, 0.2, 0, 0, 0, 0.2, 0, 0, 0, 0.2, 0.2, + 0.2, 0, 0.2, 0, 0.2, 0, 0.2, 0.2, 0.8, 0, 0, 0, 0.8, 0, + 0, 0, 0.8, 0.8, 0.8, 0, 0.8, 0, 0.8, 0, 0.8, 0.8, + 0.6, 0, 0, 0, 0.6, 0, 0, 0, 0.6, 0.6, 0.6, 0, 0.6, 0, + 0.6, 0, 0.6, 0.6, 0.6, 0.2, 0.4, 0.2, 0.4, 0.8, 0.8, + 0.4, 0.2, 0, 0.6, 0.2, 0.2, 0.8, 0.6, 0.5, 0.2, 0.1, + 0.6, 0.3, 0.1 + ), + ncol = 3, + byrow = TRUE + ) + colors <- colors[1:n, ] + # red; green; blue; yellow + # RGB format: [red green blue] + return(colors) +} diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index ec5120b..d04cb19 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -11,319 +11,317 @@ #' @param popnames popnames #' @param fixedK fixedK #' @export -writeMixtureInfo <- function( - logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK -) { - changesInLogml <- list() - ninds <- size(data, 1) / rowsFromInd - npops <- size(COUNTS, 3) - # Check that the names refer to individuals - names <- (size(popnames, 1) == ninds) #Tarkistetaan ett?nimet viittaavat yksil�ihin +writeMixtureInfo <- function(logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) { + changesInLogml <- list() + ninds <- size(data, 1) / rowsFromInd + npops <- size(COUNTS, 3) + # Check that the names refer to individuals + names <- (size(popnames, 1) == ninds) # Tarkistetaan ett?nimet viittaavat yksil�ihin - if (length(outPutFile) > 0) { - fid <- load(outPutFile) - } else { - fid <- -1 - # TODO: replace sink with option that will record input and output - sink('baps4_output.baps', split=TRUE) # save in text anyway. - } + if (length(outPutFile) > 0) { + fid <- load(outPutFile) + } else { + fid <- -1 + # TODO: replace sink with option that will record input and output + sink("baps4_output.baps", split = TRUE) # save in text anyway. + } - dispLine() - cat('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:') - cat(c('Data file: ', inputFile)) - cat('Model: independent') - cat(c('Number of clustered individuals: ', ownNum2Str(ninds))) - cat(c('Number of groups in optimal partition: ', ownNum2Str(npops))) - cat(c('Log(marginal likelihood) of optimal partition: ', ownNum2Str(logml))) - cat(' ') - if (fid != -1) { - append(fid, 'RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n') - append(fid, c('Data file: ', inputFile, '\n')) - append( - fid, - c('Number of clustered individuals: ', ownNum2Str(ninds), '\n') - ) - append( - fid, - c( - 'Number of groups in optimal partition: ', - ownNum2Str(npops), '\n' - ) - ) - append( - fid, - c( - 'Log(marginal likelihood) of optimal partition: ', - ownNum2Str(logml), - '\n' - ) - ) - } + dispLine() + cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:") + cat(c("Data file: ", inputFile)) + cat("Model: independent") + cat(c("Number of clustered individuals: ", ownNum2Str(ninds))) + cat(c("Number of groups in optimal partition: ", ownNum2Str(npops))) + cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml))) + cat(" ") + if (fid != -1) { + append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n") + append(fid, c("Data file: ", inputFile, "\n")) + append( + fid, + c("Number of clustered individuals: ", ownNum2Str(ninds), "\n") + ) + append( + fid, + c( + "Number of groups in optimal partition: ", + ownNum2Str(npops), "\n" + ) + ) + append( + fid, + c( + "Log(marginal likelihood) of optimal partition: ", + ownNum2Str(logml), + "\n" + ) + ) + } - cluster_count <- length(unique(PARTITION)) - cat('Best Partition: ') - if (fid != -1) { - append(fid, c('Best Partition: ', '\n')) - } - for (m in 1:cluster_count) { - indsInM <- find(PARTITION == m) - length_of_beginning <- 11 + floor(log10(m)) - cluster_size <- length(indsInM) + cluster_count <- length(unique(PARTITION)) + cat("Best Partition: ") + if (fid != -1) { + append(fid, c("Best Partition: ", "\n")) + } + for (m in 1:cluster_count) { + indsInM <- find(PARTITION == m) + length_of_beginning <- 11 + floor(log10(m)) + cluster_size <- length(indsInM) - if (names) { - text <- c( - 'Cluster ', - as.character(m), - ': {', - as.character(popnames[[indsInM[1]]]) - ) - for (k in 2:cluster_size) { - text <- c(text, ', ', as.character(popnames[[indsInM[k]]])) - } - } else { - text <- c( - 'Cluster ', as.character(m), ': {', as.character(indsInM[1]) - ) - for (k in 2:cluster_size) { - text <- c(text, ', ', as.character(indsInM[k])) - } - } - text <- c(text, '}') - while (length(text) > 58) { - # Take one line and display it. - new_line <- takeLine(text, 58) - text <- (length(new_line) + 1):length(text) - cat(new_line) - if (fid != -1) { - append(fid, new_line) - append(fid,'\n') - } - if (length(text) > 0) { - text <- c(blanks(length_of_beginning), text) - } else { - text <- "" - } - } - if (text != "") { - cat(text) - if (fid != -1) { - append(fid, text) - append(fid,'\n') - } - } - } + if (names) { + text <- c( + "Cluster ", + as.character(m), + ": {", + as.character(popnames[[indsInM[1]]]) + ) + for (k in 2:cluster_size) { + text <- c(text, ", ", as.character(popnames[[indsInM[k]]])) + } + } else { + text <- c( + "Cluster ", as.character(m), ": {", as.character(indsInM[1]) + ) + for (k in 2:cluster_size) { + text <- c(text, ", ", as.character(indsInM[k])) + } + } + text <- c(text, "}") + while (length(text) > 58) { + # Take one line and display it. + new_line <- takeLine(text, 58) + text <- (length(new_line) + 1):length(text) + cat(new_line) + if (fid != -1) { + append(fid, new_line) + append(fid, "\n") + } + if (length(text) > 0) { + text <- c(blanks(length_of_beginning), text) + } else { + text <- "" + } + } + if (text != "") { + cat(text) + if (fid != -1) { + append(fid, text) + append(fid, "\n") + } + } + } - if (npops > 1) { - cat(' ') - cat(' ') - cat( - 'Changes in log(marginal likelihood)', - ' if indvidual i is moved to group j:' - ) - if (fid != -1) { - append(fid, ' ') - append(fid, '\n') - append(fid, ' ') - append(fid, '\n') - append( - fid, - c( - 'Changes in log(marginal likelihood)', - 'if indvidual i is moved to group j:' - ) - ) - append(fid, '\n') - } + if (npops > 1) { + cat(" ") + cat(" ") + cat( + "Changes in log(marginal likelihood)", + " if indvidual i is moved to group j:" + ) + if (fid != -1) { + append(fid, " ") + append(fid, "\n") + append(fid, " ") + append(fid, "\n") + append( + fid, + c( + "Changes in log(marginal likelihood)", + "if indvidual i is moved to group j:" + ) + ) + append(fid, "\n") + } - if (names) { - nameSizes <- zeros(ninds, 1) - for (i in 1:ninds) { - nimi <- as.character(popnames[i]) - nameSizes[i] <- length(nimi) - } - maxSize <- max(nameSizes) - maxSize <- max(maxSize, 5) - erotus <- maxSize - 5 - alku <- blanks(erotus) - ekarivi <- c(alku, ' ind', blanks(6 + erotus)) - } else { - ekarivi <- ' ind ' - } - for (i in 1:cluster_count) { - ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i)))) - } - cat(ekarivi) - if (fid != -1) { - append(fid, ekarivi) - append(fid, '\n') - } + if (names) { + nameSizes <- zeros(ninds, 1) + for (i in 1:ninds) { + nimi <- as.character(popnames[i]) + nameSizes[i] <- length(nimi) + } + maxSize <- max(nameSizes) + maxSize <- max(maxSize, 5) + erotus <- maxSize - 5 + alku <- blanks(erotus) + ekarivi <- c(alku, " ind", blanks(6 + erotus)) + } else { + ekarivi <- " ind " + } + for (i in 1:cluster_count) { + ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i)))) + } + cat(ekarivi) + if (fid != -1) { + append(fid, ekarivi) + append(fid, "\n") + } - # %ninds = size(data,1)/rowsFromInd; - changesInLogml <- t(LOGDIFF) - for (ind in 1:ninds) { - muutokset <- changesInLogml[, ind] + # %ninds = size(data,1)/rowsFromInd; + changesInLogml <- t(LOGDIFF) + for (ind in 1:ninds) { + muutokset <- changesInLogml[, ind] - if (names) { - nimi <- as.character(popnames[ind]) - rivi <- c(blanks(maxSize - length(nimi)), nimi, ':') - } else { - rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ':') - } - for (j in 1:npops) { - rivi <- c(rivi, ' ', logml2String(omaRound(muutokset[j]))) - } - cat(rivi) - if (fid != -1) { - append(fid, rivi) - append(fid, '\n') - } - } + if (names) { + nimi <- as.character(popnames[ind]) + rivi <- c(blanks(maxSize - length(nimi)), nimi, ":") + } else { + rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":") + } + for (j in 1:npops) { + rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j]))) + } + cat(rivi) + if (fid != -1) { + append(fid, rivi) + append(fid, "\n") + } + } - cat(' ') - cat(' ') - cat('KL-divergence matrix in PHYLIP format:') + cat(" ") + cat(" ") + cat("KL-divergence matrix in PHYLIP format:") - dist_mat <- zeros(npops, npops) - if (fid != -1) { - append(fid, ' ') - append(fid, ' ') - append(fid, c('KL-divergence matrix in PHYLIP format:')) - append(fid, '\n') - } + dist_mat <- zeros(npops, npops) + if (fid != -1) { + append(fid, " ") + append(fid, " ") + append(fid, c("KL-divergence matrix in PHYLIP format:")) + append(fid, "\n") + } - maxnoalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - d <- zeros(maxnoalle, nloci, npops) - prior <- adjprior - prior[find(prior == 1)] <- 0 - nollia <- find(all(prior == 0)) # Loci in which only one allele was detected. - prior[1, nollia] <- 1 - for (pop1 in 1:npops) { - d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / - repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) - } - ekarivi <- as.character(npops) - cat(ekarivi) - if (fid != -1) { - append(fid, ekarivi) - append(fid, '\n') - } + maxnoalle <- size(COUNTS, 1) + nloci <- size(COUNTS, 2) + d <- zeros(maxnoalle, nloci, npops) + prior <- adjprior + prior[find(prior == 1)] <- 0 + nollia <- find(all(prior == 0)) # Loci in which only one allele was detected. + prior[1, nollia] <- 1 + for (pop1 in 1:npops) { + d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / + repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) + } + ekarivi <- as.character(npops) + cat(ekarivi) + if (fid != -1) { + append(fid, ekarivi) + append(fid, "\n") + } - for (pop1 in 1:npops) { - for (pop2 in 1:(pop1 - 1)) { - dist1 <- d[, , pop1] - dist2 <- d[, , pop2] - div12 <- sum( - sum(dist1 * log2((dist1 + 10 ^ -10) / (dist2 + 10 ^ -10))) - ) / nloci - div21 <- sum( - sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10))) - ) / nloci - div <- (div12 + div21) / 2 - dist_mat[pop1, pop2] <- div - } - } + for (pop1 in 1:npops) { + for (pop2 in 1:(pop1 - 1)) { + dist1 <- d[, , pop1] + dist2 <- d[, , pop2] + div12 <- sum( + sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10))) + ) / nloci + div21 <- sum( + sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10))) + ) / nloci + div <- (div12 + div21) / 2 + dist_mat[pop1, pop2] <- div + } + } - dist_mat <- dist_mat + t(dist_mat) # make it symmetric - for (pop1 in 1:npops) { - rivi <- c('Cluster_', as.character(pop1), ' ') - for (pop2 in 1:npops) { - rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), ' ') - } - cat(rivi) - if (fid != -1) { - append(fid, rivi) - append(fid, '\n') - } - } - } + dist_mat <- dist_mat + t(dist_mat) # make it symmetric + for (pop1 in 1:npops) { + rivi <- c("Cluster_", as.character(pop1), " ") + for (pop2 in 1:npops) { + rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ") + } + cat(rivi) + if (fid != -1) { + append(fid, rivi) + append(fid, "\n") + } + } + } - cat(' ') - cat(' '); - cat( - 'List of sizes of 10 best visited partitions', - 'and corresponding log(ml) values' - ) + cat(" ") + cat(" ") + cat( + "List of sizes of 10 best visited partitions", + "and corresponding log(ml) values" + ) - if (fid != -1) { - append(fid, ' ') - append(fid, '\n') - append(fid, ' ') - append(fid, '\n') - append( - fid, - c( - 'List of sizes of 10 best visited partitions', - 'and corresponding log(ml) values' - ) - ) - append(fid, '\n') - } + if (fid != -1) { + append(fid, " ") + append(fid, "\n") + append(fid, " ") + append(fid, "\n") + append( + fid, + c( + "List of sizes of 10 best visited partitions", + "and corresponding log(ml) values" + ) + ) + append(fid, "\n") + } - partitionSummary <- sortrows(partitionSummary, 2) - partitionSummary <- partitionSummary[size(partitionSummary, 1):1, ] - partitionSummary <- partitionSummary[find(partitionSummary[, 2] > -1e49), ] - if (size(partitionSummary, 1) > 10) { - vikaPartitio <- 10 - } else { - vikaPartitio <- size(partitionSummary, 1) - } - for (part in 1:vikaPartitio) { - line <- c( - as.character(partitionSummary[part, 1]), - ' ', - as.character(partitionSummary(part, 2)) - ) - cat(line) - if (fid != -1) { - append(fid, line) - append(fid, '\n') - } - } + partitionSummary <- sortrows(partitionSummary, 2) + partitionSummary <- partitionSummary[size(partitionSummary, 1):1, ] + partitionSummary <- partitionSummary[find(partitionSummary[, 2] > -1e49), ] + if (size(partitionSummary, 1) > 10) { + vikaPartitio <- 10 + } else { + vikaPartitio <- size(partitionSummary, 1) + } + for (part in 1:vikaPartitio) { + line <- c( + as.character(partitionSummary[part, 1]), + " ", + as.character(partitionSummary(part, 2)) + ) + cat(line) + if (fid != -1) { + append(fid, line) + append(fid, "\n") + } + } - if (!fixedK) { - cat(' ') - cat(' ') - cat('Probabilities for number of clusters') + if (!fixedK) { + cat(" ") + cat(" ") + cat("Probabilities for number of clusters") - if (fid != -1) { - append(fid, ' ') - append(fid, '\n') - append(fid, ' ') - append(fid, '\n') - append(fid, c('Probabilities for number of clusters')) - append(fid, '\n') - } + if (fid != -1) { + append(fid, " ") + append(fid, "\n") + append(fid, " ") + append(fid, "\n") + append(fid, c("Probabilities for number of clusters")) + append(fid, "\n") + } - npopsTaulu <- unique(partitionSummary[, 1]) - len <- length(npopsTaulu) - probs <- zeros(len, 1) - partitionSummary[, 2] <- partitionSummary[, 2] - - max(partitionSummary[, 2]) - sumtn <- sum(exp(partitionSummary[, 2])) - for (i in 1:len) { - npopstn <- sum( - exp( - partitionSummary[find( - partitionSummary[, 1] == npopsTaulu[i] - ), 2] - ) - ) - probs[i] <- npopstn / sumtn - } - for (i in 1:len) { - if (probs[i] > 1e-5) { - line <- c( - as.character(npopsTaulu[i]), ' ', as.character(probs[i]) - ) - cat(line) - if (fid != -1) { - append(fid, line) - append(fid, '\n') - } - } - } - } - return(changesInLogml) -} \ No newline at end of file + npopsTaulu <- unique(partitionSummary[, 1]) + len <- length(npopsTaulu) + probs <- zeros(len, 1) + partitionSummary[, 2] <- partitionSummary[, 2] - + max(partitionSummary[, 2]) + sumtn <- sum(exp(partitionSummary[, 2])) + for (i in 1:len) { + npopstn <- sum( + exp( + partitionSummary[find( + partitionSummary[, 1] == npopsTaulu[i] + ), 2] + ) + ) + probs[i] <- npopstn / sumtn + } + for (i in 1:len) { + if (probs[i] > 1e-5) { + line <- c( + as.character(npopsTaulu[i]), " ", as.character(probs[i]) + ) + cat(line) + if (fid != -1) { + append(fid, line) + append(fid, "\n") + } + } + } + } + return(changesInLogml) +} diff --git a/R/zeros_ones.R b/R/zeros_ones.R index 954754c..4ac8d63 100644 --- a/R/zeros_ones.R +++ b/R/zeros_ones.R @@ -8,15 +8,15 @@ #' @note Actually works for any `x`, but there's no need to bother imposing #' validation controls here. zeros_or_ones <- function(n, x) { - # Expanding n to length 2 if necessary - if (length(n) == 1) n <- c(n, n) + # Expanding n to length 2 if necessary + if (length(n) == 1) n <- c(n, n) - # Returning a matrix or an array - if (length(n) == 2) { - return(matrix(x, n[1], n[2])) - } else { - return(array(x, dim=n)) - } + # Returning a matrix or an array + if (length(n) == 2) { + return(matrix(x, n[1], n[2])) + } else { + return(array(x, dim = n)) + } } #' @title Matrix of zeros @@ -26,12 +26,12 @@ zeros_or_ones <- function(n, x) { #' @param n2 number of columns #' @param ... extra dimensions zeros <- function(n1, n2 = n1, ...) { - if (length(n1) == 1) { - n <- c(n1, n2, ...) - } else { - n <- n1 - } - return(zeros_or_ones(n, 0)) + if (length(n1) == 1) { + n <- c(n1, n2, ...) + } else { + n <- n1 + } + return(zeros_or_ones(n, 0)) } #' @title Matrix of ones @@ -41,10 +41,10 @@ zeros <- function(n1, n2 = n1, ...) { #' @param n2 number of columns #' @param ... extra dimensions ones <- function(n1, n2 = n1, ...) { - if (length(n1) == 1) { - n <- c(n1, n2, ...) - } else { - n <- n1 - } - return(zeros_or_ones(n, 1)) -} \ No newline at end of file + if (length(n1) == 1) { + n <- c(n1, n2, ...) + } else { + n <- n1 + } + return(zeros_or_ones(n, 1)) +} diff --git a/man/calculatePopLogml.Rd b/man/calculatePopLogml.Rd index 9817fad..bc57076 100644 --- a/man/calculatePopLogml.Rd +++ b/man/calculatePopLogml.Rd @@ -12,7 +12,7 @@ calculatePopLogml(points, fii) \item{fii}{fii} } \description{ -Calculates fuzzy (log) marginal likelihood for a population of +Calculates fuzzy (log) marginal likelihood for a population of real values using estimate "fii" for the dispersion value, and Jeffreys prior for the mean parameter. } diff --git a/man/learn_simple_partition.Rd b/man/learn_simple_partition.Rd index 179bc14..5b6cd83 100644 --- a/man/learn_simple_partition.Rd +++ b/man/learn_simple_partition.Rd @@ -12,6 +12,6 @@ learn_simple_partition(ordered_points, fii) \item{fii}{fii} } \description{ -Goes through all the ways to divide the points into two or +Goes through all the ways to divide the points into two or three groups. Chooses the partition which obtains highest logml. } diff --git a/man/load_fasta.Rd b/man/load_fasta.Rd index 9b4c84c..3a0c455 100644 --- a/man/load_fasta.Rd +++ b/man/load_fasta.Rd @@ -19,9 +19,8 @@ Loads a fasta file into matrix format ready for running the hierBAPS algorithm. } \examples{ -msa <- system.file("ext", "seqs.fa", package="rBAPS") +msa <- system.file("ext", "seqs.fa", package = "rBAPS") snp.matrix <- load_fasta(msa) - } \seealso{ rhierbaps::load_fasta diff --git a/man/log_gamma.Rd b/man/log_gamma.Rd index e0957ea..6788d15 100644 --- a/man/log_gamma.Rd +++ b/man/log_gamma.Rd @@ -13,6 +13,6 @@ log_gamma(x) log(gamma(x)) for x > 0, Inf otherwise } \description{ -Equal to log(gamma(x)) with special handling of x < 0 for +Equal to log(gamma(x)) with special handling of x < 0 for Matlab compatibility } diff --git a/man/proportion2str.Rd b/man/proportion2str.Rd index 1263d77..a641926 100644 --- a/man/proportion2str.Rd +++ b/man/proportion2str.Rd @@ -16,6 +16,6 @@ a 4-mark presentation of proportion Convert proportion to string } \note{ -The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The +The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The Matlab equivalent rounds it to 9. } diff --git a/tests/testthat/test-admix1.R b/tests/testthat/test-admix1.R index d755a07..a8f377f 100644 --- a/tests/testthat/test-admix1.R +++ b/tests/testthat/test-admix1.R @@ -1,249 +1,249 @@ context("Admixture analysis") test_that("learn*partition behaves like on Matlab", { - # Test data - p1 <- c(0, .5, 1, 1.5) - p2 <- c(seq(0, .5, .1), 1, 1, 1, 2) - p3 <- c(.1, .1, .1, .5, .5, .5, 1, 1, 1) - p4 <- c(.7, 1, 1, 1) + # Test data + p1 <- c(0, .5, 1, 1.5) + p2 <- c(seq(0, .5, .1), 1, 1, 1, 2) + p3 <- c(.1, .1, .1, .5, .5, .5, 1, 1, 1) + p4 <- c(.7, 1, 1, 1) - # Testing learn_simple_partition - expect_equal( - object = learn_simple_partition(p1, 2), - expected = matrix(c(1, 1, 2, 2)) - ) - expect_equal( - object = learn_simple_partition(p2, 2), - expected = matrix(c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2)) - ) - expect_equal( - object = learn_simple_partition(p3, .5), - expected = matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3)) - ) - expect_equal( - object = learn_simple_partition(p4, 5), - expected = matrix(c(1, 1, 1, 1)) - ) - expect_equal( - object = learn_simple_partition(p4, .1), - expected = matrix(c(1, 2, 2, 2)) - ) + # Testing learn_simple_partition + expect_equal( + object = learn_simple_partition(p1, 2), + expected = matrix(c(1, 1, 2, 2)) + ) + expect_equal( + object = learn_simple_partition(p2, 2), + expected = matrix(c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2)) + ) + expect_equal( + object = learn_simple_partition(p3, .5), + expected = matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3)) + ) + expect_equal( + object = learn_simple_partition(p4, 5), + expected = matrix(c(1, 1, 1, 1)) + ) + expect_equal( + object = learn_simple_partition(p4, .1), + expected = matrix(c(1, 2, 2, 2)) + ) - # Testing learn_partition_modified - expect_equal( - object = learn_partition_modified(p4), - expected = matrix(c(1, 2, 2, 2)) - ) + # Testing learn_partition_modified + expect_equal( + object = learn_partition_modified(p4), + expected = matrix(c(1, 2, 2, 2)) + ) }) test_that("type convertions behave like on Matlab", { - expect_equal(ownNum2Str(1), "1") - expect_equal(ownNum2Str(-123456789), "-123456789") - expect_equal(ownNum2Str(0), "0") - expect_error(ownNum2Str("a")) - expect_equal(proportion2str(1), "1.00") - expect_equal(proportion2str(0), "0.00") - expect_equal(proportion2str(0.4), "0.40") - expect_equal(proportion2str(0.89), "0.89") - expect_equal(proportion2str(-0.4), "0.0-40") # also bugged in original - # TODO: fix after release, as long as it doesn't break anything else + expect_equal(ownNum2Str(1), "1") + expect_equal(ownNum2Str(-123456789), "-123456789") + expect_equal(ownNum2Str(0), "0") + expect_error(ownNum2Str("a")) + expect_equal(proportion2str(1), "1.00") + expect_equal(proportion2str(0), "0.00") + expect_equal(proportion2str(0.4), "0.40") + expect_equal(proportion2str(0.89), "0.89") + expect_equal(proportion2str(-0.4), "0.0-40") # also bugged in original + # TODO: fix after release, as long as it doesn't break anything else }) test_that("computeRows behaves like on Matlab", { - # Matrices - X <- matrix(1:9, 3, byrow = TRUE) - Y <- matrix(9:1, 3, byrow = TRUE) - Z <- matrix(c(-8, 2, -4, 0), byrow = TRUE) - expect_equal( - object = computeRows(1, X, 3), - expected = matrix(c(1, 4, 7)) - ) - expect_equal( - object = computeRows(2, X, 3), - expected = matrix(c(1, 2, 7, 8, 13, 14)) - ) - expect_equal( - object = computeRows(10, X, 3), - expected = matrix(c(1:10, 31:40, 61:70)) - ) - expect_equal( - object = computeRows(100, X, 3), - expected = matrix(c(1:100, 301:400, 601:700)) - ) - expect_equal( - object = computeRows(1, Y, 3), - expected = matrix(c(9, 6, 3)) - ) - expect_equal( - object = computeRows(2, Y, 3), - expected = matrix(c(17, 18, 11, 12, 5, 6)) - ) - expect_equal( - object = computeRows(10, Y, 3), - expected = matrix(c(81:90, 51:60, 21:30)) - ) - expect_equal( - object = computeRows(1, Z, 0), - expected = matrix(, 1, 0) - ) - expect_equal( - object = computeRows(1, Z, 5), - expected = matrix(rep(-8, 5)) - ) - expect_equal( - object = computeRows(2, Z, 1), - expected = matrix(rep(c(-17, -16), 1)) - ) - expect_equal( - object = computeRows(2, Z, 3), - expected = matrix(rep(c(-17, -16), 3)) - ) - expect_equal( - object = computeRows(3, Z, 1), - expected = matrix(rep(-26:-24, 1)) - ) - expect_equal( - object = computeRows(3, Z, 10), - expected = matrix(rep(-26:-24, 10)) - ) + # Matrices + X <- matrix(1:9, 3, byrow = TRUE) + Y <- matrix(9:1, 3, byrow = TRUE) + Z <- matrix(c(-8, 2, -4, 0), byrow = TRUE) + expect_equal( + object = computeRows(1, X, 3), + expected = matrix(c(1, 4, 7)) + ) + expect_equal( + object = computeRows(2, X, 3), + expected = matrix(c(1, 2, 7, 8, 13, 14)) + ) + expect_equal( + object = computeRows(10, X, 3), + expected = matrix(c(1:10, 31:40, 61:70)) + ) + expect_equal( + object = computeRows(100, X, 3), + expected = matrix(c(1:100, 301:400, 601:700)) + ) + expect_equal( + object = computeRows(1, Y, 3), + expected = matrix(c(9, 6, 3)) + ) + expect_equal( + object = computeRows(2, Y, 3), + expected = matrix(c(17, 18, 11, 12, 5, 6)) + ) + expect_equal( + object = computeRows(10, Y, 3), + expected = matrix(c(81:90, 51:60, 21:30)) + ) + expect_equal( + object = computeRows(1, Z, 0), + expected = matrix(, 1, 0) + ) + expect_equal( + object = computeRows(1, Z, 5), + expected = matrix(rep(-8, 5)) + ) + expect_equal( + object = computeRows(2, Z, 1), + expected = matrix(rep(c(-17, -16), 1)) + ) + expect_equal( + object = computeRows(2, Z, 3), + expected = matrix(rep(c(-17, -16), 3)) + ) + expect_equal( + object = computeRows(3, Z, 1), + expected = matrix(rep(-26:-24, 1)) + ) + expect_equal( + object = computeRows(3, Z, 10), + expected = matrix(rep(-26:-24, 10)) + ) }) test_that("computeIndLogml works like on Matlab", { - expect_equivalent(computeIndLogml(10, 1), 2.3026, tol = .0001) - expect_equivalent(computeIndLogml(0, 1), -Inf) - expect_equivalent(computeIndLogml(1, 0), -Inf) - expect_equivalent(computeIndLogml(0, 0), -Inf) - expect_equivalent(computeIndLogml(-pi, -8), 3.2242, tol = .0001) - expect_equivalent(computeIndLogml(2:3, 2), 2.3026, tol = .0001) - expect_equivalent(computeIndLogml(matrix(8:5, 2), 100), 14.316, tol = .001) - expect_equivalent( - object = computeIndLogml(matrix(8:5, 2), matrix(c(1, 3), 1)), - expected = 6.4118, - tol = .001 - ) - expect_equivalent( - object = computeIndLogml(matrix(8:5, 1), matrix(c(1, 3), 1)), - expected = 12.9717, - tol = .001 - ) - expect_equivalent( - object = computeIndLogml(c(8, 1), c(-1.6, 5)), - expected = complex(real = 6.4739, imaginary = pi), - tol = .001 - ) + expect_equivalent(computeIndLogml(10, 1), 2.3026, tol = .0001) + expect_equivalent(computeIndLogml(0, 1), -Inf) + expect_equivalent(computeIndLogml(1, 0), -Inf) + expect_equivalent(computeIndLogml(0, 0), -Inf) + expect_equivalent(computeIndLogml(-pi, -8), 3.2242, tol = .0001) + expect_equivalent(computeIndLogml(2:3, 2), 2.3026, tol = .0001) + expect_equivalent(computeIndLogml(matrix(8:5, 2), 100), 14.316, tol = .001) + expect_equivalent( + object = computeIndLogml(matrix(8:5, 2), matrix(c(1, 3), 1)), + expected = 6.4118, + tol = .001 + ) + expect_equivalent( + object = computeIndLogml(matrix(8:5, 1), matrix(c(1, 3), 1)), + expected = 12.9717, + tol = .001 + ) + expect_equivalent( + object = computeIndLogml(c(8, 1), c(-1.6, 5)), + expected = complex(real = 6.4739, imaginary = pi), + tol = .001 + ) }) test_that("suoritaMuutos works like on Matlab", { - mx1 <- c(10, 5, 8) - mx2 <- matrix(c(10, 9, 5, 8, 8, -7), 2) - expect_equal(suoritaMuutos(10, 3, 1), 10) - expect_equal(suoritaMuutos(mx1, 3, 1), c(10, 5, 8)) - expect_equal(suoritaMuutos(mx1, 3, 2), c(7, 8, 8)) - expect_equal(suoritaMuutos(mx1, 3, 3), c(7, 5, 11)) - expect_equal(suoritaMuutos(mx1, 2, 3), c(8, 5, 10)) - expect_equal(suoritaMuutos(mx1, -7, 3), c(17, 5, 1)) - expect_equal(suoritaMuutos(mx2, 0, 5), mx2) - expect_equal(suoritaMuutos(mx2, 0, 5), mx2) - expect_equal(suoritaMuutos(mx2, -3, 6), matrix(c(13, 9, 5, 8, 8, -10), 2)) + mx1 <- c(10, 5, 8) + mx2 <- matrix(c(10, 9, 5, 8, 8, -7), 2) + expect_equal(suoritaMuutos(10, 3, 1), 10) + expect_equal(suoritaMuutos(mx1, 3, 1), c(10, 5, 8)) + expect_equal(suoritaMuutos(mx1, 3, 2), c(7, 8, 8)) + expect_equal(suoritaMuutos(mx1, 3, 3), c(7, 5, 11)) + expect_equal(suoritaMuutos(mx1, 2, 3), c(8, 5, 10)) + expect_equal(suoritaMuutos(mx1, -7, 3), c(17, 5, 1)) + expect_equal(suoritaMuutos(mx2, 0, 5), mx2) + expect_equal(suoritaMuutos(mx2, 0, 5), mx2) + expect_equal(suoritaMuutos(mx2, -3, 6), matrix(c(13, 9, 5, 8, 8, -10), 2)) }) test_that("laskeMuutokset4 works like on Matlab", { - mx1 <- t(c(.4, 7)) - expect_equivalent( - object = laskeMuutokset4(2, mx1, c(8, 2), 3), - expected = t(c(0, .3742)), - tol = .0001 - ) + mx1 <- t(c(.4, 7)) + expect_equivalent( + object = laskeMuutokset4(2, mx1, c(8, 2), 3), + expected = t(c(0, .3742)), + tol = .0001 + ) }) test_that("etsiParas works like on Matlab", { - mx1 <- t(c(.4, 7)) - expect_equal(etsiParas(2, mx1, c(8, 1), 8), c(.4, 7, 8)) - expect_equivalent(etsiParas(2, mx1, c(8, 1), 1), c(-1.6, 9, 3.1864), .0001) - expect_equivalent( - object = etsiParas(5, mx1, c(8, 1), -pi), - expected = c(-4.6, 12, 3.8111), - tol = .001 - ) - expect_equivalent( - object = etsiParas(-.5, mx1, c(-1, 0), -10), - expected = c(7.4, 0, complex(real = 1.8563, imaginary = 3.1416)), - tol = .0001 - ) + mx1 <- t(c(.4, 7)) + expect_equal(etsiParas(2, mx1, c(8, 1), 8), c(.4, 7, 8)) + expect_equivalent(etsiParas(2, mx1, c(8, 1), 1), c(-1.6, 9, 3.1864), .0001) + expect_equivalent( + object = etsiParas(5, mx1, c(8, 1), -pi), + expected = c(-4.6, 12, 3.8111), + tol = .001 + ) + expect_equivalent( + object = etsiParas(-.5, mx1, c(-1, 0), -10), + expected = c(7.4, 0, complex(real = 1.8563, imaginary = 3.1416)), + tol = .0001 + ) }) test_that("computePersonalAllFreqs works like on Matlab", { - expect_equal(computePersonalAllFreqs(1, 1:4, c(15, 5, 10, 40), 1), 15) - mx <- matrix(c(15, 10, 5, 40), 2) - expect_equal(computePersonalAllFreqs(1, 1:4, mx, 1), c(15, 40)) - expect_equal(computePersonalAllFreqs(1, 1:3, mx, 1), c(15, 40)) - expect_equal(computePersonalAllFreqs(1, 1:2, mx, 1), c(15, 40)) + expect_equal(computePersonalAllFreqs(1, 1:4, c(15, 5, 10, 40), 1), 15) + mx <- matrix(c(15, 10, 5, 40), 2) + expect_equal(computePersonalAllFreqs(1, 1:4, mx, 1), c(15, 40)) + expect_equal(computePersonalAllFreqs(1, 1:3, mx, 1), c(15, 40)) + expect_equal(computePersonalAllFreqs(1, 1:2, mx, 1), c(15, 40)) }) test_that("simuloiAlleeli works like on Matlab", { - sk <- 2 - vk <- 1:3 - ra <- array(1:12, c(2, 2, 3)) - mx1 <- matrix(c(3, 5, 0, 9), 2) - mx2 <- matrix(c(3, 5, 0, 9, 5, 8), 2) - expect_equal(simuloiAlleeli(sk, 1, 1), 1) - expect_equal(simuloiAlleeli(vk, 1, 2), 1) - expect_equal(simuloiAlleeli(ra, 2, 1), 1) - expect_equal(simuloiAlleeli(mx1, 1, 2), 2) - expect_equal(simuloiAlleeli(mx2, 1, 3), 1) + sk <- 2 + vk <- 1:3 + ra <- array(1:12, c(2, 2, 3)) + mx1 <- matrix(c(3, 5, 0, 9), 2) + mx2 <- matrix(c(3, 5, 0, 9, 5, 8), 2) + expect_equal(simuloiAlleeli(sk, 1, 1), 1) + expect_equal(simuloiAlleeli(vk, 1, 2), 1) + expect_equal(simuloiAlleeli(ra, 2, 1), 1) + expect_equal(simuloiAlleeli(mx1, 1, 2), 2) + expect_equal(simuloiAlleeli(mx2, 1, 3), 1) }) test_that("simulateIndividuals works like on Matlab", { - set.seed(2) - expect_equal( - object = simulateIndividuals(1, 3, 2, 0, .2), - expected = matrix(c(1, -999, 1), ncol = 1) - ) - expect_equal( - object = simulateIndividuals(5, 3, 1:3, 4, 0), - expected = matrix(rep(-999, 15 * 3), 15) - ) - expect_equal( - object = simulateIndividuals(3, 3, 2, 1, 1), - expected = matrix(rep(1, 9), 9) - ) - set.seed(2) - expect_equal( - object = sum(simulateIndividuals(3, 3, 2, 1, .5) == 1), - expected = 6 - ) + set.seed(2) + expect_equal( + object = simulateIndividuals(1, 3, 2, 0, .2), + expected = matrix(c(1, -999, 1), ncol = 1) + ) + expect_equal( + object = simulateIndividuals(5, 3, 1:3, 4, 0), + expected = matrix(rep(-999, 15 * 3), 15) + ) + expect_equal( + object = simulateIndividuals(3, 3, 2, 1, 1), + expected = matrix(rep(1, 9), 9) + ) + set.seed(2) + expect_equal( + object = sum(simulateIndividuals(3, 3, 2, 1, .5) == 1), + expected = 6 + ) }) test_that("simulateAllFreqs works as expected", { - empty_mt <- matrix(NA, 0, 0) - expect_equivalent(suppressWarnings(simulateAllFreqs(3)), empty_mt) - expect_equivalent(suppressWarnings(simulateAllFreqs(3:5)), empty_mt) - expect_equivalent( - object = suppressWarnings(simulateAllFreqs(matrix(1:4, 2))), - expected = empty_mt - ) + empty_mt <- matrix(NA, 0, 0) + expect_equivalent(suppressWarnings(simulateAllFreqs(3)), empty_mt) + expect_equivalent(suppressWarnings(simulateAllFreqs(3:5)), empty_mt) + expect_equivalent( + object = suppressWarnings(simulateAllFreqs(matrix(1:4, 2))), + expected = empty_mt + ) }) test_that("computeAllFreqs2 works as expected", { - expect_equivalent(computeAllFreqs2(10), matrix(NA, 0, 0)) + expect_equivalent(computeAllFreqs2(10), matrix(NA, 0, 0)) }) test_that("poistaLiianPienet works as expected", { - expect_equal(poistaLiianPienet(100, matrix(1:4, 2), 0), 100) - expect_equal(poistaLiianPienet(100, matrix(1:4, 2), -5), 100) + expect_equal(poistaLiianPienet(100, matrix(1:4, 2), 0), 100) + expect_equal(poistaLiianPienet(100, matrix(1:4, 2), -5), 100) }) test_that("noIndex works properly", { - abcd_vec <- letters[1:4] - abcd_mat <- matrix(abcd_vec, 2) - abcdef_mat <- matrix(letters[1:6], 2) - efg_vec <- letters[5:7] - expect_equal(noIndex(abcd_vec, 1:6), abcd_vec) - expect_equal(noIndex(abcd_vec, 1:3), abcd_vec[-4]) - expect_equal(noIndex(abcd_vec, 1:2), abcd_vec) - expect_equal(noIndex(abcd_vec, efg_vec), abcd_vec[-4]) - expect_equal(noIndex(abcd_mat, 1), abcd_mat[, 1]) - expect_equal(noIndex(abcd_mat, 2), abcd_mat[, 1]) - expect_equal(noIndex(abcdef_mat, 1:2), abcdef_mat[, 1:2]) - expect_equal(noIndex(abcdef_mat, abcd_mat), abcdef_mat[, 1:2]) -}) \ No newline at end of file + abcd_vec <- letters[1:4] + abcd_mat <- matrix(abcd_vec, 2) + abcdef_mat <- matrix(letters[1:6], 2) + efg_vec <- letters[5:7] + expect_equal(noIndex(abcd_vec, 1:6), abcd_vec) + expect_equal(noIndex(abcd_vec, 1:3), abcd_vec[-4]) + expect_equal(noIndex(abcd_vec, 1:2), abcd_vec) + expect_equal(noIndex(abcd_vec, efg_vec), abcd_vec[-4]) + expect_equal(noIndex(abcd_mat, 1), abcd_mat[, 1]) + expect_equal(noIndex(abcd_mat, 2), abcd_mat[, 1]) + expect_equal(noIndex(abcdef_mat, 1:2), abcdef_mat[, 1:2]) + expect_equal(noIndex(abcdef_mat, abcd_mat), abcdef_mat[, 1:2]) +}) diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index c972153..b5c51af 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -1,245 +1,245 @@ context("Basic Matlab functions") test_that("rand works properly", { - expect_equal(dim(rand()), c(1, 1)) - expect_equal(dim(rand(1, 2)), c(1, 2)) - expect_equal(dim(rand(3, 2)), c(3, 2)) + expect_equal(dim(rand()), c(1, 1)) + expect_equal(dim(rand(1, 2)), c(1, 2)) + expect_equal(dim(rand(3, 2)), c(3, 2)) }) test_that("repmat works properly", { - mx0 <- c(1:4) # when converted to matrix, results in a column vector - mx1 <- matrix(5:8) - mx2 <- matrix(0:-3, 2) - expect_error(repmat(mx0)) - expect_equal(repmat(mx0, 1), t(as.matrix(mx0))) - expect_equal( - object = repmat(mx0, 2), - expected = unname(cbind(rbind(mx0, mx0), rbind(mx0, mx0))) - ) - expect_equal( - object = repmat(mx1, 2), - expected = unname(cbind(rbind(mx1, mx1), rbind(mx1, mx1))) - ) - expect_equal( - object = repmat(mx2, c(2, 3)), - expected = cbind(rbind(mx2, mx2), rbind(mx2, mx2), rbind(mx2, mx2)) - ) - expect_equal( - object = repmat(mx2, c(4, 1)), - expected = rbind(mx2, mx2, mx2, mx2) - ) - expect_equal( - object = repmat(mx2, c(1, 1, 2)), - expected = array(mx2, c(2, 2, 2)) - ) - expect_equal(repmat(1:2, 3), matrix(rep(1:2, 9), 3, 6, byrow=TRUE)) - expect_equal(repmat(10, c(3, 2)), matrix(10, 3, 2)) + mx0 <- c(1:4) # when converted to matrix, results in a column vector + mx1 <- matrix(5:8) + mx2 <- matrix(0:-3, 2) + expect_error(repmat(mx0)) + expect_equal(repmat(mx0, 1), t(as.matrix(mx0))) + expect_equal( + object = repmat(mx0, 2), + expected = unname(cbind(rbind(mx0, mx0), rbind(mx0, mx0))) + ) + expect_equal( + object = repmat(mx1, 2), + expected = unname(cbind(rbind(mx1, mx1), rbind(mx1, mx1))) + ) + expect_equal( + object = repmat(mx2, c(2, 3)), + expected = cbind(rbind(mx2, mx2), rbind(mx2, mx2), rbind(mx2, mx2)) + ) + expect_equal( + object = repmat(mx2, c(4, 1)), + expected = rbind(mx2, mx2, mx2, mx2) + ) + expect_equal( + object = repmat(mx2, c(1, 1, 2)), + expected = array(mx2, c(2, 2, 2)) + ) + expect_equal(repmat(1:2, 3), matrix(rep(1:2, 9), 3, 6, byrow = TRUE)) + expect_equal(repmat(10, c(3, 2)), matrix(10, 3, 2)) }) test_that("zeros and ones work as expected", { - expect_equal(zeros(1), matrix(0, 1)) - expect_equal(zeros(2), matrix(0, 2, 2)) - expect_equal(zeros(2, 1), matrix(0, 2, 1)) - expect_equal(zeros(1, 10), matrix(0, 1, 10)) - expect_equal(zeros(3, 2, 4), array(0, c(3, 2, 4))) - expect_equal(ones(8), matrix(1, 8, 8)) - expect_equal(ones(5, 2), matrix(1, 5, 2)) - expect_equal(ones(2, 100), matrix(1, 2, 100)) - expect_equal(ones(3, 2, 4, 2), array(1, c(3, 2, 4, 2))) + expect_equal(zeros(1), matrix(0, 1)) + expect_equal(zeros(2), matrix(0, 2, 2)) + expect_equal(zeros(2, 1), matrix(0, 2, 1)) + expect_equal(zeros(1, 10), matrix(0, 1, 10)) + expect_equal(zeros(3, 2, 4), array(0, c(3, 2, 4))) + expect_equal(ones(8), matrix(1, 8, 8)) + expect_equal(ones(5, 2), matrix(1, 5, 2)) + expect_equal(ones(2, 100), matrix(1, 2, 100)) + expect_equal(ones(3, 2, 4, 2), array(1, c(3, 2, 4, 2))) }) test_that("times works as expected", { - expect_equal(times(9, 6), as.matrix(54)) - expect_equal(times(9, c(.8, 9)), as.matrix(c(7.2, 81))) - expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45))) - expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2)) - expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2)) - expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 6, 30, 12), 2)) - expect_equal( - object = times(matrix(1:4, 2), matrix(c(10, 3), 1)), - expected = matrix(c(10, 20, 9, 12), 2) - ) - expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 6, 30, 12), 2)) - expect_equal( - object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)), - expected = matrix(c(10, -10, 9, 36), 2) - ) - expect_equal( - object = times(matrix(c(-1.6, 5), 1), c(8, 1)), - expected = matrix(c(-12.8, -1.6, 40, 5), 2) - ) + expect_equal(times(9, 6), as.matrix(54)) + expect_equal(times(9, c(.8, 9)), as.matrix(c(7.2, 81))) + expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45))) + expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2)) + expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2)) + expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 6, 30, 12), 2)) + expect_equal( + object = times(matrix(1:4, 2), matrix(c(10, 3), 1)), + expected = matrix(c(10, 20, 9, 12), 2) + ) + expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 6, 30, 12), 2)) + expect_equal( + object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)), + expected = matrix(c(10, -10, 9, 36), 2) + ) + expect_equal( + object = times(matrix(c(-1.6, 5), 1), c(8, 1)), + expected = matrix(c(-12.8, -1.6, 40, 5), 2) + ) }) test_that("colon works as expected (hee hee)", { - expect_equal(colon(1, 4), 1:4) - expect_length(colon(4, 1), 0) + expect_equal(colon(1, 4), 1:4) + expect_length(colon(4, 1), 0) }) test_that("size works as on MATLAB", { - sk <- 10 - vk <- 1:4 - mx <- matrix(1:6, 2) - ra <- array(1:24, c(2, 3, 4)) - expect_equal(size(sk), 1) - expect_equal(size(vk), c(1, 4)) - expect_equal(size(mx), c(2, 3)) - expect_equal(size(ra), c(2, 3, 4)) - expect_equal(size(sk, 199), 1) - expect_equal(size(vk, 199), 1) - expect_equal(size(mx, 199), 1) - expect_equal(size(ra, 199), 1) - expect_equal(size(vk, 2), 4) - expect_equal(size(mx, 2), 3) - expect_equal(size(ra, 2), 3) - expect_equal(size(ra, 3), 4) + sk <- 10 + vk <- 1:4 + mx <- matrix(1:6, 2) + ra <- array(1:24, c(2, 3, 4)) + expect_equal(size(sk), 1) + expect_equal(size(vk), c(1, 4)) + expect_equal(size(mx), c(2, 3)) + expect_equal(size(ra), c(2, 3, 4)) + expect_equal(size(sk, 199), 1) + expect_equal(size(vk, 199), 1) + expect_equal(size(mx, 199), 1) + expect_equal(size(ra, 199), 1) + expect_equal(size(vk, 2), 4) + expect_equal(size(mx, 2), 3) + expect_equal(size(ra, 2), 3) + expect_equal(size(ra, 3), 4) }) test_that("reshape reshapes properly", { - mx <- matrix(1:4, 2) - ra <- array(1:12, c(2, 3, 2)) - expect_equal(reshape(mx, c(1, 4)), matrix(1:4, 1)) - expect_equal(reshape(mx, c(2, 2)), mx) - expect_equal(reshape(mx, c(1, 1, 4)), array(mx, c(1, 1, 4))) - expect_error(reshape(mx, c(1, 2, 3))) - expect_error(reshape(ra, c(1, 2, 3))) - expect_equal(reshape(ra, c(3, 2, 2)), array(ra, c(3, 2, 2))) + mx <- matrix(1:4, 2) + ra <- array(1:12, c(2, 3, 2)) + expect_equal(reshape(mx, c(1, 4)), matrix(1:4, 1)) + expect_equal(reshape(mx, c(2, 2)), mx) + expect_equal(reshape(mx, c(1, 1, 4)), array(mx, c(1, 1, 4))) + expect_error(reshape(mx, c(1, 2, 3))) + expect_error(reshape(ra, c(1, 2, 3))) + expect_equal(reshape(ra, c(3, 2, 2)), array(ra, c(3, 2, 2))) }) test_that("isfield works as on Matlab", { - S <- list() - S$x <- rnorm(100) - S$y <- sin(S$x) - S$title <- "y = sin(x)" - expect_true(isfield(S, "title")) - expect_equivalent( - object = isfield(S, c("x", "y", "z", "title", "error")), - expected = c(TRUE, TRUE, FALSE, TRUE, FALSE) - ) + S <- list() + S$x <- rnorm(100) + S$y <- sin(S$x) + S$title <- "y = sin(x)" + expect_true(isfield(S, "title")) + expect_equivalent( + object = isfield(S, c("x", "y", "z", "title", "error")), + expected = c(TRUE, TRUE, FALSE, TRUE, FALSE) + ) }) test_that("strcmp works as expected", { - yes <- 'Yes' - no <- 'No' - ja <- 'Yes' - expect_false(strcmp(yes, no)) - expect_true(strcmp(yes, ja)) - s1 <- 'upon' - s2 <- matrix(c('Once', 'upon', 'a', 'time'), 2, byrow=TRUE) - s3 <- c('Once', 'upon', 'a', 'time') - s4 <- matrix(c("A", "bc", "def", "G"), 2, byrow=TRUE) - s5 <- matrix(c("B", "c", "def", "G"), 2, byrow=TRUE) - expect_equal(strcmp(s1, s2), matrix(c(FALSE, FALSE, TRUE, FALSE), 2)) - expect_equivalent(strcmp(s1, s3), c(FALSE, TRUE, FALSE, FALSE)) - expect_error(strcmp(s2, s3)) - expect_equal(strcmp(s4, s5), matrix(c(FALSE, TRUE, FALSE, TRUE), 2)) + yes <- "Yes" + no <- "No" + ja <- "Yes" + expect_false(strcmp(yes, no)) + expect_true(strcmp(yes, ja)) + s1 <- "upon" + s2 <- matrix(c("Once", "upon", "a", "time"), 2, byrow = TRUE) + s3 <- c("Once", "upon", "a", "time") + s4 <- matrix(c("A", "bc", "def", "G"), 2, byrow = TRUE) + s5 <- matrix(c("B", "c", "def", "G"), 2, byrow = TRUE) + expect_equal(strcmp(s1, s2), matrix(c(FALSE, FALSE, TRUE, FALSE), 2)) + expect_equivalent(strcmp(s1, s3), c(FALSE, TRUE, FALSE, FALSE)) + expect_error(strcmp(s2, s3)) + expect_equal(strcmp(s4, s5), matrix(c(FALSE, TRUE, FALSE, TRUE), 2)) }) test_that("isempty works as expected", { - A <- array(dim=c(0, 2, 2)) - B <- matrix(rep(NA, 4), 2) - C <- matrix(rep(0, 4), 2) - cat1 <- as.factor(c(NA, NA)) - cat2 <- as.factor(c()) - str1 <- matrix(rep("", 3)) - expect_true(isempty(A)) - expect_false(isempty(B)) - expect_false(isempty(C)) - expect_false(isempty(cat1)) - expect_true(isempty(cat2)) - expect_false(isempty(str1)) + A <- array(dim = c(0, 2, 2)) + B <- matrix(rep(NA, 4), 2) + C <- matrix(rep(0, 4), 2) + cat1 <- as.factor(c(NA, NA)) + cat2 <- as.factor(c()) + str1 <- matrix(rep("", 3)) + expect_true(isempty(A)) + expect_false(isempty(B)) + expect_false(isempty(C)) + expect_false(isempty(cat1)) + expect_true(isempty(cat2)) + expect_false(isempty(str1)) }) test_that("find works as expected", { - X <- matrix(c(1, 0, 2, 0, 1, 1, 0, 0, 4), 3, byrow=TRUE) - Y <- seq(1, 19, 2) - expect_equal(find(X), c(1, 5, 7, 8, 9)) - expect_equal(find(!X), c(2, 3, 4, 6)) - expect_equal(find(Y == 13), 7) + X <- matrix(c(1, 0, 2, 0, 1, 1, 0, 0, 4), 3, byrow = TRUE) + Y <- seq(1, 19, 2) + expect_equal(find(X), c(1, 5, 7, 8, 9)) + expect_equal(find(!X), c(2, 3, 4, 6)) + expect_equal(find(Y == 13), 7) }) test_that("sortrows works as expected", { - mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) - expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) - expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) - expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) + mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) + expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) + expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) + expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) test_that("cell works as expected", { - expect_equivalent(cell(0), array(0, dim = c(0, 0))) - expect_equivalent(cell(1), array(0, dim = c(1, 1))) - expect_equivalent(cell(2), array(0, dim = c(2, 2))) - expect_equivalent(cell(3, 4), array(0, dim = c(3, 4))) - expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6))) + expect_equivalent(cell(0), array(0, dim = c(0, 0))) + expect_equivalent(cell(1), array(0, dim = c(1, 1))) + expect_equivalent(cell(2), array(0, dim = c(2, 2))) + expect_equivalent(cell(3, 4), array(0, dim = c(3, 4))) + expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6))) }) test_that("blanks works as expected", { - expect_warning(blanks(-1)) - expect_equal(suppressWarnings(blanks(-1)), "") - expect_equal(blanks(0), "") - expect_equal(blanks(1), " ") - expect_equal(blanks(10), " ") + expect_warning(blanks(-1)) + expect_equal(suppressWarnings(blanks(-1)), "") + expect_equal(blanks(0), "") + expect_equal(blanks(1), " ") + expect_equal(blanks(10), " ") }) test_that("squeeze works as expected", { - A <- array(dim = c(2, 1, 2)) - A[, , 1] <- c(1, 2) - A[, , 2] <- c(3, 4) - expect_equal(squeeze(A), matrix(1:4, 2)) - A <- array(0, dim = c(1, 1, 3)) - A[, , 1:3] <- 1:3 - expect_equal(squeeze(A), matrix(1:3, 3)) + A <- array(dim = c(2, 1, 2)) + A[, , 1] <- c(1, 2) + A[, , 2] <- c(3, 4) + expect_equal(squeeze(A), matrix(1:4, 2)) + A <- array(0, dim = c(1, 1, 3)) + A[, , 1:3] <- 1:3 + expect_equal(squeeze(A), matrix(1:3, 3)) }) test_that("fix works as expected", { - X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow=TRUE) - Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow=TRUE) - expect_identical(fix(X), Y) + X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow = TRUE) + Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow = TRUE) + expect_identical(fix(X), Y) }) test_that("isspace works as expected", { - chr <- '123 Main St.' - X <- '\t a b\tcde f' - expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0)) - expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0)) + chr <- "123 Main St." + X <- "\t a b\tcde f" + expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0)) + expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0)) }) test_that("nargin works correctly", { - addme <- function(a, b) { - if (nargin() == 2) { - c <- a + b - } else if (nargin() == 1) { - c <- a + a - } else { - c <- 0 - } - return(c) - } - expect_equal(addme(13, 42), 55) - expect_equal(addme(13), 26) - expect_equal(addme(), 0) + addme <- function(a, b) { + if (nargin() == 2) { + c <- a + b + } else if (nargin() == 1) { + c <- a + a + } else { + c <- 0 + } + return(c) + } + expect_equal(addme(13, 42), 55) + expect_equal(addme(13), 26) + expect_equal(addme(), 0) }) test_that("setdiff works as expected", { - A <- c(3, 6, 2, 1, 5, 1, 1) - B <- c(2, 4, 6) - C <- c(1, 3, 5) - expect_equal(setdiff_MATLAB(A, B), C) - A <- data.frame( - Var1 = 1:5, - Var2 = LETTERS[1:5], - Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE) - ) - B <- data.frame( - Var1 = seq(1, 9, by = 2), - Var2 = LETTERS[seq(1, 9, by = 2)], - Var3 = rep(FALSE, 5) - ) - C <- data.frame( - Var1 = c(2, 4), - Var2 = c('B', 'D'), - Var3 = c(TRUE, TRUE) - ) - row.names(C) <- c(2L, 4L) - expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames - # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 -}) \ No newline at end of file + A <- c(3, 6, 2, 1, 5, 1, 1) + B <- c(2, 4, 6) + C <- c(1, 3, 5) + expect_equal(setdiff_MATLAB(A, B), C) + A <- data.frame( + Var1 = 1:5, + Var2 = LETTERS[1:5], + Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE) + ) + B <- data.frame( + Var1 = seq(1, 9, by = 2), + Var2 = LETTERS[seq(1, 9, by = 2)], + Var3 = rep(FALSE, 5) + ) + C <- data.frame( + Var1 = c(2, 4), + Var2 = c("B", "D"), + Var3 = c(TRUE, TRUE) + ) + row.names(C) <- c(2L, 4L) + expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames + # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 +}) diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index dbb9395..0daccf7 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -2,73 +2,73 @@ context("Auxiliary functions to greedyMix") # Defining the relative path to current inst ----------------------------------- if (interactive()) { - path_inst <- "../../inst/ext" + path_inst <- "../../inst/ext" } else { - path_inst <- system.file("ext", "", package="rBAPS") + path_inst <- system.file("ext", "", package = "rBAPS") } # Reading datasets ------------------------------------------------------------- baps_diploid <- read.delim( - file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep="/"), - sep = " ", - header = FALSE + file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep = "/"), + sep = " ", + header = FALSE ) test_that("handleData works as expected", { - data_obs <- handleData(baps_diploid)$newData - data_exp <- matrix( - c( - -9, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, - -9, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, - 3, 2, 2, 3, 2, -9, 3, 1, 2, 1, 2, - 2, 1, 2, 1, 2, -9, 1, 1, 1, 1, 2, - 3, 1, 1, 1, 2, 1, 1, 2, -9, 1, 3, - 3, 1, 2, 1, 1, 1, 2, 1, -9, 2, 3, - 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 4, - 3, 2, 2, 3, 2, 2, 3, 1, 2, 1, 4, - 2, 1, 2, 1, -9, 1, 1, 1, 1, 1, 5, - 3, 1, 1, 1, -9, 1, 1, 2, 1, 1, 5 - ), - nrow = 10, byrow = TRUE - ) - colnames(data_exp) <- colnames(data_obs) - expect_equal(data_obs, data_exp) + data_obs <- handleData(baps_diploid)$newData + data_exp <- matrix( + c( + -9, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, + -9, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, + 3, 2, 2, 3, 2, -9, 3, 1, 2, 1, 2, + 2, 1, 2, 1, 2, -9, 1, 1, 1, 1, 2, + 3, 1, 1, 1, 2, 1, 1, 2, -9, 1, 3, + 3, 1, 2, 1, 1, 1, 2, 1, -9, 2, 3, + 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 4, + 3, 2, 2, 3, 2, 2, 3, 1, 2, 1, 4, + 2, 1, 2, 1, -9, 1, 1, 1, 1, 1, 5, + 3, 1, 1, 1, -9, 1, 1, 2, 1, 1, 5 + ), + nrow = 10, byrow = TRUE + ) + colnames(data_exp) <- colnames(data_obs) + expect_equal(data_obs, data_exp) }) context("Opening files on greedyMix") df_fasta <- greedyMix( - data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), - format = "FASTA" + data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), + format = "FASTA" ) df_vcf <- greedyMix( - data = file.path(path_inst, "vcf_example.vcf"), - format = "VCF", - verbose = FALSE + data = file.path(path_inst, "vcf_example.vcf"), + format = "VCF", + verbose = FALSE ) df_bam <- greedyMix( - data = file.path(path_inst, "bam_example.bam"), - format = "BAM", + data = file.path(path_inst, "bam_example.bam"), + format = "BAM", ) # TODO #19: add example reading Genpop test_that("Files are imported correctly", { - expect_equal(dim(df_fasta), c(5, 99)) - expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3)) - expect_error( - greedyMix( - data = paste(path_inst, "sam_example.sam", sep="/"), - format = "SAM", - ) - ) - expect_equal(length(df_bam[[1]]), 13) + expect_equal(dim(df_fasta), c(5, 99)) + expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3)) + expect_error( + greedyMix( + data = paste(path_inst, "sam_example.sam", sep = "/"), + format = "SAM", + ) + ) + expect_equal(length(df_bam[[1]]), 13) }) context("Linkage") test_that("Linkages are properly calculated", { - Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5) - expect_equal( - object = linkage(Y), - expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3) - ) + Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5) + expect_equal( + object = linkage(Y), + expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol = 3) + ) })