diff --git a/DESCRIPTION b/DESCRIPTION index 39d4f81..ebfa455 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Description: Partial R implementation of the BAPS software computationally-efficient method for the identification of admixture events in genetic population history. License: GPL-3 +BugReports: https://github.com/ocbe-uio/rBAPS/issues Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 8808d38..8d78b00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(laskeMuutokset4) export(learn_partition_modified) export(learn_simple_partition) export(linkage) +export(load_fasta) export(logml2String) export(lueGenePopData) export(lueNimi) diff --git a/R/greedyMix.R b/R/greedyMix.R index c5acfe8..83472e4 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -11,386 +11,10 @@ greedyMix <- function( savePreProcessed = NULL, filePreProcessed = NULL ) { - # ASK: Unclear when fixedk == TRUE. Remove? - # check whether fixed k mode is selected - # h0 <- findobj('Tag','fixk_menu') - # fixedK = get(h0, 'userdata'); - fixedK <- FALSE - - # if fixedK - # if ~(fixKWarning == 1) % call function fixKWarning - # return - # end - # end - - # ASK: ditto - # % check whether partition compare mode is selected - # h1 = findobj('Tag','partitioncompare_menu'); - # partitionCompare = get(h1, 'userdata'); - partitionCompare <- FALSE - - if (is(tietue, "list") | is(tietue, "character")) { - # ---------------------------------------------------------------------- - # Defining type of file - # ---------------------------------------------------------------------- - if (is.null(format)) { - input_type <- inputdlg( - paste( - 'Specify the format of your data:\n', - '1) BAPS-format\n', - '2) GenePop-format\n', - '3) Preprocessed data\n' - ) - ) - # Converting from number into name - input_type_name <- switch( - input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data' - ) - } else { - input_type_name <- paste0(format, "-format") - } - if (length(input_type_name) == 0) { - stop('Invalid alternative') - } else if (input_type_name == 'BAPS-format') { - # ------------------------------------------------------------------ - # Treating BAPS-formatted files - # ------------------------------------------------------------------ - if (!is(tietue, "character")) { - pathname_filename <- uigetfile( - "*.txt", "Load data in BAPS-format" - ) - } else { - pathname_filename <- tietue - } - - # ASK: remove? - # if ~isempty(partitionCompare) - # fprintf(1,'Data: %s\n',[pathname filename]); - # end - - data <- read.delim(pathname_filename, header = FALSE, sep = " ") - data <- as.matrix(data) - ninds <- testaaOnkoKunnollinenBapsData(data) # testing - if (ninds == 0) stop('Incorrect Data-file') - - # ASK: remove? - # h0 = findobj('Tag','filename1_text'); - # set(h0,'String',filename); clear h0; - message( - 'When using data which are in BAPS-format, ', - 'you can specify the sampling populations of the ', - 'individuals by giving two additional files: ', - 'one containing the names of the populations, ', - 'the other containing the indices of the first ', - 'individuals of the populations.' - ) - input_pops <- inputdlg( - prompt = 'Do you wish to specify the sampling populations? [y/N]', - definput = 'N' - ) - if (tolower(input_pops) %in% c('yes', 'y')) { - popfile <- uigetfile('*.txt', 'Load population names') - kysyToinen <- ifelse(popfile$name == 0, 0, 1) - if (kysyToinen == 1) { - indicesfile <- uigetfile('*.txt', 'Load population indices') - if (indicesfile == 0) { - popnames = "" - } else { - # popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun - } - } else { - popnames <- "" - } - } else { - popnames <- "" - } - - temp_handleData <- handleData(data) - data <- temp_handleData$newData - rowsFromInd <- temp_handleData$rowsFromInd - alleleCodes <- temp_handleData$alleleCodes - noalle <- temp_handleData$noalle - adjprior <- temp_handleData$adjprior - priorTerm <- temp_handleData$priorTerm - Z_dist <- newGetDistances(data,rowsFromInd) - Z <- Z_dist$Z - dist <- Z_dist$dist - rm(temp_handleData, Z_dist) - if (is.null(savePreProcessed)) { - save_preproc <- questdlg( - quest = 'Do you wish to save pre-processed data?', - dlgtitle = 'Save pre-processed data?', - defbtn = 'y' - ) - } else { - save_preproc <- savePreProcessed - } - if (save_preproc %in% c('y', 'yes', TRUE)) { - file_out <- uiputfile('.rda','Save pre-processed data as') - kokonimi <- paste0(file_out$path, file_out$name) - c <- list() - c$data <- data - c$rowsFromInd <- rowsFromInd - c$alleleCodes <- alleleCodes - c$noalle <- noalle - c$adjprior <- adjprior - c$priorTerm <- priorTerm - c$dist <- dist - c$popnames <- popnames - c$Z <- Z - save(c, file = kokonimi) - rm(c) - } - } else if (input_type_name == 'GenePop-format') { - # ------------------------------------------------------------------ - # Treating GenePop-formatted files - # ------------------------------------------------------------------ - if (!is(tietue, "character")) { - filename_pathname <- uigetfile( - filter = '*.txt', - title = 'Load data in GenePop-format' - ) - if (filename_pathname$name == 0) stop("No name provided") - } else { - filename_pathname <- tietue - } - - # ASK: remove? - # if ~isempty(partitionCompare) - # fprintf(1,'Data: %s\n',[pathname filename]); - # end - - kunnossa <- testaaGenePopData(filename_pathname) - if (kunnossa == 0) stop("testaaGenePopData returned 0") - data_popnames <- lueGenePopData(filename_pathname) - data <- data_popnames$data - popnames <- data_popnames$popnames - -# h0 = findobj('Tag','filename1_text'); -# set(h0,'String',filename); clear h0; - - list_dranap <- handleData(data) - data <- list_dranap$newData - rowsFromInd <- list_dranap$rowsFromInd - alleleCodes <- list_dranap$alleleCodes - noalle <- list_dranap$noalle - adjprior <- list_dranap$adjprior - priorTerm <- list_dranap$prioterm - - list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug - Z <- list_Zd$Z - dist <- list_Zd$dist - - if (is.null(savePreProcessed)) { - save_preproc <- questdlg( - quest = 'Do you wish to save pre-processed data?', - dlgtitle = 'Save pre-processed data?', - defbtn = 'y' - ) - } else { - save_preproc <- savePreProcessed - } - if (save_preproc %in% c('y', 'Yes', TRUE)) { - file_out <- uiputfile('.rda','Save pre-processed data as') - kokonimi <- paste0(file_out$path, file_out$name) - # FIXME: translate functions above so the objects below exist - c$data <- data - c$rowsFromInd <- rowsFromInd - c$alleleCodes <- alleleCodes - c$noalle <- noalle - c$adjprior <- adjprior - c$priorTerm <- priorTerm - c$dist <- dist - c$popnames <- popnames - c$Z <- Z - save(c, file = kokonimi) - rm(c) - } - } else if (input_type_name == 'Preprocessed data') { - # ------------------------------------------------------------------ - # Handling Pre-processed data - # ------------------------------------------------------------------ - file_in <- uigetfile( - filter = '*.txt', - title = 'Load pre-processed data in GenePop-format' - ) - if (file_in$name == 0) stop("No name provided") - - # ASK: remove? - # h0 = findobj('Tag','filename1_text'); - # set(h0,'String',filename); clear h0; - # if ~isempty(partitionCompare) - # fprintf(1,'Data: %s\n',[pathname filename]); - # end - - struct_array <- readRDS(paste0(file_in$path, file_in$name)) - if (isfield(struct_array,'c')) { # Matlab versio - c <- struct_array$c - if (!isfield(c,'dist')) stop('Incorrect file format') - } else if (isfield(struct_array,'dist')) { #Mideva versio - c <- struct_array - } else { - stop('Incorrect file format') - } - data <- double(c$data) - rowsFromInd <- c$rowsFromInd - alleleCodes <- c$alleleCodes - noalle <- c$noalle - adjprior <- c$adjprior - priorTerm <- c$priorTerm - dist <- c$dist - popnames <- c$popnames - Z <- c$Z - rm(c) - } - } else { - data <- double(tietue$data) - rowsFromInd <- tietue$rowsFromInd - alleleCodes <- tietue$alleleCodes - noalle <- tietue$noalle - adjprior <- tietue$adjprior - priorTerm <- tietue$priorTerm - dist <- tietue$dist - popnames <- tietue$popnames - Z <- tietue$Z - rm(tietue) - } - - # ========================================================================== - # Declaring global variables and changing environment of children functions - # ========================================================================== - PARTITION <- vector() - COUNTS <- vector() - SUMCOUNTS <- vector() - POP_LOGML <- vector() - clearGlobalVars() - - environment(writeMixtureInfo) <- environment() - # ========================================================================== - c <- list() - c$data <- data - c$noalle <- noalle - c$adjprior <- adjprior - c$priorTerm <- priorTerm - c$dist <- dist - c$Z <- Z - c$rowsFromInd <- rowsFromInd - - ninds <- length(unique(data[, ncol(data)])) - ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd) - c$rows <- t(rbind(ekat, ekat + rowsFromInd - 1)) - - # ASK remove? - # partition compare - # if (!is.null(partitionCompare)) { - # nsamplingunits <- size(c$rows, 1) - # partitions <- partitionCompare$partitions - # npartitions <- size(partitions, 2) - # partitionLogml <- zeros(1, npartitions) - # for (i in seq_len(npartitions)) { - # # number of unique partition lables - # npops <- length(unique(partitions[, i])) - - # partitionInd <- zeros(ninds * rowsFromInd, 1) - # partitionSample <- partitions[, i] - # for (j in seq_len(nsamplingunits)) { - # partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] - # } - # # partitionLogml[i] = initialCounts( - # # partitionInd, - # # data[, seq_len(end - 1)], - # # npops, - # # c$rows, - # # noalle, - # # adjprior - # # ) #TODO translate - # } - # # return the logml result - # partitionCompare$logmls <- partitionLogml - # # set(h1, 'userdata', partitionCompare) - # return() - # } - - if (fixedK) { - # logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate - # logml <- logml_npops_partitionSummary$logml - # npops <- logml_npops_partitionSummary$npops - # partitionSummary <- logml_npops_partitionSummary$partitionSummary - } else { - logml_npops_partitionSummary <- indMix(c) - logml <- logml_npops_partitionSummary$logml - npops <- logml_npops_partitionSummary$npops - partitionSummary <- logml_npops_partitionSummary$partitionSummary - } - if (logml == 1) { - warning("logml == 1") - return() - } - - data <- data[, seq_len(ncol(data) - 1)] - - # ASK: remove? - # h0 = findobj('Tag','filename1_text') - # inp = get(h0,'String'); - # h0 = findobj('Tag','filename2_text') - # outp = get(h0,'String'); - inp <- vector() - outp <- vector() - - changesInLogml <- writeMixtureInfo( - logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, - popnames, fixedK - ) # FIXME: broken - - # viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder - - talle <- questdlg( - quest = paste( - 'Do you want to save the mixture populations', - 'so that you can use them later in admixture analysis?' - ), - dlgtitle = 'Save results?', - defbtn = 'y' - ) - if (talle %in% c('Yes', 'y')) { - filename_pathname <- uiputfile('.mat','Save results as') - - # ====================================================================== - cond <- (sum(filename_pathname$name) == 0) | - (sum(filename_pathname$path) == 0) - if (cond) { - # Cancel was pressed - return() - } else { - # copy 'baps4_output.baps' into the text file with the same name. - if (file.exists('baps4_output.baps')) { - file.copy( - from = 'baps4_output.baps', - to = paste0( - filename_pathname$path, filename_pathname$name, '.txt' - ) - ) - file.remove('baps4_output.baps') - } - } - # ====================================================================== - - c$PARTITION <- PARTITION - c$COUNTS <- COUNTS - c$SUMCOUNTS <- SUMCOUNTS - c$alleleCodes <- alleleCodes - c$adjprior <- adjprior - c$popnames <- popnames - c$rowsFromInd <- rowsFromInd - c$data <- data - c$npops <- npops - c$noalle <- noalle - c$mixtureType <- 'mix' - c$logml <- logml - c$changesInLogml <- changesInLogml - - save(c, file = paste0(filename_pathname$path, filename_pathname$name)) - } else { - if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps') - } + stop( + "greedyMix() has been superseded by load_fasta().", + " Please change your code to use the latter instead of the former.", + " If you believe the error is internal to rBAPS, please open", + " a new issue (link on the package DESCRIPTION file)." + ) } diff --git a/man/load_fasta.Rd b/man/load_fasta.Rd new file mode 100644 index 0000000..037c49b --- /dev/null +++ b/man/load_fasta.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_fasta.R +\name{load_fasta} +\alias{load_fasta} +\title{load_fasta} +\usage{ +load_fasta(msa, keep.singletons = FALSE) +} +\arguments{ +\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered} + +\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters} +} +\value{ +A character matrix with filtered SNP data +} +\description{ +Loads a fasta file into matrix format ready for +running the hierBAPS algorithm. +} +\examples{ +msa <- system.file("extdata", "seqs.fa", package = "rhierbaps") +snp.matrix <- load_fasta(msa) +}