From 862a91febcc382ece993ba036042aeeff761639d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 15:16:13 +0200 Subject: [PATCH 1/7] Added bare-bones lueGenePopData --- NAMESPACE | 1 + R/greedyMix.R | 64 ----------------------------------------- R/lueGenePopData.R | 66 +++++++++++++++++++++++++++++++++++++++++++ man/lueGenePopData.Rd | 17 +++++++++++ 4 files changed, 84 insertions(+), 64 deletions(-) create mode 100644 R/lueGenePopData.R create mode 100644 man/lueGenePopData.Rd diff --git a/NAMESPACE b/NAMESPACE index d478d16..f6edaf8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(inputdlg) export(isfield) export(laskeMuutokset4) export(learn_simple_partition) +export(lueGenePopData) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) diff --git a/R/greedyMix.R b/R/greedyMix.R index 88c9004..ade1cb0 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -1580,70 +1580,6 @@ greedyMix <- function( # %------------------------------------------------------ - -# function [data, popnames] = lueGenePopData(tiedostonNimi) - -# fid = fopen(tiedostonNimi); -# line = fgetl(fid); %ensimm�inen rivi -# line = fgetl(fid); %toinen rivi -# count = rivinSisaltamienMjonojenLkm(line); - -# line = fgetl(fid); -# lokusRiveja = 1; -# while (testaaPop(line)==0) -# lokusRiveja = lokusRiveja+1; -# line = fgetl(fid); -# end - -# if lokusRiveja>1 -# nloci = lokusRiveja; -# else -# nloci = count; -# end - -# popnames = cell(10,2); -# data = zeros(100, nloci+1); -# nimienLkm=0; -# ninds=0; -# poimiNimi=1; -# digitFormat = -1; -# while line ~= -1 -# line = fgetl(fid); - -# if poimiNimi==1 -# %Edellinen rivi oli 'pop' -# nimienLkm = nimienLkm+1; -# ninds = ninds+1; -# if nimienLkm>size(popnames,1); -# popnames = [popnames; cell(10,2)]; -# end -# nimi = lueNimi(line); -# if digitFormat == -1 -# digitFormat = selvitaDigitFormat(line); -# divider = 10^digitFormat; -# end -# popnames{nimienLkm, 1} = {nimi}; %N�in se on greedyMix:iss�kin?!? -# popnames{nimienLkm, 2} = ninds; -# poimiNimi=0; - -# data = addAlleles(data, ninds, line, divider); - -# elseif testaaPop(line) -# poimiNimi = 1; - -# elseif line ~= -1 -# ninds = ninds+1; -# data = addAlleles(data, ninds, line, divider); -# end -# end - -# data = data(1:ninds*2,:); -# popnames = popnames(1:nimienLkm,:); -# fclose(fid); - -# %-------------------------------------------------------- - - # function data = addAlleles(data, ind, line, divider) # % Lisaa BAPS-formaatissa olevaan datataulukkoon # % yksil�� ind vastaavat rivit. Yksil�n alleelit diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R new file mode 100644 index 0000000..2c79d03 --- /dev/null +++ b/R/lueGenePopData.R @@ -0,0 +1,66 @@ +#' @title Read GenePop Data +#' @description Reads GenePop-formatted data +#' @param tiedostonNimi Name of the file +#' @return list containing data and popnames +#' @export +lueGenePopData <- function (tiedostonNimi) { + + fid <- load(tiedostonNimi) + line1 <- readLines(fid)[1] # ensimmäinen rivi + line2 <- readLines(fid)[2] # toinen rivi + count <- rivinSisaltamienMjonojenLkm(line) + + line <- readLines(fid)[3] + lokusRiveja <- 1 + while (testaaPop(line) == 0) { + lokusRiveja <- lokusRiveja + 1 # locus row + line <- readLines(fid)[3 + lokusRiveja] + } + + if (lokusRiveja > 1) { + nloci <- lokusRiveja + } else { + nloci <- count + } + + popnames <- cell(10, 2) + data <- zeros(100, nloci + 1) + nimienLkm <- 0 + ninds <- 0 + poimiNimi <- 1 + digitFormat <- -1 + while (line != -1) { + line <- readLines(fid)[lokusRiveja + 1] + lokusRiveja <- lokusRiveja + 1 + + if (poimiNimi == 1) { + # Edellinen rivi oli 'pop' + nimienLkm <- nimienLkm + 1 + ninds <- ninds + 1 + if (nimienLkm > size(popnames, 1)) { + popnames <- c(popnames, cell(10, 2)) + } + nimi <- lueNimi(line) + if (digitFormat == -1) { + digitFormat <- selvitaDigitFormat(line) + divider <- 10 ^ digitFormat + } + popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? + popnames[nimienLkm, 2] <- ninds + poimiNimi <- 0 + + data <- addAlleles(data, ninds, line, divider) + + } else if (testaaPop(line)) { + poimiNimi <- 1 + + } else if (line != -1) { + ninds <- ninds + 1 + data <- addAlleles(data, ninds, line, divider) + } + } + + data <- data[1:(ninds * 2),] + popnames <- popnames[seq_len(nimienLkm),] + return(list(data = data, popnames = popnames)) +} \ No newline at end of file diff --git a/man/lueGenePopData.Rd b/man/lueGenePopData.Rd new file mode 100644 index 0000000..02c0612 --- /dev/null +++ b/man/lueGenePopData.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lueGenePopData.R +\name{lueGenePopData} +\alias{lueGenePopData} +\title{Read GenePop Data} +\usage{ +lueGenePopData(tiedostonNimi) +} +\arguments{ +\item{tiedostonNimi}{Name of the file} +} +\value{ +list containing data and popnames +} +\description{ +Reads GenePop-formatted data +} From 31aeb99257a93db4309fb78258242efcce6045fa Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:07:13 +0200 Subject: [PATCH 2/7] Translated cell(), added tests --- R/cell.R | 14 ++++++++++++++ man/cell.Rd | 19 +++++++++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 8 ++++++++ 3 files changed, 41 insertions(+) create mode 100644 R/cell.R create mode 100644 man/cell.Rd diff --git a/R/cell.R b/R/cell.R new file mode 100644 index 0000000..4bcf072 --- /dev/null +++ b/R/cell.R @@ -0,0 +1,14 @@ +#' @title Cell array +#' @description Creates an array of zeros +#' @param n a the first dimension (or both, if sz is not passed) +#' @param sz the second dimension (or 1st and 2nd, if not passed) +#' @return An array of zeroes with the dimensions passed on call +cell <- function(n, sz = c(n, n), ...) { + if (length(sz) == 1 & missing(...)) { + return(array(dim = c(n, sz))) + } else if (length(sz) == 2) { + return(array(dim = sz)) + } else { + return(array(dim = c(n, sz, ...))) + } +} \ No newline at end of file diff --git a/man/cell.Rd b/man/cell.Rd new file mode 100644 index 0000000..428527c --- /dev/null +++ b/man/cell.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cell.R +\name{cell} +\alias{cell} +\title{Cell array} +\usage{ +cell(n, sz = c(n, n), ...) +} +\arguments{ +\item{n}{a the first dimension (or both, if sz is not passed)} + +\item{sz}{the second dimension (or 1st and 2nd, if not passed)} +} +\value{ +An array of zeroes with the dimensions passed on call +} +\description{ +Creates an array of zeros +} diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 63fa4bb..ca945c3 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -161,3 +161,11 @@ test_that("sortrows works as expected", { expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) + +test_that("cell works as expected", { + expect_equal(cell(0), array(dim = c(0, 0))) + expect_equal(cell(1), array(dim = c(1, 1))) + expect_equal(cell(2), array(dim = c(2, 2))) + expect_equal(cell(3, 4), array(dim = c(3, 4))) + expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) +}) \ No newline at end of file From fc1b947ca9402799f9a45b7c69c1d2eb1e8b88f4 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:10:14 +0200 Subject: [PATCH 3/7] Translated lueNimi --- NAMESPACE | 1 + R/lueNimi.R | 17 +++++++++++++++++ man/lueNimi.Rd | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 R/lueNimi.R create mode 100644 man/lueNimi.Rd diff --git a/NAMESPACE b/NAMESPACE index f6edaf8..ef1906f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(isfield) export(laskeMuutokset4) export(learn_simple_partition) export(lueGenePopData) +export(lueNimi) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) diff --git a/R/lueNimi.R b/R/lueNimi.R new file mode 100644 index 0000000..a5bbb3f --- /dev/null +++ b/R/lueNimi.R @@ -0,0 +1,17 @@ +#' @title Read the Name +#' @description Reads the line name +#' @param line line +#' @return nimi +#' @export +lueNimi <- function(line) { + # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. + n <- 1 + merkki <- line[n] + nimi <- '' + while (merkki != ',') { + nimi <- c(nimi, merkki) + n <- n + 1 + merkki <- line[n] + } + return(nimi) +} \ No newline at end of file diff --git a/man/lueNimi.Rd b/man/lueNimi.Rd new file mode 100644 index 0000000..74cd665 --- /dev/null +++ b/man/lueNimi.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lueNimi.R +\name{lueNimi} +\alias{lueNimi} +\title{Read the Name} +\usage{ +lueNimi(line) +} +\arguments{ +\item{line}{line} +} +\value{ +nimi +} +\description{ +Reads the line name +} From ec650cc6f6c95256c018f5855e39adc3b25e4470 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:22:25 +0200 Subject: [PATCH 4/7] Translated selvitaDigitFormat --- NAMESPACE | 1 + R/selvitaDigitFormat.R | 31 +++++++++++++++++++++++++++++++ man/selvitaDigitFormat.Rd | 17 +++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 R/selvitaDigitFormat.R create mode 100644 man/selvitaDigitFormat.Rd diff --git a/NAMESPACE b/NAMESPACE index ef1906f..d864e27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(rand) export(randdir) export(repmat) export(rivinSisaltamienMjonojenLkm) +export(selvitaDigitFormat) export(simulateAllFreqs) export(simulateIndividuals) export(simuloiAlleeli) diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R new file mode 100644 index 0000000..81b3519 --- /dev/null +++ b/R/selvitaDigitFormat.R @@ -0,0 +1,31 @@ +#' @title Find out the Digit Format +#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers. +#' @return +#' @export +selvitaDigitFormat <- function(line) { + # line on ensimm�inen pop-sanan j�lkeinen rivi + # Genepop-formaatissa olevasta datasta. funktio selvitt�� + # rivin muodon perusteella, ovatko datan alleelit annettu + # 2 vai 3 numeron avulla. + + n <- 1 + merkki <- line[n] + while (merkki != ',') { + n <- n + 1 + merkki <- line[n] + } + + while (!any(merkki == '0123456789')) { + n <- n + 1 + merkki <- line[n] + } + numeroja <- 0 + while (any(merkki == '0123456789')) { + numeroja <- numeroja + 1 + n <- n + 1 + merkki <- line[n] + } + + df <- numeroja / 2 + return(df) +} \ No newline at end of file diff --git a/man/selvitaDigitFormat.Rd b/man/selvitaDigitFormat.Rd new file mode 100644 index 0000000..7d89daa --- /dev/null +++ b/man/selvitaDigitFormat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/selvitaDigitFormat.R +\name{selvitaDigitFormat} +\alias{selvitaDigitFormat} +\title{Find out the Digit Format} +\usage{ +selvitaDigitFormat(line) +} +\arguments{ +\item{line}{the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.} +} +\value{ + +} +\description{ +Find out the Digit Format +} From a671895caa5163f4fcd9b8bd6dd723994199d7ff Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:24:10 +0200 Subject: [PATCH 5/7] Removed commented-out Matlab code --- R/greedyMix.R | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index ade1cb0..de6b70c 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -1622,47 +1622,6 @@ greedyMix <- function( # %------------------------------------------------------ -# function nimi = lueNimi(line) -# %Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -# n = 1; -# merkki = line(n); -# nimi = ''; -# while ~isequal(merkki,',') -# nimi = [nimi merkki]; -# n = n+1; -# merkki = line(n); -# end - -# %------------------------------------------------------- - -# function df = selvitaDigitFormat(line) -# % line on ensimm�inen pop-sanan j�lkeinen rivi -# % Genepop-formaatissa olevasta datasta. funktio selvitt�� -# % rivin muodon perusteella, ovatko datan alleelit annettu -# % 2 vai 3 numeron avulla. - -# n = 1; -# merkki = line(n); -# while ~isequal(merkki,',') -# n = n+1; -# merkki = line(n); -# end - -# while ~any(merkki == '0123456789'); -# n = n+1; -# merkki = line(n); -# end -# numeroja = 0; -# while any(merkki == '0123456789'); -# numeroja = numeroja+1; -# n = n+1; -# merkki = line(n); -# end - -# df = numeroja/2; - - - # function loggis = laskeLoggis(counts, sumcounts, adjprior) # npops = size(counts,3); From d074de07f4a93cc0fcf8d436c5b247a21e7d2d4d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:47:39 +0200 Subject: [PATCH 6/7] Translated addAlleles --- NAMESPACE | 1 + R/addAlleles.R | 49 +++++++++++++++++++++++++++++++++++++++++++++++ R/greedyMix.R | 42 ---------------------------------------- man/addAlleles.Rd | 23 ++++++++++++++++++++++ 4 files changed, 73 insertions(+), 42 deletions(-) create mode 100644 R/addAlleles.R create mode 100644 man/addAlleles.Rd diff --git a/NAMESPACE b/NAMESPACE index d864e27..81c1201 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(addAlleles) export(admix1) export(calculatePopLogml) export(colon) diff --git a/R/addAlleles.R b/R/addAlleles.R new file mode 100644 index 0000000..c51e367 --- /dev/null +++ b/R/addAlleles.R @@ -0,0 +1,49 @@ +#' @title Add Alleles +#' @param data data +#' @param ind ind +#' @param line line +#' @param divider divider +#' @return data (after alleles were added) +#' @export +addAlleles <- function(data, ind, line, divider) { + # Lisaa BAPS-formaatissa olevaan datataulukkoon + # yksil�� ind vastaavat rivit. Yksil�n alleelit + # luetaan genepop-formaatissa olevasta rivist? + # line. Jos data on 3 digit formaatissa on divider=1000. + # Jos data on 2 digit formaatissa on divider=100. + + nloci <- size(data, 2) - 1 + if (size(data, 1) < (2 * ind)) { + data <- c(data, zeros(100, nloci + 1)) + } + + k <- 1 + merkki <- line[k] + while (merkki != ',') { + k <- k + 1 + merkki <- line[k] + } + line <- line[k + 1:length(line)] + # clear k; clear merkki; + + alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) + + + if (length(alleeliTaulu) != nloci) { + stop('Incorrect data format.') + } + + for (j in seq_len(nloci)) { + ekaAlleeli <- floor(alleeliTaulu[j] / divider) + if (ekaAlleeli == 0) ekaAlleeli <- -999 + tokaAlleeli <- alleeliTaulu[j] %% divider + if (tokaAlleeli == 0) tokaAlleeli <- -999 + + data[2 * ind - 1, j] <- ekaAlleeli + data[2 * ind, j] <- tokaAlleeli + } + + data[2 * ind - 1,end] <- ind + data[2 * ind, end] <- ind + return(data) +} \ No newline at end of file diff --git a/R/greedyMix.R b/R/greedyMix.R index de6b70c..8ea16e0 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -1580,48 +1580,6 @@ greedyMix <- function( # %------------------------------------------------------ -# function data = addAlleles(data, ind, line, divider) -# % Lisaa BAPS-formaatissa olevaan datataulukkoon -# % yksil�� ind vastaavat rivit. Yksil�n alleelit -# % luetaan genepop-formaatissa olevasta rivist? -# % line. Jos data on 3 digit formaatissa on divider=1000. -# % Jos data on 2 digit formaatissa on divider=100. - -# nloci = size(data,2)-1; -# if size(data,1) < 2*ind -# data = [data; zeros(100,nloci+1)]; -# end - -# k=1; -# merkki=line(k); -# while ~isequal(merkki,',') -# k=k+1; -# merkki=line(k); -# end -# line = line(k+1:end); -# clear k; clear merkki; - -# alleeliTaulu = sscanf(line,'%d'); - -# if length(alleeliTaulu)~=nloci -# disp('Incorrect data format.'); -# end - -# for j=1:nloci -# ekaAlleeli = floor(alleeliTaulu(j)/divider); -# if ekaAlleeli==0 ekaAlleeli=-999; end; -# tokaAlleeli = rem(alleeliTaulu(j),divider); -# if tokaAlleeli==0 tokaAlleeli=-999; end - -# data(2*ind-1,j) = ekaAlleeli; -# data(2*ind,j) = tokaAlleeli; -# end - -# data(2*ind-1,end) = ind; -# data(2*ind,end) = ind; - -# %------------------------------------------------------ - # function loggis = laskeLoggis(counts, sumcounts, adjprior) # npops = size(counts,3); diff --git a/man/addAlleles.Rd b/man/addAlleles.Rd new file mode 100644 index 0000000..4c24818 --- /dev/null +++ b/man/addAlleles.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/addAlleles.R +\name{addAlleles} +\alias{addAlleles} +\title{Add Alleles} +\usage{ +addAlleles(data, ind, line, divider) +} +\arguments{ +\item{data}{data} + +\item{ind}{ind} + +\item{line}{line} + +\item{divider}{divider} +} +\value{ +data (after alleles were added) +} +\description{ +Add Alleles +} From 01909102444eff3c050c43f407e0cc257383396c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 24 Jun 2020 16:49:31 +0200 Subject: [PATCH 7/7] Minor adjustments for clean check --- R/addAlleles.R | 4 ++-- R/cell.R | 1 + R/selvitaDigitFormat.R | 2 +- man/cell.Rd | 2 ++ man/selvitaDigitFormat.Rd | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/addAlleles.R b/R/addAlleles.R index c51e367..a1c87bd 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -43,7 +43,7 @@ addAlleles <- function(data, ind, line, divider) { data[2 * ind, j] <- tokaAlleeli } - data[2 * ind - 1,end] <- ind - data[2 * ind, end] <- ind + data[2 * ind - 1, ncol(data)] <- ind + data[2 * ind, ncol(data)] <- ind return(data) } \ No newline at end of file diff --git a/R/cell.R b/R/cell.R index 4bcf072..beac532 100644 --- a/R/cell.R +++ b/R/cell.R @@ -2,6 +2,7 @@ #' @description Creates an array of zeros #' @param n a the first dimension (or both, if sz is not passed) #' @param sz the second dimension (or 1st and 2nd, if not passed) +#' @param ... Other dimensions #' @return An array of zeroes with the dimensions passed on call cell <- function(n, sz = c(n, n), ...) { if (length(sz) == 1 & missing(...)) { diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R index 81b3519..4492ea8 100644 --- a/R/selvitaDigitFormat.R +++ b/R/selvitaDigitFormat.R @@ -1,6 +1,6 @@ #' @title Find out the Digit Format #' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers. -#' @return +#' @return df #' @export selvitaDigitFormat <- function(line) { # line on ensimm�inen pop-sanan j�lkeinen rivi diff --git a/man/cell.Rd b/man/cell.Rd index 428527c..1123477 100644 --- a/man/cell.Rd +++ b/man/cell.Rd @@ -10,6 +10,8 @@ cell(n, sz = c(n, n), ...) \item{n}{a the first dimension (or both, if sz is not passed)} \item{sz}{the second dimension (or 1st and 2nd, if not passed)} + +\item{...}{Other dimensions} } \value{ An array of zeroes with the dimensions passed on call diff --git a/man/selvitaDigitFormat.Rd b/man/selvitaDigitFormat.Rd index 7d89daa..9350082 100644 --- a/man/selvitaDigitFormat.Rd +++ b/man/selvitaDigitFormat.Rd @@ -10,7 +10,7 @@ selvitaDigitFormat(line) \item{line}{the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.} } \value{ - +df } \description{ Find out the Digit Format