diff --git a/NAMESPACE b/NAMESPACE index d478d16..81c1201 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(addAlleles) export(admix1) export(calculatePopLogml) export(colon) @@ -13,6 +14,8 @@ export(inputdlg) export(isfield) export(laskeMuutokset4) export(learn_simple_partition) +export(lueGenePopData) +export(lueNimi) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) @@ -22,6 +25,7 @@ export(rand) export(randdir) export(repmat) export(rivinSisaltamienMjonojenLkm) +export(selvitaDigitFormat) export(simulateAllFreqs) export(simulateIndividuals) export(simuloiAlleeli) diff --git a/R/addAlleles.R b/R/addAlleles.R new file mode 100644 index 0000000..a1c87bd --- /dev/null +++ b/R/addAlleles.R @@ -0,0 +1,49 @@ +#' @title Add Alleles +#' @param data data +#' @param ind ind +#' @param line line +#' @param divider divider +#' @return data (after alleles were added) +#' @export +addAlleles <- function(data, ind, line, divider) { + # Lisaa BAPS-formaatissa olevaan datataulukkoon + # yksil�� ind vastaavat rivit. Yksil�n alleelit + # luetaan genepop-formaatissa olevasta rivist? + # line. Jos data on 3 digit formaatissa on divider=1000. + # Jos data on 2 digit formaatissa on divider=100. + + nloci <- size(data, 2) - 1 + if (size(data, 1) < (2 * ind)) { + data <- c(data, zeros(100, nloci + 1)) + } + + k <- 1 + merkki <- line[k] + while (merkki != ',') { + k <- k + 1 + merkki <- line[k] + } + line <- line[k + 1:length(line)] + # clear k; clear merkki; + + alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) + + + if (length(alleeliTaulu) != nloci) { + stop('Incorrect data format.') + } + + for (j in seq_len(nloci)) { + ekaAlleeli <- floor(alleeliTaulu[j] / divider) + if (ekaAlleeli == 0) ekaAlleeli <- -999 + tokaAlleeli <- alleeliTaulu[j] %% divider + if (tokaAlleeli == 0) tokaAlleeli <- -999 + + data[2 * ind - 1, j] <- ekaAlleeli + data[2 * ind, j] <- tokaAlleeli + } + + data[2 * ind - 1, ncol(data)] <- ind + data[2 * ind, ncol(data)] <- ind + return(data) +} \ No newline at end of file diff --git a/R/cell.R b/R/cell.R new file mode 100644 index 0000000..beac532 --- /dev/null +++ b/R/cell.R @@ -0,0 +1,15 @@ +#' @title Cell array +#' @description Creates an array of zeros +#' @param n a the first dimension (or both, if sz is not passed) +#' @param sz the second dimension (or 1st and 2nd, if not passed) +#' @param ... Other dimensions +#' @return An array of zeroes with the dimensions passed on call +cell <- function(n, sz = c(n, n), ...) { + if (length(sz) == 1 & missing(...)) { + return(array(dim = c(n, sz))) + } else if (length(sz) == 2) { + return(array(dim = sz)) + } else { + return(array(dim = c(n, sz, ...))) + } +} \ No newline at end of file diff --git a/R/greedyMix.R b/R/greedyMix.R index 88c9004..8ea16e0 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -1580,153 +1580,6 @@ greedyMix <- function( # %------------------------------------------------------ - -# function [data, popnames] = lueGenePopData(tiedostonNimi) - -# fid = fopen(tiedostonNimi); -# line = fgetl(fid); %ensimm�inen rivi -# line = fgetl(fid); %toinen rivi -# count = rivinSisaltamienMjonojenLkm(line); - -# line = fgetl(fid); -# lokusRiveja = 1; -# while (testaaPop(line)==0) -# lokusRiveja = lokusRiveja+1; -# line = fgetl(fid); -# end - -# if lokusRiveja>1 -# nloci = lokusRiveja; -# else -# nloci = count; -# end - -# popnames = cell(10,2); -# data = zeros(100, nloci+1); -# nimienLkm=0; -# ninds=0; -# poimiNimi=1; -# digitFormat = -1; -# while line ~= -1 -# line = fgetl(fid); - -# if poimiNimi==1 -# %Edellinen rivi oli 'pop' -# nimienLkm = nimienLkm+1; -# ninds = ninds+1; -# if nimienLkm>size(popnames,1); -# popnames = [popnames; cell(10,2)]; -# end -# nimi = lueNimi(line); -# if digitFormat == -1 -# digitFormat = selvitaDigitFormat(line); -# divider = 10^digitFormat; -# end -# popnames{nimienLkm, 1} = {nimi}; %N�in se on greedyMix:iss�kin?!? -# popnames{nimienLkm, 2} = ninds; -# poimiNimi=0; - -# data = addAlleles(data, ninds, line, divider); - -# elseif testaaPop(line) -# poimiNimi = 1; - -# elseif line ~= -1 -# ninds = ninds+1; -# data = addAlleles(data, ninds, line, divider); -# end -# end - -# data = data(1:ninds*2,:); -# popnames = popnames(1:nimienLkm,:); -# fclose(fid); - -# %-------------------------------------------------------- - - -# function data = addAlleles(data, ind, line, divider) -# % Lisaa BAPS-formaatissa olevaan datataulukkoon -# % yksil�� ind vastaavat rivit. Yksil�n alleelit -# % luetaan genepop-formaatissa olevasta rivist? -# % line. Jos data on 3 digit formaatissa on divider=1000. -# % Jos data on 2 digit formaatissa on divider=100. - -# nloci = size(data,2)-1; -# if size(data,1) < 2*ind -# data = [data; zeros(100,nloci+1)]; -# end - -# k=1; -# merkki=line(k); -# while ~isequal(merkki,',') -# k=k+1; -# merkki=line(k); -# end -# line = line(k+1:end); -# clear k; clear merkki; - -# alleeliTaulu = sscanf(line,'%d'); - -# if length(alleeliTaulu)~=nloci -# disp('Incorrect data format.'); -# end - -# for j=1:nloci -# ekaAlleeli = floor(alleeliTaulu(j)/divider); -# if ekaAlleeli==0 ekaAlleeli=-999; end; -# tokaAlleeli = rem(alleeliTaulu(j),divider); -# if tokaAlleeli==0 tokaAlleeli=-999; end - -# data(2*ind-1,j) = ekaAlleeli; -# data(2*ind,j) = tokaAlleeli; -# end - -# data(2*ind-1,end) = ind; -# data(2*ind,end) = ind; - -# %------------------------------------------------------ - -# function nimi = lueNimi(line) -# %Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -# n = 1; -# merkki = line(n); -# nimi = ''; -# while ~isequal(merkki,',') -# nimi = [nimi merkki]; -# n = n+1; -# merkki = line(n); -# end - -# %------------------------------------------------------- - -# function df = selvitaDigitFormat(line) -# % line on ensimm�inen pop-sanan j�lkeinen rivi -# % Genepop-formaatissa olevasta datasta. funktio selvitt�� -# % rivin muodon perusteella, ovatko datan alleelit annettu -# % 2 vai 3 numeron avulla. - -# n = 1; -# merkki = line(n); -# while ~isequal(merkki,',') -# n = n+1; -# merkki = line(n); -# end - -# while ~any(merkki == '0123456789'); -# n = n+1; -# merkki = line(n); -# end -# numeroja = 0; -# while any(merkki == '0123456789'); -# numeroja = numeroja+1; -# n = n+1; -# merkki = line(n); -# end - -# df = numeroja/2; - - - # function loggis = laskeLoggis(counts, sumcounts, adjprior) # npops = size(counts,3); diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R new file mode 100644 index 0000000..2c79d03 --- /dev/null +++ b/R/lueGenePopData.R @@ -0,0 +1,66 @@ +#' @title Read GenePop Data +#' @description Reads GenePop-formatted data +#' @param tiedostonNimi Name of the file +#' @return list containing data and popnames +#' @export +lueGenePopData <- function (tiedostonNimi) { + + fid <- load(tiedostonNimi) + line1 <- readLines(fid)[1] # ensimmäinen rivi + line2 <- readLines(fid)[2] # toinen rivi + count <- rivinSisaltamienMjonojenLkm(line) + + line <- readLines(fid)[3] + lokusRiveja <- 1 + while (testaaPop(line) == 0) { + lokusRiveja <- lokusRiveja + 1 # locus row + line <- readLines(fid)[3 + lokusRiveja] + } + + if (lokusRiveja > 1) { + nloci <- lokusRiveja + } else { + nloci <- count + } + + popnames <- cell(10, 2) + data <- zeros(100, nloci + 1) + nimienLkm <- 0 + ninds <- 0 + poimiNimi <- 1 + digitFormat <- -1 + while (line != -1) { + line <- readLines(fid)[lokusRiveja + 1] + lokusRiveja <- lokusRiveja + 1 + + if (poimiNimi == 1) { + # Edellinen rivi oli 'pop' + nimienLkm <- nimienLkm + 1 + ninds <- ninds + 1 + if (nimienLkm > size(popnames, 1)) { + popnames <- c(popnames, cell(10, 2)) + } + nimi <- lueNimi(line) + if (digitFormat == -1) { + digitFormat <- selvitaDigitFormat(line) + divider <- 10 ^ digitFormat + } + popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? + popnames[nimienLkm, 2] <- ninds + poimiNimi <- 0 + + data <- addAlleles(data, ninds, line, divider) + + } else if (testaaPop(line)) { + poimiNimi <- 1 + + } else if (line != -1) { + ninds <- ninds + 1 + data <- addAlleles(data, ninds, line, divider) + } + } + + data <- data[1:(ninds * 2),] + popnames <- popnames[seq_len(nimienLkm),] + return(list(data = data, popnames = popnames)) +} \ No newline at end of file diff --git a/R/lueNimi.R b/R/lueNimi.R new file mode 100644 index 0000000..a5bbb3f --- /dev/null +++ b/R/lueNimi.R @@ -0,0 +1,17 @@ +#' @title Read the Name +#' @description Reads the line name +#' @param line line +#' @return nimi +#' @export +lueNimi <- function(line) { + # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. + n <- 1 + merkki <- line[n] + nimi <- '' + while (merkki != ',') { + nimi <- c(nimi, merkki) + n <- n + 1 + merkki <- line[n] + } + return(nimi) +} \ No newline at end of file diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R new file mode 100644 index 0000000..4492ea8 --- /dev/null +++ b/R/selvitaDigitFormat.R @@ -0,0 +1,31 @@ +#' @title Find out the Digit Format +#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers. +#' @return df +#' @export +selvitaDigitFormat <- function(line) { + # line on ensimm�inen pop-sanan j�lkeinen rivi + # Genepop-formaatissa olevasta datasta. funktio selvitt�� + # rivin muodon perusteella, ovatko datan alleelit annettu + # 2 vai 3 numeron avulla. + + n <- 1 + merkki <- line[n] + while (merkki != ',') { + n <- n + 1 + merkki <- line[n] + } + + while (!any(merkki == '0123456789')) { + n <- n + 1 + merkki <- line[n] + } + numeroja <- 0 + while (any(merkki == '0123456789')) { + numeroja <- numeroja + 1 + n <- n + 1 + merkki <- line[n] + } + + df <- numeroja / 2 + return(df) +} \ No newline at end of file diff --git a/man/addAlleles.Rd b/man/addAlleles.Rd new file mode 100644 index 0000000..4c24818 --- /dev/null +++ b/man/addAlleles.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/addAlleles.R +\name{addAlleles} +\alias{addAlleles} +\title{Add Alleles} +\usage{ +addAlleles(data, ind, line, divider) +} +\arguments{ +\item{data}{data} + +\item{ind}{ind} + +\item{line}{line} + +\item{divider}{divider} +} +\value{ +data (after alleles were added) +} +\description{ +Add Alleles +} diff --git a/man/cell.Rd b/man/cell.Rd new file mode 100644 index 0000000..1123477 --- /dev/null +++ b/man/cell.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cell.R +\name{cell} +\alias{cell} +\title{Cell array} +\usage{ +cell(n, sz = c(n, n), ...) +} +\arguments{ +\item{n}{a the first dimension (or both, if sz is not passed)} + +\item{sz}{the second dimension (or 1st and 2nd, if not passed)} + +\item{...}{Other dimensions} +} +\value{ +An array of zeroes with the dimensions passed on call +} +\description{ +Creates an array of zeros +} diff --git a/man/lueGenePopData.Rd b/man/lueGenePopData.Rd new file mode 100644 index 0000000..02c0612 --- /dev/null +++ b/man/lueGenePopData.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lueGenePopData.R +\name{lueGenePopData} +\alias{lueGenePopData} +\title{Read GenePop Data} +\usage{ +lueGenePopData(tiedostonNimi) +} +\arguments{ +\item{tiedostonNimi}{Name of the file} +} +\value{ +list containing data and popnames +} +\description{ +Reads GenePop-formatted data +} diff --git a/man/lueNimi.Rd b/man/lueNimi.Rd new file mode 100644 index 0000000..74cd665 --- /dev/null +++ b/man/lueNimi.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lueNimi.R +\name{lueNimi} +\alias{lueNimi} +\title{Read the Name} +\usage{ +lueNimi(line) +} +\arguments{ +\item{line}{line} +} +\value{ +nimi +} +\description{ +Reads the line name +} diff --git a/man/selvitaDigitFormat.Rd b/man/selvitaDigitFormat.Rd new file mode 100644 index 0000000..9350082 --- /dev/null +++ b/man/selvitaDigitFormat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/selvitaDigitFormat.R +\name{selvitaDigitFormat} +\alias{selvitaDigitFormat} +\title{Find out the Digit Format} +\usage{ +selvitaDigitFormat(line) +} +\arguments{ +\item{line}{the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.} +} +\value{ +df +} +\description{ +Find out the Digit Format +} diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 63fa4bb..ca945c3 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -161,3 +161,11 @@ test_that("sortrows works as expected", { expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) + +test_that("cell works as expected", { + expect_equal(cell(0), array(dim = c(0, 0))) + expect_equal(cell(1), array(dim = c(1, 1))) + expect_equal(cell(2), array(dim = c(2, 2))) + expect_equal(cell(3, 4), array(dim = c(3, 4))) + expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) +}) \ No newline at end of file