From 2c3d40c9a719502c4e105a4422b9650634cb6954 Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <waldir.leoncio@gmail.com>
Date: Tue, 28 Jul 2020 12:50:54 +0200
Subject: [PATCH 1/4] Fixed behavior of lueNimi and selvitaDigitFormat

---
 R/lueNimi.R            |  8 ++++----
 R/selvitaDigitFormat.R | 13 ++++++-------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/R/lueNimi.R b/R/lueNimi.R
index a5bbb3f..7e0811d 100644
--- a/R/lueNimi.R
+++ b/R/lueNimi.R
@@ -1,17 +1,17 @@
 #' @title Read the Name
-#' @description Reads the line name
+#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
 #' @param line line
 #' @return nimi
 #' @export
 lueNimi <- function(line) {
 	# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
 	n <- 1
-	merkki <- line[n]
+	merkki <- substring(line, n, n)
 	nimi <- ''
 	while (merkki != ',') {
 		nimi <- c(nimi, merkki)
 		n <- n + 1
-		merkki <- line[n]
+		merkki <- substring(line, n, n)
 	}
-	return(nimi)
+	return(paste(nimi, collapse=""))
 }
\ No newline at end of file
diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R
index 4492ea8..32af592 100644
--- a/R/selvitaDigitFormat.R
+++ b/R/selvitaDigitFormat.R
@@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) {
 	# Genepop-formaatissa olevasta datasta. funktio selvitt��
 	# rivin muodon perusteella, ovatko datan alleelit annettu
 	# 2 vai 3 numeron avulla.
-
 	n <- 1
-	merkki <- line[n]
+	merkki <- substring(line, n, n)
 	while (merkki != ',') {
 		n <- n + 1
-		merkki <- line[n]
+		merkki <- substring(line, n, n)
 	}
 
-	while (!any(merkki == '0123456789')) {
+	while (!any(merkki %in% as.character(0:9))) {
 		n <- n + 1
-		merkki <- line[n]
+		merkki <- substring(line, n, n)
 	}
 	numeroja <- 0
-	while (any(merkki == '0123456789')) {
+	while (any(merkki %in% as.character(0:9))) {
 		numeroja <- numeroja + 1
 		n <- n + 1
-		merkki <- line[n]
+		merkki <- substring(line, n, n)
 	}
 
 	df <- numeroja / 2

From 88fc203c3dcfe81303b09278e924a73bd10d154f Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <waldir.leoncio@gmail.com>
Date: Tue, 28 Jul 2020 15:35:04 +0200
Subject: [PATCH 2/4] Fixed addAlleles and lueGenePopData

---
 R/addAlleles.R     | 21 ++++++++++++---------
 R/lueGenePopData.R | 33 ++++++++++++++-------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/R/addAlleles.R b/R/addAlleles.R
index a1c87bd..33d0f38 100644
--- a/R/addAlleles.R
+++ b/R/addAlleles.R
@@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) {
 	# line. Jos data on 3 digit formaatissa on divider=1000.
 	# Jos data on 2 digit formaatissa on divider=100.
 
-	nloci <- size(data, 2) - 1
+	nloci <- size(data, 2) # added 1 from original code
 	if (size(data, 1) < (2 * ind)) {
-		data <- c(data, zeros(100, nloci + 1))
+		data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
 	}
 
 	k <- 1
-	merkki <- line[k]
+	merkki <- substring(line, k, k)
 	while (merkki != ',') {
 	   k <- k + 1
-	   merkki <- line[k]
+	   merkki <- substring(line, k, k)
 	}
-	line <- line[k + 1:length(line)]
+	line <- substring(line, k + 1)
 	# clear k; clear merkki;
 
-	alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
-
+	if (grepl(" ", line)) {
+		alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
+	} else if (grepl("\t", line)) {
+		alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]])
+	}
 
 	if (length(alleeliTaulu) != nloci) {
 		stop('Incorrect data format.')
@@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) {
 
 	for (j in seq_len(nloci)) {
 		ekaAlleeli <- floor(alleeliTaulu[j] / divider)
-		if (ekaAlleeli == 0) ekaAlleeli <- -999
+		if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999
 		tokaAlleeli <- alleeliTaulu[j] %% divider
-		if (tokaAlleeli == 0) tokaAlleeli <- -999
+		if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999
 
 		data[2 * ind - 1, j] <- ekaAlleeli
 		data[2 * ind, j] <- tokaAlleeli
diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R
index 2c79d03..7276537 100644
--- a/R/lueGenePopData.R
+++ b/R/lueGenePopData.R
@@ -4,17 +4,16 @@
 #' @return list containing data and popnames
 #' @export
 lueGenePopData <- function (tiedostonNimi) {
-
-	fid   <- load(tiedostonNimi)
-	line1 <- readLines(fid)[1] # ensimmäinen rivi
-	line2 <- readLines(fid)[2] # toinen rivi
+	fid   <- readLines(tiedostonNimi)
+	line <- fid[1] # ensimmäinen rivi
+	line <- fid[2] # toinen rivi
 	count <- rivinSisaltamienMjonojenLkm(line)
 
-	line <- readLines(fid)[3]
+	line <- fid[3]
 	lokusRiveja <- 1
 	while (testaaPop(line) == 0) {
 		lokusRiveja <- lokusRiveja + 1 # locus row
-		line <- readLines(fid)[3 + lokusRiveja]
+		line <- fid[2 + lokusRiveja]
 	}
 
 	if (lokusRiveja > 1) {
@@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) {
 	ninds <- 0
 	poimiNimi <- 1
 	digitFormat <- -1
-	while (line != -1) {
-		line <- readLines(fid)[lokusRiveja + 1]
-		lokusRiveja <- lokusRiveja + 1
-
+	while (lokusRiveja < length(fid) - 2) {
+		lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
+		line <- fid[lokusRiveja + 2]
 		if (poimiNimi == 1) {
-			# Edellinen rivi oli 'pop'
+			# Edellinen rivi oli 'pop' (previous line was pop)
 			nimienLkm <- nimienLkm + 1
 			ninds <- ninds + 1
 			if (nimienLkm > size(popnames, 1)) {
-				popnames <- c(popnames, cell(10, 2))
+				popnames <- rbind(popnames, cell(10, 2))
 			}
 			nimi <- lueNimi(line)
 			if (digitFormat == -1) {
 				digitFormat <- selvitaDigitFormat(line)
 				divider <- 10 ^ digitFormat
 			}
-			popnames[nimienLkm, 1] <- nimi   #N�in se on greedyMix:iss�kin?!?
+			popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!?
 			popnames[nimienLkm, 2] <- ninds
 			poimiNimi <- 0
-
 			data <- addAlleles(data, ninds, line, divider)
-
 		 } else if (testaaPop(line)) {
 			poimiNimi <- 1
-
-		 } else if (line != -1) {
+		 } else if (!is.na(line)) {
 			ninds <- ninds + 1
 			data <- addAlleles(data, ninds, line, divider)
 		}
 	}
 
-	data <- data[1:(ninds * 2),]
-	popnames <- popnames[seq_len(nimienLkm),]
+	data <- data[1:(ninds * 2), ]
+	popnames <- popnames[seq_len(nimienLkm), ]
 	return(list(data = data, popnames = popnames))
 }
\ No newline at end of file

From 2f809d01df29b58c9eb781f4881d0f38faae5eed Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <waldir.leoncio@gmail.com>
Date: Tue, 28 Jul 2020 15:35:17 +0200
Subject: [PATCH 3/4] Improved validation

---
 R/lueNimi.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/lueNimi.R b/R/lueNimi.R
index 7e0811d..377e791 100644
--- a/R/lueNimi.R
+++ b/R/lueNimi.R
@@ -4,6 +4,12 @@
 #' @return nimi
 #' @export
 lueNimi <- function(line) {
+	# ==========================================================================
+	# Validation
+	# ==========================================================================
+	if (!grepl(",", line)) {
+		stop("There are no commas in this line")
+	}
 	# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
 	n <- 1
 	merkki <- substring(line, n, n)

From c9ff8e76a962787a11bc5f3828eb2ab18f822134 Mon Sep 17 00:00:00 2001
From: Waldir Leoncio <waldir.leoncio@gmail.com>
Date: Tue, 28 Jul 2020 15:35:29 +0200
Subject: [PATCH 4/4] Changed base Matlab function behavior

---
 R/cell.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/cell.R b/R/cell.R
index beac532..746634e 100644
--- a/R/cell.R
+++ b/R/cell.R
@@ -6,10 +6,10 @@
 #' @return An array of zeroes with the dimensions passed on call
 cell <- function(n, sz = c(n, n), ...) {
 	if (length(sz) == 1 & missing(...)) {
-		return(array(dim = c(n, sz)))
+		return(array(0, dim = c(n, sz)))
 	} else if (length(sz) == 2) {
-		return(array(dim = sz))
+		return(array(0, dim = sz))
 	} else {
-		return(array(dim = c(n, sz, ...)))
+		return(array(0, dim = c(n, sz, ...)))
 	}
 }
\ No newline at end of file