\documentclass{article} \title{Voynich Manuscript - Paragraph-Initial Gallows} \author{Sarah Goslee} \date{2006-10-22} \begin{document} \maketitle \section{Introduction} One of the obviously "different" features of the VMS is the paragraph-initial gallows characters. My first thought looking at them was that they might represent some difference in encoding between those paragraphs, since they didn't seem to be part of the succeeding words. I analyzed the paragraphs in the herbal A section for differences among paragraphs with the four different inital gallows characters: F, K, P, and T. A few other characters appear occasionally. <>= library(ecodist) # read in paragraph data myevt <- read.table("newall.evt", sep="\t") pageinfo <- read.table("pages.txt", sep="\t", header=TRUE) lineinfo <- sapply(myevt[,1], function(x)substring(x, 2, nchar(x)-1)) lineinfo <- sapply(lineinfo, function(x)strsplit(x, "\\.")) lineinfo <- data.frame(do.call("rbind", lineinfo)) colnames(lineinfo) <- c("page", "para", "line") lines <- as.character(myevt[,2]) # pull out only herbal A information pageinfo <- pageinfo[pageinfo$lang == "A" & pageinfo$section == "h",] lines <- lines[(lineinfo$page %in% pageinfo$page)] lineinfo <- lineinfo[(lineinfo$page %in% pageinfo$page),] # separate out paragraph text from labels, other text paraevt <- lines[substring(lineinfo$para, 1, 1) == "P" | substring(lineinfo$para, 1, 1) == "Q"] paralineinfo <- lineinfo[substring(lineinfo$para, 1, 1) == "P" | substring(lineinfo$para, 1, 1) == "Q",] # merge lines into their paragraphs paragraphs <- rep("", length(paraevt)) currline <- 0 for(i in 1:length(paraevt)) { if(substring(paraevt[i], 1, 1) == "=") { currline <- currline + 1 paragraphs[currline] <- paraevt[i] } else { paragraphs[currline] <- paste(paragraphs[currline], substring(paraevt[i], 2, nchar(paraevt[i])), sep="") } } paragraphs <- paragraphs[1:currline] rm(i, currline) # split paragraphs into characters paragraph.chars <- sapply(paragraphs, function(x)strsplit(x, "")) # split paragraphs into words paragraph.words <- sapply(paragraphs, function(x)sub("^=", "", x)) paragraph.words <- sapply(paragraph.words, function(x)sub("=$", "", x)) paragraph.words <- sapply(paragraph.words, function(x)gsub("=", "\\.", x)) paragraph.words <- sapply(paragraph.words, function(x)gsub("-", "\\.", x)) paragraph.words <- sapply(paragraph.words, function(x)strsplit(x, "\\.")) # create chars by paragraph and words by paragraph tables paragraph.chars.table <- cbind(rep(1, length(paragraph.chars[[1]])), paragraph.chars[[1]]) paragraph.words.table <- cbind(rep(1, length(paragraph.words[[1]])), paragraph.words[[1]]) for(i in 2:length(paragraphs)) { paragraph.chars.table <- rbind(paragraph.chars.table, cbind(rep(i, length(paragraph.chars[[i]])), paragraph.chars[[i]])) paragraph.words.table <- rbind(paragraph.words.table, cbind(rep(i, length(paragraph.words[[i]])), paragraph.words[[i]])) } rm(i) paragraph.chars.table <- crosstab(as.numeric(paragraph.chars.table[,1]), paragraph.chars.table[,2], rep(1, nrow(paragraph.chars.table))) paragraph.words.table <- crosstab(as.numeric(paragraph.words.table[,1]), paragraph.words.table[,2], rep(1, nrow(paragraph.words.table))) # drop non-letter characters - * . = paragraph.chars.table <- paragraph.chars.table[,-c(1,2,3,4)] # scale tables by rowsum paragraph.chars.rowsum <- apply(paragraph.chars.table, 1, sum) paragraph.words.rowsum <- apply(paragraph.words.table, 1, sum) paragraph.chars.table <- sweep(paragraph.chars.table, 1, paragraph.chars.rowsum, "/") paragraph.words.table <- sweep(paragraph.words.table, 1, paragraph.words.rowsum, "/") pinitial <- unlist(lapply(paragraphs, function(x)substring(x, 2, 2))) @ \section{Ordination} \begin{figure} \begin{center} <>= paragraph.chars.pco <- pco(dist(paragraph.chars.table)) paragraph.words.pco <- pco(dist(paragraph.words.table)) par(mfrow=c(1,1)) plot(paragraph.chars.pco$vectors[,1:2], xlab="PCO 1", ylab="PCO 2", main="VMS", pch=pinitial) points(paragraph.chars.pco$vectors[pinitial == "F",1:2], pch="F", col="red") points(paragraph.chars.pco$vectors[pinitial == "K",1:2], pch="K", col="orange") points(paragraph.chars.pco$vectors[pinitial == "P",1:2], pch="P", col="green") points(paragraph.chars.pco$vectors[pinitial == "T",1:2], pch="T", col="blue") @ \caption{Ordination of character frequencies for Voynich herbal A section, by paragraph, labelled with paragraph-initial letter.} \label{fig:charPCO} \end{center} \end{figure} \begin{figure} \begin{center} <>= par(mfrow=c(1,1)) plot(paragraph.words.pco$vectors[,1:2], xlab="PCO 1", ylab="PCO 2", main="VMS", pch=pinitial) points(paragraph.words.pco$vectors[pinitial == "F",1:2], pch="F", col="red") points(paragraph.words.pco$vectors[pinitial == "K",1:2], pch="K", col="orange") points(paragraph.words.pco$vectors[pinitial == "P",1:2], pch="P", col="green") points(paragraph.words.pco$vectors[pinitial == "T",1:2], pch="T", col="blue") @ \caption{Ordination of word frequencies for Voynich herbal A section, by paragraph, labelled with paragraph-initial letter.} \label{fig:wordPCO} \end{center} \end{figure} Neither ordination on character frequencies nor on word frequencies shows any distinction between paragraphs beginning with the different gallows characters (Figs. \ref{fig:charPCO}, \ref{fig:wordPCO}). The gallows characters don't seem to relate to any difference in underlying language or encoding. \end{document}