\documentclass{article} \title{Voynich Manuscript - Patterns of Co-occurrence} \author{Sarah Goslee} \date{2006-10-22} \begin{document} \maketitle \section{Introduction} These analyses continue using the three discrete subsets described in the first page: an herbal subset (H) in Currier language A, and the recipe section (R) and the balenological section (B), both in Currier language B. <>= seth.char.pairs <- seth.page.chars for(i in 1:length(seth.page.chars)) { temp <- seth.page.chars[[i]] seth.char.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep="") } rm(i, temp) seth.char.pairs.table <- table(unlist(seth.char.pairs)) seth.char.pairs.table <- data.frame(c1=substring(names(seth.char.pairs.table),1,1), c2=substring(names(seth.char.pairs.table),2,2), seth.char.pairs.table) seth.char.pairs.table <- crosstab(seth.char.pairs.table$c1, seth.char.pairs.table$c2, seth.char.pairs.table$Freq) seth.char.pairs.table <- seth.char.pairs.table[-match("*", rownames(seth.char.pairs.table)), -match("*", colnames(seth.char.pairs.table))] seth.char.pairs.table <- seth.char.pairs.table[-match("X", rownames(seth.char.pairs.table)), -match("X", colnames(seth.char.pairs.table))] setr.char.pairs <- setr.page.chars for(i in 1:length(setr.page.chars)) { temp <- setr.page.chars[[i]] setr.char.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep="") } rm(i, temp) setr.char.pairs.table <- table(unlist(setr.char.pairs)) setr.char.pairs.table <- data.frame(c1=substring(names(setr.char.pairs.table),1,1), c2=substring(names(setr.char.pairs.table),2,2), setr.char.pairs.table) setr.char.pairs.table <- crosstab(setr.char.pairs.table$c1, setr.char.pairs.table$c2, setr.char.pairs.table$Freq) setr.char.pairs.table <- setr.char.pairs.table[-match("*", rownames(setr.char.pairs.table)), -match("*", colnames(setr.char.pairs.table))] setr.char.pairs.table <- setr.char.pairs.table[-match("x", rownames(setr.char.pairs.table)), -match("x", colnames(setr.char.pairs.table))] setb.char.pairs <- setb.page.chars for(i in 1:length(setb.page.chars)) { temp <- setb.page.chars[[i]] setb.char.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep="") } rm(i, temp) setb.char.pairs.table <- table(unlist(setb.char.pairs)) setb.char.pairs.table <- data.frame(c1=substring(names(setb.char.pairs.table),1,1), c2=substring(names(setb.char.pairs.table),2,2), setb.char.pairs.table) setb.char.pairs.table <- crosstab(setb.char.pairs.table$c1, setb.char.pairs.table$c2, setb.char.pairs.table$Freq) setb.char.pairs.table <- setb.char.pairs.table[-match("*", rownames(setb.char.pairs.table)), -match("*", colnames(setb.char.pairs.table))] setb.char.pairs.table <- rbind(setb.char.pairs.table[1:9,], rep(0, ncol(setb.char.pairs.table)), setb.char.pairs.table[10:nrow(setb.char.pairs.table),]) setb.char.pairs.table <- cbind(setb.char.pairs.table[,1:9], rep(0, nrow(setb.char.pairs.table)), setb.char.pairs.table[,10:ncol(setb.char.pairs.table)]) names(setb.char.pairs.table) <- names(seth.char.pairs.table) write.table(seth.char.pairs.table, "hcharpair.csv", quote=FALSE) write.table(setr.char.pairs.table, "rcharpair.csv", quote=FALSE) write.table(setb.char.pairs.table, "bcharpair.csv", quote=FALSE) seth.word.pairs <- seth.page.words for(i in 1:length(seth.page.words)) { temp <- seth.page.words[[i]] seth.word.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep=".") } rm(i, temp) setr.word.pairs <- setr.page.words for(i in 1:length(setr.page.words)) { temp <- setr.page.words[[i]] setr.word.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep=".") } rm(i, temp) setb.word.pairs <- setb.page.words for(i in 1:length(setb.page.words)) { temp <- setb.page.words[[i]] setb.word.pairs[[i]] <- paste(temp[1:(length(temp)-1)], temp[2:length(temp)], sep=".") } rm(i, temp) write.table(table(unlist(seth.word.pairs)), "hwordpair.csv", quote=FALSE) write.table(table(unlist(setr.word.pairs)), "rwordpair.csv", quote=FALSE) write.table(table(unlist(setb.word.pairs)), "bwordpair.csv", quote=FALSE) @ Having found differences in character frequencies, I also wanted to look at patterns of word beginnings and endings, and paragraph beginnings and endings. (Note that "capital gallows characters" are by definition paragraph-initial.) \begin{figure} \begin{center} <>= seth.char.begin <- seth.char.pairs.table[1:3,-c(1:3)] seth.char.begin <- apply(seth.char.begin, 2, sum) seth.char.begin <- seth.char.begin/sum(seth.char.begin) seth.char.pbegin <- seth.char.pairs.table[3,-c(1:3)] seth.char.pbegin <- seth.char.pbegin/sum(seth.char.pbegin) seth.char.end <- seth.char.pairs.table[-c(1:3), 1:3] seth.char.end <- apply(seth.char.end, 1, sum) seth.char.end <- seth.char.end/sum(seth.char.end) seth.char.pend <- seth.char.pairs.table[-c(1:3),3] seth.char.pend <- seth.char.pend/sum(seth.char.pend) setr.char.begin <- setr.char.pairs.table[1:3,-c(1:3)] setr.char.begin <- apply(setr.char.begin, 2, sum) setr.char.begin <- setr.char.begin/sum(setr.char.begin) setr.char.pbegin <- setr.char.pairs.table[3,-c(1:3)] setr.char.pbegin <- setr.char.pbegin/sum(setr.char.pbegin) setr.char.end <- setr.char.pairs.table[-c(1:3), 1:3] setr.char.end <- apply(setr.char.end, 1, sum) setr.char.end <- setr.char.end/sum(setr.char.end) setr.char.pend <- setr.char.pairs.table[-c(1:3),3] setr.char.pend <- setr.char.pend/sum(setr.char.pend) setb.char.begin <- setb.char.pairs.table[1:3,-c(1:3)] setb.char.begin <- apply(setb.char.begin, 2, sum) setb.char.begin <- setb.char.begin/sum(setb.char.begin) setb.char.pbegin <- setb.char.pairs.table[3,-c(1:3)] setb.char.pbegin <- setb.char.pbegin/sum(setb.char.pbegin) setb.char.end <- setb.char.pairs.table[-c(1:3), 1:3] setb.char.end <- apply(setb.char.end, 1, sum) setb.char.end <- setb.char.end/sum(setb.char.end) setb.char.pend <- setb.char.pairs.table[-c(1:3),3] setb.char.pend <- setb.char.pend/sum(setb.char.pend) plot(1:length(seth.char.begin), seth.char.begin, type="b", pch=names(seth.char.begin), col="green", xlab="Character", ylab="Frequency", main="Word beginnings") lines(1:length(setr.char.begin), setr.char.begin, type="b", pch=names(setr.char.begin), col="darkgoldenrod2") lines(1:length(setb.char.begin), setb.char.begin, type="b", pch=names(setb.char.begin), col="purple") @ \caption{Frequencies of characters beginning words.} \label{fig:begin} \end{center} \end{figure} Set H is the only group where d was an extremely frequent word-initial character, and t and y were also more frequent in this position in set H than the other two (Fig. \ref{fig:begin}). The characters o, q and especially l were less common as word-initials in set H. Sets R and B were similar, although there are differences in the frequency of a and d between the two sets. \begin{figure} \begin{center} <>= plot(1:length(seth.char.pbegin), seth.char.pbegin, type="b", pch=names(seth.char.pbegin), col="green", xlab="Character", ylab="Frequency", main="Paragraph beginnings", ylim=c(0, .7)) lines(1:length(setr.char.pbegin), setr.char.pbegin, type="b", pch=names(setr.char.pbegin), col="darkgoldenrod2") lines(1:length(setb.char.pbegin), setb.char.pbegin, type="b", pch=names(setb.char.pbegin), col="purple") @ \caption{Frequencies of characters beginning paragraphs.} \label{fig:pbegin} \end{center} \end{figure} Paragraph-initial characters were mainly the gallows characters F, K, P, T, and q and o were also found (Fig. \ref{fig:pbegin}). In set R, d was found as a paragraph-initial character. F and K were more common in set H, and P was less common. \begin{figure} \begin{center} <>= plot(1:length(seth.char.end), seth.char.end, type="b", pch=names(seth.char.end), col="green", xlab="Character", ylab="Frequency", main="Word endings", ylim=c(0,.6)) lines(1:length(setr.char.end), setr.char.end, type="b", pch=names(setr.char.end), col="darkgoldenrod2") lines(1:length(setb.char.end), setb.char.end, type="b", pch=names(setb.char.end), col="purple") @ \caption{Frequencies of characters ending words.} \label{fig:end} \end{center} \end{figure} Word-ending characters were similar in all three sets, and only four characters were commonly found: l, n, r and y (Fig. \ref{fig:end}). The same characters were found in the paragraph-final position, but frequencies among sets were more variable (Fig. \ref{fig:pend}). \begin{figure} \begin{center} <>= plot(1:length(seth.char.pend), seth.char.pend, type="b", pch=names(seth.char.pend), col="green", xlab="Character", ylab="Frequency", main="Paragraph endings", ylim=c(0, .7)) lines(1:length(setr.char.pend), setr.char.pend, type="b", pch=names(setr.char.pend), col="darkgoldenrod2") lines(1:length(setb.char.pend), setb.char.pend, type="b", pch=names(setb.char.pend), col="purple") @ \caption{Frequencies of characters ending paragraphs.} \label{fig:pend} \end{center} \end{figure} \end{document}