Sanskrit R Statistics

http://www.rstudio.com/ide/download/desktop
http://www.youtube.com/watch?v=o0Y478jOjGk
http://google-styleguide.googlecode.com/svn/trunk/Rguide.xml
https://docs.google.com/document/d/1esDVxyWvH8AsX-VJa-8oqWaHLs4stGlIbk8kLc5VlII/edit

Nityanand Misra’s R Code https://groups.google.com/forum/#!topic/bvparishat/cNoHQNYriks

# Read file
data <- read.delim( ‘http://kjc-fs-cluster.kjc.uni-heidelberg.de/dcs/data/syllables/syllables.dat’, sep=»;», header=TRUE, comment.char=»#», stringsAsFactors=FALSE);
# Delete Avagrahas
data$Syllable <- gsub( «‘», «», data$Syllable );
# There are some rows with bad syllable data (< 0.02%) — retain only rows with clean data
data <- data[ !grepl( ‘[^athAHsnevidmyMkuroSpzUjIbgcNDlTRGJL]’, data$Syllable ),];
# Map to consonants and vowels
data$Syllable2 <- data$Syllable;
data$Syllable2 <- gsub( «[kgcjTDpbtd]h», «C», data$Syllable2 );
data$Syllable2 <- gsub( «[kgcjTDpbBtdDGhJlmnNrsSvVyz]», «C», data$Syllable2 );
data$Syllable2 <- gsub( «[aA][iIuU]», «V», data$Syllable2 );
data$Syllable2 <- gsub( «[aAiIuURLeo]», «V», data$Syllable2 );
# Count of triple conjuncts
sum( data[grep( «[C]{3}», data$Syllable2 ),]$total )
[1] 116457
# Count of total syllables
sum( data$total )
[1] 7566142
# % of syllables with triple conjuncts
sum( data[grep( «[C]{3}», data$Syllable2 ),]$total ) / sum( data$total )
[1] 0.01539186