regioning <-
function(cghdata.called, threshold=0.00001){

################################################################################
# 
# Function that constructs region data for call probabilities.
# First, CGHregions is applied to hard calls.
# Second, regions and clones are matched.
# Finally, The mode call probability signature of each region is calculated.
#
################################################################################

find.reg.modus <- function(x){

################################################################################
# 
# Function that finds the mode signature of a region's call probability data
#
################################################################################

# find all changes in call probability signatures
splitter <- list()
splitter[[1]] <- c(1)
index.temp <- 1
j <- 1
for (i in 1:(dim(x)[1]-1)){
if (all(x[i,] == x[i+1,])){
     index.temp <- c(index.temp,i+1)
          splitter[[j]] <- index.temp
} else {
index.temp <- i+1
     j <- j + 1
           splitter[[j]] <- index.temp
}
}

# make list of changes
region.details <- NULL
for (i in 1:length(splitter)){
region.details <- rbind(region.details, c(min(splitter[[i]]),max(splitter[[i]])))
}

# calculate the largest region, i.e., modus
modus <- which.max(region.details[,2] - region.details[,1] + 1)

# return the modus signature
return(x[region.details[modus[1],1],])

}

# perform regioning of the hard calls by CGHregions
cat("CGHregions of hard call data...")
cghdata.regions <- CGHregions(cghdata.called, averror = threshold)
cat("...done", "\n")
print(paste("threshold used:", threshold, sep=" "))

# extract annotation information of the calls
calls.annotation <- pData(featureData(cghdata.called))

# extract annotation information of the regions
regions.annotation <- pData(featureData(cghdata.regions))

# sort out which features map to a region
cat("Map regions to clones...")
reg.to.clones <- list()
counter <- 0
for (chr in 1:max(calls.annotation[,1])){
reg.ann.temp <- regions.annotation[regions.annotation[,1]==chr, 1:4]
for (r in 1:dim(reg.ann.temp)[1]){
counter <- counter + 1
A1 <- which(calls.annotation[,1]==chr)
A2 <- which(calls.annotation[,2] >= reg.ann.temp[r,2])
A3 <- which(calls.annotation[,2] <= reg.ann.temp[r,3])
reg.to.clones[[counter]] <- intersect(intersect(A1, A2), A3)
}
}
cat("...done", "\n")

# extract call probability data
cghdata.probs <- numeric()
for (i in 1:dim(calls(cghdata.called))[2]){
cghdata.probs <- cbind(cghdata.probs, cbind(probloss(cghdata.called)[,i], probnorm(cghdata.called)[,i], probgain(cghdata.called)[,i], probamp(cghdata.called)[,i]))
}

# calculate mode call probability signature for each region
cat("Calculate mode soft call signature for each region...")
cghdata.regprobs <- numeric()
for (i in 1:length(reg.to.clones)){
cghdata.regprobs <- rbind(cghdata.regprobs, find.reg.modus(cghdata.probs[reg.to.clones[[i]],,drop=FALSE]))
}
cat("...done", "\n")
softcalls.samplenames <- character()
for (i in 1:dim(calls(cghdata.called))[2]){
if (dim(cghdata.regprobs)[2] / dim(calls(cghdata.called))[2] == 3){
softcalls.samplenames <- c(softcalls.samplenames, paste(c("probloss_", "probnorm_", "probgain_"), colnames(regions(cghdata.regions))[i], sep=""))
}
if (dim(cghdata.regprobs)[2] / dim(calls(cghdata.called))[2] == 4){
softcalls.samplenames <- c(softcalls.samplenames, paste(c("probloss_", "probnorm_", "probgain_", "probamp_"), colnames(regions(cghdata.regions))[i], sep=""))
}
}
colnames(cghdata.regprobs) <- softcalls.samplenames
rownames(cghdata.regprobs) <- rownames(regions(cghdata.regions))

# format and return region data (hard and soft calls)
regdata <- list()
regdata$ann <- regions.annotation
regdata$hardcalls <- regions(cghdata.regions)
regdata$softcalls <- cghdata.regprobs
return(regdata)
}

