# Single-cell Analysis

Goolam dataset is included with scDHA package as an example data.

```
library(SingleCellExperiment)
library(scDHA)
#Load example data (Goolam dataset)
data("Goolam")
#Get data matrix and label
data <- t(Goolam$data); label <- as.character(Goolam$label)
#Log transform the data
data <- log2(data + 1)
```

```
#Generate clustering result, the input matrix has rows as samples and columns as genes
result <- scDHA(data, seed = 1)
#The clustering result can be found here
cluster <- result$cluster
#Calculate adjusted Rand Index using mclust package
ari <- round(mclust::adjustedRandIndex(cluster,label), 2)
print(paste0("ARI = ", ari))
```

```
#Generate 2D representation, the input is the output from scDHA function
result <- scDHA.vis(result, seed = 1)
#Plot the representation of the dataset, different colors represent different cell types
plot(result$pred, col=factor(label), xlab = "scDHA1", ylab = "scDHA2")
```

```
#Cell stage order in Goolam dataset
cell.stages <- c("2cell", "4cell", "8cell", "16cell", "blast")
#Generate pseudo-time for each cell, the input is the output from scDHA function
result <- scDHA.pt(result, start.point = 1, seed = 1)
#Calculate R-squared value representing correlation between inferred pseudo-time and cell stage order
r2 <- round(cor(result$pt, as.numeric(factor(label, levels = cell.stages)))^2, digits = 2)
#Plot pseudo-temporal ordering of cells in Goolam dataset
plot(result$pt, factor(label, levels = cell.stages), xlab= "Pseudo Time", ylab = "Cell Stages", xaxt="n", yaxt="n")
axis(2, at=1:5,labels=cell.stages, las=2)
text(x = 1, y = 4.5, labels = paste0("R2 = ", r2))
```

```
#Split data into training and testing sets
set.seed(1)
idx <- sample.int(nrow(data), size = round(nrow(data)*0.75))
train.x <- data[idx, ]; train.y <- label[idx]
test.x <- data[-idx, ]; test.y <- label[-idx]
#Predict the labels of cells in testing set, the input matrices have rows as samples and columns as genes
prediction <- scDHA.class(train = train.x, train.label = train.y, test = test.x, seed = 1)
#Calculate accuracy of the predictions
acc <- round(sum(test.y == prediction)/length(test.y), 2)
print(paste0("Accuracy = ", acc))
```