import pickle
import scanpy as sc
from UNAGI import plotting
!gzip -d ../../tutorials/example_1/attribute.pkl.gz
adata = sc.read_h5ad('../../../tutorials/example_1/dataset.h5ad')
adata.uns = pickle.load(open('../../tutorials/example_1/attribute.pkl', 'rb'))
Visualize the cell type composition acorss stages
Visualize the cell embeddings colouring by cell types
plotting.plot_stages_latent_representation(adata,'ident','stage',dpi=100)
6750
ARI: 0.4217906931483671
NMIs: 0.44839254929652533
silhouette score: 0.059075164506930025
3152
ARI: 0.44552176148100026
NMIs: 0.5212305876478248
silhouette score: 0.08603391761741375
4195
ARI: 0.428937899921206
NMIs: 0.46303444258947984
silhouette score: 0.050308310743398345
13550
ARI: 0.6294182896062666
NMIs: 0.6546946927992846
silhouette score: 0.09578975742334347
ARIs: [0.48141716103921]
NMI: [0.5218380680832787]
silhouette score: 0.07280178757277139
Plot the dotplots for cell type markers at individual stages
plotted_genes = ['IGF1', 'NLGN1', 'ROBO2', 'SLIT2', 'EGFEM1P', 'LINC02388', 'DACH2', 'FGF14', 'ITGBL1', 'MMRN1', 'CCL21', 'ADARB2', 'LRRTM4', 'ZNF385D', 'PRUNE2', 'LDB2', 'AQP1', 'FLT1', 'SLCO2A1', 'EPAS1', 'PCDH17']
for i in adata.obs['stage'].unique():
stageadata = adata[adata.obs['stage']==i]
sc.pl.dotplot(stageadata, groupby='ident', var_names=plotted_genes,vmax=2,swap_axes=True,save='_stage%d.pdf'%int(i))
WARNING: saving figure to file figures/dotplot__stage0.pdf
WARNING: saving figure to file figures/dotplot__stage1.pdf
WARNING: saving figure to file figures/dotplot__stage2.pdf
WARNING: saving figure to file figures/dotplot__stage3.pdf
Plot dynamic markers for a specific track
Use the alveolar fibroblast as an example
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import os
import json
import numpy as np
import scanpy as sc
from cycler import cycler
def readIdremJson(path, filename):
# print('getting Target genes from ', filename)
path = os.path.join(path,filename,'DREM.json')
f=open(path,"r")
lf=f.readlines()
f.close()
lf="".join(lf)
lf=lf[5:-2]+']'
tt=json.loads(lf,strict=False)
return tt
def readIdremJson(path, filename):
# print('getting Target genes from ', filename)
path = os.path.join(path,filename,'DREM.json')
f=open(path,"r")
lf=f.readlines()
f.close()
lf="".join(lf)
lf=lf[5:-2]+']'
tt=json.loads(lf,strict=False)
return tt
def getvalueofMarkers(idrem,filename, gene):
tt = readIdremJson(idrem,filename)
temp = np.array(tt[8])
idrem_genes = np.array(temp[1:,0].tolist())
tendency = temp[1:,4].astype(float)* temp[1:,3].astype(float) * temp[1:,2].astype(float) * temp[1:,1].astype(float)
tendency[tendency <0] = 0
index = [i for i, x in enumerate(tendency) if x <= 0]
genenames = temp[1:,0].tolist()
gene_idx = genenames.index(gene)
change = temp[1:,4].astype(float) - temp[1:,1].astype(float)
stage0 = temp[1:,1].astype(float)
stage1 = temp[1:,2].astype(float)-stage0
stage2 = temp[1:,3].astype(float)-stage0
stage3 = temp[1:,4].astype(float)-stage0
# stage0 =
#return [stage1[gene_idx],stage2[gene_idx],stage3[gene_idx]]
return [0,stage1[gene_idx],stage2[gene_idx],stage3[gene_idx] ]#[change[gene_idx]]
def getvaluesFromIDREM(path,genes,tracks,target_track):
out = {}
for gene in genes:
out[gene] = []
filenames = tracks
for each in filenames:
if each == target_track:
name = each+'.txt_viz'
if each[0] != '.':
each = each.split('.')[0]#.split('-')[-1].split('n')
out[gene]+=getvalueofMarkers(path,name,gene.split('\\')[0])
return out
######
adata = sc.read_h5ad('PATH_TO_DOWNLOADED/mes_4/dataset.h5ad')
uns = pickle.load(open('PATH_TO_DOWNLOADED/mes_4/attribute.pkl', 'rb'))
pm = uns['progressionMarkers']
target_track = '4-3n6n9n10-1n5n7n13n17-2n6n8n9n12n15'
increasing = []
decreasing = []
genes_increasing = []
genes_decreasing = []
plt.rcParams['axes.prop_cycle'] = plt.cycler("color", plt.cm.tab20.colors)
for each in pm.keys():
if each == target_track:
tt = readIdremJson('PATH_TO_DOWNLOADED/mes_4/idrem',each+'.txt_viz')
tt = readIdremJson('PATH_TO_DOWNLOADED/mes_4/idrem',each+'.txt_viz')
scope = []
for i, gene in enumerate(tt[0][6]['genesInNode']):
if gene:
scope.append(tt[3][i])
track = pm[each]
df = pd.DataFrame.from_dict(track['increasing'])
print(scope)
temp = list(df['gene'].values)
go = []
for gene in scope:
if gene in list(df['gene'].values):
go.append(gene)
# go = np.array(go)
df = df.loc[df['gene'].isin(go)]
df.sort_values(by=['rank'], inplace=True)
increasing.append(df.values[:10])
genes_increasing+=list(df['gene'].values[:10]+'\\'+str(each))
df = pd.DataFrame.from_dict(track['decreasing'])
scope = []
for i, gene in enumerate(tt[0][8]['genesInNode']):
if gene:
scope.append(tt[3][i])
go = []
for gene in scope:
if gene in list(df['gene'].values):
go.append(gene)
# go = np.array(go)
df = df.loc[df['gene'].isin(go)]
df.sort_values(by=['rank'], inplace=True)
decreasing.append(df.values[:10])
genes_decreasing+=list(df['gene'].values[:10]+'\\'+str(each))
out_increasing = getvaluesFromIDREM('PATH_TO_DOWNLOADED/mes_4/idrem',genes_increasing,list(pm.keys()),target_track=target_track)
out_decreasing = getvaluesFromIDREM('PATH_TO_DOWNLOADED/mes_4/idrem',genes_decreasing,list(pm.keys()),target_track=target_track)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1,figsize=(4,4),dpi=300)
gene_names = list(out_increasing.keys())
increasing_gene_names = [x.split('\\')[0] for x in gene_names]
ax.tick_params(top=True, bottom=False,
labeltop=True, labelbottom=False)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_yticklabels(), rotation=-30, ha="right",rotation_mode="anchor")
plt.setp(ax.get_xticklabels(), rotation=-45, ha="right",rotation_mode="anchor")
im0 = ax.plot(np.array([0,1,2,3]),np.array(list(out_increasing.values())+list(out_decreasing.values())).T)
gene_names = list(out_decreasing.keys())
gene_names = [x.split('\\')[0] for x in gene_names]
ax.legend(increasing_gene_names+gene_names,loc=2,fontsize='xx-small',bbox_to_anchor=(1.08, 1))
ax.tick_params(top=False, bottom=True,
labeltop=False, labelbottom=True)
ax.set_xticks(np.array([0,1,2,3])+0.25,('Control','Stage 1', 'Stage 2','Stage 3'))
ax.set_ylim(-1.5,1)
ax.set_title('Top Dynamic Genes in the FibAlv-4')
ax.set_ylabel('Expression Change (Log2FC)')
ax.set_xlabel('Disease Progression')
# plt.savefig('genes_in_fib4.pdf',bbox_inches='tight', pad_inches=0)
plt.show()
['SCGB1A1', 'BPIFB1', 'PKN2-AS1', 'SCGB3A1', 'SERPINE1', 'CDH2', 'HPSE2', 'PRB4', 'CEMIP', 'ITGA2', 'LYZ', 'ADAMTSL1', 'KIF26B', 'PRB3', 'KCND2', 'LINC00578', 'ITGB8', 'CASC15', 'MUC5B', 'MUC5AC', 'ZNF385D', 'BPIFA1', 'MYO16', 'ITGA11', 'COL1A1', 'POSTN', 'AC117453.1', 'ITGBL1', 'AC009570.2', 'LAMA2', 'NFATC2', 'CYP7B1', 'COL1A2', 'ESRRG', 'IGF1', 'ADAMTS16', 'WT1', 'BPIFB2', 'FBXO32', 'NKAIN2', 'KRT17', 'SUGCT', 'MIR31HG', 'FGF7', 'GREM1', 'COL3A1', 'DCLK1', 'EPHA3', 'MSMB', 'KLHL4', 'COMP', 'GLDN', 'THBS2', 'TFF3', 'AC021594.2', 'RUNX2', 'IGFN1', 'AGR2', 'TENM3', 'ALDH1A3', 'LDLRAD4', 'HECW1', 'PDGFD', 'LTF', 'DPYSL3', 'SULF1', 'VWC2', 'ISM1', 'FRMD6', 'TMEM178B', 'SLC24A2', 'PVT1', 'ACTA2', 'ZMAT4', 'VCAN', 'TMEM45A', 'NCAM1', 'PCDH7', 'DDAH1', 'TMEM132C', 'SEMA3D', 'TIMP1', 'AC079160.1', 'FBN1', 'EMP1', 'CORIN', 'NOX4', 'COL7A1', 'GREB1', 'HAS2', 'AC018697.1', 'TEX41', 'ANK2', 'COL6A3', 'AC099066.2', 'PAMR1', 'RUNX1', 'CCDC80', 'PPFIA2', 'IL1R1', 'NRCAM', 'GEM', 'CLMP', 'PLXNA4', 'COL14A1', 'IFNG-AS1', 'LTBP1', 'SMOC2', 'NAV3', 'GNGT1', 'MEDAG', 'SAMD3', 'NHS', 'GABRG3', 'DPYS', 'CCN2', 'TSPAN5', 'DEPTOR', 'SFRP2', 'FAM20C', 'BICC1', 'TENM4', 'FAP', 'LUM', 'AC092353.2', 'MMP16', 'COL24A1', 'IGFBP7', 'PDE7B', 'SYNDIG1', 'SLC44A3-AS1', 'MEG8', 'LINC02694', 'SPSB1', 'MMP19', 'PDE1A', 'AL136441.1', 'TSHZ2', 'SNAP25', 'LINC02398', 'AC093772.1', 'CADPS', 'LINC01515', 'BOC', 'MIR205HG', 'AC244205.1', 'ZG16B', 'DSG3', 'MAP1B', 'PCBP3', 'RFX2', 'LTBP2', 'ENTPD1', 'AP005212.4', 'AC079298.3', 'CSRNP3', 'PRRX1', 'MMP7', 'KCNE4', 'ADAMTS14', 'KLHL13', 'SNCAIP', 'MOXD1', 'CHL1', 'NALCN', 'RYR3', 'RASGRF2', 'AC090578.1', 'ANKRD36BP2', 'POT1-AS1', 'MSC-AS1', 'AL445250.1', 'COL15A1', 'AL356108.1', 'SYTL2', 'STEAP2-AS1', 'CYP1B1', 'FHL2', 'COL5A3', 'ROR2', 'CRACD', 'COL5A1', 'SPP1', 'IRAG1', 'PRH2', 'AL158835.1']
Plot the dendrogram of each stage data
Visualize hierarchical markers using heatmaps
Visualize the pathway perturabtion results
from UNAGI import perturbations
perturbations.get_top_pathways(adata, 0.5, top_n=20)
| pathways | perturbation score | pval_adjusted | regulated genes | idrem_suggestion | |
|---|---|---|---|---|---|
| 0 | REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES... | 0.857904 | 0.0 | [CFTR, BAIAP2L1, ARHGAP44, RHOBTB2, ANLN, CDH1... | [CFTR:-, BAIAP2L1:-, ARHGAP44:+, RHOBTB2:-, AN... |
| 1 | REACTOME_RESPONSE_TO_ELEVATED_PLATELET_CYTOSOL... | 0.832910 | 0.0 | [CD9, IGF1, RAB27B, TGFB2, TIMP3, TIMP1, SERPI... | [CD9:-, IGF1:-, RAB27B:-, TGFB2:+, TIMP3:+, TI... |
| 2 | REACTOME_RHO_GTPASE_CYCLE | 0.805893 | 0.0 | [CFTR, BAIAP2L1, ARHGAP44, RHOBTB2, ANLN, ARHG... | [CFTR:-, BAIAP2L1:-, ARHGAP44:+, RHOBTB2:-, AN... |
| 3 | REACTOME_ASPARAGINE_N_LINKED_GLYCOSYLATION | 0.789156 | 0.0 | [ST3GAL6, ST6GAL1, AREG, ST8SIA4, COL7A1, ST3G... | [ST3GAL6:+, ST6GAL1:-, AREG:-, ST8SIA4:-, COL7... |
| 4 | REACTOME_INFECTIOUS_DISEASE | 0.770471 | 0.0 | [CD9, LTF, CDH1, NEDD4L, CALCRL, SYT1, ST6GAL1... | [CD9:-, LTF:-, CDH1:-, NEDD4L:+, CALCRL:+, SYT... |
| 5 | REACTOME_SEMAPHORIN_INTERACTIONS | 0.757776 | 0.0 | [SEMA3A, PAK3, SEMA6A, MYH14, MET, SEMA5A, DPY... | [SEMA3A:-, PAK3:-, SEMA6A:+, MYH14:-, MET:-, S... |
| 6 | KEGG_VIRAL_MYOCARDITIS | 0.748296 | 0.0 | [ICAM1, MYH14, DMD, SGCD, CD55, LAMA2, CAV1, M... | [ICAM1:+, MYH14:-, DMD:-, SGCD:-, CD55:-, LAMA... |
| 7 | WP_VITAMIN_D_RECEPTOR_PATHWAY | 0.746727 | 0.0 | [TPM1, IGFBP3, TGFB2, CYP3A5, DUSP10, MX2, THB... | [TPM1:-, IGFBP3:+, TGFB2:+, CYP3A5:+, DUSP10:-... |
| 8 | REACTOME_SIGNALING_BY_GPCR | 0.734829 | 0.0 | [CX3CL1, CASR, PTGER3, NPFFR2, CAMK2B, DGKG, C... | [CX3CL1:-, CASR:-, PTGER3:-, NPFFR2:+, CAMK2B:... |
| 9 | REACTOME_PROGRAMMED_CELL_DEATH | 0.727195 | 0.0 | [BIRC3, DAPK2, CDH1, PRKCQ, TP63, CLSPN, BMX, ... | [BIRC3:-, DAPK2:+, CDH1:-, PRKCQ:-, TP63:-, CL... |
| 10 | WP_REGULATION_OF_ACTIN_CYTOSKELETON | 0.726818 | 0.0 | [RRAS2, PIK3C2G, PIP5K1B, MYH10, ITGA1, CHRM3,... | [RRAS2:+, PIK3C2G:+, PIP5K1B:+, MYH10:+, ITGA1... |
| 11 | REACTOME_SCAVENGING_BY_CLASS_A_RECEPTORS | 0.722579 | 0.0 | [FTL, COL1A1, APOE, COL4A2, COLEC12, SCGB3A2, ... | [FTL:-, COL1A1:-, APOE:-, COL4A2:-, COLEC12:+,... |
| 12 | REACTOME_BIOSYNTHESIS_OF_THE_N_GLYCAN_PRECURSO... | 0.722269 | 0.0 | [ST3GAL6, ST6GAL1, ST8SIA4, ST3GAL5, ST6GALNAC... | [ST3GAL6:+, ST6GAL1:-, ST8SIA4:-, ST3GAL5:+, S... |
| 13 | PID_SYNDECAN_1_PATHWAY | 0.719932 | 0.0 | [COL6A3, MET, COL7A1, MMP1, COL11A1, COL1A2, C... | [COL6A3:-, MET:-, COL7A1:-, MMP1:-, COL11A1:-,... |
| 14 | REACTOME_COLLAGEN_BIOSYNTHESIS_AND_MODIFYING_E... | 0.719879 | 0.0 | [TLL1, COL23A1, COL11A1, COL17A1, COL5A3, COL4... | [TLL1:+, COL23A1:-, COL11A1:-, COL17A1:-, COL5... |
| 15 | REACTOME_TOLL_LIKE_RECEPTOR_CASCADES | 0.718208 | 0.0 | [BIRC3, MAPK10, SFTPA1, SFTPD, CD36, CTSV, DUS... | [BIRC3:-, MAPK10:-, SFTPA1:+, SFTPD:+, CD36:+,... |
| 16 | REACTOME_COLLAGEN_FORMATION | 0.717334 | 0.0 | [TLL1, COL23A1, LAMA3, LAMC2, COL11A1, COL17A1... | [TLL1:+, COL23A1:-, LAMA3:+, LAMC2:-, COL11A1:... |
| 17 | WP_INSULIN_SIGNALING | 0.713507 | 0.0 | [RRAD, INSR, PIK3C2G, ENPP1, MAPK10, PRKCB, XB... | [RRAD:-, INSR:-, PIK3C2G:+, ENPP1:-, MAPK10:-,... |
| 18 | REACTOME_COLLAGEN_CHAIN_TRIMERIZATION | 0.710968 | 0.0 | [COL23A1, COL11A1, COL17A1, COL5A3, COL4A4, CO... | [COL23A1:-, COL11A1:-, COL17A1:-, COL5A3:-, CO... |
| 19 | NABA_COLLAGENS | 0.710968 | 0.0 | [COL10A1, COL11A1, COL12A1, COL14A1, COL15A1, ... | [COL10A1:-, COL11A1:-, COL12A1:-, COL14A1:-, C... |
Visualize the drug/compounds perturabtion results
from UNAGI import perturbations
perturbations.get_top_compounds(adata, 0.5, cutoff=0.05)
| compound | perturbation score | pval_adjusted | drug_regulation | idrem_suggestion | |
|---|---|---|---|---|---|
| 0 | KI-16425 | 0.622860 | 0.000000e+00 | [LPAR1:-, LPAR3:-] | [LPAR1:+, LPAR3:-] |
| 1 | 8-bromo-cGMP | 0.593094 | 0.000000e+00 | [PRKG1:+] | [PRKG1:+] |
| 2 | 1-monopalmitin,dofequidar,erythromycin-ethylsu... | 0.494398 | 1.154632e-15 | [ABCB1:-] | [ABCB1:-] |
| 3 | ibudilast | 0.435658 | 8.955820e-12 | [PDE10A:-, PDE3A:-, PDE4D:+] | [PDE10A:-, PDE3A:-, PDE4D:+] |
| 4 | pimobendan,amrinone,anagrelide,cilostamide,cil... | 0.434252 | 1.113823e-11 | [PDE3A:+] | [PDE3A:-] |
| 5 | ODQ,YC-1,LY-83583 | 0.432265 | 1.501685e-11 | [GUCY1A2:-] | [GUCY1A2:+] |
| 6 | felodipine,nimodipine | 0.404349 | 6.392553e-10 | [NR3C2:+, CACNA1C:-, CACNA1D:-] | [NR3C2:+, CACNA1C:-, CACNA1D:+] |
| 7 | amlodipine,cleviprex,isradipine,nifedipine,nil... | 0.387505 | 5.314587e-09 | [CACNA1C:+, CACNA1D:+] | [CACNA1C:-, CACNA1D:+] |
| 8 | vandetanib | 0.379303 | 1.446378e-08 | [EGFR:-, FLT1:-, KDR:+, TEK:+] | [EGFR:-, FLT1:+, KDR:+, TEK:+] |
| 9 | BMS-927711,MK-3207,telcagepant | 0.376201 | 2.127605e-08 | [CALCRL:+] | [CALCRL:+] |
| 10 | MK-2461 | 0.352000 | 3.232959e-07 | [FGFR2:+, FLT1:+, MET:-] | [FGFR2:-, FLT1:+, MET:-] |
| 11 | AS-601245,pyrazolanthrone | 0.337868 | 1.433529e-06 | [MAPK10:-] | [MAPK10:-] |
| 12 | afatinib,BMS-599626,canertinib,dacomitinib,ner... | 0.317526 | 1.050923e-05 | [EGFR:-, ERBB4:-] | [EGFR:-, ERBB4:-] |
| 13 | tetraethylenepentamine | 0.292696 | 9.500971e-05 | [SOD2:+] | [SOD2:+] |
| 14 | cabozantinib | 0.281300 | 2.438229e-04 | [KDR:+, KIT:+, MET:+, TEK:-] | [KDR:+, KIT:-, MET:-, TEK:+] |
| 15 | benidipine,diltiazem,lacidipine,manidipine,nic... | 0.272534 | 4.893755e-04 | [CACNA1C:+] | [CACNA1C:-] |
| 16 | dipyridamole | 0.257740 | 1.454910e-03 | [PDE10A:-, PDE7B:-] | [PDE10A:-, PDE7B:-] |
| 17 | topiramate | 0.251413 | 2.296113e-03 | [GABRB2:-, GABRG3:-, GRIA1:+, SCN1A:-] | [GABRB2:+, GABRG3:-, GRIA1:+, SCN1A:+] |
| 18 | apremilast,cilomilast,dyphylline,piclamilast,R... | 0.243585 | 3.928745e-03 | [PDE4D:-] | [PDE4D:+] |
| 19 | niguldipine | 0.239965 | 5.078949e-03 | [ADRA1A:-, CACNA1C:-] | [ADRA1A:+, CACNA1C:-] |
| 20 | triflusal,papaverine,PF-02545920 | 0.239697 | 5.325332e-03 | [PDE10A:-] | [PDE10A:-] |
| 21 | crizotinib,BMS-777607,MK-8033,PF-04217903,PHA-... | 0.237142 | 6.430805e-03 | [MET:-] | [MET:-] |
| 22 | regorafenib | 0.235685 | 7.259023e-03 | [FGFR2:+, FLT1:+, FRK:+, KDR:+, KIT:+, TEK:+] | [FGFR2:-, FLT1:+, FRK:-, KDR:+, KIT:-, TEK:+] |
| 23 | amuvatinib | 0.234486 | 8.070473e-03 | [KIT:-, MET:-] | [KIT:-, MET:-] |
| 24 | 2-iminobiotin,myricitrin,proadifen | 0.232254 | 9.545256e-03 | [NOS1:+] | [NOS1:+] |
| 25 | BRD-K19059335 | 0.226743 | 1.362506e-02 | [RUNX1:-] | [RUNX1:-] |
| 26 | PSB-069 | 0.226485 | 1.435799e-02 | [ENTPD1:+] | [ENTPD1:-] |
| 27 | furosemide,piretanide,bumetanide | 0.222332 | 1.884517e-02 | [SLC12A2:-] | [SLC12A2:-] |
| 28 | foretinib,golvatinib | 0.213019 | 3.230532e-02 | [KDR:+, MET:-] | [KDR:+, MET:-] |
| 29 | estramustine-phosphate | 0.205931 | 4.818654e-02 | [ESR2:+, MAP2:-] | [ESR2:-, MAP2:-] |