import pickle
import scanpy as sc
from UNAGI import plotting
!gzip -d ../../tutorials/example_1/attribute.pkl.gz
adata = sc.read_h5ad('../../../tutorials/example_1/dataset.h5ad')
adata.uns = pickle.load(open('../../tutorials/example_1/attribute.pkl', 'rb'))

Visualize the cell type composition acorss stages

plotting.cell_type_composition(adata,'ident','stage',dpi=100)

Visualize the cell embeddings colouring by cell types

plotting.plot_stages_latent_representation(adata,'ident','stage',dpi=100)
6750
ARI:  0.4217906931483671
NMIs:  0.44839254929652533
silhouette score:  0.059075164506930025
3152
ARI:  0.44552176148100026
NMIs:  0.5212305876478248
silhouette score:  0.08603391761741375
4195
ARI:  0.428937899921206
NMIs:  0.46303444258947984
silhouette score:  0.050308310743398345
13550
ARI:  0.6294182896062666
NMIs:  0.6546946927992846
silhouette score:  0.09578975742334347
../../_images/941078ec73b6e7ecc175a6a745eb512c57cbbd3b7bdf7c2f36b3cfc66df1f307.png
ARIs:  [0.48141716103921]
NMI:  [0.5218380680832787]
silhouette score:  0.07280178757277139

Plot the dotplots for cell type markers at individual stages

plotted_genes = ['IGF1', 'NLGN1', 'ROBO2', 'SLIT2', 'EGFEM1P', 'LINC02388', 'DACH2', 'FGF14', 'ITGBL1', 'MMRN1', 'CCL21', 'ADARB2', 'LRRTM4', 'ZNF385D', 'PRUNE2', 'LDB2', 'AQP1', 'FLT1', 'SLCO2A1', 'EPAS1', 'PCDH17']
for i in adata.obs['stage'].unique():
    stageadata = adata[adata.obs['stage']==i]
    sc.pl.dotplot(stageadata, groupby='ident', var_names=plotted_genes,vmax=2,swap_axes=True,save='_stage%d.pdf'%int(i))
WARNING: saving figure to file figures/dotplot__stage0.pdf
../../_images/1e19af760d9c5f81e60c36eabaa100790f75cb0b26537361e47190d51377b845.png
WARNING: saving figure to file figures/dotplot__stage1.pdf
../../_images/de9af464e7096a81dcdb93d864d748e810f41ded9538769ca37fc0655ea8f342.png
WARNING: saving figure to file figures/dotplot__stage2.pdf
../../_images/6b66abe62d8e1d65a5ad232c854f34d13a19d08eaf207671c0d412d2f43cc12e.png
WARNING: saving figure to file figures/dotplot__stage3.pdf
../../_images/962ab74efb1ea7e4e2087baa0d5db0b525b269625c6c729b86de4ba2562e54da.png

Plot dynamic markers for a specific track

Use the alveolar fibroblast as an example

import matplotlib.pyplot as plt
import pickle
import pandas as pd
import os
import json
import numpy as np
import scanpy as sc
from cycler import cycler
def readIdremJson(path, filename):
    # print('getting Target genes from ', filename)

    path = os.path.join(path,filename,'DREM.json')
    f=open(path,"r")
    lf=f.readlines()
    f.close()
    lf="".join(lf)
    lf=lf[5:-2]+']'
    tt=json.loads(lf,strict=False)
    return tt
def readIdremJson(path, filename):
    # print('getting Target genes from ', filename)

    path = os.path.join(path,filename,'DREM.json')
    f=open(path,"r")
    lf=f.readlines()
    f.close()
    lf="".join(lf)
    lf=lf[5:-2]+']'
    tt=json.loads(lf,strict=False)
    return tt
def getvalueofMarkers(idrem,filename, gene):
    tt = readIdremJson(idrem,filename)
    temp = np.array(tt[8])
    idrem_genes = np.array(temp[1:,0].tolist())
    tendency = temp[1:,4].astype(float)* temp[1:,3].astype(float) * temp[1:,2].astype(float) * temp[1:,1].astype(float)
    tendency[tendency <0] = 0
    index = [i for i, x in enumerate(tendency) if x <= 0]
    genenames = temp[1:,0].tolist()
    gene_idx = genenames.index(gene)
    change = temp[1:,4].astype(float) - temp[1:,1].astype(float)
    stage0 = temp[1:,1].astype(float)
    stage1 = temp[1:,2].astype(float)-stage0
    stage2 = temp[1:,3].astype(float)-stage0
    stage3 = temp[1:,4].astype(float)-stage0
    # stage0 = 
    #return [stage1[gene_idx],stage2[gene_idx],stage3[gene_idx]]
    return [0,stage1[gene_idx],stage2[gene_idx],stage3[gene_idx] ]#[change[gene_idx]]
def getvaluesFromIDREM(path,genes,tracks,target_track):
    out = {}
    for gene in genes:
        out[gene] = []
        filenames = tracks
        for each in filenames:
            if each == target_track:
                name = each+'.txt_viz'
                if each[0] != '.':
                    each = each.split('.')[0]#.split('-')[-1].split('n')
                    out[gene]+=getvalueofMarkers(path,name,gene.split('\\')[0])
    return out



######
adata = sc.read_h5ad('PATH_TO_DOWNLOADED/mes_4/dataset.h5ad')
uns = pickle.load(open('PATH_TO_DOWNLOADED/mes_4/attribute.pkl', 'rb'))
pm = uns['progressionMarkers']
target_track = '4-3n6n9n10-1n5n7n13n17-2n6n8n9n12n15'
increasing = []
decreasing = []
genes_increasing = []
genes_decreasing = []
plt.rcParams['axes.prop_cycle'] = plt.cycler("color", plt.cm.tab20.colors)
for each in pm.keys():
    if each == target_track:
        
        tt = readIdremJson('PATH_TO_DOWNLOADED/mes_4/idrem',each+'.txt_viz')
        tt = readIdremJson('PATH_TO_DOWNLOADED/mes_4/idrem',each+'.txt_viz')
        scope = []
        for i, gene in enumerate(tt[0][6]['genesInNode']):
            if gene:
                scope.append(tt[3][i])
        track = pm[each]
        df = pd.DataFrame.from_dict(track['increasing'])
        print(scope)
        temp = list(df['gene'].values)
        go = []
        for gene in scope:
            if gene in list(df['gene'].values):
                go.append(gene)
        # go = np.array(go)
        df = df.loc[df['gene'].isin(go)]
      
        df.sort_values(by=['rank'], inplace=True)
        increasing.append(df.values[:10])
        genes_increasing+=list(df['gene'].values[:10]+'\\'+str(each))
        df = pd.DataFrame.from_dict(track['decreasing'])
        scope = []
        for i, gene in enumerate(tt[0][8]['genesInNode']):
            if gene:
                scope.append(tt[3][i])
        
        go = []
        for gene in scope:
            if gene in list(df['gene'].values):
                go.append(gene)
        # go = np.array(go)
        df = df.loc[df['gene'].isin(go)]
        df.sort_values(by=['rank'], inplace=True)
        decreasing.append(df.values[:10])
        genes_decreasing+=list(df['gene'].values[:10]+'\\'+str(each))

out_increasing = getvaluesFromIDREM('PATH_TO_DOWNLOADED/mes_4/idrem',genes_increasing,list(pm.keys()),target_track=target_track)
out_decreasing = getvaluesFromIDREM('PATH_TO_DOWNLOADED/mes_4/idrem',genes_decreasing,list(pm.keys()),target_track=target_track)

import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1,figsize=(4,4),dpi=300)

gene_names = list(out_increasing.keys())
increasing_gene_names = [x.split('\\')[0] for x in gene_names]
ax.tick_params(top=True, bottom=False,
                   labeltop=True, labelbottom=False)

    # Rotate the tick labels and set their alignment.
plt.setp(ax.get_yticklabels(), rotation=-30, ha="right",rotation_mode="anchor")
plt.setp(ax.get_xticklabels(), rotation=-45, ha="right",rotation_mode="anchor")

im0 = ax.plot(np.array([0,1,2,3]),np.array(list(out_increasing.values())+list(out_decreasing.values())).T)
gene_names = list(out_decreasing.keys())
gene_names = [x.split('\\')[0] for x in gene_names]
ax.legend(increasing_gene_names+gene_names,loc=2,fontsize='xx-small',bbox_to_anchor=(1.08, 1))
ax.tick_params(top=False, bottom=True,
                   labeltop=False, labelbottom=True)
ax.set_xticks(np.array([0,1,2,3])+0.25,('Control','Stage 1', 'Stage 2','Stage 3'))
ax.set_ylim(-1.5,1)

ax.set_title('Top Dynamic Genes in the FibAlv-4')
ax.set_ylabel('Expression Change (Log2FC)')
ax.set_xlabel('Disease Progression')

# plt.savefig('genes_in_fib4.pdf',bbox_inches='tight', pad_inches=0)
plt.show()
['SCGB1A1', 'BPIFB1', 'PKN2-AS1', 'SCGB3A1', 'SERPINE1', 'CDH2', 'HPSE2', 'PRB4', 'CEMIP', 'ITGA2', 'LYZ', 'ADAMTSL1', 'KIF26B', 'PRB3', 'KCND2', 'LINC00578', 'ITGB8', 'CASC15', 'MUC5B', 'MUC5AC', 'ZNF385D', 'BPIFA1', 'MYO16', 'ITGA11', 'COL1A1', 'POSTN', 'AC117453.1', 'ITGBL1', 'AC009570.2', 'LAMA2', 'NFATC2', 'CYP7B1', 'COL1A2', 'ESRRG', 'IGF1', 'ADAMTS16', 'WT1', 'BPIFB2', 'FBXO32', 'NKAIN2', 'KRT17', 'SUGCT', 'MIR31HG', 'FGF7', 'GREM1', 'COL3A1', 'DCLK1', 'EPHA3', 'MSMB', 'KLHL4', 'COMP', 'GLDN', 'THBS2', 'TFF3', 'AC021594.2', 'RUNX2', 'IGFN1', 'AGR2', 'TENM3', 'ALDH1A3', 'LDLRAD4', 'HECW1', 'PDGFD', 'LTF', 'DPYSL3', 'SULF1', 'VWC2', 'ISM1', 'FRMD6', 'TMEM178B', 'SLC24A2', 'PVT1', 'ACTA2', 'ZMAT4', 'VCAN', 'TMEM45A', 'NCAM1', 'PCDH7', 'DDAH1', 'TMEM132C', 'SEMA3D', 'TIMP1', 'AC079160.1', 'FBN1', 'EMP1', 'CORIN', 'NOX4', 'COL7A1', 'GREB1', 'HAS2', 'AC018697.1', 'TEX41', 'ANK2', 'COL6A3', 'AC099066.2', 'PAMR1', 'RUNX1', 'CCDC80', 'PPFIA2', 'IL1R1', 'NRCAM', 'GEM', 'CLMP', 'PLXNA4', 'COL14A1', 'IFNG-AS1', 'LTBP1', 'SMOC2', 'NAV3', 'GNGT1', 'MEDAG', 'SAMD3', 'NHS', 'GABRG3', 'DPYS', 'CCN2', 'TSPAN5', 'DEPTOR', 'SFRP2', 'FAM20C', 'BICC1', 'TENM4', 'FAP', 'LUM', 'AC092353.2', 'MMP16', 'COL24A1', 'IGFBP7', 'PDE7B', 'SYNDIG1', 'SLC44A3-AS1', 'MEG8', 'LINC02694', 'SPSB1', 'MMP19', 'PDE1A', 'AL136441.1', 'TSHZ2', 'SNAP25', 'LINC02398', 'AC093772.1', 'CADPS', 'LINC01515', 'BOC', 'MIR205HG', 'AC244205.1', 'ZG16B', 'DSG3', 'MAP1B', 'PCBP3', 'RFX2', 'LTBP2', 'ENTPD1', 'AP005212.4', 'AC079298.3', 'CSRNP3', 'PRRX1', 'MMP7', 'KCNE4', 'ADAMTS14', 'KLHL13', 'SNCAIP', 'MOXD1', 'CHL1', 'NALCN', 'RYR3', 'RASGRF2', 'AC090578.1', 'ANKRD36BP2', 'POT1-AS1', 'MSC-AS1', 'AL445250.1', 'COL15A1', 'AL356108.1', 'SYTL2', 'STEAP2-AS1', 'CYP1B1', 'FHL2', 'COL5A3', 'ROR2', 'CRACD', 'COL5A1', 'SPP1', 'IRAG1', 'PRH2', 'AL158835.1']
../../_images/edd3bac9b20c798e8deba672d2f685efd864c7034118753126c03a016393badc.png

Plot the dendrogram of each stage data

plotting.plot_hc_dendrogram(adata,'stage','ident')

Visualize hierarchical markers using heatmaps

plotting.hierarchical_static_markers_heatmap(adata,stage=0,cluster=3,level=1,n_genes=20,stage_key='stage',celltype_key='ident')
WARNING: `var_names` is a dictionary. This will reset the current value of `var_group_labels` and `var_group_positions`.
../../_images/dc93019743878e5761046cae6a09c95ae38efb75d60e650121ff9084304357da.png

Visualize the pathway perturabtion results

from UNAGI import perturbations
perturbations.get_top_pathways(adata, 0.5, top_n=20)
pathways perturbation score pval_adjusted regulated genes idrem_suggestion
0 REACTOME_SIGNALING_BY_RHO_GTPASES_MIRO_GTPASES... 0.857904 0.0 [CFTR, BAIAP2L1, ARHGAP44, RHOBTB2, ANLN, CDH1... [CFTR:-, BAIAP2L1:-, ARHGAP44:+, RHOBTB2:-, AN...
1 REACTOME_RESPONSE_TO_ELEVATED_PLATELET_CYTOSOL... 0.832910 0.0 [CD9, IGF1, RAB27B, TGFB2, TIMP3, TIMP1, SERPI... [CD9:-, IGF1:-, RAB27B:-, TGFB2:+, TIMP3:+, TI...
2 REACTOME_RHO_GTPASE_CYCLE 0.805893 0.0 [CFTR, BAIAP2L1, ARHGAP44, RHOBTB2, ANLN, ARHG... [CFTR:-, BAIAP2L1:-, ARHGAP44:+, RHOBTB2:-, AN...
3 REACTOME_ASPARAGINE_N_LINKED_GLYCOSYLATION 0.789156 0.0 [ST3GAL6, ST6GAL1, AREG, ST8SIA4, COL7A1, ST3G... [ST3GAL6:+, ST6GAL1:-, AREG:-, ST8SIA4:-, COL7...
4 REACTOME_INFECTIOUS_DISEASE 0.770471 0.0 [CD9, LTF, CDH1, NEDD4L, CALCRL, SYT1, ST6GAL1... [CD9:-, LTF:-, CDH1:-, NEDD4L:+, CALCRL:+, SYT...
5 REACTOME_SEMAPHORIN_INTERACTIONS 0.757776 0.0 [SEMA3A, PAK3, SEMA6A, MYH14, MET, SEMA5A, DPY... [SEMA3A:-, PAK3:-, SEMA6A:+, MYH14:-, MET:-, S...
6 KEGG_VIRAL_MYOCARDITIS 0.748296 0.0 [ICAM1, MYH14, DMD, SGCD, CD55, LAMA2, CAV1, M... [ICAM1:+, MYH14:-, DMD:-, SGCD:-, CD55:-, LAMA...
7 WP_VITAMIN_D_RECEPTOR_PATHWAY 0.746727 0.0 [TPM1, IGFBP3, TGFB2, CYP3A5, DUSP10, MX2, THB... [TPM1:-, IGFBP3:+, TGFB2:+, CYP3A5:+, DUSP10:-...
8 REACTOME_SIGNALING_BY_GPCR 0.734829 0.0 [CX3CL1, CASR, PTGER3, NPFFR2, CAMK2B, DGKG, C... [CX3CL1:-, CASR:-, PTGER3:-, NPFFR2:+, CAMK2B:...
9 REACTOME_PROGRAMMED_CELL_DEATH 0.727195 0.0 [BIRC3, DAPK2, CDH1, PRKCQ, TP63, CLSPN, BMX, ... [BIRC3:-, DAPK2:+, CDH1:-, PRKCQ:-, TP63:-, CL...
10 WP_REGULATION_OF_ACTIN_CYTOSKELETON 0.726818 0.0 [RRAS2, PIK3C2G, PIP5K1B, MYH10, ITGA1, CHRM3,... [RRAS2:+, PIK3C2G:+, PIP5K1B:+, MYH10:+, ITGA1...
11 REACTOME_SCAVENGING_BY_CLASS_A_RECEPTORS 0.722579 0.0 [FTL, COL1A1, APOE, COL4A2, COLEC12, SCGB3A2, ... [FTL:-, COL1A1:-, APOE:-, COL4A2:-, COLEC12:+,...
12 REACTOME_BIOSYNTHESIS_OF_THE_N_GLYCAN_PRECURSO... 0.722269 0.0 [ST3GAL6, ST6GAL1, ST8SIA4, ST3GAL5, ST6GALNAC... [ST3GAL6:+, ST6GAL1:-, ST8SIA4:-, ST3GAL5:+, S...
13 PID_SYNDECAN_1_PATHWAY 0.719932 0.0 [COL6A3, MET, COL7A1, MMP1, COL11A1, COL1A2, C... [COL6A3:-, MET:-, COL7A1:-, MMP1:-, COL11A1:-,...
14 REACTOME_COLLAGEN_BIOSYNTHESIS_AND_MODIFYING_E... 0.719879 0.0 [TLL1, COL23A1, COL11A1, COL17A1, COL5A3, COL4... [TLL1:+, COL23A1:-, COL11A1:-, COL17A1:-, COL5...
15 REACTOME_TOLL_LIKE_RECEPTOR_CASCADES 0.718208 0.0 [BIRC3, MAPK10, SFTPA1, SFTPD, CD36, CTSV, DUS... [BIRC3:-, MAPK10:-, SFTPA1:+, SFTPD:+, CD36:+,...
16 REACTOME_COLLAGEN_FORMATION 0.717334 0.0 [TLL1, COL23A1, LAMA3, LAMC2, COL11A1, COL17A1... [TLL1:+, COL23A1:-, LAMA3:+, LAMC2:-, COL11A1:...
17 WP_INSULIN_SIGNALING 0.713507 0.0 [RRAD, INSR, PIK3C2G, ENPP1, MAPK10, PRKCB, XB... [RRAD:-, INSR:-, PIK3C2G:+, ENPP1:-, MAPK10:-,...
18 REACTOME_COLLAGEN_CHAIN_TRIMERIZATION 0.710968 0.0 [COL23A1, COL11A1, COL17A1, COL5A3, COL4A4, CO... [COL23A1:-, COL11A1:-, COL17A1:-, COL5A3:-, CO...
19 NABA_COLLAGENS 0.710968 0.0 [COL10A1, COL11A1, COL12A1, COL14A1, COL15A1, ... [COL10A1:-, COL11A1:-, COL12A1:-, COL14A1:-, C...

Visualize the drug/compounds perturabtion results

from UNAGI import perturbations
perturbations.get_top_compounds(adata, 0.5, cutoff=0.05)
compound perturbation score pval_adjusted drug_regulation idrem_suggestion
0 KI-16425 0.622860 0.000000e+00 [LPAR1:-, LPAR3:-] [LPAR1:+, LPAR3:-]
1 8-bromo-cGMP 0.593094 0.000000e+00 [PRKG1:+] [PRKG1:+]
2 1-monopalmitin,dofequidar,erythromycin-ethylsu... 0.494398 1.154632e-15 [ABCB1:-] [ABCB1:-]
3 ibudilast 0.435658 8.955820e-12 [PDE10A:-, PDE3A:-, PDE4D:+] [PDE10A:-, PDE3A:-, PDE4D:+]
4 pimobendan,amrinone,anagrelide,cilostamide,cil... 0.434252 1.113823e-11 [PDE3A:+] [PDE3A:-]
5 ODQ,YC-1,LY-83583 0.432265 1.501685e-11 [GUCY1A2:-] [GUCY1A2:+]
6 felodipine,nimodipine 0.404349 6.392553e-10 [NR3C2:+, CACNA1C:-, CACNA1D:-] [NR3C2:+, CACNA1C:-, CACNA1D:+]
7 amlodipine,cleviprex,isradipine,nifedipine,nil... 0.387505 5.314587e-09 [CACNA1C:+, CACNA1D:+] [CACNA1C:-, CACNA1D:+]
8 vandetanib 0.379303 1.446378e-08 [EGFR:-, FLT1:-, KDR:+, TEK:+] [EGFR:-, FLT1:+, KDR:+, TEK:+]
9 BMS-927711,MK-3207,telcagepant 0.376201 2.127605e-08 [CALCRL:+] [CALCRL:+]
10 MK-2461 0.352000 3.232959e-07 [FGFR2:+, FLT1:+, MET:-] [FGFR2:-, FLT1:+, MET:-]
11 AS-601245,pyrazolanthrone 0.337868 1.433529e-06 [MAPK10:-] [MAPK10:-]
12 afatinib,BMS-599626,canertinib,dacomitinib,ner... 0.317526 1.050923e-05 [EGFR:-, ERBB4:-] [EGFR:-, ERBB4:-]
13 tetraethylenepentamine 0.292696 9.500971e-05 [SOD2:+] [SOD2:+]
14 cabozantinib 0.281300 2.438229e-04 [KDR:+, KIT:+, MET:+, TEK:-] [KDR:+, KIT:-, MET:-, TEK:+]
15 benidipine,diltiazem,lacidipine,manidipine,nic... 0.272534 4.893755e-04 [CACNA1C:+] [CACNA1C:-]
16 dipyridamole 0.257740 1.454910e-03 [PDE10A:-, PDE7B:-] [PDE10A:-, PDE7B:-]
17 topiramate 0.251413 2.296113e-03 [GABRB2:-, GABRG3:-, GRIA1:+, SCN1A:-] [GABRB2:+, GABRG3:-, GRIA1:+, SCN1A:+]
18 apremilast,cilomilast,dyphylline,piclamilast,R... 0.243585 3.928745e-03 [PDE4D:-] [PDE4D:+]
19 niguldipine 0.239965 5.078949e-03 [ADRA1A:-, CACNA1C:-] [ADRA1A:+, CACNA1C:-]
20 triflusal,papaverine,PF-02545920 0.239697 5.325332e-03 [PDE10A:-] [PDE10A:-]
21 crizotinib,BMS-777607,MK-8033,PF-04217903,PHA-... 0.237142 6.430805e-03 [MET:-] [MET:-]
22 regorafenib 0.235685 7.259023e-03 [FGFR2:+, FLT1:+, FRK:+, KDR:+, KIT:+, TEK:+] [FGFR2:-, FLT1:+, FRK:-, KDR:+, KIT:-, TEK:+]
23 amuvatinib 0.234486 8.070473e-03 [KIT:-, MET:-] [KIT:-, MET:-]
24 2-iminobiotin,myricitrin,proadifen 0.232254 9.545256e-03 [NOS1:+] [NOS1:+]
25 BRD-K19059335 0.226743 1.362506e-02 [RUNX1:-] [RUNX1:-]
26 PSB-069 0.226485 1.435799e-02 [ENTPD1:+] [ENTPD1:-]
27 furosemide,piretanide,bumetanide 0.222332 1.884517e-02 [SLC12A2:-] [SLC12A2:-]
28 foretinib,golvatinib 0.213019 3.230532e-02 [KDR:+, MET:-] [KDR:+, MET:-]
29 estramustine-phosphate 0.205931 4.818654e-02 [ESR2:+, MAP2:-] [ESR2:-, MAP2:-]