-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblind-diff.pl
More file actions
executable file
·95 lines (85 loc) · 2.31 KB
/
blind-diff.pl
File metadata and controls
executable file
·95 lines (85 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
use strict;
use ProgramName;
use GffTranscriptReader;
my $name=ProgramName::get();
die "$name </path/to/individual>\n" unless @ARGV==1;
my ($path)=@ARGV;
my $BLIND_GFF="$path/RNA/blind/stringtie-blind.gff";
my $GFF="$path/RNA/stringtie.gff";
my $INPUT_GFF1="$path/1.gff";
my $INPUT_GFF2="$path/2.gff";
# Count how many ALT transcripts were provided to StringTie
my $reader=new GffTranscriptReader();
my $total=countALT($reader,$INPUT_GFF1)+countALT($reader,$INPUT_GFF2);
print "TOTAL=$total\n";
#exit;
# Load $BLIND_GFF
my %blind;
my $transcripts=$reader->loadGFF($BLIND_GFF);
my $n=@$transcripts;
for(my $i=0 ; $i<$n ; ++$i) {
my $transcript=$transcripts->[$i];
my $refID=getRefID($transcript);
#print "refID=$refID\.\n";
next if $refID=~/\S/;
my $key=hash($transcript);
#print "blind key: [$key]\n";
$blind{$key}=1;
}
# Load $GFF
my $reader=new GffTranscriptReader();
my $transcripts=$reader->loadGFF($GFF);
my $n=@$transcripts;
for(my $i=0 ; $i<$n ; ++$i) {
my $transcript=$transcripts->[$i];
my $refID=getRefID($transcript);
next unless $refID=~/ALT/;
my $key=hash($transcript);
#print "alt key: [$key]\n";
my $found=0+$blind{$key};
my $gene=$transcript->getSubstrate(); #$transcript->getGeneId();
print "$gene\t$refID\t$found\n";
}
print STDERR "[done]\n";
sub getRefID {
my ($transcript)=@_;
my $extra=$transcript->parseExtraFields();
my $hash=$transcript->hashExtraFields($extra);
my $refID=$hash->{"reference_id"};
return $refID;
}
sub hash {
my ($transcript)=@_;
my $substrate=$transcript->getSubstrate();
my $h="$substrate ";
my $exons=$transcript->getRawExons();
my $strand=$transcript->getStrand();
my $n=@$exons;
for(my $i=0 ; $i<$n ; ++$i) {
my $exon=$exons->[$i];
my $begin=$exon->getBegin(); my $end=$exon->getEnd();
if($i==0) {
if($strand eq "+") { $h.="$end " }
else { $h.="$begin " }
}
elsif($i==$n-1) {
if($strand eq "+") { $h.="$begin " }
else { $h.="$end " }
}
else { $h.="$begin\-$end " }
}
return $h;
}
sub countALT {
my ($reader,$infile)=@_;
my $transcripts=$reader->loadGFF($infile);
my $n=@$transcripts;
my $count=0;
for(my $i=0 ; $i<$n ; ++$i) {
my $transcript=$transcripts->[$i];
my $refID=$transcript->getTranscriptId();
if($refID=~/ALT/) { ++$count }
}
return $count;
}