-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze-broken-RVIS.pl
More file actions
executable file
·76 lines (68 loc) · 1.96 KB
/
analyze-broken-RVIS.pl
File metadata and controls
executable file
·76 lines (68 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/perl
use strict;
use ProgramName;
$|=1;
my $name=ProgramName::get();
die "$name <MIN_COUNT> <homozygotes_only:0/1> <inactivated.txt>\n" unless @ARGV==3;
my ($MIN_COUNT,$HOMOZYGOTES_ONLY,$BROKEN)=@ARGV;
my $RVIS="/home/bmajoros/intolerance/RVIS/RVIS.txt";
#my $RVIS="/home/bmajoros/intolerance/RVIS/ncRVIS-parsed.txt";
#my $BROKEN="/home/bmajoros/1000G/assembly/broken.txt";
my $NAMES="/home/bmajoros/ensembl/gene-names/combined.txt";
my %toEnsembl;
open(IN,$NAMES) || die $NAMES;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=2;
my ($ensembl,$id)=@fields;
$toEnsembl{$id}=$ensembl;
}
close(IN);
my %RVIS; # indexed by ensembl id
open(IN,$RVIS) || die $RVIS;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=3;
my ($id,$rvis,$percentile)=@fields;
my $ensembl=$toEnsembl{$id};
next unless $ensembl;
#$RVIS{$ensembl}=$rvis;
$RVIS{$ensembl}=$percentile;
}
close(IN);
my %alleles;
open(IN,$BROKEN) || die $BROKEN;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=4;
my ($indiv,$allele,$gene,$transcript)=@fields;
#next if $chr eq "chrX" || $chr eq "chrY";
if($gene=~/(\S+)\./) { $gene=$1 }
$alleles{$gene}->{$indiv}->{$allele}=1;
}
close(IN);
my %counts;
my @genes=keys %alleles;
foreach my $gene (@genes) {
my $alleles=$alleles{$gene};
my @indivs=keys %$alleles;
foreach my $indiv (@indivs) {
my $hash=$alleles->{$indiv};
my @keys=keys %$hash;
#if($HOMOZYGOTES_ONLY && @keys==2 || !$HOMOZYGOTES_ONLY && @keys>0) {
if($HOMOZYGOTES_ONLY && @keys==2 || !$HOMOZYGOTES_ONLY && @keys==1) {
my $rvis=$RVIS{$gene};
next unless defined $rvis;
++$counts{$gene};
}
}
}
open(CARSON,">carson.txt");
my @genes=keys %counts;
foreach my $gene (@genes) {
my $count=$counts{$gene};
my $rvis=$RVIS{$gene};
die unless defined $rvis;
next unless $count>=$MIN_COUNT;
next unless $rvis=~/\d/;
print "$count\t$rvis\n";
if($rvis<20) { print CARSON "$gene\t$count\t$rvis\n" }
}
close(CARSON);