DIR Return Create A Forum - Home
---------------------------------------------------------
nCoV_info
HTML https://ncovinfo.createaforum.com
---------------------------------------------------------
*****************************************************
DIR Return to: nCoV - discussion
*****************************************************
#Post#: 282--------------------------------------------------
covidcg.org's big file encoding Gisaid but hiding the names and
not showing the NNNNN
By: babar Date: January 17, 2021, 1:23 am
---------------------------------------------------------
Download and unzip
HTML https://storage.googleapis.com/ve-public/v1.4/data_package.json.gz
Then with node.js (on my laptop the file is too big for
chrome)
[code]
var t = require('./data_package.json');
var fr_loc = {};
var c =t["geo_select_tree"].children[2].children[15].children;
// list of French locations
for (var i=0;i<c.length;i++) {
if (c[i]) {
fr_loc[c[i].location_id]=c[i].label;
for (var j=0;j<c[i].children.length;j++) {
if (c[i].children[j])
fr_loc[c[i].children[j].location_id]=c[i].label;
}
}
}
var fr_sequences=[];
for (var i = 0;i < t.case_data.length; i++) {
if (fr_loc[t.case_data[i].location_id])
fr_sequences.push(t.case_data[i]);
}
var lab_count = {};
for (var i =0;i < fr_sequences.length; i++) {
var l =
t.metadata_map.submitting_lab[fr_sequences[i].submitting_lab];
if (!lab_count[l]) lab_count[l]=0;
lab_count[l]++;
}
lab_count;
var SNP= {};
for (var n in t.metadata_map.dna_snp) {
if (t.metadata_map.dna_snp.hasOwnProperty(n)) {
SNP[t.metadata_map.dna_snp[n]]=n;
}
}
for (var i =0;i < fr_sequences.length; i++) {
var mut = [];
for (var j =0; j<fr_sequences[i].dna_snp_str.length;j++) {
mut.push(SNP[fr_sequences[i].dna_snp_str[j]]);
}
fr_sequences[i].mut = mut;
fr_sequences[i].loc = fr_loc[fr_sequences[i].location_id];
}
[/code]
output : a table of 2753 sequences, the first one is
[code]
> fr_sequences[0]
{ 'Accession ID': '2d58d2cd',
collection_date: '2020-02-26',
submission_date: '2020-03-14',
gender: 0,
age_start: 36,
age_end: 37,
patient_status: 0,
passage: 0,
specimen: 5,
lineage: 'A.2',
clade: 'S',
sequencing_tech: 7,
assembly_method: 23,
comment_type: -1,
authors: 39,
originating_lab: 51,
submitting_lab: 30,
dna_snp_str: [ 28245, 28855, 4217, 14839, 15498, 18708, 19358,
19677 ],
gene_aa_snp_str: [ 14838, 4890, 18663, 18297, 19910, 605 ],
protein_aa_snp_str: [ 11687, 13956, 13590, 15422, 605, 15820
],
location_id: 1315,
mut:
[ '8782|C|T',
'9477|T|A',
'14805|C|T',
'25553|C|T',
'25979|G|T',
'28144|T|C',
'28657|C|T',
'28863|C|T' ]
loc:'Grand-Est'}
[/code]
#Post#: 283--------------------------------------------------
Re: covidcg.org's big file encoding Gisaid but hiding the names
and not showing the NNNNN
By: gsgs Date: January 17, 2021, 4:49 am
---------------------------------------------------------
hi babar.
Thanks !
I could download and unzip that file.
HTML https://storage.googleapis.com/ve-public/v1.5/data_package.json.gz
231MB , 341354 lines ,
HTML https://storage.googleapis.com/ve-public/v1.6/data_package.json.gz
HTML https://storage.googleapis.com/ve-public/v1.7/data_package.json.gz
.
396 with B.1.351
shall we use (part of) this forum to exchange sequencing info ?
or make another forum ?
shall I make you admin for re-organisation ?
shall we invite others ?
*****************************************************