URI:
   DIR Return Create A Forum - Home
       ---------------------------------------------------------
       nCoV_info
  HTML https://ncovinfo.createaforum.com
       ---------------------------------------------------------
       *****************************************************
   DIR Return to: nCoV - discussion
       *****************************************************
       #Post#: 282--------------------------------------------------
       covidcg.org's big file encoding Gisaid but hiding the names and 
       not showing the NNNNN
       By: babar Date: January 17, 2021, 1:23 am
       ---------------------------------------------------------
       Download and unzip
  HTML https://storage.googleapis.com/ve-public/v1.4/data_package.json.gz
       Then with node.js    (on my laptop the file is too big for
       chrome)
       [code]
       var t = require('./data_package.json');
       var fr_loc = {};
       var c =t["geo_select_tree"].children[2].children[15].children;
       // list of French locations
       for (var i=0;i<c.length;i++) {
       if (c[i]) {
       fr_loc[c[i].location_id]=c[i].label;
       for  (var j=0;j<c[i].children.length;j++) {
       if (c[i].children[j])
       fr_loc[c[i].children[j].location_id]=c[i].label;
       }
       }
       }
       var fr_sequences=&#91;];
       for (var i = 0;i < t.case_data.length; i++) {
       if (fr_loc[t.case_data[i].location_id])
       fr_sequences.push(t.case_data[i]);
       }
       var lab_count = {};
       for (var i =0;i < fr_sequences.length; i++) {
       var l =
       t.metadata_map.submitting_lab[fr_sequences[i].submitting_lab];
       if (!lab_count[l]) lab_count[l]=0;
       lab_count[l]++;
       }
       lab_count;
       var SNP= {};
       for (var n in t.metadata_map.dna_snp) {
       if (t.metadata_map.dna_snp.hasOwnProperty(n)) {
       SNP[t.metadata_map.dna_snp[n]]=n;
       }
       }
       for (var i =0;i < fr_sequences.length; i++) {
       var mut = &#91;];
       for (var j =0; j<fr_sequences[i].dna_snp_str.length;j++) {
       mut.push(SNP[fr_sequences[i].dna_snp_str[j]]);
       }
       fr_sequences[i].mut = mut;
       fr_sequences[i].loc = fr_loc[fr_sequences[i].location_id];
       }
       [/code]
       output : a table of 2753 sequences, the first one is
       [code]
       > fr_sequences[0]
       { 'Accession ID': '2d58d2cd',
       collection_date: '2020-02-26',
       submission_date: '2020-03-14',
       gender: 0,
       age_start: 36,
       age_end: 37,
       patient_status: 0,
       passage: 0,
       specimen: 5,
       lineage: 'A.2',
       clade: 'S',
       sequencing_tech: 7,
       assembly_method: 23,
       comment_type: -1,
       authors: 39,
       originating_lab: 51,
       submitting_lab: 30,
       dna_snp_str: [ 28245, 28855, 4217, 14839, 15498, 18708, 19358,
       19677 ],
       gene_aa_snp_str: [ 14838, 4890, 18663, 18297, 19910, 605 ],
       protein_aa_snp_str: [ 11687, 13956, 13590, 15422, 605, 15820
       ],
       location_id: 1315,
       mut:
       [ '8782|C|T',
       '9477|T|A',
       '14805|C|T',
       '25553|C|T',
       '25979|G|T',
       '28144|T|C',
       '28657|C|T',
       '28863|C|T' ]
       loc:'Grand-Est'}
       [/code]
       #Post#: 283--------------------------------------------------
       Re: covidcg.org's big file encoding Gisaid but hiding the names 
       and not showing the NNNNN
       By: gsgs Date: January 17, 2021, 4:49 am
       ---------------------------------------------------------
       hi babar.
       Thanks !
       I could download and unzip that file.
  HTML https://storage.googleapis.com/ve-public/v1.5/data_package.json.gz
       231MB , 341354 lines ,
  HTML https://storage.googleapis.com/ve-public/v1.6/data_package.json.gz
  HTML https://storage.googleapis.com/ve-public/v1.7/data_package.json.gz
       .
       396 with B.1.351
       shall we use (part of) this forum to exchange sequencing info ?
       or make another forum ?
       shall I make you admin for re-organisation ?
       shall we invite others ?
       *****************************************************