Prerequisites
First install MongoDB, node.js and csvkit.
For node.js, download node binary package (node-v0.10.33-linux-x64.tar.gz in my case) from its website, extract and add binary folder into $PATH. For example, add the following line into ~/.zshenv:
PATH=$HOME/apps/node-v0.10.33-linux-x64/bin:$PATH
For MongoDB, download its binary package (mongodb-linux-x86_64-2.6.5.tgz in my case) from its website, extract and add binary folder into $PATH:
PATH=$HOME/apps/mongodb-linux-x86_64-2.6.5/bin:$PATH
Install csvkit with sudo pip install csvkit
.
Preparation
-
Convert Excel (xls/xlsx) file to csv file:
in2csv data.xls > data.csv
; -
(Optional) Convert file encoding:
iconv -f gbk -t utf8 data20150218.csv > input.csv
; -
Examine data:
head input.csv | csvjson -i 4
; -
Start MongoDB server for importing data:
mongod
;
Import Data
In case you want to overwrite old data, you need backup old collection: start a mongodb client, make a copy of the old collection (the original collection is "fairs"):
$ mongo
> db.fairs.renameCollection('fairsBak')
P.S. You can make a copy of a collection with db.fairs.copyTo('fairsBak')
.
Now import new data to collection "fairs":
./importdata
Under the hood
importdata:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
|
checkTransform.js:
var data = require('./rawdata.json');
var fs = require('fs');
var result = []
data.forEach(function(elem) {
var aFair = JSON.parse( JSON.stringify(elem) );
aFair.indexStr = {};
aFair.indexStr['name'] = (elem.chnName + ' ' + elem.engName).trim();
aFair.indexStr['sponsor'] = elem.sponsor;
aFair.indexStr['undertaker'] = elem.undertaker;
aFair.indexStr['category'] = elem.category;
aFair.indexStr['simpleSearch'] = elem.chnName + ' ' + elem.engName + ' ' + elem.position + ' ' + elem.time + ' ' + elem.category;
if (elem.sponsor) {
aFair['sponsor'] = [];
elem.sponsor.split('|').forEach(function(spr) {
var aSponsor = {};
var sps = spr.split('$');
aSponsor['name'] = sps[0];
aSponsor['tel'] = sps[1];
aSponsor['fax'] = sps[2];
aSponsor['email'] = sps[3];
aSponsor['website'] = sps[4];
aFair.sponsor.push(aSponsor);
});
}
if (elem.undertaker) {
aFair['undertaker'] = [];
elem.undertaker.split('|').forEach(function(udt) {
var aUndertaker = {};
var uds = udt.split('$');
aUndertaker['name'] = uds[0];
aUndertaker['tel'] = uds[1];
aUndertaker['fax'] = uds[2];
aUndertaker['email'] = uds[3];
aUndertaker['website'] = uds[4];
aFair.undertaker.push(aUndertaker);
});
}
if (elem.category) {
aFair['category'] = [];
elem.category.split('|').forEach(function(catStr) {
var aCat = {};
var cats = catStr.split('$');
aCat['major'] = cats[0] === '?' ? '' : cats[0];
aCat['minor'] = cats.slice(1);
aFair.category.push(aCat);
});
}
result.push(aFair);
});
fs.writeFile('result.json', JSON.stringify(result));