Read dump
⚠️
As the decompressed dump size can exceed 20 GB:
- You have to read the dump line by line.
- It might take a while.
1import fs from 'fs';
2
3let stream = fs.createReadStream(newFileName, { flags: 'r', encoding: 'utf-8' });
4let buffer = '';
5let hotel;
6
7// line is the current processed line
8async function processLine(line) {
9 if (line[line.length - 1] == '\r') {
10 line = line.substr(0, line.length - 1);
11 }
12 if (line.length > 0) {
13 hotel = JSON.parse(line);
14 // TODO: your logic
15 }
16}
17
18async function readLineByLine() {
19 let position;
20 while ((position = buffer.indexOf('\n')) >= 0) {
21 if (position == 0) {
22 buffer = buffer.slice(1);
23 continue;
24 }
25 await processLine(buffer.slice(0, position));
26 buffer = buffer.slice(position + 1);
27 }
28}
29
30async function readDump() {
31 stream.on('data', function (functionData) {
32 buffer += functionData.toString();
33 readLineByLine();
34 });
35}
36
37await readDump();
1# decompressed_file_name is the decompressed dump file name
2def read_dump(decompressed_file_name):
3 with open(decompressed_file_name, "r") as f:
4 s = f.readline()
5 while s:
6 hotel = json.loads(s)
7 # TODO: your logic
8 s = f.readline()
9
10
11if __name__ == "__main__":
12 read_dump(decompressed_dump_name)