다음 쿼리를 사용하여 neo4j 데이터베이스를 만들었습니다. Google CSV 파일에는 50,000 개의 행이 있습니다.neo4j 기본 데이터베이스의 성능을 향상시키는 방법
// Query1
CREATE CONSTRAINT ON (p:PR) ASSERT p.prId IS UNIQUE;
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_PRData.csv' AS line WITH line,
SPLIT(SPLIT(line.`Open Date`, ' ')[0], '/') AS opnDateList,
SPLIT(SPLIT(line.`Closed Date`, ' ')[0], '/') AS clsDateList
MERGE (prNode:PR{prId:TOINT(line.prId)})
MERGE (app:Application{appName:line.Application})
MERGE (func:Function{funName:line.Function})
MERGE (subfunc:SubFunction{subFunName:line.Subfunction})
MERGE (cat:Category{catName:line.Category})
MERGE (rel:Release{relName:line.Release})
MERGE (custNode:Customer{customerName:line.`Server Name`})
MERGE (prOpenDate:PROpenDate{openDate:SPLIT(line.`Open Date`, ' ')[0]})
SET prOpenDate.day = TOINT(opnDateList[1]),prOpenDate.month = TOINT(opnDateList[0]),prOpenDate.year = opnDateList[2]
MERGE (prClosedDate:PRClosedDate{closedDate:SPLIT(line.`Closed Date`, ' ')[0]})
SET prClosedDate.day = TOINT(clsDateList[1]),prClosedDate.month = TOINT(clsDateList[0]),prClosedDate.year = clsDateList[2]
MERGE (app)-[:PART_OF_APPLN]->(func)
MERGE (func)-[:PART_OF_FUNCTION]->(subfunc)
MERGE (subfunc)-[:PART_OF_SUBFUNCTION]->(cat)
MERGE (prNode)-[:CATEGORY]->(cat)
MERGE (prNode)-[:REPORTED_BY]->(custNode)
MERGE (prNode)-[:OPEN_ON]->(prOpenDate)
MERGE (prNode)-[:CLOSED_ON]->(prClosedDate)
MERGE (prNode)-[:REPORTED_IN]->(rel)
Query 2:
//change year for open date nodes
MERGE (q:PROpenDate) SET q.year=SPLIT(q.year,' ')[0] return q;
Query 3:
//change year for closed date nodes
MERGE (q:PRClosedDate) SET q.year=SPLIT(q.year,' ')[0] return q;
Query 4:
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_PR_WithCP.csv' AS line WITH line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (prnode:PR{prId:TOINT(SPLIT(line.prRefId, 'PR')[1])})
CREATE (prnode)-[:FIXED_BY]->(cpNode)
Query 5:
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_CPWithFilename.csv' AS line WITH line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (cpFile:FILE{fileName:line.fileName})
CREATE (cpNode)-[:CONTAINS]->(cpFile)
Query 6:
USING PERIODIC COMMIT 100
LOAD CSV WITH HEADERS FROM
'file:///2015_CPcomments.csv' AS line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (fileNode:FILE{fileName:line.fileName})
MERGE (owner:DougUser{userId:line.cpOwner})
MERGE (reviewer:DougUser{userId:line.cpReviewer})
MERGE (cpNode)-[:SUBMITTED_BY]->(owner)
WITH line WHERE line.reviewComment IS NOT NULL
MERGE (comment:ReviewComment{commentText:line.reviewComment,contextCP:line.cpId})
MERGE (comment)-[:GIVEN_BY]->(reviewer)
MERGE (comment)-[:COMMENT_FOR]->(fileNode)
그것은 neo4j에 데이터를 업로드하는 데 더 많은 시간이 걸립니다. 첫 번째 쿼리에 대해 7 시간 이상.
또한 서버에서 데이터를 가져 오는 데 더 많은 시간이 걸립니다.
MATCH (pr:PR)-[:FIXED_BY]-(cp)
MATCH (cp)-[:CONTAINS]-(file)
MATCH (pr)-[:CLOSED_ON]-(cls)
MATCH (pr)-[:REPORTED_BY]-(custs)
MATCH (pr)-[:CATEGORY]-(cats)
WHERE file.fileName STARTS WITH 'xyz' AND NOT(cls.closedDate = '')AND
apoc.date.parse(cls.closedDate,'s', 'MM/dd/yyyy') >= apoc.date.parse('01/01/2014','s', 'MM/dd/yyyy') AND apoc.date.parse(cls.closedDate,'s', 'MM/dd/yyyy') <= apoc.date.parse('06/13/2017','s', 'MM/dd/yyyy')
RETURN collect(DISTINCT custs.customerName) AS customers, collect(DISTINCT cats.catName) AS categories
위의 쿼리는 데이터를 가져 오는 데 5 분 이상 걸립니다. 이 문제를 해결하도록 도와주세요. 성능이 매우 나쁩니다.
어떤 색인 및 제약 사항이 있습니까? (브라우저의': schema') neo4j 설정을 했습니까? – logisima