2012-11-14 4 views
1

이 같은 두 개의 데이터 프레임이있다 : (X)공통 열 이름에 따라 R의 두 개의 데이터 프레임을 접합

structure(list(ICTO = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "ICTO-6335", class = "factor"), Application = structure(c(5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("AUS-PSOFT", 
"DBA-GL-ORA-PRD", "JPN-PSOFT", "LDN-PSOFT", "LNBCV_GL", "NYBCV_GL", 
"NYK-PSOFT", "SGBCV_GL", "SNG-PSOFT", "02-PEOPLESOFT", "11-SLR-PROC", 
"AP-CIT-BATCH-STATUS", "FCIT-GARS", "GBL-EXPENSE", "GLAD", "HRDMART-MAINT", 
"MISC-PSOFT", "NYK-LATE", "NYK-WKND", "REP_PSOFT"), class = "factor"), 
    Group = structure(c(58L, 58L, 58L, 58L, 58L, 58L, 58L, 58L, 
    58L, 58L), .Label = c("AUS-AP", "AUS-CHF", "AUS-CHK", "AUS-DATE", 
    "AUS-DE", "AUS-DST", "AUS-ESS", "AUS-GL", "AUS-GLI", "AUS-GLR", 
    "AUS-LATE", "AUS-SL", "AUS-SLI", "AUS-SLR", "AUS-SM", "AUS-SMR", 
    "JPN-AM", "JPN-AP", "JPN-CHF", "JPN-CHK", "JPN-DE", "JPN-GL", 
    "JPN-GLI", "JPN-GLR", "JPN-SL", "JPN-SLI", "JPN-SLR", "LDN-AP", 
    "LDN-CHF", "LDN-ESS", "LDN-GBM", "LDN-GL", "LDN-GL-BUD", 
    "LDN-GL-CPM", "LDN-GL-CPM-FULL", "LDN-GL-EIT", "LDN-GL-ITR", 
    "LDN-GLR", "LDN-PSOFT", "LDN-SMR", "NYK-AM", "NYK-AP", "NYK-BO", 
    "NYK-BRANCH", "NYK-CHF", "NYK-ESS", "NYK-GBM", "NYK-GL", 
    "NYK-GL-BUD", "NYK-GL-BUD-HC", "NYK-GL-FOR", "NYK-GLR", "NYK-SM", 
    "NYK-SMR", "PDCGL06", "PDCGL30", "PNYPSGL1", "RFCS", "SGP-GLR", 
    "SNG-AM", "SNG-AP", "SNG-BOK", "SNG-CHF", "SNG-CHK", "SNG-DE", 
    "SNG-GBM", "SNG-GL", "SNG-GL-BUD", "SNG-GLI", "SNG-GLR", 
    "SNG-MAS", "SNG-SHB", "SNG-SL", "SNG-SLI", "SNG-SLR", "SNG-SM", 
    "SNG-SMR", "TIS", "LNBCV", "NYBCV", "NYK-WKND-MAINT", "RECYCLE_APPSERV", 
    "RECYCLE_WEBSERV", "SGBCV", "02-REP-PEOPLESOFT", "11-001-HOUSEKEEP", 
    "11-001-RCL-CHK", "11-SLR-PROC-AU", "11-SLR-PROC-HK", "11-SLR-PROC-IN", 
    "11-SLR-PROC-INT", "11-SLR-PROC-JL", "11-SLR-PROC-KR", "11-SLR-PROC-SG", 
    "11-SLR-REG-RPT", "AUS", "BREAK-GLASS", "CLOAKWARE", "CONV", 
    "EMAIL-ALERT-MONITOR", "FCIT-GLI-GARS", "GLAD-AUS", "GLAD-LON", 
    "GLAD-NYK", "HKG", "HRDMART-MON", "JPN", "LDN", "LedgerLastFeed", 
    "LON_PEOPLESOFT", "NYK", "NYK-LATE", "RECYCLE_PRCSSKED", 
    "SGP", "SGS60A-080", "SPD", "SYNCH-PROD-DR"), class = "factor"), 
    JobName = c("EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", 
    "EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", "EXBCV06D", 
    "EXBCV06D"), Date = c(120820L, 120817L, 120816L, 120815L, 
    120814L, 120813L, 120810L, 120809L, 120808L, 120807L), Status = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Ended Not OK", 
    "Ended OK", "Executing", "Wait Condition", "Wait Resource" 
    ), class = "factor"), StartTime = c(20120821015845, 20120819024725, 
    20120817010722, 20120816010512, 20120815013233, 20120814005343, 
    20120811004005, 20120810004613, 20120809012701, 20120808005116 
    ), EndTime = c(20120821015854, 20120819024734, 20120817010733, 
    20120816010521, 20120815013242, 20120814005354, 20120811004015, 
    20120810004623, 20120809012710, 20120808005126), ElapseSecond = c(9L, 
    9L, 11L, 9L, 9L, 11L, 10L, 10L, 9L, 10L)), .Names = c("ICTO", 
"Application", "Group", "JobName", "Date", "Status", "StartTime", 
"EndTime", "ElapseSecond"), row.names = 2689:2698, class = "data.frame") 

dput (Y)

structure(list(JobName = c("XAPSJCDC0D", "XHPSJCD0HD", "XSPSJCD03D", 
"EXBCV06D", "EXESS120D", "EXGL008D", "EXGL027D", "EXGL028D", 
"EXGL035D", "EXGL042S"), EntryDesc = structure(c(59L, 60L, 61L, 
64L, 53L, 71L, 37L, 70L, 35L, 41L), .Label = c("AFINA FEED", 
"Arrival of All Australia Feeds", "Arrival of All Japan Feeds", 
"Arrival of All Singapore Feeds", "Arrival of Endur Feed", "Basel II Balance Sheet Extract - Pacific", 
"Billing Manager Feed", "BOK Reg Reports Availability", "CD GL Balance Extract J11 AYE to CARAT", 
"CD GL Balance Extract SGP to CARAT", "CD Taiwan GL Extract to SYSTEX", 
"CIF Affiliate Feed", "End of Endur Feed Processing", "End of Spectal BDLite Feed Processing", 
"FTP Carat LCYBS Daily Extract", "FX Shredder Currency upload", 
"GFX FXOps Interface", "GL Balance Extract A48 to CARAT", "GLOBAL MONEY MARKET FEED", 
"Glosub interface", "GMI Feed Load", "Inspire Journal Feed", 
"Intellimatch Feed Sent", "Intellimatch Feed Sent - Australia", 
"Intellimatch Feed Sent - Japan", "Intellimatch Feed Sent - Singapore", 
"Ledger Available - Australia", "Ledger Available - HK/KR/SG", 
"Ledger Available - Japan", "Load GERS Feed", "LOAD GERS FEED", 
"Load of the VATSET Feed file to staging", "Loan IQ feed", "MAS MERP Reports Availability", 
"MONTHLY SUMMARISED JOURNAL FEED", "MyHR feed for HRMS and HR4U (prev. Headcount feed)", 
"NTPA-LOAD TO STAGING USD CCY", "NY NTPA Journal Feed", "OLD WORLD 80 ps_tipsj", 
"OLD WORLD 80 ps_tipzs", "OPC IT - Arrival of GMI Feed", "Opera Exchange Rate Extract - AUS", 
"Opera Exchange Rate Extract - SNG", "PCIT - Arrival of Spectral Feed", 
"Peoplesoft - Basel II Balance Sheet Extract - NY", "Peoplesoft - BDLite Extract", 
"Peoplesoft - End of GMI Feed Processing", "Peoplesoft - End of NTPA GLI Feed Processing", 
"Peoplesoft - FSR fcdb transactions delivered", "Peoplesoft - FSR fclonae delivered", 
"Peoplesoft - FSR gmmbal delivered", "Peoplesoft - FTP Phase II Completion", 
"PeopleSoft - FTP Phase II Completion", "Peoplesoft - FX Rates feed to Opera", 
"Peoplesoft - GL Extract feed for Hong Kong to CARAT", "Peoplesoft - GL Extract feed for India to CARAT", 
"Peoplesoft - GL Extract for Korea Branch to CARAT", "Peoplesoft - NYK Alt YE Close", 
"Poets-GLI Feed to Peoplesoft For AUS", "Poets-GLI Feed to Peoplesoft For HKG", 
"Poets-GLI Feed to Peoplesoft For SNG", "PROCESS INPUT FEED FROM FEPS GE", 
"Project Accounting upload", "Reporting Server Available", "Run application engine to process Endur feed.", 
"SL Period Balance Extract for T15 FTP", "SL YTD Balance Extract for T14 FTP", 
"SL YTD Balance Extract for T15 FTP", "SPECTRAL Feeds", "SPHERE FEED UPLOAD", 
"SUMMIT LOAD TO STAGING", "TPW Sub-ledger extract ftp to CARAT", 
"Peoplesoft - BDLite Extract ", "Peoplesoft - End of GMI Feed Processing " 
), class = "factor"), SLAType = structure(c(3L, 3L, 3L, 1L, 3L, 
3L, 3L, 3L, 3L, 2L), .Label = c("DDA", "Milestone", "OLA"), class = "factor"), 
    EntryType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = "Automated", class = "factor"), Active = structure(c(1L, 
    2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("OK", "ON" 
    ), class = "factor"), LastRun = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L), .Label = c("2012/08/01", "2012/09/06", 
    " 2012/10/08", " 2012/10/10", " 2012/10/12", " 2012/10/15" 
    ), class = "factor"), DataCenter = structure(c(2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("PNYSHCTM07", "PSGSHCTM03" 
    ), class = "factor"), ProviderReg = structure(c(2L, 2L, 2L, 
    1L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Americas IT View", 
    "Asia Pacific IT View", "EMEA IT View"), class = "factor"), 
    ProviderDiv = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("RF&CS IT", "BO IT"), class = "factor"), 
    ProviderSubDiv = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L), .Label = c("CFO IT - Product Control (KGK)", 
    "CFO IT – Financial Accounting (KGX)", "CFO IT - Financial Reporting [KGFX]", 
    "CFO IT ? Financial Accounting (KGX)"), class = "factor"), 
    ReceiverReg = structure(c(2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L), .Label = c("Americas Business View", "Asia Pacific Business View", 
    "EMEA Business View"), class = "factor"), ReceiverDiv = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Finance", 
    "Back Office"), class = "factor"), ReceiverSubDiv = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("CFO IT – Financial Accounting (KGX)", 
    "Financial Accounting", "Product Control", "CFO - Financial Reporting", 
    "CFO IT ? Financial Accounting (KGX)"), class = "factor"), 
    Service = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L), .Label = c("Accounting Reporting", "Ledger Processing", 
    "Product Control", "Regional Financial Accounting"), class = "factor"), 
    ICTO = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L 
    ), .Label = "ICTO-6335", class = "factor"), SLAHour = c(4, 
    4, 4, 8.3, 7.3, 3, 3, 3, 4, 4), TargetDate = c(-1L, -1L, 
    -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L)), .Names = c("JobName", 
"EntryDesc", "SLAType", "EntryType", "Active", "LastRun", "DataCenter", 
"ProviderReg", "ProviderDiv", "ProviderSubDiv", "ReceiverReg", 
"ReceiverDiv", "ReceiverSubDiv", "Service", "ICTO", "SLAHour", 
"TargetDate"), row.names = c(NA, 10L), class = "data.frame") 

I

dput을 예 : 출력 loo 이 같은 KS :

JobName Date 
EXBCV06D 120417 
EXBCV06D 120417 
EXBCV06D 120417 
EXBCV06D 120417 

는 작업 이름과 날짜에 대한 한 줄이 있어야합니다 :

head(subset(xx, JobName=="EXBCV06D"),10) 
     JobName  ICTO Application Group Date Status StartTime  EndTime ElapseSecond     EntryDesc SLAType SLAHour TargetDate 
35076 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120417 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35077 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120417 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35078 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120417 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35079 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120417 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35080 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120419 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35081 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120419 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35082 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120419 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35083 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120419 Ended OK 2.012042e+13 2.012042e+13   9 Reporting Server Available  DDA  8.3   -1 
35084 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120412 Ended OK 2.012041e+13 2.012041e+13   9 Reporting Server Available  DDA  8.3   -1 
35085 EXBCV06D ICTO-6335 LNBCV_GL RFCS 120412 Ended OK 2.012041e+13 2.012041e+13   9 Reporting Server Available  DDA  8.3   -1 

나는 같은 작업 이름을 동일한 날짜에 4 번을보고하고있다. 예를 들어,

EXBCV06D 120417 

중 4 개가 없어야합니다.

이 작업을 수행하는 올바른 방법입니까? 동일한 Date에 대해 여러 개의 JobNames를 얻는 것 같습니다. 각 Date에 하나의 JobName이 있어야합니다.

+0

'? merge'를보세요. '병합 (x, y, by = 'ProcessName')'. 데이터가 크면'data.table' 패키지의'merge' 함수를 볼 수 있습니다. – Justin

+0

단일 ProcessName이 있어야 할 때 여러 ProcessName이 표시됩니다. 내가 잘못하고있는 어떤 아이디어? – user1471980

+0

당신은'dput (head (x))'와'dput (head (y))'를 사용할 수 있고 질문에 대한 결과를 편집 할 수 있습니까? – Justin

답변

3

사용하여 병합 :

xy <- merge(x, y, by='JobName') 

그러나,이 병합이 당신을 줄 것이다 모든 x 데이터에 하나의 독특한 JobName 있기 때문에 당신이 기대하지 않는 결과. 따라서 다른 모든 행은 y에 저장됩니다. 계속 유지하려면 all.y=TRUE을 추가 할 수 있지만 NA 날짜가있는 행이 많습니다.

?merge을 신중하게 읽고 작은 예제 데이터 세트를 만드는 것이 좋습니다. 데이터 세트는보고있는 "문제"를 재현해야하며 예상되는 결과도 제공해야합니다. 그걸 알지 못하고 ... 훨씬 더 많은 것을 돕기가 어렵습니다.

+0

나는이 문제를 발견했다고 생각한다. 내 데이터 프레임에 동일한 JobName이 여러 개 있습니다. 나는 고유 한 데이터 프레임 y에서 내 행을 가져야합니다. 내 데이터 프레임 y에 고유 한 행이 모두 있는지 확인하려면 어떻게해야합니까? 이것은 내 문제를 해결할 것입니다. – user1471980

+0

'고유'. 'foo = data.frame (x = 1 : 2, y = rep (1 : 2, each = 4)); 고유 (foo)'. – Justin

+0

모두 감사합니다, 나는 그것을 얻었다 고 생각합니다. – user1471980

관련 문제