0

텍스트 마이닝 프로젝트를 진행하고 있습니다. 일부 LDA 주제 모델링을 수행했으며 지금은 주제 확률을 가지고 있습니다. 네트워크 그래프를 만들 수 있도록 문서간에 유클리드 거리를 얻을 수 있도록 클러스터 패키지를 사용하고 싶지만 오류가 계속 발생합니다. 좋은 시각화 기술에 대한 모든 추천도 따뜻하게 환영 할 것입니다 : 내가 전에 dput() 함수를 사용하여이 웹 사이트에 재현 데이터를 업로드 적이없는클러스터 패키지를 사용하여 유클리드 거리를 계산할 때 오류가 발생했습니다.

library(cluster) 
FundDist <- as.matrix(daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE)) 

Error in daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE) : invalid type character for column numbers 1 
In addition: Warning messages: 
1: In data.matrix(x) : NAs introduced by coercion 
2: In daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE) : 
with mixed variables, metric "gower" is used automatically 
3: In min(x) : no non-missing arguments to min; returning Inf 
4: In max(x) : no non-missing arguments to max; returning -Inf 

. 그래서 나는이 일을 올바르게하기를 바랍니다. 아래 출력물을 복사하여 붙여 넣었습니다. 제 문제를 읽어 주셔서 감사합니다.

EUTopicNetworks <- structure(list(Filename = c("AT_Burenland_2007.txt", "AT21_Kaernten_07.txt", 
"AT12_LowerAustria_07_13.txt", "AT_Nat_2007.txt", "AT34_Salzburg_07.txt", 
"AT22_Steiermark_07.txt", "AT36_Tirol_07.txt", "UpperAustria2007.txt", 
"AT13_Vienna_07.txt", "vorarlberg2007.txt", "AT_Austria_1.txt", 
"AT11_Burgenland_1", "lowe austria 2014.txt", "AT13_Vienna2_14.txt", 
"AT21_Kaernten_14.txt", "AT22_Steiermark_14.txt", "AT31_UpperAustria_14.txt", 
"AT35_Salzburg_14.txt", "AT36_Tirol_14.txt", "AT37_Vorarlberg_14.txt", 
"abbruzzo2007-2013.txt", "calabria2007-2013.txt", "campania2007-2013.txt", 
"emiliaromagna2007-2013.txt", "sicily2007.txtt", "friuli2007-2013.txt", 
"lazio2007-2013.txt", "liguria2007.txt", "lombardy2007-2013.txt", 
"piemonte2007-2013.txt", "puglia2007-2013.txt", "sardinia2007-2013.txt", 
"Bolzano_07.txt", "umbria 2007-2013.txt", "valledaosta 2007-2013.txt", 
"tuscany2007.txt", "VENETO2007-2013.txt", "abruzzo2014-2020.txt", 
"basilicata2014-2020.txt", "calabria2014-2020.txt", "campania2014-2020.txt", 
"emiliaromagna2014-2020.txt", "sicily2014.txt", "friuli2014-2020.txt", 
"lazio2014.txt", "liguria2014.txt", "lombardia2014-2020.txt", 
"piemonte2014-2020.txt", "puglia_14.txt", "sardinia2014.txt", 
"Bolzano_14.txt", "umbria2014.txt", "valledaosta 2014-2020.txt", 
"tuscany2014.txt", "molise_14.txt", "molise_07.txt", "trento2007.txt", 
"trento2014.txt", "ITALIANSTRATEGICPLAN2007-2013.txt", "italyinnovationstrategy2014-2020.txt", 
"veneto2014-2020.txt", "aquitanie2014-2020.txt", "aquitanie2007.txt", 
"auvergne2014-2020.txt", "auvergne_07.txt", "bretagne2014-2020.txt", 
"bretagne_07.txt", "centre2014-2020.txt", "centre2007.txt", "champagne-ardenne 2007.txt", 
"champagne-ardenne 2014.txt", "PICARDIE2007.txt", "picardie2014.txt", 
"bassenormandie 2007.txt", "bassenormandie 2014.txt", "bourgogne2014.txt", 
"bourgogne_07.txt", "midi-pyrenees2007.txt", "midipyrennes14.txt", 
"franche-comte2014-2020.txt", "franche-comte_2007.txt", "hautenormandie2007.txt", 
"hautenormandie2014-2020.txt", "limousine2014-2020.txt", "limousine2007.txt", 
"loire2007.txt", "loire2014-2020.txt", "lorraine2014-2020.txt", 
"lorraine2007.txt", "nordpasdecalais2007.txt", "nordpasdecalais2014-2020.txt", 
"rhonealpes2014-2020.txt", "rhone-alpes2007.txt", "poitou-charenter2007.txt", 
"poituou-charentes2014.txt", "corse2007.txt", "corsica.txt", 
"bretagne_07.txt", "bretagne2014-2020.txt", "Baden-Wu_07.txt", 
"Baden-wu14.txt", "bavaria2007.txt", "BAVARIA_14.txt", "BERLIN2014-2020.txt", 
"Berlin_07.txt", "bradenburgh2014.txt", "Bradenburgh2007.txt", 
"bremen2007.txt", "bremen2014.txt", "hamburg_07.txt", "HAMBURGO2014-2020.txt", 
"Hessen_07.txt", "Hessian1.txt", "LowerSaxony2_07.txt", "LOWERSAXONY2014-2020.txt", 
"Mecklenburg_07.txt", "MECKPOMM2014-2020.txt", "rheinland2014-2020.txt", 
"RhinelanPlatz_07.txt", "saarland2014-2020.txt", "saarland_07.txt", 
"sachsen-anhalt2014-2020.txt", "sachsen-anhelt2007.txt", "saxony_07.txt", 
"saxony_14.txt", "Schleswig-Holstein2020.txt", "Schleswig-Holstein_07.txt", 
"thuringia2007.txt", "THURINGIA2014-2020.txt", "Andalucia_2007-2013.txt", 
"Andalusia_14.txt", "Aragon_14.txt", "Aragon_2007.txt", "Asturias_2007.txt", 
"ES12_Asturias.txt", "Baleares_2007.txt", "Balears_14.txt", "Canarias_07.txt", 
"Canaries_14.txt", "Cantabria_2007.txt", "ES13_Cantabria_14.txt", 
"Castillala_Mancha_2007.txt", "ES42_Castilla-la_mancha.txt", 
"CastillayLeon_dic_2007.txt", "ES41_Castilla-Leon.txt", "ES51_Catalonia_14.txt", 
"catalonia2007.txt", "Madrid_2007-13.txt", "Madrid_14.txt", "Murcia_14.txt", 
"murcia2007.txt", "Valencia_14.txt", "Valenciana_2007.txt", "laRioja2007.txt", 
"CombiEngland_07.txt", "EastWales_07.txt", "NorthernIreland_07.txt", 
"Scotland_07.txt", "WestWales_07.txt", "EastWales_14.txt", "England_14.txt", 
"Northern_Ireland14.txt", "Scotland14.txt", "Westwales_14.txt", 
"malta2007-2013.2.txt", "malta2014-2020.txt2.txt"), Funds = c(0.028649302, 
0.036198106, 0.041060412, 0.036543709, 0.047044295, 0.01659907, 
0.019221094, 0.056763265, 0.052615278, 0.045216842, 0.048176521, 
0.038976137, 0.027341846, 0.037721688, 0.049252945, 0.05918185, 
0.05440539, 0.017412537, 0.029307636, 0.022385126, 0.019737738, 
0.027626844, 0.0334503, 0.043976555, 0.042856083, 0.021046234, 
0.018061427, 0.014983543, 0.067145641, 0.019741648, 0.019018285, 
0.030614714, 0.019666862, 0.028158874, 0.026009936, 0.019330949, 
0.023088856, 0.044273539, 0.021168401, 0.017627883, 0.030486684, 
0.017509486, 0.034035728, 0.034106673, 0.043486846, 0.029087254, 
0.050564915, 0.047219925, 0.051437475, 0.029694445, 0.008588781, 
0.045469371, 0.060967658, 0.049260664, 0.015106536, 0.026186649, 
0.023254401, 0.053579943, 0.031056644, 0.045125396, 0.057680642, 
0.01125217, 0.042532521, 0.041545015, 0.047940862, 0.036641552, 
0.072252939, 0.035679102, 0.067488953, 0.008492444, 0.021052205, 
0.020152732, 0.040564092, 0.02921307, 0.018565646, 0.022775302, 
0.011711217, 0.019967731, 0.00877454, 0.022250866, 0.003696986, 
0.011277284, 0.007740289, 0.02790784, 0.008134596, 0.014931457, 
0.03269353, 0.041386999, 0.066164327, 0.011440048, 0.006215758, 
0.010688796, 0.003811851, 0.003303556, 0.023094521, 0.010550119, 
0.018023822, 0.022757839, 0.017667203, 0.02073341, 0.013537221, 
0.011950717, 0.009010298, 0.019796088, 0.011314152, 0.01098032, 
0.008832217, 0.040330019, 0.005822583, 0.006599734, 0.016338338, 
0.013906508, 0.010973094, 0.010448791, 0.003723683, 0.013769165, 
0.007583811, 0.009724543, 0.00237987, 0.005005899, 0.005048481, 
0.013000829, 0.012671508, 0.003054379, 0.03508621, 0.012981055, 
0.021982606, 0.009448894, 0.014883524, 0.018772709, 0.006068872, 
0.018122102, 0.020449118, 0.015102835, 0.005449833, 0.011014679, 
0.016602374, 0.006482356, 0.009969209, 0.002646448, 0.01205523, 
0.04659564, 0.010866707, 0.0144986, 0.046946229, 0.028629168, 
0.034634807, 0.059078927, 0.002919951, 0.016168915, 0.024403654, 
0.09171777, 0.009978063, 0.015196456, 0.015174811, 0.047399696, 
0.015303701, 0.011753077, 0.014862118, 0.01487099, 0.011742448, 
0.018346786, 0.010785336, 0.010421162, 0.013791872, 0.026389358 
), Biotech = c(0.024814541, 0.005668351, 0.017716491, 0.00853945, 
0.015916015, 0.03888657, 0.001333459, 0.017368849, 0.023781704, 
0.051278428, 0.005484117, 0.021759003, 0.027973849, 0.002774256, 
0.005744201, 0.004244159, 0.00468969, 0.000581776, 0.022734494, 
0.03445351, 0.000800523, 0.000362683, 0.026945766, 0.006823146, 
0.005847249, 0.000630851, 0.020794353, 0.035979974, 0.006165474, 
0.027793267, 0.00504312, 0.018927097, 0.000760576, 0.012289583, 
0.002109001, 0.000442817, 0.000594334, 0.00037428, 0.06596126, 
0.027988907, 0.019067461, 0.024872467, 0.015379713, 0.015295277, 
9.36e-05, 0.000117979, 4e-05, 0.031220784, 0.001357913, 0.040951957, 
0.000438858, 0.038880733, 0.00115553, 0.041152387, 0.042576251, 
0.002254845, 0.022345729, 0.002596388, 0.022562024, 0.000243528, 
0.000885187, 0.013339204, 0.001418329, 0.028089687, 0.002057198, 
0.000244579, 0.000140129, 0.051721762, 0.014989271, 0.001673642, 
0.04500578, 0.001615416, 0.00010688, 8.18e-05, 0.000526549, 0.024849247, 
0.032961749, 0.033875354, 0.032145136, 0.012619383, 0.003522134, 
0.012225185, 0.043464039, 0.077400519, 0.056308327, 0.020638077, 
0.049992043, 0.038864222, 0.039459316, 0.034937031, 0.037406742, 
0.029987413, 0.002413193, 0.000584526, 0.004584848, 0.012491496, 
0.031710331, 0.017858395, 0.030812232, 0.003435739, 0.02648106, 
0.006927007, 0.030785802, 0.044329986, 0.009838859, 0.002951219, 
0.030722621, 0.020511401, 0.013623405, 0.081263322, 0.029623712, 
0.003790876, 0.00335598, 0.018842609, 0.008430911, 0.032611226, 
0.057455638, 0.004304486, 0.015733474, 0.043981231, 7.95e-05, 
0.004054158, 0.045173701, 0.016378658, 0.015906368, 2.92e-05, 
0.00057313, 0.00079682, 0.013209159, 0.039911915, 0.000237856, 
0.022373161, 0.015821272, 0.026750309, 0.048698356, 0.041430357, 
0.00287091, 0.007965338, 0.034481633, 0.001543219, 0.022152119, 
0.041801127, 0.017463336, 0.038010604, 0.050393079, 0.045031199, 
0.043613378, 0.037411148, 0.00186188, 0.018962051, 0.043254408, 
0.018666636, 0.027696462, 0.024293257, 0.062711642, 0.000519461, 
0.001056595, 0.031300324, 0.024742217, 0.024718682, 0.000780182, 
0.01862668, 0.000973041, 0.000542227, 0.001011475, 0.011077226 
), Transfers = c(0.00473547, 0.00038783, 0.000424567, 0.000695775, 
0.000135175, 0.010334213, 0.000106781, 0.003008423, 0.000608193, 
0.010326284, 0.000934925, 0.031277279, 0.00572826, 0.000260722, 
0.001021529, 0.000154104, 0.000220061, 4.32e-05, 0.018335222, 
0.013011634, 2.49e-05, 4.83e-05, 0.021935677, 0.000390414, 0.000130749, 
3.77e-05, 0.009460382, 0.146681735, 7.44e-05, 0.082389135, 0.000592343, 
0.000562132, 1.53e-05, 0.020403948, 1.31e-05, 2.46e-05, 5.51e-05, 
0.000321357, 0.037377138, 0.006516009, 0.022055996, 0.041838049, 
0.002549792, 0.00271147, 8.55e-05, 0.001550897, 0.001094715, 
0.002059784, 2.73e-05, 0.012813067, 9.84e-06, 0.009924993, 8.74e-05, 
0.004619721, 0.013069859, 2.14e-05, 0.053722696, 5.79e-05, 0.006753522, 
1.18e-05, 0.005116721, 0.000108002, 2.73e-05, 0.003596542, 2.79e-05, 
0.00438903, 8.31e-05, 0.026310482, 0.001005592, 0.000428282, 
0.049529581, 1.93e-05, 8.57e-05, 0.001610554, 9.92e-06, 0.094923027, 
0.031919217, 0.13955002, 0.083229087, 0.000284159, 0.000267466, 
0.000349366, 0.056697448, 0.049064161, 0.075636951, 0.004204928, 
0.006115066, 0.007264789, 0.002044115, 0.043477142, 0.046506897, 
0.082070827, 0.00035585, 0.010126049, 0.000178782, 0.000133394, 
0.019258021, 9.19e-05, 0.069771158, 0.164961859, 0.030302868, 
0.008376654, 0.095394069, 0.069931231, 0.000553351, 0.000544636, 
0.095332857, 0.001748097, 0.000288915, 0.049584358, 0.095331287, 
0.000598831, 0.001574565, 0.124263691, 3.34e-05, 0.107925558, 
0.087354139, 0.000618826, 0.000110399, 0.035831715, 5.52e-06, 
0.003000538, 0.076722556, 0.001625612, 0.00057855, 2.15e-05, 
6.78e-05, 0.000268523, 0.000567245, 0.04113056, 1.71e-05, 0.03401376, 
0.001848523, 0.029357767, 0.078771496, 0.05552954, 0.068487283, 
0.001617493, 0.045003856, 0.000170027, 0.102169304, 0.033286348, 
0.000645582, 0.123061518, 0.024437451, 0.002628661, 0.013120533, 
0.002000205, 0.000545963, 0.103891281, 0.01547252, 0.004918401, 
0.032767954, 0.084638687, 0.093356166, 0.000156201, 0.000752217, 
0.109659324, 0.208642497, 0.208474925, 0.000404265, 0.078084401, 
0.000538784, 0.012066067, 0.018067282, 0.000205862), Collab = c(0.030001488, 
0.036707564, 0.01458121, 0.026231048, 0.018525526, 0.011553297, 
0.058634057, 0.001686141, 0.001348074, 0.006757227, 0.013508918, 
0.003715637, 0.002921306, 0.009278328, 0.004626478, 0.002879119, 
0.055770088, 0.095661212, 0.017193222, 0.004260887, 0.0994825, 
0.094794299, 0.00236101, 0.05708391, 0.070789976, 0.093534164, 
0.001109712, 0.009766358, 0.033402635, 0.011669702, 0.06682796, 
0.001608723, 0.076258585, 0.0177607, 0.081032098, 0.094412392, 
0.105163053, 0.000130001, 0.000308904, 0.000673957, 0.000108183, 
0.006185235, 0.001417778, 0.001392482, 0.001763266, 4.19e-05, 
0.000316372, 0.000538187, 0.057255911, 0.000888558, 0.117687659, 
0.002003037, 0.068194122, 0.000653657, 0.000152612, 0.089555908, 
0.002829031, 0.032391752, 0.000114824, 0.001213285, 0.000386851, 
0.015705495, 0.049863754, 0.000186015, 0.036288112, 0.000121075, 
0.001514642, 0.00150885, 0.000594681, 0.139375952, 0.002323917, 
0.075647519, 0.002870689, 3.77e-05, 0.077144908, 0.026437255, 
0.000115174, 0.00227099, 0.004700389, 0.041492391, 0.122675327, 
0.020817113, 6.89e-05, 0.000303617, 0.000137477, 0.001432608, 
0.000184365, 0.001050974, 0.000709209, 0.000270104, 0.000303001, 
0.018320147, 0.099247105, 0.082998488, 0.000888759, 0.016183068, 
0.006294048, 0.002853816, 0.019514895, 0.038458183, 0.002923949, 
0.106293548, 0.011739459, 0.000128574, 0.007004556, 0.114129525, 
0.012154148, 0.00942754, 0.009594396, 1.79e-05, 0.003734627, 
8.05e-06, 0.119908919, 0.018081544, 0.075305864, 0.008538072, 
0.000172614, 0.011539718, 0.001156176, 2.3e-05, 0.06492041, 0.12754611, 
0.00024379, 0.006267908, 0.00306844, 0.001193837, 0.013286424, 
0.113241894, 0.00550093, 0.000513184, 0.164987722, 0.008430982, 
0.01127053, 0.00073653, 0.000330426, 0.002238095, 0.104762755, 
0.010050252, 0.000469937, 0.145991698, 0.016278919, 0.000640692, 
0.005282822, 0.005445685, 0.00014593, 0.000589578, 0.003085291, 
0.003763146, 0.118843056, 0.019891671, 0.007112815, 0.004553507, 
0.014161345, 0.011043344, 1.65e-05, 0.05419503, 0.107074967, 
0.01952576, 0.015831838, 0.015618949, 0.133629759, 0.016718132, 
0.120940954, 0.072855599, 0.066799617, 0.006925232)), .Names = c("Filename", 
"Funds", "Biotech", "Transfers", "Collab"), class = "data.frame", row.names = c(NA, 
-166L)) 
+0

내가 코멘트 중 하나에 제기하는 질문에 대답 할 수 있었다이 코드를 실행하면하지만, 당신은 문자 변수로 유클리드 거리를 계산할 수 없습니다. – Cath

+0

감사합니다! 그리고 내 산출량을 정리해 주셔서 감사합니다. – Ricardo

답변

0

@ 위에서 언급 한대로 문제는 텍스트 열과 관련됩니다. 제거이 작품 :

EUTopicNetworks2 <- EUTopicNetworks[,-1] 
class(EUTopicNetworks2) 

library(cluster) 
FundDist <- as.matrix(daisy(EUTopicNetworks2, metric = "euclidean", stand = TRUE)) 
+0

시간을내어 도와 주셔서 대단히 감사합니다. 가장 높이 평가됩니다. 나는 그것을 볼 수있어서 기쁩니다 :). 텍스트와 관련하여 거리를 측정 할 때 출력을 텍스트 파일과 연관시키는 가장 좋은 방법은 무엇입니까? – Ricardo

0

난 당신이 기본 R``에서 dist``와 유클리드 거리를 계산할 수

row.names(EUTopicNetworks) <- EUTopicNetworks[,1] 
EUTopicNetworks <- EUTopicNetworks[,-1] 

library(cluster) 
FundDist <- as.matrix(daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE)) 
관련 문제