Olá pessoal!
Eu precisava fazer o scrapping desse site para utilizar na minha tese.
A ideia seria modificar os valores de data e coordenadas e pegar os outputs gerados.
Eu até comecei, mas não tenho muita experiência com webscrapping e POST
library(httr)
url <- "https://www2.pvlighthouse.com.au/calculators/solar%20spectrum%20calculator/solar%20spectrum%20calculator.aspx"
page_initial <- GET(url)
content_initial <- httr::content(page_initial)
data_form <- list(
"smMain"= "TabContainer1$TabPanel1$UpdatePanel1|TabContainer1$TabPanel1$tbYear",
"TabContainer1$TabPanel1$tbYear"= "2023",
"TabContainer1$TabPanel1$tbHour"= "12",
"TabContainer1$TabPanel1$tbMonth"= "1",
"TabContainer1$TabPanel1$tbMinute"= "0",
"TabContainer1$TabPanel1$tbDay"= "19",
"TabContainer1$TabPanel1$tbSecond"= "0",
"TabContainer1$TabPanel1$tbLatitude"= "-34,2833",
"TabContainer1$TabPanel1$tbLongitude"= "150,95",
"TabContainer1$TabPanel1$tbModuleTiltAngle"= "0",
"TabContainer1$TabPanel1$tbModuleAzimuthAngle"= "0",
"TabContainer1$TabPanel1$ddlSpectrumSource"= "AM0",
"TabContainer1$TabPanel1$ddlAtmosphericTransmissionModel"= "SPCTRAL2 [Bir86]",
"TabContainer1$TabPanel1$tbAtmosphericPressure"= "1013,25",
"TabContainer1$TabPanel1$tbTurbidity"= "0,084",
"TabContainer1$TabPanel1$tbPrecipitableWaterVapour"= "1,4164",
"TabContainer1$TabPanel1$tbOzone"= "0,3438",
"TabContainer1$TabPanel1$tbAlbedo"= "0,1",
"TabContainer1$TabPanel1$cpeInsolation_ClientState"= "false",
"TabContainer1$TabPanel1$cpeFigure_ClientState"= "false",
"TabContainer1$TabPanel1$ddlFigureYaxis"= "Spectral irradiance",
"TabContainer1$TabPanel1$ddlSelectedPlane"= "Module",
"TabContainer1$TabPanel1$ddlFigureXaxis"= "Wavelength",
"TabContainer1$TabPanel1$cbPlotExtraterrestrialSpectrum"= "on",
"TabContainer1$TabPanel1$cbPlotDirect"= "on",
"TabContainer1$TabPanel1$cbPlotDiffuse"= "on",
"TabContainer1$TabPanel1$cbPlotGlobal"= "on",
"TabContainer1$tabOptions$ddlSolarPosition"= "Enter module location and time",
"TabContainer1$tabOptions$ddlSolarVectorAlgorithm"= "PSA algorithm [Bla01]",
"TabContainer1$tabOptions$ddlSpectrumManipulation"= "Impose wavelength limits",
"TabContainer1$tabOptions$tbOptionWavelengthMin"= "280",
"TabContainer1$tabOptions$tbOptionWavelengthMax"= "4000",
"TabContainer1$tabOptions$tbOptionWavelengthInterval"= "10",
"TabContainer1$TabPanel2$ddlOptionsFileType"= "CSV US/UK (comma delimited)",
"TabContainer1$TabPanel2$tbStoreXData"= "",
"TabContainer1$TabPanel2$tbStoreYData"= "",
"tbFeedback"= ""
)
search_page <- POST(
url = url,
body = data_form,
encode = "form"
)
search_page$content
#> [1] 0d 0a 0d 0a 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 3e 0d 0a 0d 0a 3c
#> [25] 68 74 6d 6c 3e 0d 0a 3c 68 65 61 64 3e 3c 74 69 74 6c 65 3e 0d 0a 09 53
#> [49] 6f 6c 61 72 20 73 70 65 63 74 72 75 6d 20 63 61 6c 63 75 6c 61 74 6f 72
#> [73] 0d 0a 3c 2f 74 69 74 6c 65 3e 3c 6d 65 74 61 20 69 64 3d 22 4d 65 74 61
#> [97] 31 22 20 6e 61 6d 65 3d 22 64 65 73 63 72 69 70 74 69 6f 6e 22 20 63 6f
#> [ reached getOption("max.print") -- omitted 299085 entries ]
Created on 2023-01-19 with reprex v2.0.2