The following code is an approach on how to scape a website using the JavaScript console. This method is a slight improvement of https://controllingtheinter.net/2022/06/13/screenscrape-a-website-with-powershell-3-0/ as now I take the data, cast it into a JavaScript variable then write it out to a textdump. I can then save the textdump and reference it from another language as JSON is available in the NEWTONSOFT library of .NET
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
var MyArray = new Array; for (const MyElement of document.getElementsByClassName("detailed-result-panel--panel-row--2aE8z detailed-result-panel--question-container--7NyiS")) { var myobject = new Object(); myobject.Question = (MyElement.getElementsByClassName("udlite-text-bold mc-quiz-question--question-prompt--2_dlz rt-scaffolding")[0].innerText); myobject.AnswerA = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[0].innerText); myobject.AnswerB = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[1].innerText); myobject.AnswerC = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[2].innerText); try { myobject.AnswerD = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[3].innerText); } catch (ex) { } myobject.Explanation = (MyElement.getElementsByClassName("mc-quiz-question--explanation--Q5KHQ")[0].innerText); MyArray.push(myobject); } JSON.stringify(MyArray); |
The following code was my failed attempt of storing it into an HTML element, then from there JSON.stringify out the data but I was not able to get the generation of XYZ to work correctly, It may be that I need to convert it to a class instead, but for now the method above worked just fine and was a few lines less.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
var xyz = { Question: "hello world", AnswerA: "hello world", AnswerB: "hello world", AnswerC: "hello world", AnswerD: "hello world", Explanation: "hello world" }; function xyz(Question, Answers, Explanation) { this.Question = Question; this.getQuestion = function() { return Question; } } var xyzarray = []; for (const MyElement of document.getElementsByClassName("detailed-result-panel--panel-row--2aE8z detailed-result-panel--question-container--7NyiS")) { console.log(MyElement.getElementsByClassName("udlite-text-bold mc-quiz-question--question-prompt--2_dlz rt-scaffolding")[0].innerText); console.log(MyElement.getElementsByClassName("unstyled-list")[0].childNodes[0].innerText); console.log(MyElement.getElementsByClassName("unstyled-list")[0].childNodes[1].innerText); console.log(MyElement.getElementsByClassName("unstyled-list")[0].childNodes[2].innerText); console.log(MyElement.getElementsByClassName("unstyled-list")[0].childNodes[3].innerText); console.log(MyElement.getElementsByClassName("mc-quiz-question--explanation--Q5KHQ")[0].innerText); var myobj = new xyz('','',''); myobj.Question = MyElement.getElementsByClassName("udlite-text-bold mc-quiz-question--question-prompt--2_dlz rt-scaffolding")[0].innerText; xyzarray.push(myobj); } |
Limitations of JSON stringify: https://stackoverflow.com/questions/11171746/reverse-of-json-stringify
Now the trick is to get powershell to read the object back into memory and to parse it property. After dumping the data it seems this site is confirming that it does not confirm to RFC.
So in order to output the data in a nicely formatted way I used
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
var MyArray = new Array; for (const MyElement of document.getElementsByClassName("detailed-result-panel--panel-row--2aE8z detailed-result-panel--question-container--7NyiS")) { var myobject = new Object; myobject.Question = (MyElement.getElementsByClassName("udlite-text-bold mc-quiz-question--question-prompt--2_dlz rt-scaffolding")[0].innerText); myobject.AnswerA = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[0].innerText); myobject.AnswerB = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[1].innerText); myobject.AnswerC = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[2].innerText); try { myobject.AnswerD = (MyElement.getElementsByClassName("unstyled-list")[0].childNodes[3].innerText); } catch (ex) { myobject.AnswerD = ''; } myobject.Explanation = (MyElement.getElementsByClassName("mc-quiz-question--explanation--Q5KHQ")[0].innerText); MyArray.push(myobject); var CorrectAnswer = null; switch(true) { case myobject.AnswerA.includes("Correct"): CorrectAnswer = "Correct answer is A." break; case myobject.AnswerB.includes("Correct"): CorrectAnswer = "Correct answer is B." break; case myobject.AnswerC.includes("Correct"): CorrectAnswer = "Correct answer is C." break; case myobject.AnswerD.includes("Correct"): CorrectAnswer = "Correct answer is D." break; } console.log(myobject.Question.trim() + " \nA. " + myobject.AnswerA.replace("(Incorrect)", "").replace("(Correct)", "").trim() + " \nB. " + myobject.AnswerB.replace("(Incorrect)", "").replace("(Correct)", "").trim() + " \nC. " + myobject.AnswerC.replace("(Incorrect)", "").replace("(Correct)", "").trim() + "\nD. " + myobject.AnswerD.replace("(Incorrect)", "").replace("(Correct)", "").trim() + "\n" + CorrectAnswer + " \n" + myobject.Explanation.replace(/\n\s*\n/g, '\n\n').trim() + "\n"); } JSON.stringify(MyArray); |
Here is another quick example of pulling Xpaths and sleeping as it rolls through div’s
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
function getElementByXpath(path) { return document.evaluate(path, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; } var MyArray = new Array; const delay = ms => new Promise(res => setTimeout(res, ms)); for (var i=0; i<document.getElementsByClassName('c-quiz-sidebar_questions')[0].getElementsByTagName("li").length; i++) { console.log(i); document.getElementsByClassName('c-quiz-sidebar_questions')[0].getElementsByTagName("li")[i].getElementsByTagName("button")[0].click(); await delay(4000); var myobj = new Object(); myobj.Index = i; console.log( getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[1]").innerText ); myobj.Question= getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[1]").innerText.replace("You left this question blank, so we scored it as incorrect.\n\n", ""); console.log( getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[1]/label/div[2]/span/div[1]").innerText ); myobj.AnswerA = getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[1]/label/div[2]/span/div[1]").innerText console.log( getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[2]/label/div[2]/span/div[1]").innerText ); myobj.AnswerB = getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[2]/label/div[2]/span/div[1]").innerText; console.log( getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[3]/label/div[2]/span/div[1]").innerText ); myobj.AnswerC = getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[3]/label/div[2]/span/div[1]").innerText; console.log( getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[4]/label/div[2]/span/div[1]").innerText ); myobj.AnswerD = getElementByXpath("//html/body/main/div[5]/section/div[2]/div[2]/div/div[1]/div/div[2]/div[4]/label/div[2]/span/div[1]").innerText; console.log( getElementByXpath("/html/body/main/div[5]/section/div[2]/div[2]/div/div[2]/div").innerText ); myobj.Explanation = getElementByXpath("/html/body/main/div[5]/section/div[2]/div[2]/div/div[2]/div").innerText.replace("\n\nLet us know what you think about this answer explanation", ""); MyArray.push(myobj); } JSON.stringify(MyArray); //Used to Dump the data for text format. //https://masteringjs.io/tutorials/fundamentals/foreach-object var Extract = ''; Object.values(MyArray).forEach(val => { console.log(val.Question); Extract += "Question " + (val.Index+1) + ".\n\n"; Extract += val.Question + ".\n\n"; console.log(val.AnswerA); Extract += "Question A. " + val.AnswerA + ".\n\n"; console.log(val.AnswerB); Extract += "Question B. " + val.AnswerB + ".\n\n"; console.log(val.AnswerC); Extract += "Question C. " + val.AnswerC + ".\n\n"; console.log(val.AnswerD); Extract += "Question D. " + val.AnswerD + ".\n\n"; console.log(val.Explanation); Extract += val.Explanation + ".\n\n"; }); |