Exploratory data analysis

1 Glimpse

glimpse(form_csv)
## Observations: 1,854
## Variables: 74
## $ ec5_uuid                <chr> "254c5116-c203-453a-b3b3-d9a95979a256", …
## $ created_at              <dttm> 2019-09-20 23:39:54, 2019-09-20 23:37:0…
## $ title                   <chr> "Helen Quinn 36 3600065", "Helen Quinn 3…
## $ `1_Surveyor_Nickname`   <chr> "Helen Quinn", "Helen Quinn", "Helen Qui…
## $ `2_Map_sheet_number`    <dbl> 36, 36, 36, 36, 36, 36, 103, 103, 103, 1…
## $ `3_Functional_Unit_Co`  <dbl> 3600065, 3600064, 3600063, 3600062, 3600…
## $ `5_11_Name_of_organis`  <chr> NA, NA, NA, NA, NA, NA, "Fashtag", "Pyra…
## $ `6_12_Description_of_`  <chr> "Artist studio", "Artist studio", "Artis…
## $ `7_13_When_was_the_or`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `8_14_Do_you_regard_t`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `9_15_If_no_what_type`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `10_16_If_other_pleas`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `12_21_Total_number_o`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `13_22_Observed_or_Ve`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `14_23_How_many_FTEs_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `15_24_Observed_or_Ve`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `16_25_Is_the_organis`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `17_26_How_many_peopl`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `19_31_Floor_levels_t`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `20_32_How_many_store`  <dbl> 1, 1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA…
## $ `21_33_Unit_size_Inse`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `22_34_Square_metres_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `23_35_Observed_or_Ve`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `24_36_Predominant_he`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `25_37_Premises_type_`  <chr> "an office building, Other", "an office …
## $ `26_38_If_other_pleas`  <chr> "Offices in council housing block, conve…
## $ `27_39_Type_of_associ`  <chr> "No associated yard space", "No associat…
## $ `28_310_Car_parking_S`  <chr> "No car parking on site", "No car parkin…
## $ `29_311_Goods_access_`  <chr> "Goods lift access", "Goods lift access"…
## $ `30_312_Are_these_pre`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `31_313_If_premises_a`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `32_314_When_did_the_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `33_315_Is_the_premis`  <chr> "Rented/leased", "Rented/leased", "Rente…
## $ `34_316_If_rentedleas`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `35_317_Will_the_leas`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `37_41_Is_the_localit`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `38_42_If_so_why_sele`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `39_43_If_locality_is`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `40_44_What_is_your_v`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `41_45_What_are_the_t`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `42_46_Where_are_the_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `43_47_Where_are_the_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `44_48_Is_the_organis`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `45_49_Does_the_organ`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `46_410_If_yes_where_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `47_411_If_yes_when`    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `48_412_If_yes_why`     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `49_413_Are_the_organ`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `50_414_If_yes_please`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `51_415_Are_you_aware`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `52_416_If_yes_how_ha`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `53_417_If_other_plea`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `54_418_What_is_your_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `56_51_Contact_willin`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `57_52_Name_of_contac`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `58_53_Contact_teleph`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `59_54_Contact_email_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `60_55_Organisation_t`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `61_56_Organisation_w`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `62_57_Organisation_e`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `63_58_Street_name_eg`  <chr> "Thurlow Street", "Thurlow street", "Thu…
## $ `64_59_Street_number_`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `65_510_Postcode_Incl`  <chr> "SE17 2dg", "SE17 2dg", "SE17 2dg", "Se1…
## $ `66_511_Unit_number_i`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `68_61_Additional_not`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `69_62_Internal_photo`  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `71_71_Would_the_acti`  <chr> "no", "no", "no", "no", "no", "no", NA, …
## $ lat_72_72_Location      <dbl> NA, NA, NA, NA, NA, 51.48835, 51.47103, …
## $ long_72_72_Location     <dbl> NA, NA, NA, NA, NA, -0.086646, -0.066961…
## $ accuracy_72_72_Location <dbl> NA, NA, NA, NA, NA, 4, 65, 65, 65, 65, 6…
## $ `73_73_External_photo`  <chr> NA, NA, NA, NA, NA, NA, "https://five.ep…
## $ `74_74_SIC_Code`        <chr> "90030", "90030", "90030", "90030", "900…
## $ `75_75_Site_ID`         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ `76_76_Notes_on_any_o`  <chr> "Other ASC studios, Turps, Bainbridge", …

2 Completeness

Count of NAs per question

colSums(is.na(form_csv))
##                ec5_uuid              created_at                   title 
##                       0                       0                       0 
##     1_Surveyor_Nickname      2_Map_sheet_number    3_Functional_Unit_Co 
##                       1                       1                       0 
##    5_11_Name_of_organis    6_12_Description_of_    7_13_When_was_the_or 
##                     720                     131                    1341 
##    8_14_Do_you_regard_t    9_15_If_no_what_type    10_16_If_other_pleas 
##                    1074                    1756                    1843 
##    12_21_Total_number_o    13_22_Observed_or_Ve    14_23_How_many_FTEs_ 
##                    1282                    1284                    1423 
##    15_24_Observed_or_Ve    16_25_Is_the_organis    17_26_How_many_peopl 
##                    1420                    1417                    1491 
##    19_31_Floor_levels_t    20_32_How_many_store    21_33_Unit_size_Inse 
##                     837                     931                    1253 
##    22_34_Square_metres_    23_35_Observed_or_Ve    24_36_Predominant_he 
##                    1246                    1255                    1141 
##    25_37_Premises_type_    26_38_If_other_pleas    27_39_Type_of_associ 
##                     888                    1740                     978 
##    28_310_Car_parking_S    29_311_Goods_access_    30_312_Are_these_pre 
##                    1059                    1067                    1293 
##    31_313_If_premises_a    32_314_When_did_the_    33_315_Is_the_premis 
##                    1804                    1542                    1228 
##    34_316_If_rentedleas    35_317_Will_the_leas    37_41_Is_the_localit 
##                    1661                    1537                    1425 
##    38_42_If_so_why_sele    39_43_If_locality_is    40_44_What_is_your_v 
##                    1470                    1510                    1604 
##    41_45_What_are_the_t    42_46_Where_are_the_    43_47_Where_are_the_ 
##                    1533                    1513                    1449 
##    44_48_Is_the_organis    45_49_Does_the_organ    46_410_If_yes_where_ 
##                    1460                    1473                    1806 
##      47_411_If_yes_when       48_412_If_yes_why    49_413_Are_the_organ 
##                    1805                    1802                    1500 
##    50_414_If_yes_please    51_415_Are_you_aware    52_416_If_yes_how_ha 
##                    1741                    1486                    1624 
##    53_417_If_other_plea    54_418_What_is_your_    56_51_Contact_willin 
##                    1824                    1613                    1605 
##    57_52_Name_of_contac    58_53_Contact_teleph    59_54_Contact_email_ 
##                    1533                    1702                    1720 
##    60_55_Organisation_t    61_56_Organisation_w    62_57_Organisation_e 
##                    1457                    1314                    1489 
##    63_58_Street_name_eg    64_59_Street_number_    65_510_Postcode_Incl 
##                     796                    1034                     849 
##    66_511_Unit_number_i    68_61_Additional_not    69_62_Internal_photo 
##                    1270                    1035                    1647 
##    71_71_Would_the_acti      lat_72_72_Location     long_72_72_Location 
##                     986                     244                     244 
## accuracy_72_72_Location    73_73_External_photo          74_74_SIC_Code 
##                     244                     534                    1080 
##           75_75_Site_ID    76_76_Notes_on_any_o 
##                    1849                    1639
form_csv %>%
  group_by(`1_Surveyor_Nickname`) %>%
  summarise_all(funs(sum(is.na(.)))) %>%
  htmlTable(caption="NA count by Question and Surveyor", col.columns = c("none", "#F7F7F7"), css.cell="padding-left:1em; padding-right:1em;", rnames=F, align = "lr")
NA count by Question and Surveyor
1_Surveyor_Nickname ec5_uuid created_at title 2_Map_sheet_number 3_Functional_Unit_Co 5_11_Name_of_organis 6_12_Description_of_ 7_13_When_was_the_or 8_14_Do_you_regard_t 9_15_If_no_what_type 10_16_If_other_pleas 12_21_Total_number_o 13_22_Observed_or_Ve 14_23_How_many_FTEs_ 15_24_Observed_or_Ve 16_25_Is_the_organis 17_26_How_many_peopl 19_31_Floor_levels_t 20_32_How_many_store 21_33_Unit_size_Inse 22_34_Square_metres_ 23_35_Observed_or_Ve 24_36_Predominant_he 25_37_Premises_type_ 26_38_If_other_pleas 27_39_Type_of_associ 28_310_Car_parking_S 29_311_Goods_access_ 30_312_Are_these_pre 31_313_If_premises_a 32_314_When_did_the_ 33_315_Is_the_premis 34_316_If_rentedleas 35_317_Will_the_leas 37_41_Is_the_localit 38_42_If_so_why_sele 39_43_If_locality_is 40_44_What_is_your_v 41_45_What_are_the_t 42_46_Where_are_the_ 43_47_Where_are_the_ 44_48_Is_the_organis 45_49_Does_the_organ 46_410_If_yes_where_ 47_411_If_yes_when 48_412_If_yes_why 49_413_Are_the_organ 50_414_If_yes_please 51_415_Are_you_aware 52_416_If_yes_how_ha 53_417_If_other_plea 54_418_What_is_your_ 56_51_Contact_willin 57_52_Name_of_contac 58_53_Contact_teleph 59_54_Contact_email_ 60_55_Organisation_t 61_56_Organisation_w 62_57_Organisation_e 63_58_Street_name_eg 64_59_Street_number_ 65_510_Postcode_Incl 66_511_Unit_number_i 68_61_Additional_not 69_62_Internal_photo 71_71_Would_the_acti lat_72_72_Location long_72_72_Location accuracy_72_72_Location 73_73_External_photo 74_74_SIC_Code 75_75_Site_ID 76_76_Notes_on_any_o
Adam 0 0 0 0 0 47 0 128 71 168 171 99 99 145 145 139 144 70 81 111 111 112 113 97 171 104 111 109 143 162 143 134 161 149 141 143 145 149 143 146 144 143 143 171 171 171 143 168 144 151 170 145 143 117 155 156 110 89 107 36 53 39 103 37 145 84 14 14 14 17 88 175 131
Aga 0 0 0 0 0 165 37 269 212 276 293 235 234 238 237 227 260 209 220 231 230 230 232 215 296 220 221 222 221 293 278 231 268 236 227 233 228 294 266 242 232 230 231 292 286 293 228 292 227 277 296 287 272 295 292 289 239 233 258 219 226 222 269 254 295 233 85 85 85 208 250 294 288
Caroline 0 0 0 0 0 1 1 4 1 3 4 4 4 4 4 4 4 1 2 4 4 4 4 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 1 3 4 1 2 3 4 4 3 0 4 4 4 1 4 4 4
Dominika Piotrowska 0 0 0 0 0 0 5 34 33 36 36 33 33 33 33 33 33 33 33 33 33 33 34 33 36 33 33 33 33 36 34 33 35 33 33 34 34 35 34 34 34 33 33 36 36 36 34 36 34 36 36 35 33 36 36 36 35 35 35 35 35 35 36 2 35 33 1 1 1 3 14 36 35
Helen Quinn 0 0 0 0 0 116 17 181 179 208 212 150 150 185 185 185 187 178 94 185 185 185 187 86 141 90 92 96 185 211 190 102 199 191 185 188 187 187 188 187 185 187 187 205 205 205 187 205 187 192 200 188 186 193 203 198 181 162 168 86 190 89 204 151 197 99 18 18 18 89 84 212 110
Jess 0 0 0 0 0 0 0 1 0 3 3 0 0 0 0 0 0 0 0 2 2 2 1 0 3 0 0 0 0 3 0 0 0 0 0 0 0 1 2 1 0 0 0 2 3 2 0 1 0 1 3 1 1 1 1 2 3 3 3 1 1 2 2 3 1 0 1 1 1 1 3 3 3
Jessica 0 0 0 0 0 42 8 107 89 158 166 100 101 102 103 103 105 74 93 121 122 123 116 100 155 102 102 111 104 150 114 108 129 121 106 109 113 119 113 111 110 111 112 150 152 149 124 142 120 139 166 130 118 125 131 141 133 122 149 115 117 112 129 138 113 113 27 27 27 51 100 166 163
Joe 0 0 0 0 0 48 4 58 51 103 106 94 93 94 95 95 100 8 9 24 23 27 9 10 106 8 8 10 84 106 97 93 106 98 93 101 102 103 97 101 96 97 96 105 105 103 99 105 98 103 106 106 102 103 106 105 82 55 64 7 7 7 7 3 101 82 0 0 0 62 58 105 106
Max 0 0 0 0 0 73 4 133 119 196 203 118 118 156 156 155 166 81 79 89 89 91 90 73 184 87 116 123 158 197 165 143 187 186 162 169 185 181 176 167 162 168 175 197 197 198 177 198 174 189 199 174 181 159 175 189 158 143 175 43 53 90 178 32 163 103 78 78 78 53 85 205 164
Nei C 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1
Neil 0 0 0 0 0 57 23 79 75 125 126 79 81 82 80 79 83 75 82 99 95 96 92 87 126 91 89 89 82 121 86 79 101 93 81 86 98 113 100 82 81 81 81 121 124 122 89 108 87 103 126 108 115 112 112 119 116 122 123 115 118 115 96 119 110 63 2 2 2 7 122 126 124
Neil C 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1
Neil C 0 0 0 0 0 35 0 59 57 80 82 59 59 59 59 59 59 59 59 62 61 62 65 60 82 60 60 60 61 78 60 60 74 69 60 61 63 69 62 62 60 59 62 82 82 82 64 75 61 70 80 72 72 71 72 78 70 78 74 59 63 59 75 74 73 43 0 0 0 1 66 82 82
Nicolas 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Rashi 0 0 0 0 0 135 30 284 183 396 437 307 308 321 319 334 346 45 175 288 287 286 194 121 432 175 219 206 214 439 367 237 393 353 329 339 347 345 344 372 337 343 345 437 436 433 347 404 346 355 434 359 374 313 411 399 325 265 325 75 165 72 163 214 408 131 13 13 13 40 203 437 425
0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 1 1 1

3 Classes ‘Activity’ and ‘SIC’ codes

“1.2 Description of organisations activity”

descr_class <- form_csv %>%
  group_by(`6_12_Description_of_`) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) 
descr_class %>%
  head(35) %>%
  htmlTable(caption="1.2 Description of organisations activity (top 35)", col.columns = c("none", "#F7F7F7"), css.cell="padding-left:1em; padding-right:1em;", rnames=F, align = "lr")
1.2 Description of organisations activity (top 35)
6_12_Description_of_ n
vacant 216
131
residential 112
Residential 95
Artist studio 56
demolished 46
unidentified 41
Artist 40
unidentifiable 32
Vacant 26
Derelict 18
Demolished 17
development 15
Unidentifiable 14
Architecture Practice 13
derelict 13
Development 12
Car repair 9
Car repairs 9
Office block 8
Photography 8
Non industrial 6
Storage 6
Architecture 5
Artistic creation 5
Pub 5
Under development 5
Brewery 4
Cafe 4
Photography studio 4
Vehicle repairs 4
Vehicle Repairs and Servicing 4
Architecture design 3
Art Gallery 3
Artists studios 3

Unique values in “1.2 Description of organisations activity”

form_csv %>% 
  distinct(`6_12_Description_of_`) %>%
  nrow()
## [1] 798

One of a kind values in “1.2 Description of organisations activity” (top 5)

descr_class %>%
  group_by(n) %>%
  summarise(kind = n()) %>%
  head(5) %>%
  rename("Number of species" = n, "Count" = kind) %>%
  pander()
Number of species Count
1 681
2 65
3 20
4 5
5 4

Download full 1.2 Description of organisations activity table

“7.4 SIC Code”

SIC_class <- form_csv %>%
  group_by(`74_74_SIC_Code`) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) 
SIC_class %>%
  head(35) %>%
  htmlTable(caption="7.4 SIC Code (top 35)", col.columns = c("none", "#F7F7F7"), css.cell="padding-left:1em; padding-right:1em;", rnames=F, align = "lr")
7.4 SIC Code (top 35)
74_74_SIC_Code n
1080
90030 162
45200 56
71111 25
52103 16
96090 11
32120 10
74100 10
46342 9
82990 9
18129 8
23410 7
70229 7
74202 7
94910 7
11050 6
31090 6
96010 6
96020 6
16230 5
32990 5
46900 5
56101 5
56302 5
61100 5
74.10 5
77320 5
93290 5
47760 4
56102 4
56103 4
56210 4
59200 4
62.02 4
62020 4

Unique values in “7.4 SIC Code”

form_csv %>% 
  distinct(`74_74_SIC_Code`) %>%
  nrow()
## [1] 277

Download full 7.4 SIC Code table