## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
## GDAL version >= 3.1.0 | setting mapviewOptions(fgb = TRUE)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
oct18_N_u <- read_csv("~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv")
## Warning: Missing column names filled in: 'X111' [111]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   fid = col_double(),
##   field_1.x = col_double(),
##   created_at = col_datetime(format = ""),
##   query_code = col_double(),
##   `2_Map_sheet_number` = col_double(),
##   `3_Functional_Unit_Co` = col_double(),
##   `7_13_When_was_the_or` = col_double(),
##   `17_26_How_many_peopl` = col_double(),
##   `20_32_How_many_store` = col_double(),
##   `21_33_Unit_size_Inse` = col_double(),
##   `24_36_Predominant_he` = col_double(),
##   `34_316_If_rentedleas` = col_double(),
##   `66_511_Unit_number_i` = col_double(),
##   lat_72_72_Location = col_double(),
##   long_72_72_Location = col_double(),
##   accuracy_72_72_Location = col_double(),
##   `75_75_Site_ID` = col_logical(),
##   sD_RS_field_1 = col_double(),
##   b_sheet = col_double(),
##   FUC = col_double()
##   # ... with 14 more columns
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: 260 parsing failures.
##  row           col           expected    actual                                                       file
## 1284 75_75_Site_ID 1/0/T/F/TRUE/FALSE 7311      '~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv'
## 1306 Latitude_mi   1/0/T/F/TRUE/FALSE 51.505419 '~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv'
## 1306 Longitude_mi  1/0/T/F/TRUE/FALSE -0.102403 '~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv'
## 1307 sheet_no_2    1/0/T/F/TRUE/FALSE 4         '~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv'
## 1330 sheet_no_2    1/0/T/F/TRUE/FALSE 19        '~/Documents/SINDA_local/Final_data_from_gd/oct18_N_u.csv'
## .... ............. .................. ......... ..........................................................
## See problems(...) for more details.
oct18_N <- oct18_N_u %>%
  select(83, 92, 93, 96, 97, 98, 105:110, 112)
colnames(oct18_N)
##  [1] "FUC"                 "mb_comment"          "comment_1"          
##  [4] "description_M"       "description_updated" "category"           
##  [7] "name"                "query_resolved"      "action_spatial"     
## [10] "action_spreadsheet"  "parcels_actions"     "action_other"       
## [13] "resolution_details"
oct18_N %>%
  select(1) %>%
  filter(duplicated(.))
oct18_N <- oct18_N %>%
  mutate(FUC = as.character(FUC),
         FUC = ifelse(is.na(FUC), paste0('oct18-', 1:length(FUC[is.na(FUC)])), FUC))
oct2020_v0 <- st_read("/Users/nicolas/Documents/SINDA_local/Final_data_from_gd/oct2020_v0.gpkg")
## Reading layer `oct2020_v0' from data source `/Users/nicolas/Documents/SINDA_local/Final_data_from_gd/oct2020_v0.gpkg' using driver `GPKG'
## Simple feature collection with 2622 features and 119 fields (with 458 geometries empty)
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 531305.5 ymin: 171277 xmax: 536322.5 ymax: 180434.8
## projected CRS:  OSGB 1936 / British National Grid
colnames(oct2020_v0)
##   [1] "origin"                          "id"                             
##   [3] "field_1.x"                       "ec5_uuid"                       
##   [5] "created_at"                      "query_code"                     
##   [7] "title"                           "X1_surveyor_nickname"           
##   [9] "X2_map_sheet_number"             "X5_11_name_of_organis"          
##  [11] "X6_12_description_of_"           "X6_12b_description_category"    
##  [13] "X7_13_when_was_the_or"           "X8_14_do_you_regard_t"          
##  [15] "X9_15_if_no_what_type"           "X10_16_if_other_pleas"          
##  [17] "X12_21_total_number_o"           "X13_22_observed_or_ve"          
##  [19] "X14_23_how_many_ftes_"           "X15_24_observed_or_ve"          
##  [21] "X16_25_is_the_organis"           "X17_26_how_many_peopl"          
##  [23] "X19_31_floor_levels_t"           "X20_32_how_many_store"          
##  [25] "X21_33_unit_size_inse"           "X22_34_square_metres_"          
##  [27] "X23_35_observed_or_ve"           "X24_36_predominant_he"          
##  [29] "X25_37_premises_type_"           "X26_38_if_other_pleas"          
##  [31] "X27_39_type_of_associ"           "X28_310_car_parking_s"          
##  [33] "X29_311_goods_access_"           "X30_312_are_these_pre"          
##  [35] "X31_313_if_premises_a"           "X32_314_when_did_the_"          
##  [37] "X33_315_is_the_premis"           "X34_316_if_rentedleas"          
##  [39] "X35_317_will_the_leas"           "X37_41_is_the_localit"          
##  [41] "X38_42_if_so_why_sele"           "X39_43_if_locality_is"          
##  [43] "X40_44_what_is_your_v"           "X41_45_what_are_the_t"          
##  [45] "X42_46_where_are_the_"           "X43_47_where_are_the_"          
##  [47] "X44_48_is_the_organis"           "X45_49_does_the_organ"          
##  [49] "X46_410_if_yes_where_"           "X47_411_if_yes_when"            
##  [51] "X48_412_if_yes_why"              "X49_413_are_the_organ"          
##  [53] "X50_414_if_yes_please"           "X51_415_are_you_aware"          
##  [55] "X52_416_if_yes_how_ha"           "X53_417_if_other_plea"          
##  [57] "X54_418_what_is_your_"           "X56_51_contact_willin"          
##  [59] "X57_52_name_of_contac"           "X58_53_contact_teleph"          
##  [61] "X59_54_contact_email_"           "X60_55_organisation_t"          
##  [63] "X61_56_organisation_w"           "X62_57_organisation_e"          
##  [65] "X63_58_street_name_eg"           "X64_59_street_number_"          
##  [67] "X65_510_postcode_incl"           "X66_511_unit_number_i"          
##  [69] "X68_61_additional_not"           "X69_62_internal_photo"          
##  [71] "X71_71_would_the_acti"           "lat_72_72_location"             
##  [73] "long_72_72_location"             "accuracy_72_72_location"        
##  [75] "X73_73_external_photo"           "X74_74_sic_code"                
##  [77] "X75_75_site_id"                  "X76_76_notes_on_any_o"          
##  [79] "sd_rs_field_1"                   "sd_rs_6_12_description_of_...10"
##  [81] "polygon"                         "b_sheet"                        
##  [83] "fuc_s"                           "field_1.y"                      
##  [85] "fuc_2"                           "lat"                            
##  [87] "long"                            "sheet_no"                       
##  [89] "sheet_no_2"                      "mb_note_no"                     
##  [91] "mb_comment"                      "comment_1"                      
##  [93] "comment_2"                       "comment_3"                      
##  [95] "description_m"                   "description_updated"            
##  [97] "category"                        "map.sheet.number"               
##  [99] "latitude_mi"                     "longitude_mi"                   
## [101] "note_mi"                         "px"                             
## [103] "py"                              "name"                           
## [105] "query_resolved"                  "action_spatial"                 
## [107] "action_spreadsheet"              "parcels_actions"                
## [109] "action_other"                    "field_111"                      
## [111] "resolution_details"              "field_1."                       
## [113] "X3_functional_unit_co"           "X73_73_eternal_photo"           
## [115] "n"                               "desc_org"                       
## [117] "class"                           "n_class"                        
## [119] "class1"                          "geom"
oct2020_v0 %>%
  select(83) %>%
  filter(duplicated(.))
to_join_2020 <- oct2020_v0 %>%
  select(83, 10, 11, 117, 118, 119)
to_join_2020
to_join_2020 <- to_join_2020 %>% 
  mutate(fuc_s = as.character(fuc_s),
         fuc_s = ifelse(is.na(fuc_s), paste0('oct20-', 1:length(fuc_s[is.na(fuc_s)])), fuc_s))
join_summ <- to_join_2020 %>%
  full_join(., oct18_N, by = c("fuc_s" = "FUC"), keep = T)
mapview(join_summ)
join_summ <- join_summ %>%
  mutate(newFUC = fuc_s,
         newFUC = as.character(newFUC),
         newFUC = ifelse(is.na(newFUC), paste0('oct18-', 1:length(newFUC[is.na(newFUC)])), newFUC))
bbase <- read_excel("~/Documents/SINDA_local/data_from_southwark/WMT OKR Business Survey 2019/REDACTED_164_OKR Survey_WMT 2019 business base_191104.xlsx", 
    col_types = c("text", "numeric", "text", 
        "text", "text", "text", "text", "numeric", 
        "text", "text", "text", "text", "text", 
        "text", "text", "text", "text", "text", 
        "text", "text", "text", "text", "text", 
        "text", "text", "numeric", "numeric", 
        "numeric", "numeric", "text", "text", 
        "text", "text", "text", "text", "text", 
        "text", "numeric", "text", "numeric", 
        "text", "text", "text", "numeric", 
        "numeric", "numeric", "numeric"))
bbase_sf <- bbase %>%
  select(1:37) %>%
  st_as_sf(., coords = c("Xcoord", "Ycoord"), na.fail = F, crs = 27700) %>%
  filter(Join != 234) %>%
  mutate(newFUC = paste0('OKR-', 1:nrow(.)))
## Warning in lapply(x[coords], as.numeric): NAs introduced by coercion

## Warning in lapply(x[coords], as.numeric): NAs introduced by coercion
qtm(bbase_sf)
## Warning: The shape bbase_sf contains empty units.

bbase_sf1 <- bbase_sf %>%
  select(c(`Site allocation`, `Sub-area`, `SIC CODE`, Sector, `Building use category`, `Building type`)) %>%
  clean_names() %>%
  rename(geom = geometry)
bbase_sf %>%
  group_by(Sector) %>%
  summarise(n = n()) %>%
  select(Sector) %>%
  st_drop_geometry()
## `summarise()` ungrouping output (override with `.groups` argument)
colnames(join_summ)
##  [1] "fuc_s"                 "X5_11_name_of_organis" "X6_12_description_of_"
##  [4] "class"                 "n_class"               "class1"               
##  [7] "FUC"                   "mb_comment"            "comment_1"            
## [10] "description_M"         "description_updated"   "category"             
## [13] "name"                  "query_resolved"        "action_spatial"       
## [16] "action_spreadsheet"    "parcels_actions"       "action_other"         
## [19] "resolution_details"    "geom"                  "newFUC"
colnames(bbase_sf1)
## [1] "site_allocation"       "sub_area"              "sic_code"             
## [4] "sector"                "building_use_category" "building_type"        
## [7] "geom"
join_summ %>%
  st_drop_geometry() %>%
  select(action_spatial) %>%
  as.factor() %>%
  levels()
## [1] "c(NA, \"FUC polygon created\", \"FUC polygon edited\", \"FUC polygon needs deleting\", \"None\")"
join_summ %>%
  group_by(action_spatial) %>%
  summarise(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
join_summ %>%
  group_by(action_spreadsheet) %>%
  summarise(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
j1 <- join_summ %>%
  filter(action_spreadsheet != "Record needs deleting" | is.na(action_spreadsheet)) # keep NAs
j2 <- j1 %>%
  filter(action_spatial != "FUC polygon needs deleting" | is.na(action_spatial)) # keep NAs
bind <- bind_rows(j2, bbase_sf1) %>%
  mutate(U_ID = row_number(),
         source = case_when(is.na(newFUC) ~ "WMT",
                            TRUE ~ "CASS")) %>%
  select(28,29, 1:27)
st_write(bind, "bind.gpkg", append=FALSE)
## Deleting layer `bind' using driver `GPKG'
## Writing layer `bind' to data source `bind.gpkg' using driver `GPKG'
## Writing 3992 features with 28 fields and geometry type Unknown (any).
write_xlsx(bind, path = "bind.xlsx", col_names = TRUE)
oct18gpkg <- st_read("/Users/nicolas/Documents/SINDA_local/Final_data_from_gd/oct18_N.gpkg")
## Reading layer `masterGJ' from data source `/Users/nicolas/Documents/SINDA_local/Final_data_from_gd/oct18_N.gpkg' using driver `GPKG'
## Simple feature collection with 2515 features and 100 fields (with 458 geometries empty)
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 531305.5 ymin: 171277 xmax: 536322.5 ymax: 180434.8
## projected CRS:  unnamed
colnames(oct18gpkg)
##   [1] "field_1.x"                       "ec5_uuid"                       
##   [3] "created_at"                      "title"                          
##   [5] "X1_Surveyor_Nickname"            "X2_Map_sheet_number"            
##   [7] "X3_Functional_Unit_Co"           "X5_11_Name_of_organis"          
##   [9] "X6_12_Description_of_"           "X7_13_When_was_the_or"          
##  [11] "X8_14_Do_you_regard_t"           "X9_15_If_no_what_type"          
##  [13] "X10_16_If_other_pleas"           "X12_21_Total_number_o"          
##  [15] "X13_22_Observed_or_Ve"           "X14_23_How_many_FTEs_"          
##  [17] "X15_24_Observed_or_Ve"           "X16_25_Is_the_organis"          
##  [19] "X17_26_How_many_peopl"           "X19_31_Floor_levels_t"          
##  [21] "X20_32_How_many_store"           "X21_33_Unit_size_Inse"          
##  [23] "X22_34_Square_metres_"           "X23_35_Observed_or_Ve"          
##  [25] "X24_36_Predominant_he"           "X25_37_Premises_type_"          
##  [27] "X26_38_If_other_pleas"           "X27_39_Type_of_associ"          
##  [29] "X28_310_Car_parking_S"           "X29_311_Goods_access_"          
##  [31] "X30_312_Are_these_pre"           "X31_313_If_premises_a"          
##  [33] "X32_314_When_did_the_"           "X33_315_Is_the_premis"          
##  [35] "X34_316_If_rentedleas"           "X35_317_Will_the_leas"          
##  [37] "X37_41_Is_the_localit"           "X38_42_If_so_why_sele"          
##  [39] "X39_43_If_locality_is"           "X40_44_What_is_your_v"          
##  [41] "X41_45_What_are_the_t"           "X42_46_Where_are_the_"          
##  [43] "X43_47_Where_are_the_"           "X44_48_Is_the_organis"          
##  [45] "X45_49_Does_the_organ"           "X46_410_If_yes_where_"          
##  [47] "X47_411_If_yes_when"             "X48_412_If_yes_why"             
##  [49] "X49_413_Are_the_organ"           "X50_414_If_yes_please"          
##  [51] "X51_415_Are_you_aware"           "X52_416_If_yes_how_ha"          
##  [53] "X53_417_If_other_plea"           "X54_418_What_is_your_"          
##  [55] "X56_51_Contact_willin"           "X57_52_Name_of_contac"          
##  [57] "X58_53_Contact_teleph"           "X59_54_Contact_email_"          
##  [59] "X60_55_Organisation_t"           "X61_56_Organisation_w"          
##  [61] "X62_57_Organisation_e"           "X63_58_Street_name_eg"          
##  [63] "X64_59_Street_number_"           "X65_510_Postcode_Incl"          
##  [65] "X66_511_Unit_number_i"           "X68_61_Additional_not"          
##  [67] "X69_62_Internal_photo"           "X71_71_Would_the_acti"          
##  [69] "lat_72_72_Location"              "long_72_72_Location"            
##  [71] "accuracy_72_72_Location"         "X73_73_External_photo"          
##  [73] "X74_74_SIC_Code"                 "X75_75_Site_ID"                 
##  [75] "X76_76_Notes_on_any_o"           "sD_RS_field_1"                  
##  [77] "sD_RS_6_12_Description_of_...10" "Polygon"                        
##  [79] "b_sheet"                         "FUC"                            
##  [81] "FUC_s"                           "field_1.y"                      
##  [83] "query_code"                      "fuc_2"                          
##  [85] "lat"                             "long"                           
##  [87] "sheet_no"                        "sheet_no_2"                     
##  [89] "mb_note_no"                      "mb_comment"                     
##  [91] "comment_1"                       "comment_2"                      
##  [93] "comment_3"                       "description_M"                  
##  [95] "description_updated"             "category"                       
##  [97] "Map.Sheet.Number"                "Latitude_mi"                    
##  [99] "Longitude_mi"                    "Note_mi"                        
## [101] "geom"
oct18gpkg %>%
  select(7, 80, 81, 84)