FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); /** * @file * * NBD Protocol (NBD) * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NBD_STATE_RX_FLAG 0x100 #define NBD_STATE_TX_FLAG 0x200 #define NBD_STATE_IDLE 0x0 #define NBD_STATE_RX_NEGOTIATION (NBD_STATE_RX_FLAG|0x01) #define NBD_STATE_TX_NEGOTIATION (NBD_STATE_TX_FLAG|0x02) #define NBD_STATE_TX_OPTION (NBD_STATE_TX_FLAG|0x03) #define NBD_STATE_RX_OPTION (NBD_STATE_RX_FLAG|0x04) #define NBD_STATE_RX_OPTION_EXPORT_NAME (NBD_STATE_RX_FLAG|0x05) #define NBD_STATE_TX_TRANSMISSION_READ (NBD_STATE_TX_FLAG|0x06) #define NBD_STATE_RX_TRANSMISSION_READ (NBD_STATE_TX_FLAG|0x07) #define NBD_STATE_TX_TRANSMISSION_WRITE (NBD_STATE_RX_FLAG|0x08) #define NBD_STATE_RX_TRANSMISSION_WRITE (NBD_STATE_RX_FLAG|0x09) #define NBD_FLAG_C_FIXED_NEWSTYLE 0x01 #define NBD_FLAG_C_NO_ZEROES 0x02 #define NBD_OPT_EXPORT_NAME 0x01 #define NBD_CMD_READ 0x00 #define NBD_CMD_WRITE 0x01 #define NBD_CMD_DISC 0x02 #define NBD_CMD_FLUSH 0x03 #define NBD_CMD_TRIM 0x04 #define NBD_CMD_WRITE_ZEROES 0x06 static __inline uint16_t get_uint16(char *buf, int offset) { return ntohs(*(uint16_t *)&buf[offset]); } static __inline uint32_t get_uint32(char *buf, int offset) { return ntohl(*(uint32_t *)&buf[offset]); } static __inline uint64_t get_uint64(char *buf, int offset) { return ntohll(*(uint64_t *)&buf[offset]); } static __inline void put_uint16(char *buf, int offset, uint16_t value) { *(uint16_t *)&buf[offset] = htons(value); } static __inline void put_uint32(char *buf, int offset, uint32_t value) { *(uint32_t *)&buf[offset] = htonl(value); } static __inline void put_uint64(char *buf, int offset, uint64_t value) { *(uint64_t *)&buf[offset] = htonll(value); } FEATURE (FEATURE_PROTOCOL, "NBD", DHCP_EB_FEATURE_NBD, 1); #define NBD_PORT 10809 /** NBD boot firmware table signature */ #define NBDFT_SIG ACPI_SIGNATURE ( 'n', 'B', 'F', 'T' ) /* windows only seems to make a very small number of write */ #define WRITE_CACHE_MAX 16 struct write_cache { uint64_t lba; char data[512]; }; /** * NBD Boot Firmware Table (nBFT) */ struct nbft_table { /** ACPI header */ union { struct acpi_description_header acpi; char nbft_header_bytes[48]; }; uint8_t mac[ETH_ALEN]; uint16_t vlan; struct in6_addr ip_address; int8_t ip_prefix_length; struct in6_addr ip_gateway; struct in6_addr ip_dns_server[2]; struct in6_addr ip_dhcp_server; uint16_t pci_bus_dev_func; uint16_t port; uint16_t server_length; uint16_t path_length; uint16_t write_cache_count; char strings[1]; } __attribute__ (( packed )); struct nbd_cmd { struct refcnt refcnt; struct interface block; struct interface nbd; uint64_t lba; uint32_t count; userptr_t buffer; size_t len; }; struct nbd_dev { struct refcnt refcnt; struct net_device *netdev; struct interface block; struct interface nbd; struct interface socket; int state; struct nbd_cmd *capacity_cmd; struct nbd_cmd *rw_cmd; char *server; uint16_t port; char *export; uint64_t total_sectors; uint16_t sector_size; char tx_buf[16384]; char rx_buf[16384]; uint16_t rx_count; struct write_cache *write_cache; struct nbft_table *nbft; }; static void nbd_dev_tx_resume(struct nbd_dev *nbddev); static void nbd_dev_complete_capacity(struct nbd_dev *nbddev) { struct block_device_capacity capacity; capacity.blksize = nbddev->sector_size; capacity.blocks = nbddev->total_sectors; capacity.max_count = 31; block_capacity(&nbddev->capacity_cmd->block, &capacity); dbg_printf("capacity is set\n"); intf_shutdown(&nbddev->capacity_cmd->block, 0); dbg_printf("intf_shutdown is called\n"); nbddev->capacity_cmd = NULL; } static int nbd_get_rx_data(struct nbd_dev *nbddev, struct io_buffer *iobuf, int required) { int copy_length; if (nbddev->rx_count >= required) { return 1; } if (required - nbddev->rx_count < (int)iob_len(iobuf)) { copy_length = required - nbddev->rx_count; } else { copy_length = iob_len(iobuf); } memcpy(nbddev->rx_buf + nbddev->rx_count, iobuf->data, copy_length); nbddev->rx_count += copy_length; iob_pull(iobuf, copy_length); return (nbddev->rx_count >= required); } static int nbd_dev_socket_deliver(struct nbd_dev *nbddev, struct io_buffer *iobuf, struct xfer_metadata *meta) { uint16_t rx_required; int rc = 0; (void)meta; while (1) { switch(nbddev->state) { case NBD_STATE_RX_NEGOTIATION: rx_required = 18; if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } if (get_uint64(nbddev->rx_buf, 0) != 0x4e42444d41474943) { dbg_printf("bad checksum 1\n"); rc = 1; goto done; } nbddev->state = NBD_STATE_TX_NEGOTIATION; nbd_dev_tx_resume(nbddev); break; case NBD_STATE_RX_OPTION: rx_required = 20; if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } rx_required = 20 + get_uint64(nbddev->rx_buf, 16); if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } nbddev->state = NBD_STATE_TX_OPTION; break; case NBD_STATE_RX_OPTION_EXPORT_NAME: rx_required = 10; if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } nbddev->sector_size = 512; nbddev->total_sectors = get_uint64(nbddev->rx_buf, 0) / nbddev->sector_size; dbg_printf("total_sectors = %d\n", (int)nbddev->total_sectors); dbg_printf("flags = %04x\n", get_uint16(nbddev->rx_buf, 8)); if (nbddev->capacity_cmd) { nbd_dev_complete_capacity(nbddev); } nbddev->state = NBD_STATE_IDLE; break; case NBD_STATE_RX_TRANSMISSION_READ: rx_required = 16; if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } if (get_uint32(nbddev->rx_buf, 4) == 0) { rx_required = 16 + nbddev->rw_cmd->count * nbddev->sector_size; if (!nbd_get_rx_data(nbddev, iobuf, rx_required)) { goto done; } } memcpy((void *)nbddev->rw_cmd->buffer, nbddev->rx_buf + 16, nbddev->rw_cmd->count * nbddev->sector_size); nbddev->state = NBD_STATE_IDLE; intf_shutdown(&nbddev->rw_cmd->block, 0); free(nbddev->rw_cmd); nbddev->rw_cmd = NULL; break; default: goto done; } if (nbddev->rx_count > rx_required) { dbg_printf("excess data ??\n"); } nbddev->rx_count = 0; } done: free_iob(iobuf); return rc; } static void nbd_dev_tx_resume(struct nbd_dev *nbddev) { struct io_buffer *iobuf; userptr_t ptr; int rc; char *data; int length; int next_state; while (1) { if (!xfer_window(&nbddev->socket)) { return; } switch (nbddev->state) { case NBD_STATE_TX_NEGOTIATION: dbg_printf("NBD_STATE_TX_NEGOTIATION\n"); length = sizeof(uint32_t); data = malloc(length); put_uint32(data, 0, NBD_FLAG_C_FIXED_NEWSTYLE | NBD_FLAG_C_NO_ZEROES); ptr = (userptr_t)data; next_state = NBD_STATE_TX_OPTION; break; case NBD_STATE_TX_OPTION: dbg_printf("NBD_STATE_TX_OPTION\n"); length = 16 + strlen(nbddev->export); data = malloc(length); put_uint64(data, 0, 0x49484156454F5054); put_uint32(data, 8, NBD_OPT_EXPORT_NAME); put_uint32(data, 12, strlen(nbddev->export)); memcpy(data + 16, nbddev->export, strlen(nbddev->export)); ptr = (userptr_t)data; next_state = NBD_STATE_RX_OPTION_EXPORT_NAME; break; case NBD_STATE_TX_TRANSMISSION_READ: length = 28; data = malloc(length); put_uint32(data, 0, 0x25609513); put_uint16(data, 4, 0); put_uint16(data, 6, NBD_CMD_READ); put_uint64(data, 8, 0x1234123412341234); put_uint64(data, 16, nbddev->rw_cmd->lba * nbddev->sector_size); put_uint32(data, 24, nbddev->rw_cmd->count * nbddev->sector_size); ptr = (userptr_t)data; next_state = NBD_STATE_RX_TRANSMISSION_READ; break; default: return; } iobuf = xfer_alloc_iob(&nbddev->socket, length); if (!iobuf) { dbg_printf("xfer_alloc_iob failed\n"); } copy_from_user(iob_put(iobuf, length), ptr, 0, length); rc = xfer_deliver_iob(&nbddev->socket, iobuf); if (!rc) { nbddev->state = next_state; } else { dbg_printf("xfer_deliver_iob rc = %08x (%s)\n", rc, strerror(rc)); } } } static void nbd_dev_socket_close(struct nbd_dev *nbddev, int rc) { (void)nbddev; dbg_printf("nbd_dev_socket_close rc = %d (%s)\n", rc, strerror(rc)); intf_shutdown(&nbddev->socket, rc); } static struct interface_operation nbd_dev_socket_ops[] = { INTF_OP ( xfer_deliver, struct nbd_dev *, nbd_dev_socket_deliver ), INTF_OP ( xfer_window_changed, struct nbd_dev *, nbd_dev_tx_resume ), INTF_OP ( intf_close, struct nbd_dev *, nbd_dev_socket_close ), }; static struct interface_descriptor nbd_dev_socket_desc = INTF_DESC ( struct nbd_dev, socket, nbd_dev_socket_ops ); static void nbd_cmd_block_close(struct nbd_cmd *nbdcmd, int rc) { dbg_printf("nbdcmd_close rc = %d\n", rc); intf_shutdown(&nbdcmd->block, rc); } static struct interface_operation nbd_cmd_block_ops[] = { INTF_OP(intf_close, struct nbd_cmd *, nbd_cmd_block_close), }; static struct interface_descriptor nbd_cmd_block_desc = INTF_DESC_PASSTHRU(struct nbd_cmd, block, nbd_cmd_block_ops, nbd); static void nbd_set_ip4(struct in6_addr *dst, struct in_addr *src) { memset(dst, 0, sizeof(*dst)); dst->s6_addr16[6] = 0xFFFF; memcpy(&dst->s6_addr32[3], src, sizeof(*src)); } static void nbd_set_ip4_setting(struct settings *settings, struct in6_addr *addr, const struct setting *setting, int count) { struct in_addr in[count]; int i_count; fetch_ipv4_array_setting(settings, setting, in, count); for (i_count = 0; i_count < count; i_count++) { nbd_set_ip4(&addr[i_count], &in[i_count]); } } #define array_size(a) (sizeof((a)) / sizeof((a)[0])) static int nbd_dev_describe(struct nbd_dev *nbddev, struct acpi_description_header *acpi, size_t len) { struct nbft_table *nbft = container_of ( acpi, struct nbft_table, acpi ); struct settings *parent; struct settings *origin; uint32_t offset; char *base; dbg_printf("nbd_dev_describe\n"); nbddev->nbft = nbft; if (len < offsetof(struct nbft_table, strings) + strlen(nbddev->server)) { dbg_printf("bailing - not enough space\n"); return -ENOBUFS; } nbddev->netdev = last_opened_netdev(); parent = netdev_settings(nbddev->netdev); fetch_setting(parent, &ip_setting, &origin, NULL, NULL, 0); /* Populate table */ nbft->acpi.signature = cpu_to_le32(NBDFT_SIG); nbft->acpi.revision = 1; memcpy(nbft->mac, nbddev->netdev->ll_addr, sizeof(nbft->mac)); dbg_printf("mac = %02x:%02x:%02x:%02x:%02x:%02x\n", nbft->mac[0], nbft->mac[1], nbft->mac[2], nbft->mac[3], nbft->mac[4], nbft->mac[5]); nbft->vlan = vlan_tag(nbddev->netdev); nbd_set_ip4_setting(parent, &nbft->ip_address, &ip_setting, 1); dbg_printf("ip_address = %s\n", inet_ntoa(*(struct in_addr *)&nbft->ip_address.s6_addr32[3])); nbd_set_ip4_setting(parent, &nbft->ip_gateway, &gateway_setting, 1); nbd_set_ip4_setting(parent, &nbft->ip_dns_server[0], &dns_setting, array_size(nbft->ip_dns_server)); nbft->ip_prefix_length = 24; nbft->port = nbddev->port; nbft->server_length = strlen(nbddev->server); base = (char *)nbft; offset = offsetof(struct nbft_table, strings); memcpy(base + offset, nbddev->server, nbft->server_length); offset += nbft->server_length; nbddev->write_cache = (struct write_cache *)(base + offset); nbft->acpi.length = cpu_to_le32(offset); return 0; } static int nbd_dev_block_read(struct nbd_dev *nbddev, struct interface *block, uint64_t lba, unsigned int count, userptr_t buffer, size_t len) { nbddev->rw_cmd = zalloc(sizeof(*nbddev->rw_cmd)); if (!nbddev->rw_cmd) { dbg_printf("unable to allocate rw_cmd\n"); return 1; } nbddev->rw_cmd->lba = lba; nbddev->rw_cmd->count = count; nbddev->rw_cmd->buffer = buffer; nbddev->rw_cmd->len = len; ref_init(&nbddev->rw_cmd->refcnt, NULL); intf_init(&nbddev->rw_cmd->block, &nbd_cmd_block_desc, &nbddev->rw_cmd->refcnt); nbddev->state = NBD_STATE_TX_TRANSMISSION_READ; intf_plug_plug(&nbddev->rw_cmd->block, block); nbd_dev_tx_resume(nbddev); return 0; } static int nbd_dev_block_write(struct nbd_dev *nbddev, struct interface *block, uint64_t lba, unsigned int count, userptr_t buffer, size_t len) { int i_count; int i_cache; (void)len; dbg_printf("nbd_dev_block_write %d, %d\n", (int)lba, count); if (nbddev->rw_cmd || nbddev->state != NBD_STATE_IDLE) { dbg_printf("nbd_dev_block_write command already in progress??\n"); } nbddev->rw_cmd = zalloc(sizeof(*nbddev->rw_cmd)); intf_plug_plug(&nbddev->rw_cmd->block, block); intf_shutdown(&nbddev->rw_cmd->block, 0); nbddev->rw_cmd = NULL; return 0; } static int nbd_dev_block_read_capacity(struct nbd_dev *nbddev, struct interface *block) { dbg_printf("nbd_dev_block_read_capacity\n"); if (nbddev->capacity_cmd) { dbg_printf("duplicate capacity cmd???\n"); } nbddev->capacity_cmd = zalloc(sizeof(*nbddev->capacity_cmd)); ref_init(&nbddev->capacity_cmd->refcnt, NULL); intf_init(&nbddev->capacity_cmd->block, &nbd_cmd_block_desc, &nbddev->capacity_cmd->refcnt); intf_plug_plug(&nbddev->capacity_cmd->block, block); if (nbddev->total_sectors != 0) { nbd_dev_complete_capacity(nbddev); } return 0; } static void nbd_dev_block_close(struct nbd_dev *nbddev, int rc) { dbg_printf("nbd_dev_block_close rc = %08x (%s)\n", rc, strerror(rc)); intf_shutdown(&nbddev->block, rc); } static struct interface_operation nbd_dev_block_ops[] = { INTF_OP(acpi_describe, struct nbd_dev *, nbd_dev_describe), INTF_OP(block_read, struct nbd_dev *, nbd_dev_block_read), INTF_OP(block_write, struct nbd_dev *, nbd_dev_block_write), INTF_OP(block_read_capacity, struct nbd_dev *, nbd_dev_block_read_capacity), INTF_OP(intf_close, struct nbd_dev *, nbd_dev_block_close), }; static void nbd_dev_nbd_close(struct nbd_dev *nbddev, int rc) { dbg_printf("nbd_dev_nbd_close rc = %08x (%s)\n", rc, strerror(rc)); intf_shutdown(&nbddev->nbd, rc); } static struct interface_descriptor nbd_dev_block_desc = INTF_DESC_PASSTHRU(struct nbd_dev, block, nbd_dev_block_ops, nbd); static struct interface_operation nbd_dev_nbd_ops[] = { INTF_OP(intf_close, struct nbd_dev *, nbd_dev_nbd_close), }; static struct interface_descriptor nbd_dev_nbd_desc = INTF_DESC_PASSTHRU(struct nbd_dev, nbd, nbd_dev_nbd_ops, block); static void nbddev_free(struct refcnt *refcnt) { dbg_printf("nbddev_free\n"); struct nbd_dev *nbddev = container_of(refcnt, struct nbd_dev, refcnt); free(nbddev); } static int nbd_connect(struct nbd_dev *nbddev) { struct sockaddr_tcpip sa; int rc; dbg_printf("nbd_connect\n"); memset(&sa, 0, sizeof(sa)); sa.st_port = htons(nbddev->port); nbddev->state = NBD_STATE_RX_NEGOTIATION; rc = xfer_open_named_socket(&nbddev->socket, SOCK_STREAM, (struct sockaddr *)&sa, nbddev->server, NULL); if (!rc) { dbg_printf("xfer_open_named_socket rc = %d (%s)\n", rc, strerror(rc)); return rc; } return rc; } int nbd_open_uri(struct interface *parent, struct uri *uri) { int rc; struct nbd_dev *nbddev; dbg_printf("nbd_open_uri\n"); dbg_printf("host = %s\n", uri->host); nbddev = zalloc(sizeof(*nbddev)); nbddev->port = strtoul(uri->port, NULL, 10); if (!nbddev->port) { nbddev->port = NBD_PORT; } nbddev->server = strdup(uri->host); nbddev->export = strdup(uri->path + 1); ref_init(&nbddev->refcnt, nbddev_free); intf_init(&nbddev->block, &nbd_dev_block_desc, &nbddev->refcnt); intf_init(&nbddev->nbd, &nbd_dev_nbd_desc, &nbddev->refcnt); intf_init(&nbddev->socket, &nbd_dev_socket_desc, &nbddev->refcnt); rc = nbd_connect(nbddev); intf_plug_plug(&nbddev->block, parent); ref_put(&nbddev->refcnt); return rc; } struct uri_opener nbd_uri_opener __uri_opener = { .scheme = "nbd", .open = nbd_open_uri, };